Spaces:
Build error
Build error
| import gradio as gr | |
| from TTS.api import TTS | |
| import os | |
| import tempfile | |
| import sounddevice as sd | |
| from scipy.io.wavfile import write | |
| from concurrent.futures import ThreadPoolExecutor | |
| # Agree to Coqui's terms | |
| os.environ["COQUI_TOS_AGREED"] = "1" | |
| os.environ["OMP_NUM_THREADS"] = "2" # Set CPU threads to 8 (adjust based on your CPU cores) | |
| # Load the model and optimize CPU usage | |
| tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False) | |
| tts.to("cpu") | |
| # Supported languages by the model | |
| LANGUAGES = { | |
| "English": "en", | |
| "Spanish": "es", | |
| "German": "de", | |
| "French": "fr", | |
| "Italian": "it", | |
| "Hindi" : "hi", | |
| "Russian": "ru", | |
| "Turkish": "tr", | |
| "Japanese": "ja", | |
| "Korean": "ko", | |
| "Hungarian": "hu" | |
| } | |
| # Function to generate voice | |
| def generate_voice(text, speaker_audio, language): | |
| output_path = tempfile.mktemp(suffix=".wav") | |
| tts.tts_to_file( | |
| text=text, | |
| speaker_wav=speaker_audio, | |
| file_path=output_path, | |
| language=LANGUAGES.get(language, "en"), | |
| sample_rate=44100, | |
| ) | |
| return output_path | |
| # Function to record audio from the mic | |
| def record_audio(duration=10, filename="mic_input.wav"): | |
| fs = 44100 # Sample rate | |
| print("Recording...") | |
| audio_data = sd.rec(int(duration * fs), samplerate=fs, channels=1) | |
| sd.wait() # Wait until recording is finished | |
| write(filename, fs, audio_data) | |
| print(f"Recording saved as {filename}") | |
| return filename | |
| # Gradio interface | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# 🗣️ Voice Cloning with Coqui XTTS-v2") | |
| with gr.Row(): | |
| text_input = gr.Textbox(label="Enter Text", placeholder="Type the text you want to synthesize...") | |
| speaker_audio_input = gr.Audio(label="Upload Speaker Audio (WAV)", type="filepath") | |
| language_dropdown = gr.Dropdown( | |
| label="Select Output Language", | |
| choices=list(LANGUAGES.keys()), | |
| value="English" | |
| ) | |
| mic_button = gr.Button("Record from Mic") | |
| output_audio = gr.Audio(label="Generated Voice", type="filepath") | |
| generate_button = gr.Button("Generate Voice") | |
| mic_button.click( | |
| fn=lambda: record_audio(duration=10), | |
| inputs=[], | |
| outputs=speaker_audio_input, | |
| ) | |
| generate_button.click( | |
| fn=generate_voice, | |
| inputs=[text_input, speaker_audio_input, language_dropdown], | |
| outputs=output_audio | |
| ) | |
| # Launch the app | |
| demo.launch(server_name="0.0.0.0", server_port=7860, share=True) | |