Spaces:

AI-trainer1
/

text_to_speech_test1

Build error

App Files Files Community

Aseem Gupta commited on Jan 13

Commit

22fdf85

1 Parent(s): c879c20

test8 test7 worked but limited

Browse files

Files changed (2) hide show

app.py +50 -7
requirements.txt +3 -0

app.py CHANGED Viewed

@@ -1,24 +1,55 @@
 import gradio as gr
 from TTS.api import TTS
 import os
 os.environ["COQUI_TOS_AGREED"] = "1"
-# Load the model
-tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2")
 tts.to("cpu")
 # Function to generate voice
-def generate_voice(text, speaker_audio):
-    output_path = "output.wav"
     tts.tts_to_file(
         text=text,
         speaker_wav=speaker_audio,
         file_path=output_path,
-        language="en"
     )
     return output_path
 # Gradio interface
 with gr.Blocks() as demo:
     gr.Markdown("# 🗣️ Voice Cloning with Coqui XTTS-v2")
@@ -26,16 +57,28 @@ with gr.Blocks() as demo:
     with gr.Row():
         text_input = gr.Textbox(label="Enter Text", placeholder="Type the text you want to synthesize...")
         speaker_audio_input = gr.Audio(label="Upload Speaker Audio (WAV)", type="filepath")
     output_audio = gr.Audio(label="Generated Voice", type="filepath")
     generate_button = gr.Button("Generate Voice")
     generate_button.click(
         fn=generate_voice,
-        inputs=[text_input, speaker_audio_input],
         outputs=output_audio
     )
 # Launch the app
-demo.launch()

 import gradio as gr
 from TTS.api import TTS
 import os
+import tempfile
+import sounddevice as sd
+from scipy.io.wavfile import write
+# Agree to Coqui's terms
 os.environ["COQUI_TOS_AGREED"] = "1"
+# Load the model and optimize CPU usage
+tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
 tts.to("cpu")
+# Supported languages by the model
+LANGUAGES = {
+    "English": "en",
+    "Spanish": "es",
+    "German": "de",
+    "French": "fr",
+    "Italian": "it",
+    "Hindi" : "hi",
+    "Russian": "ru",
+    "Spanish": "es",
+    "Turkish": "tr",
+    "Japanese": "ja",
+    "Korean": "ko",
+    "Hungarian": "hu"
+}
 # Function to generate voice
+def generate_voice(text, speaker_audio, language):
+    output_path = tempfile.mktemp(suffix=".wav")
     tts.tts_to_file(
         text=text,
         speaker_wav=speaker_audio,
         file_path=output_path,
+        language=LANGUAGES.get(language, "en")
     )
     return output_path
+# Function to record audio from the mic
+def record_audio(duration=10, filename="mic_input.wav"):
+    fs = 44100  # Sample rate
+    print("Recording...")
+    audio_data = sd.rec(int(duration * fs), samplerate=fs, channels=1)
+    sd.wait()  # Wait until recording is finished
+    write(filename, fs, audio_data)
+    print(f"Recording saved as {filename}")
+    return filename
 # Gradio interface
 with gr.Blocks() as demo:
     gr.Markdown("# 🗣️ Voice Cloning with Coqui XTTS-v2")
     with gr.Row():
         text_input = gr.Textbox(label="Enter Text", placeholder="Type the text you want to synthesize...")
         speaker_audio_input = gr.Audio(label="Upload Speaker Audio (WAV)", type="filepath")
+        language_dropdown = gr.Dropdown(
+            label="Select Output Language",
+            choices=list(LANGUAGES.keys()),
+            value="English"
+        )
+        mic_button = gr.Button("Record from Mic")
     output_audio = gr.Audio(label="Generated Voice", type="filepath")
     generate_button = gr.Button("Generate Voice")
+    mic_button.click(
+        fn=lambda: record_audio(duration=10),
+        inputs=[],
+        outputs=speaker_audio_input,
+    )
     generate_button.click(
         fn=generate_voice,
+        inputs=[text_input, speaker_audio_input, language_dropdown],
         outputs=output_audio
     )
 # Launch the app
+demo.launch(server_name="0.0.0.0", server_port=7860, share=True)

requirements.txt CHANGED Viewed

@@ -2,3 +2,6 @@ gradio
 torch
 TTS
 soundfile

 torch
 TTS
 soundfile
+coqui-tts
+sounddevice
+scipy