Spaces:

AI-trainer1
/

text_to_speech_test1

Build error

App Files Files Community

Aseem Gupta commited on Jan 13, 2025

Commit

a416ccf

1 Parent(s): c636952

test2

Browse files

Files changed (2) hide show

app.py +29 -41
requirements.txt +1 -0

app.py CHANGED Viewed

@@ -1,49 +1,37 @@
 import gradio as gr
-import torch
-from transformers import pipeline
-from langdetect import detect
-# Load the Coqui XTTS model
-tts = pipeline("text-to-speech", model="coqui/XTTS-v2", device=0 if torch.cuda.is_available() else -1)
-# Helper function to clone voice and generate speech
-def clone_and_generate(audio, text_prompt, language):
-    if audio is None or text_prompt.strip() == "":
-        return "Please provide both audio input and text prompt.", None
-    # Check if language is supported
-    supported_languages = {"english": "en", "hindi": "hi"}
-    if language not in supported_languages:
-        return f"Language {language} not supported yet.", None
-    # Convert text to the target language (if needed)
-    if detect(text_prompt) != supported_languages[language]:
-        # For now, we assume text is already in the desired language
-        pass
-    # Generate speech
-    try:
-        result = tts(text=text_prompt, speaker=audio)
-        return "Speech generated successfully!", result["audio"]
-    except Exception as e:
-        return f"Error: {str(e)}", None
 # Gradio Interface
 with gr.Blocks() as demo:
-    gr.Markdown("## 🎤 Voice Cloning & Text-to-Speech with Language Translation")
     with gr.Row():
-        with gr.Column():
-            audio_input = gr.Audio(source="microphone", type="filepath", label="🎙️ Record or Upload Voice")
-            text_input = gr.Textbox(label="📝 Enter Text to Generate Speech")
-            language_input = gr.Dropdown(choices=["english", "hindi"], value="english", label="🌐 Select Language")
-        with gr.Column():
-            output_message = gr.Textbox(label="📢 Status")
-            output_audio = gr.Audio(label="🔊 Generated Speech")
-    generate_button = gr.Button("🚀 Generate Speech")
-    generate_button.click(clone_and_generate, inputs=[audio_input, text_input, language_input], outputs=[output_message, output_audio])
-# Launch the app
 demo.launch()

 import gradio as gr
+from TTS.api import TTS
+# Load the XTTS-v2 model
+tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
+# Define the function for voice cloning
+def generate_voice(text, speaker_audio):
+    output_path = "output.wav"
+    tts.tts_to_file(
+        text=text,
+        speaker_wav=speaker_audio.name,
+        file_path=output_path,
+        language="en"
+    )
+    return output_path
 # Gradio Interface
 with gr.Blocks() as demo:
+    gr.Markdown("# 🗣️ Voice Cloning with Coqui XTTS-v2")
     with gr.Row():
+        text_input = gr.Textbox(label="Enter Text", placeholder="Type the text you want to synthesize...")
+        speaker_audio_input = gr.Audio(label="Upload Speaker Audio (WAV)", type="file")
+    output_audio = gr.Audio(label="Generated Voice", type="filepath")
+    generate_button = gr.Button("Generate Voice")
+    generate_button.click(
+        fn=generate_voice,
+        inputs=[text_input, speaker_audio_input],
+        outputs=output_audio
+    )
+# Launch the Gradio app
 demo.launch()

requirements.txt CHANGED Viewed

@@ -2,3 +2,4 @@ gradio
 torch
 transformers
 langdetect

 torch
 transformers
 langdetect
+TTS