Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import soundfile as sf | |
| import tempfile | |
| from voxcpm import VoxCPM | |
| # =============================== | |
| # Load VoxCPM model once | |
| # =============================== | |
| model = VoxCPM.from_pretrained("openbmb/VoxCPM-0.5B") | |
| # =============================== | |
| # TTS function | |
| # =============================== | |
| def tts_generate(text, cfg_value, inference_steps, normalize, denoise): | |
| try: | |
| wav = model.generate( | |
| text=text, | |
| cfg_value=cfg_value, | |
| inference_timesteps=inference_steps, | |
| normalize=normalize, | |
| denoise=denoise | |
| ) | |
| tmp_wav = tempfile.NamedTemporaryFile(delete=False, suffix=".wav") | |
| sf.write(tmp_wav.name, wav, 16000) | |
| return tmp_wav.name | |
| except Exception as e: | |
| return f"Error: {str(e)}" | |
| # =============================== | |
| # Gradio Interface | |
| # =============================== | |
| app = gr.Interface( | |
| fn=tts_generate, | |
| inputs=[ | |
| gr.Textbox(label="Enter text", value="Hello Hugging Face!", lines=3), | |
| gr.Slider(0.5, 5.0, value=2.0, step=0.1, label="CFG Value"), | |
| gr.Slider(5, 20, value=8, step=1, label="Inference timesteps"), | |
| gr.Checkbox(value=True, label="Enable Normalization"), | |
| gr.Checkbox(value=True, label="Enable Denoise"), | |
| ], | |
| outputs=gr.Audio(type="filepath", label="Generated Audio"), | |
| title="ποΈ VoxCPM Text-to-Speech", | |
| description="Generate expressive speech with VoxCPM TTS. Adjust CFG for text fidelity and inference timesteps for speed vs quality." | |
| ) | |
| # =============================== | |
| # Run the app | |
| # =============================== | |
| if __name__ == "__main__": | |
| app.launch() | |