import gradio as gr import soundfile as sf import tempfile from voxcpm import VoxCPM # =============================== # Load VoxCPM model once # =============================== model = VoxCPM.from_pretrained("openbmb/VoxCPM-0.5B") # =============================== # TTS function # =============================== def tts_generate(text, cfg_value, inference_steps, normalize, denoise): try: wav = model.generate( text=text, cfg_value=cfg_value, inference_timesteps=inference_steps, normalize=normalize, denoise=denoise ) tmp_wav = tempfile.NamedTemporaryFile(delete=False, suffix=".wav") sf.write(tmp_wav.name, wav, 16000) return tmp_wav.name except Exception as e: return f"Error: {str(e)}" # =============================== # Gradio Interface # =============================== app = gr.Interface( fn=tts_generate, inputs=[ gr.Textbox(label="Enter text", value="Hello Hugging Face!", lines=3), gr.Slider(0.5, 5.0, value=2.0, step=0.1, label="CFG Value"), gr.Slider(5, 20, value=8, step=1, label="Inference timesteps"), gr.Checkbox(value=True, label="Enable Normalization"), gr.Checkbox(value=True, label="Enable Denoise"), ], outputs=gr.Audio(type="filepath", label="Generated Audio"), title="🎙️ VoxCPM Text-to-Speech", description="Generate expressive speech with VoxCPM TTS. Adjust CFG for text fidelity and inference timesteps for speed vs quality." ) # =============================== # Run the app # =============================== if __name__ == "__main__": app.launch()