Spaces:
Sleeping
Sleeping
File size: 1,674 Bytes
005dd5c 50d895f 005dd5c 50d895f c4a8974 50d895f 005dd5c 50d895f 005dd5c 50d895f 8827fc3 005dd5c 50d895f 005dd5c 50d895f 005dd5c 8827fc3 5ca5e0d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
import gradio as gr
import soundfile as sf
import tempfile
from voxcpm import VoxCPM
# ===============================
# Load VoxCPM model once
# ===============================
model = VoxCPM.from_pretrained("openbmb/VoxCPM-0.5B")
# ===============================
# TTS function
# ===============================
def tts_generate(text, cfg_value, inference_steps, normalize, denoise):
try:
wav = model.generate(
text=text,
cfg_value=cfg_value,
inference_timesteps=inference_steps,
normalize=normalize,
denoise=denoise
)
tmp_wav = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
sf.write(tmp_wav.name, wav, 16000)
return tmp_wav.name
except Exception as e:
return f"Error: {str(e)}"
# ===============================
# Gradio Interface
# ===============================
app = gr.Interface(
fn=tts_generate,
inputs=[
gr.Textbox(label="Enter text", value="Hello Hugging Face!", lines=3),
gr.Slider(0.5, 5.0, value=2.0, step=0.1, label="CFG Value"),
gr.Slider(5, 20, value=8, step=1, label="Inference timesteps"),
gr.Checkbox(value=True, label="Enable Normalization"),
gr.Checkbox(value=True, label="Enable Denoise"),
],
outputs=gr.Audio(type="filepath", label="Generated Audio"),
title="๐๏ธ VoxCPM Text-to-Speech",
description="Generate expressive speech with VoxCPM TTS. Adjust CFG for text fidelity and inference timesteps for speed vs quality."
)
# ===============================
# Run the app
# ===============================
if __name__ == "__main__":
app.launch()
|