Ahmad-01 commited on
Commit
8827fc3
·
verified ·
1 Parent(s): 66e7adc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -53
app.py CHANGED
@@ -1,67 +1,23 @@
1
  import gradio as gr
 
2
  import soundfile as sf
3
  import tempfile
4
- from voxcpm import VoxCPM
5
- from modelscope import snapshot_download
6
-
7
- # ===============================
8
- # Pre-download models to cache
9
- # ===============================
10
- snapshot_download('iic/speech_zipenhancer_ans_multiloss_16k_base', cache_dir="./models")
11
- snapshot_download('iic/SenseVoiceSmall', cache_dir="./models")
12
 
13
- # ===============================
14
- # Load VoxCPM model (only once)
15
- # ===============================
16
  model = VoxCPM.from_pretrained("openbmb/VoxCPM-0.5B")
17
 
18
- # ===============================
19
- # TTS function
20
- # ===============================
21
- def tts_generate(text, cfg_value, inference_steps, normalize, denoise, fast_mode):
22
- # Fast mode: reduce quality but speed up inference
23
- if fast_mode:
24
- cfg_value = 1.5
25
- inference_steps = 6
26
- normalize = False
27
- denoise = False
28
-
29
- wav = model.generate(
30
- text=text,
31
- cfg_value=cfg_value,
32
- inference_timesteps=inference_steps,
33
- normalize=normalize,
34
- denoise=denoise
35
- )
36
-
37
  tmp_wav = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
38
  sf.write(tmp_wav.name, wav, 16000)
39
  return tmp_wav.name
40
 
41
- # ===============================
42
- # Gradio UI
43
- # ===============================
44
- tts_app = gr.Interface(
45
  fn=tts_generate,
46
- inputs=[
47
- gr.Textbox(label="Enter text", value="Hello, this is a test of VoxCPM!", lines=3),
48
- gr.Slider(0.5, 5.0, value=2.0, step=0.1, label="CFG Value"),
49
- gr.Slider(5, 50, value=10, step=1, label="Inference timesteps"),
50
- gr.Checkbox(value=True, label="Enable Normalization"),
51
- gr.Checkbox(value=True, label="Enable Denoise"),
52
- gr.Checkbox(value=False, label="Enable Fast Mode (lower quality, faster)"),
53
- ],
54
- outputs=gr.Audio(type="filepath", label="Generated Audio"),
55
- title="🎙️ VoxCPM Text-to-Speech Generator",
56
- description=(
57
- "Generate expressive speech from text using VoxCPM TTS. "
58
- "Adjust CFG for text accuracy vs naturalness, and inference timesteps for speed vs quality. "
59
- "Use 'Fast Mode' for quick previews."
60
- )
61
  )
62
 
63
- # ===============================
64
- # Launch App
65
- # ===============================
66
  if __name__ == "__main__":
67
- tts_app.launch()
 
1
  import gradio as gr
2
+ from voxcpm import VoxCPM
3
  import soundfile as sf
4
  import tempfile
 
 
 
 
 
 
 
 
5
 
6
+ # Load model once
 
 
7
  model = VoxCPM.from_pretrained("openbmb/VoxCPM-0.5B")
8
 
9
+ def tts_generate(text):
10
+ wav = model.generate(text=text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  tmp_wav = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
12
  sf.write(tmp_wav.name, wav, 16000)
13
  return tmp_wav.name
14
 
15
+ app = gr.Interface(
 
 
 
16
  fn=tts_generate,
17
+ inputs=gr.Textbox(label="Enter text", value="Hello Hugging Face!"),
18
+ outputs=gr.Audio(type="filepath"),
19
+ title="VoxCPM TTS Test"
 
 
 
 
 
 
 
 
 
 
 
 
20
  )
21
 
 
 
 
22
  if __name__ == "__main__":
23
+ app.launch()