tuf601121 commited on
Commit
271a505
·
verified ·
1 Parent(s): 3e68d9b

Upload 5 files

Browse files
Files changed (5) hide show
  1. README.md +10 -0
  2. app.py +87 -0
  3. gitattributes.txt +35 -0
  4. requirements.txt +6 -0
  5. speakers.json +71 -0
README.md ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ title: 12lab
4
+ sdk: gradio
5
+ emoji: 🚀
6
+ colorFrom: yellow
7
+ colorTo: red
8
+ pinned: false
9
+ short_description: nothing
10
+ ---
app.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ import gradio as gr
4
+ import soundfile as sf
5
+ import numpy as np
6
+ from pathlib import Path
7
+ import json
8
+ import traceback
9
+
10
+ # Token MUST be added via HuggingFace Space Secrets
11
+ HF_TOKEN = os.getenv("HF_TOKEN")
12
+ if not HF_TOKEN:
13
+ print("WARNING: HF_TOKEN missing. Add it in Space → Settings → Variables & Secrets.")
14
+
15
+ MODEL_ID = "ai4bharat/indic-parler-tts"
16
+
17
+ try:
18
+ from parler_tts import ParlerTTSForConditionalGeneration
19
+ from transformers import AutoTokenizer
20
+ except Exception as e:
21
+ raise RuntimeError("Missing required libraries. Install dependencies from requirements.txt. Error: " + str(e))
22
+
23
+ device = "cuda" if torch.cuda.is_available() else "cpu"
24
+
25
+ print("Loading model…")
26
+ model = ParlerTTSForConditionalGeneration.from_pretrained(MODEL_ID).to(device)
27
+ text_tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
28
+
29
+ try:
30
+ desc_encoder_name = model.config.text_encoder._name_or_path
31
+ desc_tokenizer = AutoTokenizer.from_pretrained(desc_encoder_name)
32
+ except:
33
+ desc_tokenizer = text_tokenizer
34
+
35
+ sampling_rate = getattr(model.config, "sampling_rate", 22050)
36
+
37
+ sp_file = Path(__file__).parent / "speakers.json"
38
+ if sp_file.exists():
39
+ SPEAKERS = json.load(open(sp_file, "r", encoding="utf-8"))
40
+ else:
41
+ SPEAKERS = ["Default"]
42
+
43
+ def synthesize(text, speaker, emotion="Neutral"):
44
+ if not text.strip():
45
+ return None
46
+
47
+ desc = f"{speaker}'s voice. Tone: {emotion}. Natural, clear speech, close mic."
48
+ try:
49
+ desc_ids = desc_tokenizer(desc, return_tensors="pt").to(device)
50
+ text_ids = text_tokenizer(text, return_tensors="pt").to(device)
51
+ except:
52
+ desc_ids = desc_tokenizer(desc, return_tensors="pt")
53
+ text_ids = text_tokenizer(text, return_tensors="pt")
54
+
55
+ with torch.no_grad():
56
+ try:
57
+ audio = model.generate(
58
+ input_ids=desc_ids.input_ids,
59
+ attention_mask=desc_ids.attention_mask,
60
+ prompt_input_ids=text_ids.input_ids,
61
+ prompt_attention_mask=text_ids.attention_mask,
62
+ max_length=20000,
63
+ )
64
+ except:
65
+ audio = model.generate(description=desc, text=text)
66
+
67
+ arr = audio.cpu().numpy().squeeze()
68
+ if np.issubdtype(arr.dtype, np.integer):
69
+ arr = arr.astype("float32") / np.iinfo(arr.dtype).max
70
+
71
+ out_path = f"/tmp/out_{abs(hash(text))}.wav"
72
+ sf.write(out_path, arr, sampling_rate)
73
+ return out_path
74
+
75
+ with gr.Blocks() as demo:
76
+ gr.Markdown("# Indic Parler-TTS (69 Speakers)")
77
+
78
+ txt = gr.Textbox(value="नमस्ते, यह एक परीक्षण वाक्य है।", label="Text")
79
+ sp = gr.Dropdown(SPEAKERS, value=SPEAKERS[0], label="Speaker")
80
+ emo = gr.Dropdown(["Neutral","Happy","Sad","Angry","Narration"], value="Neutral", label="Emotion")
81
+ btn = gr.Button("Generate")
82
+ out = gr.Audio()
83
+
84
+ btn.click(fn=synthesize, inputs=[txt, sp, emo], outputs=out)
85
+
86
+ if __name__ == '__main__':
87
+ demo.launch(server_name="0.0.0.0", server_port=7860)
gitattributes.txt ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ torch
2
+ transformers
3
+ parler-tts @ git+https://github.com/huggingface/parler-tts.git
4
+ soundfile
5
+ numpy
6
+ gradio
speakers.json ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ "Aakash",
3
+ "Aditi",
4
+ "Amit",
5
+ "Amrita",
6
+ "Anjali",
7
+ "Anu",
8
+ "Arjun",
9
+ "Aryan",
10
+ "Asha",
11
+ "Bhanu",
12
+ "Bikram",
13
+ "Chetan",
14
+ "Debjani",
15
+ "Deepak",
16
+ "Dinesh",
17
+ "Divya",
18
+ "Divjot",
19
+ "Gauri",
20
+ "Gurpreet",
21
+ "Hardeep",
22
+ "Harish",
23
+ "Isha",
24
+ "Jaya",
25
+ "Jatin",
26
+ "Jon",
27
+ "Kabir",
28
+ "Karan",
29
+ "Kavitha",
30
+ "Kavya",
31
+ "Kiran",
32
+ "Kunal",
33
+ "Laishram",
34
+ "Lalitha",
35
+ "Lea",
36
+ "Maya",
37
+ "Meera",
38
+ "Milan",
39
+ "Mina",
40
+ "Mohit",
41
+ "Nikhil",
42
+ "Neha",
43
+ "Nisha",
44
+ "Poonam",
45
+ "Prakash",
46
+ "Priya",
47
+ "Puneet",
48
+ "Radha",
49
+ "Rakesh",
50
+ "Ranjit",
51
+ "Ravi",
52
+ "Riya",
53
+ "Rohit",
54
+ "Rohini",
55
+ "Sanjay",
56
+ "Saurav",
57
+ "Shalini",
58
+ "Shweta",
59
+ "Sita",
60
+ "Sneha",
61
+ "Suresh",
62
+ "Sunita",
63
+ "Swapna",
64
+ "Tapan",
65
+ "Tarun",
66
+ "Tisha",
67
+ "Varun",
68
+ "Vikas",
69
+ "Vidya",
70
+ "Yash"
71
+ ]