bezzam
/

VibeVoice-1.5B

@@ -1,6 +1,9 @@
 {
   "_attn_implementation_autoset": false,
   "acoustic_tokenizer_config": {
     "bias": true,
     "channels": 1,
     "depths": [
@@ -20,6 +23,7 @@
       5,
       8
     ],
     "ffn_expansion": 4,
     "hidden_act": "gelu",
     "hidden_size": 64,
@@ -55,6 +59,9 @@
   "model_type": "vibevoice",
   "pad_token_id": 151643,
   "semantic_tokenizer_config": {
     "bias": true,
     "channels": 1,
     "depths": [
@@ -74,6 +81,7 @@
       5,
       8
     ],
     "ffn_expansion": 4,
     "hidden_act": "gelu",
     "hidden_size": 128,

 {
   "_attn_implementation_autoset": false,
   "acoustic_tokenizer_config": {
+    "architectures": [
+      "VibeVoiceAcousticTokenizerModel"
+    ],
     "bias": true,
     "channels": 1,
     "depths": [
       5,
       8
     ],
+    "dtype": "bfloat16",
     "ffn_expansion": 4,
     "hidden_act": "gelu",
     "hidden_size": 64,
   "model_type": "vibevoice",
   "pad_token_id": 151643,
   "semantic_tokenizer_config": {
+    "architectures": [
+      "VibeVoiceSemanticTokenizerModel"
+    ],
     "bias": true,
     "channels": 1,
     "depths": [
       5,
       8
     ],
+    "dtype": "bfloat16",
     "ffn_expansion": 4,
     "hidden_act": "gelu",
     "hidden_size": 128,