lvj
/

Qwen3-4B-parq-4b-weight-4b-embed-shared-gsm

@@ -1,10 +1,9 @@
 {
   "architectures": [
-    "FSDPQwen3ForCausalLM"
   ],
   "attention_bias": false,
   "attention_dropout": 0.0,
-  "bos_token_id": 151643,
   "dtype": "float32",
   "eos_token_id": 151645,
   "head_dim": 128,
@@ -56,9 +55,157 @@
   "num_attention_heads": 32,
   "num_hidden_layers": 36,
   "num_key_value_heads": 8,
   "quantization_config": {
     "include_input_output_embeddings": true,
-    "modules_to_not_convert": [],
     "quant_method": "torchao",
     "quant_type": {
       "default": {
@@ -99,14 +246,11 @@
               "_type": "Int8DynamicActivationIntxWeightConfig",
               "_version": 2
             },
-            "lm_head": {
               "_data": {
-                "granularity": {
-                  "_data": {
-                    "axis": 0
-                  },
-                  "_type": "PerAxis",
-                  "_version": 1
                 },
                 "intx_packing_format": {
                   "_data": "UNPACKED_TO_INT8",
@@ -117,48 +261,24 @@
                   "_type": "QDQLayout",
                   "_version": 1
                 },
-                "mapping_type": {
-                  "_data": "SYMMETRIC",
-                  "_type": "MappingType"
-                },
-                "scale_dtype": null,
                 "weight_dtype": {
                   "_data": "int4",
                   "_type": "torch.dtype"
-                }
-              },
-              "_type": "IntxWeightOnlyConfig",
-              "_version": 2
-            },
-            "model.embed_tokens": {
-              "_data": {
-                "granularity": {
                   "_data": {
                     "axis": 0
                   },
                   "_type": "PerAxis",
                   "_version": 1
                 },
-                "intx_packing_format": {
-                  "_data": "UNPACKED_TO_INT8",
-                  "_type": "IntxPackingFormat"
-                },
-                "layout": {
-                  "_data": {},
-                  "_type": "QDQLayout",
-                  "_version": 1
-                },
-                "mapping_type": {
                   "_data": "SYMMETRIC",
                   "_type": "MappingType"
                 },
-                "scale_dtype": null,
-                "weight_dtype": {
-                  "_data": "int4",
-                  "_type": "torch.dtype"
-                }
               },
-              "_type": "IntxWeightOnlyConfig",
               "_version": 2
             }
           }

 {
   "architectures": [
+    "Qwen3ForCausalLM"
   ],
   "attention_bias": false,
   "attention_dropout": 0.0,
   "dtype": "float32",
   "eos_token_id": 151645,
   "head_dim": 128,
   "num_attention_heads": 32,
   "num_hidden_layers": 36,
   "num_key_value_heads": 8,
+  "pad_token_id": 151645,
   "quantization_config": {
     "include_input_output_embeddings": true,
+    "modules_to_not_convert": [
+      "model.layers.0.self_attn.q_norm",
+      "model.layers.0.self_attn.k_norm",
+      "model.layers.0.input_layernorm",
+      "model.layers.0.post_attention_layernorm",
+      "model.layers.1.self_attn.q_norm",
+      "model.layers.1.self_attn.k_norm",
+      "model.layers.1.input_layernorm",
+      "model.layers.1.post_attention_layernorm",
+      "model.layers.2.self_attn.q_norm",
+      "model.layers.2.self_attn.k_norm",
+      "model.layers.2.input_layernorm",
+      "model.layers.2.post_attention_layernorm",
+      "model.layers.3.self_attn.q_norm",
+      "model.layers.3.self_attn.k_norm",
+      "model.layers.3.input_layernorm",
+      "model.layers.3.post_attention_layernorm",
+      "model.layers.4.self_attn.q_norm",
+      "model.layers.4.self_attn.k_norm",
+      "model.layers.4.input_layernorm",
+      "model.layers.4.post_attention_layernorm",
+      "model.layers.5.self_attn.q_norm",
+      "model.layers.5.self_attn.k_norm",
+      "model.layers.5.input_layernorm",
+      "model.layers.5.post_attention_layernorm",
+      "model.layers.6.self_attn.q_norm",
+      "model.layers.6.self_attn.k_norm",
+      "model.layers.6.input_layernorm",
+      "model.layers.6.post_attention_layernorm",
+      "model.layers.7.self_attn.q_norm",
+      "model.layers.7.self_attn.k_norm",
+      "model.layers.7.input_layernorm",
+      "model.layers.7.post_attention_layernorm",
+      "model.layers.8.self_attn.q_norm",
+      "model.layers.8.self_attn.k_norm",
+      "model.layers.8.input_layernorm",
+      "model.layers.8.post_attention_layernorm",
+      "model.layers.9.self_attn.q_norm",
+      "model.layers.9.self_attn.k_norm",
+      "model.layers.9.input_layernorm",
+      "model.layers.9.post_attention_layernorm",
+      "model.layers.10.self_attn.q_norm",
+      "model.layers.10.self_attn.k_norm",
+      "model.layers.10.input_layernorm",
+      "model.layers.10.post_attention_layernorm",
+      "model.layers.11.self_attn.q_norm",
+      "model.layers.11.self_attn.k_norm",
+      "model.layers.11.input_layernorm",
+      "model.layers.11.post_attention_layernorm",
+      "model.layers.12.self_attn.q_norm",
+      "model.layers.12.self_attn.k_norm",
+      "model.layers.12.input_layernorm",
+      "model.layers.12.post_attention_layernorm",
+      "model.layers.13.self_attn.q_norm",
+      "model.layers.13.self_attn.k_norm",
+      "model.layers.13.input_layernorm",
+      "model.layers.13.post_attention_layernorm",
+      "model.layers.14.self_attn.q_norm",
+      "model.layers.14.self_attn.k_norm",
+      "model.layers.14.input_layernorm",
+      "model.layers.14.post_attention_layernorm",
+      "model.layers.15.self_attn.q_norm",
+      "model.layers.15.self_attn.k_norm",
+      "model.layers.15.input_layernorm",
+      "model.layers.15.post_attention_layernorm",
+      "model.layers.16.self_attn.q_norm",
+      "model.layers.16.self_attn.k_norm",
+      "model.layers.16.input_layernorm",
+      "model.layers.16.post_attention_layernorm",
+      "model.layers.17.self_attn.q_norm",
+      "model.layers.17.self_attn.k_norm",
+      "model.layers.17.input_layernorm",
+      "model.layers.17.post_attention_layernorm",
+      "model.layers.18.self_attn.q_norm",
+      "model.layers.18.self_attn.k_norm",
+      "model.layers.18.input_layernorm",
+      "model.layers.18.post_attention_layernorm",
+      "model.layers.19.self_attn.q_norm",
+      "model.layers.19.self_attn.k_norm",
+      "model.layers.19.input_layernorm",
+      "model.layers.19.post_attention_layernorm",
+      "model.layers.20.self_attn.q_norm",
+      "model.layers.20.self_attn.k_norm",
+      "model.layers.20.input_layernorm",
+      "model.layers.20.post_attention_layernorm",
+      "model.layers.21.self_attn.q_norm",
+      "model.layers.21.self_attn.k_norm",
+      "model.layers.21.input_layernorm",
+      "model.layers.21.post_attention_layernorm",
+      "model.layers.22.self_attn.q_norm",
+      "model.layers.22.self_attn.k_norm",
+      "model.layers.22.input_layernorm",
+      "model.layers.22.post_attention_layernorm",
+      "model.layers.23.self_attn.q_norm",
+      "model.layers.23.self_attn.k_norm",
+      "model.layers.23.input_layernorm",
+      "model.layers.23.post_attention_layernorm",
+      "model.layers.24.self_attn.q_norm",
+      "model.layers.24.self_attn.k_norm",
+      "model.layers.24.input_layernorm",
+      "model.layers.24.post_attention_layernorm",
+      "model.layers.25.self_attn.q_norm",
+      "model.layers.25.self_attn.k_norm",
+      "model.layers.25.input_layernorm",
+      "model.layers.25.post_attention_layernorm",
+      "model.layers.26.self_attn.q_norm",
+      "model.layers.26.self_attn.k_norm",
+      "model.layers.26.input_layernorm",
+      "model.layers.26.post_attention_layernorm",
+      "model.layers.27.self_attn.q_norm",
+      "model.layers.27.self_attn.k_norm",
+      "model.layers.27.input_layernorm",
+      "model.layers.27.post_attention_layernorm",
+      "model.layers.28.self_attn.q_norm",
+      "model.layers.28.self_attn.k_norm",
+      "model.layers.28.input_layernorm",
+      "model.layers.28.post_attention_layernorm",
+      "model.layers.29.self_attn.q_norm",
+      "model.layers.29.self_attn.k_norm",
+      "model.layers.29.input_layernorm",
+      "model.layers.29.post_attention_layernorm",
+      "model.layers.30.self_attn.q_norm",
+      "model.layers.30.self_attn.k_norm",
+      "model.layers.30.input_layernorm",
+      "model.layers.30.post_attention_layernorm",
+      "model.layers.31.self_attn.q_norm",
+      "model.layers.31.self_attn.k_norm",
+      "model.layers.31.input_layernorm",
+      "model.layers.31.post_attention_layernorm",
+      "model.layers.32.self_attn.q_norm",
+      "model.layers.32.self_attn.k_norm",
+      "model.layers.32.input_layernorm",
+      "model.layers.32.post_attention_layernorm",
+      "model.layers.33.self_attn.q_norm",
+      "model.layers.33.self_attn.k_norm",
+      "model.layers.33.input_layernorm",
+      "model.layers.33.post_attention_layernorm",
+      "model.layers.34.self_attn.q_norm",
+      "model.layers.34.self_attn.k_norm",
+      "model.layers.34.input_layernorm",
+      "model.layers.34.post_attention_layernorm",
+      "model.layers.35.self_attn.q_norm",
+      "model.layers.35.self_attn.k_norm",
+      "model.layers.35.input_layernorm",
+      "model.layers.35.post_attention_layernorm",
+      "model.norm",
+      "lm_head"
+    ],
     "quant_method": "torchao",
     "quant_type": {
       "default": {
               "_type": "Int8DynamicActivationIntxWeightConfig",
               "_version": 2
             },
+            "model.embed_tokens": {
               "_data": {
+                "act_mapping_type": {
+                  "_data": "ASYMMETRIC",
+                  "_type": "MappingType"
                 },
                 "intx_packing_format": {
                   "_data": "UNPACKED_TO_INT8",
                   "_type": "QDQLayout",
                   "_version": 1
                 },
                 "weight_dtype": {
                   "_data": "int4",
                   "_type": "torch.dtype"
+                },
+                "weight_granularity": {
                   "_data": {
                     "axis": 0
                   },
                   "_type": "PerAxis",
                   "_version": 1
                 },
+                "weight_mapping_type": {
                   "_data": "SYMMETRIC",
                   "_type": "MappingType"
                 },
+                "weight_scale_dtype": null
               },
+              "_type": "Int8DynamicActivationIntxWeightConfig",
               "_version": 2
             }
           }

generation_config.json CHANGED Viewed

@@ -1,11 +1,10 @@
 {
-  "bos_token_id": 151643,
   "do_sample": true,
   "eos_token_id": [
     151645,
-    151643
   ],
-  "pad_token_id": 151643,
   "temperature": 0.6,
   "top_k": 20,
   "top_p": 0.95,

 {
   "do_sample": true,
   "eos_token_id": [
     151645,
+    151645
   ],
+  "pad_token_id": 151645,
   "temperature": 0.6,
   "top_k": 20,
   "top_p": 0.95,

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:12dfdb01ec3178c45b8bc36dcfeee40e41c66daf213160b091631c426dcbf4d6
+size 4419374407