Spaces:

xiaowang7777
/

fnlp-moss-moon-003-sft-int8

Runtime error

xiaowang7777 commited on Apr 24, 2023

Commit

0c0f97c

1 Parent(s): 51ac4d5

push

Files changed (1) hide show

app.py CHANGED Viewed

@@ -5,21 +5,26 @@ from models.modeling_moss import MossForCausalLM
 from models.tokenization_moss import MossTokenizer
 from models.configuration_moss import MossConfig
 from accelerate import init_empty_weights, load_checkpoint_and_dispatch
 # nstruct_pipeline_3b = pipeline(model="fnlp/moss-moon-003-sft-int4", torch_dtype=torch.float, trust_remote_code=True,
 #                                device_map="auto")
 model_path = "fnlp/moss-moon-003-sft-int8"
-config = MossConfig.from_pretrained(model_path)
-tokenizer = MossTokenizer.from_pretrained(model_path)
-with init_empty_weights():
-  raw_model = MossForCausalLM._from_config(config, torch_dtype=torch.float)
-raw_model.tie_weights()
-model = load_checkpoint_and_dispatch(
-  raw_model, checkpoint=model_path, device_map="balanced_low_0", no_split_module_classes=["MossBlock"], dtype=torch.float,
-  offload_folder="offload_folder"
-)
 def generate(query, temperature, top_p, top_k, max_new_tokens):

 from models.tokenization_moss import MossTokenizer
 from models.configuration_moss import MossConfig
 from accelerate import init_empty_weights, load_checkpoint_and_dispatch
+from transformers import AutoTokenizer, AutoModelForCausalLM
 # nstruct_pipeline_3b = pipeline(model="fnlp/moss-moon-003-sft-int4", torch_dtype=torch.float, trust_remote_code=True,
 #                                device_map="auto")
 model_path = "fnlp/moss-moon-003-sft-int8"
+# config = MossConfig.from_pretrained(model_path)
+# tokenizer = MossTokenizer.from_pretrained(model_path)
+#
+# with init_empty_weights():
+#   raw_model = MossForCausalLM._from_config(config, torch_dtype=torch.float)
+# raw_model.tie_weights()
+# model = load_checkpoint_and_dispatch(
+#   raw_model, checkpoint=model_path, device_map="balanced_low_0", no_split_module_classes=["MossBlock"], dtype=torch.float,
+#   offload_folder="offload_folder"
+# )
+tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
+model = AutoModelForCausalLM.from_pretrained(model_path, trust_remote_code=True).float()
+model = model.eval()
 def generate(query, temperature, top_p, top_k, max_new_tokens):