Spaces:

pszemraj
/

small-instruct-streaming

Sleeping

pszemraj commited on May 23, 2023

Commit

af12f2c

1 Parent(s): 5209ab6

fix bugs

Files changed (1) hide show

app.py CHANGED Viewed

@@ -13,8 +13,8 @@ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, TextIteratorStrea
 model_id = "pszemraj/flan-t5-large-instruct-dolly_hhrlhf"
 torch_device = "cuda" if torch.cuda.is_available() else "cpu"
-logging.info("Running on device:", torch_device)
-logging.info("CPU threads:", torch.get_num_threads())
 if torch_device == "cuda":
@@ -23,6 +23,11 @@ if torch_device == "cuda":
     )
 else:
     model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
 tokenizer = AutoTokenizer.from_pretrained(model_id)
@@ -123,6 +128,14 @@ with gr.Blocks() as demo:
                 interactive=True,
                 label="Top-k",
             )
             repetition_penalty = gr.Slider(
                 minimum=0.9,
                 maximum=2.5,

 model_id = "pszemraj/flan-t5-large-instruct-dolly_hhrlhf"
 torch_device = "cuda" if torch.cuda.is_available() else "cpu"
+logging.info(f"Running on device:\t {torch_device}")
+logging.info(f"CPU threads:\t {torch.get_num_threads()}")
 if torch_device == "cuda":
     )
 else:
     model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
+try:
+    model = torch.compile(model)
+except Exception as e:
+    logging.error(f"Unable to compile model:\t{e}")
 tokenizer = AutoTokenizer.from_pretrained(model_id)
                 interactive=True,
                 label="Top-k",
             )
+            temperature = gr.Slider(
+                minimum=0.1,
+                maximum=5.0,
+                value=0.8,
+                step=0.1,
+                interactive=True,
+                label="Temperature",
+            )
             repetition_penalty = gr.Slider(
                 minimum=0.9,
                 maximum=2.5,