|
|
import os |
|
|
from dotenv import load_dotenv |
|
|
from transformers import pipeline |
|
|
import torch |
|
|
from model_config import MODEL_NAME |
|
|
|
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
|
|
|
HF_TOKEN = os.getenv("HF_TOKEN") |
|
|
|
|
|
|
|
|
|
|
|
generator = pipeline( |
|
|
"text-generation", |
|
|
model=MODEL_NAME, |
|
|
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, |
|
|
device_map="auto", |
|
|
token=HF_TOKEN if "bigcode" in MODEL_NAME else None |
|
|
) |
|
|
|
|
|
def generate_response(prompt: str) -> str: |
|
|
""" |
|
|
Generate a response from the model based on the input prompt. |
|
|
Returns raw model output as string. |
|
|
""" |
|
|
|
|
|
outputs = generator( |
|
|
prompt, |
|
|
max_new_tokens=200, |
|
|
num_return_sequences=1, |
|
|
temperature=0.3, |
|
|
top_p=0.9, |
|
|
do_sample=True |
|
|
) |
|
|
return outputs[0]["generated_text"] |
|
|
|