Spaces:
Runtime error
Runtime error
Update llm.py
Browse files
llm.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
-
import
|
|
|
|
| 2 |
from datetime import datetime
|
| 3 |
-
from transformers import pipeline
|
| 4 |
|
| 5 |
# Viral Content Generator Agent
|
| 6 |
PREFIX = """You are an Advanced Viral Content Generator with Self-Research and Self-Improvement Capabilities
|
|
@@ -43,27 +43,67 @@ def run_gpt(
|
|
| 43 |
purpose,
|
| 44 |
**prompt_kwargs,
|
| 45 |
):
|
| 46 |
-
"""
|
| 47 |
-
|
| 48 |
content = PREFIX.format(
|
| 49 |
date_time_str=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
| 50 |
purpose=purpose,
|
| 51 |
) + prompt_template.format(**prompt_kwargs)
|
| 52 |
|
| 53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
num_return_sequences=1,
|
| 59 |
-
)[0]['generated_text']
|
| 60 |
|
| 61 |
-
|
| 62 |
|
| 63 |
-
# Manually truncate at the first stop token
|
| 64 |
-
if stop_tokens:
|
| 65 |
-
for token in stop_tokens:
|
| 66 |
-
if token in response:
|
| 67 |
-
response = response.split(token)[0]
|
| 68 |
|
| 69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import subprocess
|
| 3 |
from datetime import datetime
|
|
|
|
| 4 |
|
| 5 |
# Viral Content Generator Agent
|
| 6 |
PREFIX = """You are an Advanced Viral Content Generator with Self-Research and Self-Improvement Capabilities
|
|
|
|
| 43 |
purpose,
|
| 44 |
**prompt_kwargs,
|
| 45 |
):
|
| 46 |
+
"""Generate a response using a local Ollama model (gpt-oss by default)."""
|
| 47 |
+
|
| 48 |
content = PREFIX.format(
|
| 49 |
date_time_str=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
| 50 |
purpose=purpose,
|
| 51 |
) + prompt_template.format(**prompt_kwargs)
|
| 52 |
|
| 53 |
+
models = _preferred_models()
|
| 54 |
+
last_error = None
|
| 55 |
+
for model in models:
|
| 56 |
+
try:
|
| 57 |
+
raw_response = _call_ollama(model, content, max_tokens)
|
| 58 |
+
response = _strip_prompt(content, raw_response)
|
| 59 |
+
if stop_tokens:
|
| 60 |
+
for token in stop_tokens:
|
| 61 |
+
if token in response:
|
| 62 |
+
response = response.split(token)[0]
|
| 63 |
+
break
|
| 64 |
+
return response.strip()
|
| 65 |
+
except Exception as exc: # pylint: disable=broad-except
|
| 66 |
+
last_error = exc
|
| 67 |
+
print(f"[LLM]: Failed with model '{model}': {exc}")
|
| 68 |
+
|
| 69 |
+
raise RuntimeError(f"All Ollama model attempts failed: {last_error}")
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def _preferred_models():
|
| 73 |
+
env_model = os.getenv("OLLAMA_MODEL")
|
| 74 |
+
candidates = [env_model, "gpt-oss", "qwen3"]
|
| 75 |
+
return [model for model in candidates if model]
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
def _call_ollama(model: str, prompt: str, max_tokens: int) -> str:
|
| 79 |
+
"""Invoke the local Ollama runtime with the provided prompt."""
|
| 80 |
+
max_predict = max(64, int(max_tokens or 0))
|
| 81 |
+
cmd = [
|
| 82 |
+
"ollama",
|
| 83 |
+
"run",
|
| 84 |
+
"--num-predict",
|
| 85 |
+
str(max_predict),
|
| 86 |
+
model,
|
| 87 |
+
]
|
| 88 |
+
try:
|
| 89 |
+
completed = subprocess.run(
|
| 90 |
+
cmd,
|
| 91 |
+
input=prompt.encode("utf-8"),
|
| 92 |
+
capture_output=True,
|
| 93 |
+
check=False,
|
| 94 |
+
)
|
| 95 |
+
except FileNotFoundError as exc:
|
| 96 |
+
raise RuntimeError("Ollama executable not found. Please install Ollama or adjust PATH.") from exc
|
| 97 |
|
| 98 |
+
if completed.returncode != 0:
|
| 99 |
+
stderr = completed.stderr.decode("utf-8", errors="ignore")
|
| 100 |
+
raise RuntimeError(stderr.strip() or f"ollama run {model} failed with code {completed.returncode}")
|
|
|
|
|
|
|
| 101 |
|
| 102 |
+
return completed.stdout.decode("utf-8", errors="ignore")
|
| 103 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
|
| 105 |
+
def _strip_prompt(prompt: str, raw_response: str) -> str:
|
| 106 |
+
"""Some models echo the prompt; remove it if present."""
|
| 107 |
+
if raw_response.startswith(prompt):
|
| 108 |
+
return raw_response[len(prompt):]
|
| 109 |
+
return raw_response
|