Spaces:
Running
Running
File size: 1,339 Bytes
65e3bf9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
# model.py
"""
Model wrapper using Hugging Face pipelines.
We use a ready-made sentiment-analysis pipeline so we don't train here.
"""
from transformers import pipeline
from typing import List, Dict
import math
from tqdm import tqdm
# Choose a well-known small finetuned model for sentiment (works out of the box)
DEFAULT_MODEL = "distilbert-base-uncased-finetuned-sst-2-english"
# Initialize pipeline (cached by HF locally)
sentiment_pipeline = pipeline("sentiment-analysis", model=DEFAULT_MODEL)
def analyze_text(text: str) -> Dict:
"""
Analyze a single text string and return label + score.
Returns: {"label": "POSITIVE"/"NEGATIVE", "score": float}
"""
if not isinstance(text, str) or text.strip() == "":
return {"label": "NEUTRAL", "score": 0.0}
out = sentiment_pipeline(text[:1000]) # cut very long input for speed
# pipeline returns a list with dict(s)
return out[0]
def analyze_batch(texts: List[str], batch_size: int = 16) -> List[Dict]:
"""
Analyze a list of texts in batches (to avoid memory spikes).
"""
results = []
n = len(texts)
for i in tqdm(range(0, n, batch_size), desc="Running model"):
batch = texts[i:i+batch_size]
res = sentiment_pipeline(batch)
results.extend(res)
return results
|