gnanesh / model.py
gnaneshwar18's picture
commit
65e3bf9 verified
# model.py
"""
Model wrapper using Hugging Face pipelines.
We use a ready-made sentiment-analysis pipeline so we don't train here.
"""
from transformers import pipeline
from typing import List, Dict
import math
from tqdm import tqdm
# Choose a well-known small finetuned model for sentiment (works out of the box)
DEFAULT_MODEL = "distilbert-base-uncased-finetuned-sst-2-english"
# Initialize pipeline (cached by HF locally)
sentiment_pipeline = pipeline("sentiment-analysis", model=DEFAULT_MODEL)
def analyze_text(text: str) -> Dict:
"""
Analyze a single text string and return label + score.
Returns: {"label": "POSITIVE"/"NEGATIVE", "score": float}
"""
if not isinstance(text, str) or text.strip() == "":
return {"label": "NEUTRAL", "score": 0.0}
out = sentiment_pipeline(text[:1000]) # cut very long input for speed
# pipeline returns a list with dict(s)
return out[0]
def analyze_batch(texts: List[str], batch_size: int = 16) -> List[Dict]:
"""
Analyze a list of texts in batches (to avoid memory spikes).
"""
results = []
n = len(texts)
for i in tqdm(range(0, n, batch_size), desc="Running model"):
batch = texts[i:i+batch_size]
res = sentiment_pipeline(batch)
results.extend(res)
return results