Spaces:
Running
Running
| # model.py | |
| """ | |
| Model wrapper using Hugging Face pipelines. | |
| We use a ready-made sentiment-analysis pipeline so we don't train here. | |
| """ | |
| from transformers import pipeline | |
| from typing import List, Dict | |
| import math | |
| from tqdm import tqdm | |
| # Choose a well-known small finetuned model for sentiment (works out of the box) | |
| DEFAULT_MODEL = "distilbert-base-uncased-finetuned-sst-2-english" | |
| # Initialize pipeline (cached by HF locally) | |
| sentiment_pipeline = pipeline("sentiment-analysis", model=DEFAULT_MODEL) | |
| def analyze_text(text: str) -> Dict: | |
| """ | |
| Analyze a single text string and return label + score. | |
| Returns: {"label": "POSITIVE"/"NEGATIVE", "score": float} | |
| """ | |
| if not isinstance(text, str) or text.strip() == "": | |
| return {"label": "NEUTRAL", "score": 0.0} | |
| out = sentiment_pipeline(text[:1000]) # cut very long input for speed | |
| # pipeline returns a list with dict(s) | |
| return out[0] | |
| def analyze_batch(texts: List[str], batch_size: int = 16) -> List[Dict]: | |
| """ | |
| Analyze a list of texts in batches (to avoid memory spikes). | |
| """ | |
| results = [] | |
| n = len(texts) | |
| for i in tqdm(range(0, n, batch_size), desc="Running model"): | |
| batch = texts[i:i+batch_size] | |
| res = sentiment_pipeline(batch) | |
| results.extend(res) | |
| return results | |