File size: 1,339 Bytes
65e3bf9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# model.py
"""

Model wrapper using Hugging Face pipelines.

We use a ready-made sentiment-analysis pipeline so we don't train here.

"""

from transformers import pipeline
from typing import List, Dict
import math
from tqdm import tqdm

# Choose a well-known small finetuned model for sentiment (works out of the box)
DEFAULT_MODEL = "distilbert-base-uncased-finetuned-sst-2-english"

# Initialize pipeline (cached by HF locally)
sentiment_pipeline = pipeline("sentiment-analysis", model=DEFAULT_MODEL)

def analyze_text(text: str) -> Dict:
    """

    Analyze a single text string and return label + score.

    Returns: {"label": "POSITIVE"/"NEGATIVE", "score": float}

    """
    if not isinstance(text, str) or text.strip() == "":
        return {"label": "NEUTRAL", "score": 0.0}
    out = sentiment_pipeline(text[:1000])  # cut very long input for speed
    # pipeline returns a list with dict(s)
    return out[0]

def analyze_batch(texts: List[str], batch_size: int = 16) -> List[Dict]:
    """

    Analyze a list of texts in batches (to avoid memory spikes).

    """
    results = []
    n = len(texts)
    for i in tqdm(range(0, n, batch_size), desc="Running model"):
        batch = texts[i:i+batch_size]
        res = sentiment_pipeline(batch)
        results.extend(res)
    return results