File size: 1,041 Bytes
42b5a1a
0abf359
 
42b5a1a
45a4908
86c1607
 
 
 
0abf359
42b5a1a
 
288828b
45a4908
 
 
 
42b5a1a
0abf359
 
 
 
8ef1932
 
42b5a1a
 
 
45a4908
8ef1932
 
42b5a1a
45a4908
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
from transformers import pipeline
import time


# Load once at startup
classifier = pipeline(
    "zero-shot-classification",
    model="valhalla/distilbart-mnli-12-3"
)
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")

def analyze_article(text, title, link):
    t0 = time.time()
    result = classifier(
        text,
        candidate_labels=["analytics", "data science", "business insight"],
        multi_label=True
    )
    print(f"[DEBUG] Classifier Model inference time: {time.time() - t0:.2f}s")
     # Summarize article text
    summary = summarizer(text[:1024], max_length=150, min_length=40, do_sample=False)[0]['summary_text']
    print(f"[DEBUG] Summarizer Model inference time: {time.time() - t0:.2f}s")
    is_useful = any(label in ["analytics", "data science"] for label in result['labels'][:2])

    return {
        "title": title,
        "link": link,
        "summary": summary,
        "top_label": result['labels'][0],
        "relevance": "Useful" if is_useful else "Not useful"
    }