raghavNCI commited on
Commit
89716e4
·
1 Parent(s): 6a3e0a5

headline generator first trial

Browse files
Files changed (2) hide show
  1. app.py +6 -3
  2. nuse_modules/headlines_generator.py +83 -0
app.py CHANGED
@@ -3,7 +3,8 @@ from routes.category import router # routes.py must be in same folder
3
  from routes.question import askMe
4
  from routes.wa_gateway import wa_router
5
  from dotenv import load_dotenv
6
- from cache_init import fetch_and_cache_articles
 
7
  from fastapi.middleware.cors import CORSMiddleware
8
 
9
 
@@ -23,8 +24,10 @@ app.add_middleware(
23
  )
24
 
25
  @app.on_event("startup")
26
- async def startup_event():
27
- fetch_and_cache_articles()
 
 
28
 
29
  @app.get("/health")
30
  def health_check():
 
3
  from routes.question import askMe
4
  from routes.wa_gateway import wa_router
5
  from dotenv import load_dotenv
6
+ import asyncio
7
+ from nuse_modules.headlines_generator import generate_and_store_headlines
8
  from fastapi.middleware.cors import CORSMiddleware
9
 
10
 
 
24
  )
25
 
26
  @app.on_event("startup")
27
+ def fetch_and_cache_articles() -> None:
28
+ loop = asyncio.get_event_loop()
29
+ # Run in default thread-pool executor
30
+ loop.run_in_executor(None, generate_and_store_headlines)
31
 
32
  @app.get("/health")
33
  def health_check():
nuse_modules/headlines_generator.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ import datetime as _dt
3
+ import json, os
4
+ from typing import List, Dict
5
+
6
+ from clients.redis_client import redis_client as _r
7
+ from nuse_modules.google_search import search_google_news
8
+ from models_initialization.mistral_registry import mistral_generate
9
+
10
+
11
+ _CATEGORIES = {
12
+ "world": "world news top stories",
13
+ "india": "india top stories",
14
+ "finance": "business finance economy today",
15
+ "sports": "sports headlines today",
16
+ "entertainment": "entertainment celebrity movie tv",
17
+ }
18
+
19
+ _ARTICLES_PER_CAT = 5
20
+ _SUMMARY_TOKENS = 120
21
+ _REDIS_TTL_SECONDS = 24 * 3600
22
+
23
+ def _dedupe_urls(articles: List[dict]) -> List[dict]:
24
+ seen = set()
25
+ out = []
26
+ for art in articles:
27
+ if art["link"] not in seen:
28
+ seen.add(art["link"])
29
+ out.append(art)
30
+ return out
31
+
32
+
33
+ def _summarise_article(article: dict) -> str:
34
+ prompt = (
35
+ "You are a concise news assistant. Summarise the following article "
36
+ "in one sentence (<=25 words). Omit source and author names.\n\n"
37
+ f"ARTICLE:\n{article['content']}"
38
+ )
39
+ return mistral_generate(prompt, max_new_tokens=_SUMMARY_TOKENS, temperature=0.3)
40
+
41
+
42
+ def _redis_key(date: str, category: str) -> str:
43
+ return f"headlines:{date}:{category}"
44
+
45
+
46
+ def generate_and_store_headlines(today: str | None = None) -> Dict[str, List[dict]]:
47
+ """
48
+ Fetches top articles per category, summarises them, stores in Redis,
49
+ and returns the full payload (useful for logging / testing).
50
+ """
51
+ date_str = today or _dt.datetime.utcnow().strftime("%Y-%m-%d")
52
+ all_output = {}
53
+
54
+ for cat, query in _CATEGORIES.items():
55
+ print(f"[HEADLINES] {cat.title()} …")
56
+
57
+ # 1. Google -> list of {title, link, snippet, content}
58
+ raw_articles = search_google_news([query], num_results=_ARTICLES_PER_CAT)
59
+ raw_articles = _dedupe_urls(raw_articles)
60
+
61
+ # 2. Summarise each article
62
+ summaries = []
63
+ for art in raw_articles:
64
+ if not art["content"]:
65
+ continue # skip if scraper failed
66
+ summary = _summarise_article(art)
67
+ summaries.append(
68
+ {
69
+ "title": art["title"],
70
+ "url": art["link"],
71
+ "summary": summary,
72
+ "source_snippet": art["snippet"],
73
+ }
74
+ )
75
+
76
+ # 3. Store in Upstash Redis
77
+ redis_key = _redis_key(date_str, cat)
78
+ _r.set(redis_key, json.dumps(summaries), ex=_REDIS_TTL_SECONDS)
79
+
80
+ all_output[cat] = summaries
81
+ print(f" ↳ stored {len(summaries)} items in Redis ({redis_key})")
82
+
83
+ return all_output