|
import os |
|
import sys |
|
import json |
|
import requests |
|
import redis |
|
from typing import List, Dict, Optional |
|
from llama_index.core import VectorStoreIndex |
|
from llama_index.core.query_engine import RetrieverQueryEngine |
|
from llama_index.core.schema import Document |
|
from llama_index.core.settings import Settings |
|
|
|
|
|
Settings.llm = None |
|
|
|
|
|
REDIS_URL = os.environ.get("UPSTASH_REDIS_URL", "redis://localhost:6379") |
|
REDIS_KEY = os.environ.get("UPSTASH_REDIS_TOKEN") |
|
MISTRAL_URL = os.environ.get("MISTRAL_URL") |
|
HF_TOKEN = os.environ.get("HF_TOKEN") |
|
|
|
|
|
redis_client = redis.Redis.from_url(REDIS_URL, decode_responses=True) |
|
|
|
|
|
TOPICS = ["India news", "World news", "Tech news", "Finance news", "Sports news"] |
|
|
|
|
|
HEADERS = { |
|
"Authorization": f"Bearer {HF_TOKEN}", |
|
"Content-Type": "application/json" |
|
} |
|
|
|
|
|
def build_prompt(content: str, topic: str) -> str: |
|
base_instruction = ( |
|
"You are Nuseโs official news summarizer โ insightful, punchy, and always on point. ๐ง โจ\n" |
|
"Your job is to scan the content below and extract the key news items. For each item, craft a crisp summary (15โ20 words), add 1โ2 fitting emojis, and make it pop.\n" |
|
"List each summary on a new line starting with a dash (-) and no numbers. This is how Nuse keeps it clean and scannable.\n" |
|
"\n" |
|
"Example format:\n" |
|
"- India stuns Australia in a last-ball thriller at the World Cup finals ๐๐ฎ๐ณ\n (15โ20 words)" |
|
"- U.S. imposes sweeping tariffs on Chinese tech giants, rattling global markets ๐๐บ๐ธ\n (15โ20 words)" |
|
"- Ceasefire breakthrough: Netanyahu bows to pressure after week-long escalation ๐ฅ๐๏ธ\n (15โ20 words)" |
|
"\n" |
|
"If you don't find anything useful, don't return anything for that news item" |
|
"Be sharp. Be brief. No fluff. No preambles. Just the summaries.\n" |
|
"Return only the final summary block โ no extra commentary, no prompt repetition." |
|
) |
|
tail = f"Topic: {topic}\n\n{content.strip()}" |
|
return f"<s>[INST]{base_instruction}\n\n{tail}[/INST]</s>" |
|
|
|
|
|
|
|
def call_mistral(prompt: str) -> Optional[str]: |
|
headers = { |
|
"Authorization": f"Bearer {HF_TOKEN}", |
|
"Content-Type": "application/json" |
|
} |
|
payload = { |
|
"inputs": prompt |
|
} |
|
|
|
try: |
|
response = requests.post(MISTRAL_URL, headers=headers, json=payload, timeout=20) |
|
response.raise_for_status() |
|
data = response.json() |
|
|
|
|
|
if isinstance(data, list) and data: |
|
raw_output = data[0].get("generated_text", "") |
|
elif isinstance(data, dict): |
|
raw_output = data.get("generated_text", "") |
|
else: |
|
return None |
|
|
|
|
|
if "[/INST]</s>" in raw_output: |
|
return raw_output.split("[/INST]</s>")[-1].strip() |
|
return raw_output.strip() |
|
|
|
except Exception as e: |
|
print(f"โ ๏ธ Mistral error: {e}") |
|
return None |
|
|
|
|
|
def summarize_topic(docs: List[str], topic: str) -> List[Dict]: |
|
feed = [] |
|
for doc in docs[:5]: |
|
prompt = build_prompt(doc, topic) |
|
print("\n๐ค Prompt sent to Mistral:\n", prompt[:300], "...\n") |
|
summary_block = call_mistral(prompt) |
|
|
|
if summary_block: |
|
|
|
for line in summary_block.splitlines(): |
|
line = line.strip() |
|
if line.startswith("-") or line.startswith("โ"): |
|
clean_summary = line.lstrip("-โ").strip() |
|
if clean_summary: |
|
feed.append({ |
|
"summary": clean_summary, |
|
"image_url": "https://source.unsplash.com/800x600/?news", |
|
"article_link": "https://google.com/search?q=" + topic.replace(" ", "+") |
|
}) |
|
|
|
return feed |
|
|
|
|
|
|
|
def generate_and_cache_daily_feed(documents: List[Document]): |
|
index = VectorStoreIndex.from_documents(documents) |
|
retriever = index.as_retriever() |
|
query_engine = RetrieverQueryEngine(retriever=retriever) |
|
|
|
final_feed = [] |
|
|
|
for topic in TOPICS: |
|
print(f"\n๐ Generating for: {topic}") |
|
response = query_engine.query(topic) |
|
docs = [str(node.get_content()) for node in response.source_nodes] |
|
|
|
topic_feed = summarize_topic(docs, topic) |
|
final_feed.append({ |
|
"topic": topic.lower().replace(" news", ""), |
|
"feed": topic_feed |
|
}) |
|
|
|
redis_client.set(REDIS_KEY, json.dumps(final_feed, ensure_ascii=False)) |
|
print(f"โ
Cached daily feed under key '{REDIS_KEY}'") |
|
return final_feed |
|
|
|
|
|
def get_cached_daily_feed(): |
|
cached = redis_client.get(REDIS_KEY) |
|
return json.loads(cached) if cached else [] |
|
|