ragV98's picture
undid more changes
22c12ad
raw
history blame
5.16 kB
import os
import sys
import json
import requests
import redis
from typing import List, Dict, Optional
from llama_index.core import VectorStoreIndex
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.schema import Document
from llama_index.core.settings import Settings
# โœ… Disable implicit LLM usage
Settings.llm = None
# ๐Ÿ” Environment variables
REDIS_URL = os.environ.get("UPSTASH_REDIS_URL", "redis://localhost:6379")
REDIS_KEY = os.environ.get("UPSTASH_REDIS_TOKEN")
MISTRAL_URL = os.environ.get("MISTRAL_URL")
HF_TOKEN = os.environ.get("HF_TOKEN")
# โœ… Redis client
redis_client = redis.Redis.from_url(REDIS_URL, decode_responses=True)
# ๐Ÿ“ฐ Topics
TOPICS = ["India news", "World news", "Tech news", "Finance news", "Sports news"]
# ๐Ÿ“„ Headers for HF endpoint
HEADERS = {
"Authorization": f"Bearer {HF_TOKEN}",
"Content-Type": "application/json"
}
# ๐Ÿง  Build Mistral-style instruction prompt
def build_prompt(content: str, topic: str) -> str:
base_instruction = (
"You are Nuseโ€™s official news summarizer โ€” insightful, punchy, and always on point. ๐Ÿง โœจ\n"
"Your job is to scan the content below and extract the key news items. For each item, craft a crisp summary (15โ€“20 words), add 1โ€“2 fitting emojis, and make it pop.\n"
"List each summary on a new line starting with a dash (-) and no numbers. This is how Nuse keeps it clean and scannable.\n"
"\n"
"Example format:\n"
"- India stuns Australia in a last-ball thriller at the World Cup finals ๐Ÿ๐Ÿ‡ฎ๐Ÿ‡ณ\n (15โ€“20 words)"
"- U.S. imposes sweeping tariffs on Chinese tech giants, rattling global markets ๐Ÿ“‰๐Ÿ‡บ๐Ÿ‡ธ\n (15โ€“20 words)"
"- Ceasefire breakthrough: Netanyahu bows to pressure after week-long escalation ๐Ÿ”ฅ๐Ÿ•Š๏ธ\n (15โ€“20 words)"
"\n"
"If you don't find anything useful, don't return anything for that news item"
"Be sharp. Be brief. No fluff. No preambles. Just the summaries.\n"
"Return only the final summary block โ€” no extra commentary, no prompt repetition."
)
tail = f"Topic: {topic}\n\n{content.strip()}"
return f"<s>[INST]{base_instruction}\n\n{tail}[/INST]</s>"
# ๐Ÿ” Call Mistral using HF Inference Endpoint
def call_mistral(prompt: str) -> Optional[str]:
headers = {
"Authorization": f"Bearer {HF_TOKEN}",
"Content-Type": "application/json"
}
payload = {
"inputs": prompt
}
try:
response = requests.post(MISTRAL_URL, headers=headers, json=payload, timeout=20)
response.raise_for_status()
data = response.json()
# Get the generated text
if isinstance(data, list) and data:
raw_output = data[0].get("generated_text", "")
elif isinstance(data, dict):
raw_output = data.get("generated_text", "")
else:
return None
# โœ… Extract only the portion after the [/INST]</s> marker
if "[/INST]</s>" in raw_output:
return raw_output.split("[/INST]</s>")[-1].strip()
return raw_output.strip()
except Exception as e:
print(f"โš ๏ธ Mistral error: {e}")
return None
# โœ‚๏ธ Summarize top N documents
def summarize_topic(docs: List[str], topic: str) -> List[Dict]:
feed = []
for doc in docs[:5]:
prompt = build_prompt(doc, topic)
print("\n๐Ÿ“ค Prompt sent to Mistral:\n", prompt[:300], "...\n")
summary_block = call_mistral(prompt)
if summary_block:
# Split by lines that start with "- " or "โ€“ " (dash or en dash)
for line in summary_block.splitlines():
line = line.strip()
if line.startswith("-") or line.startswith("โ€“"):
clean_summary = line.lstrip("-โ€“").strip()
if clean_summary:
feed.append({
"summary": clean_summary,
"image_url": "https://source.unsplash.com/800x600/?news",
"article_link": "https://google.com/search?q=" + topic.replace(" ", "+")
})
return feed
# โšก Generate and cache daily feed
def generate_and_cache_daily_feed(documents: List[Document]):
index = VectorStoreIndex.from_documents(documents)
retriever = index.as_retriever()
query_engine = RetrieverQueryEngine(retriever=retriever)
final_feed = []
for topic in TOPICS:
print(f"\n๐Ÿ” Generating for: {topic}")
response = query_engine.query(topic)
docs = [str(node.get_content()) for node in response.source_nodes]
topic_feed = summarize_topic(docs, topic)
final_feed.append({
"topic": topic.lower().replace(" news", ""),
"feed": topic_feed
})
redis_client.set(REDIS_KEY, json.dumps(final_feed, ensure_ascii=False))
print(f"โœ… Cached daily feed under key '{REDIS_KEY}'")
return final_feed
# ๐Ÿ“ฆ For testing or API access
def get_cached_daily_feed():
cached = redis_client.get(REDIS_KEY)
return json.loads(cached) if cached else []