|
import os |
|
import sys |
|
import json |
|
import requests |
|
from requests.exceptions import RequestException |
|
|
|
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) |
|
|
|
import redis |
|
from typing import List, Dict |
|
from llama_index.core import VectorStoreIndex |
|
from llama_index.core.query_engine import RetrieverQueryEngine |
|
from llama_index.core.schema import Document |
|
from llama_index.core.settings import Settings |
|
|
|
|
|
Settings.llm = None |
|
|
|
|
|
REDIS_URL = os.environ.get("UPSTASH_REDIS_URL", "redis://localhost:6379") |
|
REDIS_KEY = os.environ.get("UPSTASH_REDIS_TOKEN") |
|
MISTRAL_URL = os.environ.get("MISTRAL_URL") |
|
HF_TOKEN = os.environ.get("HF_TOKEN") |
|
|
|
|
|
redis_client = redis.Redis.from_url(REDIS_URL, decode_responses=True) |
|
|
|
|
|
TOPICS = ["India news", "World news", "Tech news", "Finance news", "Sports news"] |
|
|
|
|
|
def build_prompt(content: str, topic: str) -> str: |
|
return ( |
|
f"You are a news summarizer. Summarize the following content in 25-30 words. " |
|
f"Make it engaging and informative. Include appropriate emojis. Topic: {topic}\n\n{content}" |
|
) |
|
|
|
|
|
HEADERS = { |
|
"Authorization": f"Bearer {HF_TOKEN}", |
|
"Content-Type": "application/json" |
|
} |
|
|
|
def call_mistral(prompt: str, max_new_tokens: int = 128, temperature: float = 0.7) -> str: |
|
""" |
|
Call Hugging Face Inference Endpoint hosting Mistral-7B. |
|
Returns the generated summary, or empty string on failure. |
|
""" |
|
payload = { |
|
"inputs": prompt, |
|
"parameters": { |
|
"max_new_tokens": max_new_tokens, |
|
"temperature": temperature |
|
} |
|
} |
|
|
|
try: |
|
response = requests.post(MISTRAL_URL, headers=HEADERS, json=payload, timeout=60) |
|
response.raise_for_status() |
|
data = response.json() |
|
|
|
|
|
if isinstance(data, list) and data: |
|
return data[0].get("generated_text", "").strip() |
|
if isinstance(data, dict) and "generated_text" in data: |
|
return data["generated_text"].strip() |
|
|
|
except RequestException as e: |
|
print("β Mistral HF request failed:", str(e)) |
|
if e.response is not None: |
|
print("βͺοΈ Response:", e.response.text[:300]) |
|
except Exception as e: |
|
print("β Unexpected error:", str(e)) |
|
|
|
return "" |
|
|
|
|
|
|
|
def summarize_topic(docs: List[str], topic: str) -> List[Dict]: |
|
feed = [] |
|
|
|
for i, doc in enumerate(docs[:5]): |
|
if not doc or len(doc.strip()) < 200: |
|
print(f"β οΈ Skipped short/empty doc {i+1} for '{topic}'\n") |
|
continue |
|
|
|
print(f"π Doc {i+1} preview:\n{doc[:300]}...\n") |
|
prompt = build_prompt(doc, topic) |
|
summary = call_mistral(prompt) |
|
|
|
if summary: |
|
feed.append({ |
|
"summary": summary, |
|
"image_url": "https://source.unsplash.com/800x600/?news", |
|
"article_link": "https://google.com/search?q=" + topic.replace(" ", "+") |
|
}) |
|
|
|
return feed |
|
|
|
|
|
def generate_and_cache_daily_feed(documents: List[Document]): |
|
index = VectorStoreIndex.from_documents(documents) |
|
retriever = index.as_retriever() |
|
query_engine = RetrieverQueryEngine(retriever=retriever) |
|
|
|
final_feed = [] |
|
|
|
for topic in TOPICS: |
|
print(f"\nπ Generating for: {topic}") |
|
response = query_engine.query(topic) |
|
docs = [str(node.get_content()) for node in response.source_nodes] |
|
|
|
topic_feed = summarize_topic(docs, topic) |
|
final_feed.append({ |
|
"topic": topic.lower().replace(" news", ""), |
|
"feed": topic_feed |
|
}) |
|
|
|
|
|
redis_client.set(REDIS_KEY, json.dumps(final_feed, ensure_ascii=False)) |
|
print(f"β
Cached daily feed under key '{REDIS_KEY}'") |
|
return final_feed |
|
|
|
|
|
def get_cached_daily_feed(): |
|
cached = redis_client.get(REDIS_KEY) |
|
return json.loads(cached) if cached else [] |
|
|