|
import os |
|
import sys |
|
import json |
|
import requests |
|
|
|
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) |
|
|
|
import redis |
|
from typing import List, Dict |
|
from llama_index.core import VectorStoreIndex |
|
from llama_index.core.query_engine import RetrieverQueryEngine |
|
from llama_index.core.schema import Document |
|
from llama_index.core.settings import Settings |
|
|
|
|
|
Settings.llm = None |
|
|
|
|
|
REDIS_URL = os.environ.get("UPSTASH_REDIS_URL", "redis://localhost:6379") |
|
REDIS_KEY = os.environ.get("UPSTASH_REDIS_TOKEN") |
|
MISTRAL_URL = os.environ.get("MISTRAL_URL") |
|
HF_TOKEN = os.environ.get("HF_TOKEN") |
|
|
|
|
|
redis_client = redis.Redis.from_url(REDIS_URL, decode_responses=True) |
|
|
|
|
|
TOPICS = ["India news", "World news", "Tech news", "Finance news", "Sports news"] |
|
|
|
|
|
def build_prompt(content: str, topic: str) -> str: |
|
return ( |
|
f"You are a news summarizer. Summarize the following content in 25-30 words. " |
|
f"Make it engaging and informative. Include appropriate emojis. Topic: {topic}\n\n{content}" |
|
) |
|
|
|
|
|
def call_mistral(prompt: str) -> str: |
|
if not prompt or len(prompt.strip()) < 50: |
|
print(f"β οΈ Skipping empty or invalid prompt:\n{prompt}\n") |
|
return None |
|
|
|
headers = { |
|
"Authorization": f"Bearer {HF_TOKEN}", |
|
"Content-Type": "application/json" |
|
} |
|
payload = { |
|
"inputs": [{"role": "user", "content": prompt}] |
|
} |
|
|
|
print(f"\nπ€ Prompt sent to Mistral:\n{prompt[:300]}...\n") |
|
|
|
try: |
|
response = requests.post(MISTRAL_URL, headers=headers, json=payload, timeout=20) |
|
response.raise_for_status() |
|
return response.json()["outputs"][0]["content"].strip() |
|
except Exception as e: |
|
print(f"β οΈ Mistral error: {e}") |
|
return None |
|
|
|
|
|
def summarize_topic(docs: List[str], topic: str) -> List[Dict]: |
|
feed = [] |
|
|
|
for i, doc in enumerate(docs[:5]): |
|
if not doc or len(doc.strip()) < 200: |
|
print(f"β οΈ Skipped short/empty doc {i+1} for '{topic}'\n") |
|
continue |
|
|
|
print(f"π Doc {i+1} preview:\n{doc[:300]}...\n") |
|
prompt = build_prompt(doc, topic) |
|
summary = call_mistral(prompt) |
|
|
|
if summary: |
|
feed.append({ |
|
"summary": summary, |
|
"image_url": "https://source.unsplash.com/800x600/?news", |
|
"article_link": "https://google.com/search?q=" + topic.replace(" ", "+") |
|
}) |
|
|
|
return feed |
|
|
|
|
|
def generate_and_cache_daily_feed(documents: List[Document]): |
|
index = VectorStoreIndex.from_documents(documents) |
|
retriever = index.as_retriever() |
|
query_engine = RetrieverQueryEngine(retriever=retriever) |
|
|
|
final_feed = [] |
|
|
|
for topic in TOPICS: |
|
print(f"\nπ Generating for: {topic}") |
|
response = query_engine.query(topic) |
|
docs = [str(node.get_content()) for node in response.source_nodes] |
|
|
|
topic_feed = summarize_topic(docs, topic) |
|
final_feed.append({ |
|
"topic": topic.lower().replace(" news", ""), |
|
"feed": topic_feed |
|
}) |
|
|
|
|
|
redis_client.set(REDIS_KEY, json.dumps(final_feed, ensure_ascii=False)) |
|
print(f"β
Cached daily feed under key '{REDIS_KEY}'") |
|
return final_feed |
|
|
|
|
|
def get_cached_daily_feed(): |
|
cached = redis_client.get(REDIS_KEY) |
|
return json.loads(cached) if cached else [] |
|
|