File size: 3,682 Bytes
69210b9 a092d54 67fbb52 e465159 69210b9 9d73da0 f312f0d 1804706 0e7d7a3 69210b9 0e7d7a3 69210b9 0e7d7a3 69210b9 9d73da0 69210b9 9d73da0 69210b9 9d73da0 69210b9 9d73da0 0e7d7a3 69210b9 9d73da0 69210b9 0e7d7a3 9d73da0 69210b9 9d73da0 69210b9 9d73da0 69210b9 9d73da0 69210b9 9d73da0 69210b9 9d73da0 67fbb52 e465159 69210b9 0e7d7a3 69210b9 0e7d7a3 69210b9 9d73da0 69210b9 9d73da0 69210b9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 |
import os
import sys
import json
import requests
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
import redis
from typing import List, Dict
from llama_index.core import VectorStoreIndex
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.schema import Document
from llama_index.core.settings import Settings
# β
Disable OpenAI fallback
Settings.llm = None
# π Environment variables
REDIS_URL = os.environ.get("UPSTASH_REDIS_URL", "redis://localhost:6379")
REDIS_KEY = os.environ.get("UPSTASH_REDIS_TOKEN")
MISTRAL_URL = os.environ.get("MISTRAL_URL") # Hugging Face endpoint
HF_TOKEN = os.environ.get("HF_TOKEN") # Hugging Face token
# β
Redis client
redis_client = redis.Redis.from_url(REDIS_URL, decode_responses=True)
# π° Topics to summarize
TOPICS = ["India news", "World news", "Tech news", "Finance news", "Sports news"]
# βοΈ Build Mistral prompt
def build_prompt(content: str, topic: str) -> str:
return (
f"You are a news summarizer. Summarize the following content in 25-30 words. "
f"Make it engaging and informative. Include appropriate emojis. Topic: {topic}\n\n{content}"
)
# π§ Send prompt to Mistral
def call_mistral(prompt: str) -> str:
if not prompt or len(prompt.strip()) < 50:
print(f"β οΈ Skipping empty or invalid prompt:\n{prompt}\n")
return None
headers = {
"Authorization": f"Bearer {HF_TOKEN}",
"Content-Type": "application/json"
}
payload = {
"inputs": [{"role": "user", "content": prompt}]
}
print(f"\nπ€ Prompt sent to Mistral:\n{prompt[:300]}...\n") # show a snippet for debugging
try:
response = requests.post(MISTRAL_URL, headers=headers, json=payload, timeout=20)
response.raise_for_status()
return response.json()["outputs"][0]["content"].strip()
except Exception as e:
print(f"β οΈ Mistral error: {e}")
return None
# βοΈ Generate summaries per topic
def summarize_topic(docs: List[str], topic: str) -> List[Dict]:
feed = []
for i, doc in enumerate(docs[:5]):
if not doc or len(doc.strip()) < 200:
print(f"β οΈ Skipped short/empty doc {i+1} for '{topic}'\n")
continue
print(f"π Doc {i+1} preview:\n{doc[:300]}...\n")
prompt = build_prompt(doc, topic)
summary = call_mistral(prompt)
if summary:
feed.append({
"summary": summary,
"image_url": "https://source.unsplash.com/800x600/?news",
"article_link": "https://google.com/search?q=" + topic.replace(" ", "+")
})
return feed
# π Full pipeline
def generate_and_cache_daily_feed(documents: List[Document]):
index = VectorStoreIndex.from_documents(documents)
retriever = index.as_retriever()
query_engine = RetrieverQueryEngine(retriever=retriever)
final_feed = []
for topic in TOPICS:
print(f"\nπ Generating for: {topic}")
response = query_engine.query(topic)
docs = [str(node.get_content()) for node in response.source_nodes]
topic_feed = summarize_topic(docs, topic)
final_feed.append({
"topic": topic.lower().replace(" news", ""),
"feed": topic_feed
})
# πΎ Cache feed to Redis
redis_client.set(REDIS_KEY, json.dumps(final_feed, ensure_ascii=False))
print(f"β
Cached daily feed under key '{REDIS_KEY}'")
return final_feed
# π¦ For API access
def get_cached_daily_feed():
cached = redis_client.get(REDIS_KEY)
return json.loads(cached) if cached else []
|