File size: 5,162 Bytes
69210b9 c86bfda 69210b9 a092d54 67fbb52 e465159 69210b9 6716a7e f312f0d 1804706 0e7d7a3 69210b9 27120a6 6716a7e 69210b9 0e7d7a3 69210b9 6716a7e 69210b9 6716a7e 62a4bec 6716a7e 93ca074 3521e98 93ca074 3521e98 93ca074 22c12ad 93ca074 6716a7e 93ca074 6716a7e 27120a6 69210b9 27120a6 69210b9 27120a6 69210b9 27120a6 69210b9 62a4bec 27120a6 6716a7e 27120a6 62a4bec 27120a6 236d6c7 27120a6 71257bd 6716a7e 69210b9 6716a7e 69210b9 6716a7e 93ca074 69210b9 93ca074 6716a7e 67fbb52 e465159 69210b9 0e7d7a3 69210b9 0e7d7a3 69210b9 6716a7e 69210b9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
import os
import sys
import json
import requests
import redis
from typing import List, Dict, Optional
from llama_index.core import VectorStoreIndex
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.schema import Document
from llama_index.core.settings import Settings
# โ
Disable implicit LLM usage
Settings.llm = None
# ๐ Environment variables
REDIS_URL = os.environ.get("UPSTASH_REDIS_URL", "redis://localhost:6379")
REDIS_KEY = os.environ.get("UPSTASH_REDIS_TOKEN")
MISTRAL_URL = os.environ.get("MISTRAL_URL")
HF_TOKEN = os.environ.get("HF_TOKEN")
# โ
Redis client
redis_client = redis.Redis.from_url(REDIS_URL, decode_responses=True)
# ๐ฐ Topics
TOPICS = ["India news", "World news", "Tech news", "Finance news", "Sports news"]
# ๐ Headers for HF endpoint
HEADERS = {
"Authorization": f"Bearer {HF_TOKEN}",
"Content-Type": "application/json"
}
# ๐ง Build Mistral-style instruction prompt
def build_prompt(content: str, topic: str) -> str:
base_instruction = (
"You are Nuseโs official news summarizer โ insightful, punchy, and always on point. ๐ง โจ\n"
"Your job is to scan the content below and extract the key news items. For each item, craft a crisp summary (15โ20 words), add 1โ2 fitting emojis, and make it pop.\n"
"List each summary on a new line starting with a dash (-) and no numbers. This is how Nuse keeps it clean and scannable.\n"
"\n"
"Example format:\n"
"- India stuns Australia in a last-ball thriller at the World Cup finals ๐๐ฎ๐ณ\n (15โ20 words)"
"- U.S. imposes sweeping tariffs on Chinese tech giants, rattling global markets ๐๐บ๐ธ\n (15โ20 words)"
"- Ceasefire breakthrough: Netanyahu bows to pressure after week-long escalation ๐ฅ๐๏ธ\n (15โ20 words)"
"\n"
"If you don't find anything useful, don't return anything for that news item"
"Be sharp. Be brief. No fluff. No preambles. Just the summaries.\n"
"Return only the final summary block โ no extra commentary, no prompt repetition."
)
tail = f"Topic: {topic}\n\n{content.strip()}"
return f"<s>[INST]{base_instruction}\n\n{tail}[/INST]</s>"
# ๐ Call Mistral using HF Inference Endpoint
def call_mistral(prompt: str) -> Optional[str]:
headers = {
"Authorization": f"Bearer {HF_TOKEN}",
"Content-Type": "application/json"
}
payload = {
"inputs": prompt
}
try:
response = requests.post(MISTRAL_URL, headers=headers, json=payload, timeout=20)
response.raise_for_status()
data = response.json()
# Get the generated text
if isinstance(data, list) and data:
raw_output = data[0].get("generated_text", "")
elif isinstance(data, dict):
raw_output = data.get("generated_text", "")
else:
return None
# โ
Extract only the portion after the [/INST]</s> marker
if "[/INST]</s>" in raw_output:
return raw_output.split("[/INST]</s>")[-1].strip()
return raw_output.strip()
except Exception as e:
print(f"โ ๏ธ Mistral error: {e}")
return None
# โ๏ธ Summarize top N documents
def summarize_topic(docs: List[str], topic: str) -> List[Dict]:
feed = []
for doc in docs[:5]:
prompt = build_prompt(doc, topic)
print("\n๐ค Prompt sent to Mistral:\n", prompt[:300], "...\n")
summary_block = call_mistral(prompt)
if summary_block:
# Split by lines that start with "- " or "โ " (dash or en dash)
for line in summary_block.splitlines():
line = line.strip()
if line.startswith("-") or line.startswith("โ"):
clean_summary = line.lstrip("-โ").strip()
if clean_summary:
feed.append({
"summary": clean_summary,
"image_url": "https://source.unsplash.com/800x600/?news",
"article_link": "https://google.com/search?q=" + topic.replace(" ", "+")
})
return feed
# โก Generate and cache daily feed
def generate_and_cache_daily_feed(documents: List[Document]):
index = VectorStoreIndex.from_documents(documents)
retriever = index.as_retriever()
query_engine = RetrieverQueryEngine(retriever=retriever)
final_feed = []
for topic in TOPICS:
print(f"\n๐ Generating for: {topic}")
response = query_engine.query(topic)
docs = [str(node.get_content()) for node in response.source_nodes]
topic_feed = summarize_topic(docs, topic)
final_feed.append({
"topic": topic.lower().replace(" news", ""),
"feed": topic_feed
})
redis_client.set(REDIS_KEY, json.dumps(final_feed, ensure_ascii=False))
print(f"โ
Cached daily feed under key '{REDIS_KEY}'")
return final_feed
# ๐ฆ For testing or API access
def get_cached_daily_feed():
cached = redis_client.get(REDIS_KEY)
return json.loads(cached) if cached else []
|