Spaces:

nuseAI
/

fastAPIv2

Sleeping

App Files Files Community

fastAPIv2 / components /generators /daily_feed.py

ragV98

undid more changes

22c12ad about 2 months ago

raw

history blame

5.16 kB

	import os
	import sys
	import json
	import requests
	import redis
	from typing import List, Dict, Optional
	from llama_index.core import VectorStoreIndex
	from llama_index.core.query_engine import RetrieverQueryEngine
	from llama_index.core.schema import Document
	from llama_index.core.settings import Settings

	# ✅ Disable implicit LLM usage
	Settings.llm = None

	# 🔐 Environment variables
	REDIS_URL = os.environ.get("UPSTASH_REDIS_URL", "redis://localhost:6379")
	REDIS_KEY = os.environ.get("UPSTASH_REDIS_TOKEN")
	MISTRAL_URL = os.environ.get("MISTRAL_URL")
	HF_TOKEN = os.environ.get("HF_TOKEN")

	# ✅ Redis client
	redis_client = redis.Redis.from_url(REDIS_URL, decode_responses=True)

	# 📰 Topics
	TOPICS = ["India news", "World news", "Tech news", "Finance news", "Sports news"]

	# 📄 Headers for HF endpoint
	HEADERS = {
	"Authorization": f"Bearer {HF_TOKEN}",
	"Content-Type": "application/json"
	}

	# 🧠 Build Mistral-style instruction prompt
	def build_prompt(content: str, topic: str) -> str:
	base_instruction = (
	"You are Nuse’s official news summarizer — insightful, punchy, and always on point. 🧠✨\n"
	"Your job is to scan the content below and extract the key news items. For each item, craft a crisp summary (15–20 words), add 1–2 fitting emojis, and make it pop.\n"
	"List each summary on a new line starting with a dash (-) and no numbers. This is how Nuse keeps it clean and scannable.\n"
	"\n"
	"Example format:\n"
	"- India stuns Australia in a last-ball thriller at the World Cup finals 🏏🇮🇳\n (15–20 words)"
	"- U.S. imposes sweeping tariffs on Chinese tech giants, rattling global markets 📉🇺🇸\n (15–20 words)"
	"- Ceasefire breakthrough: Netanyahu bows to pressure after week-long escalation 🔥🕊️\n (15–20 words)"
	"\n"
	"If you don't find anything useful, don't return anything for that news item"
	"Be sharp. Be brief. No fluff. No preambles. Just the summaries.\n"
	"Return only the final summary block — no extra commentary, no prompt repetition."
	)
	tail = f"Topic: {topic}\n\n{content.strip()}"
	return f"<s>[INST]{base_instruction}\n\n{tail}[/INST]</s>"


	# 🔁 Call Mistral using HF Inference Endpoint
	def call_mistral(prompt: str) -> Optional[str]:
	headers = {
	"Authorization": f"Bearer {HF_TOKEN}",
	"Content-Type": "application/json"
	}
	payload = {
	"inputs": prompt
	}

	try:
	response = requests.post(MISTRAL_URL, headers=headers, json=payload, timeout=20)
	response.raise_for_status()
	data = response.json()

	# Get the generated text
	if isinstance(data, list) and data:
	raw_output = data[0].get("generated_text", "")
	elif isinstance(data, dict):
	raw_output = data.get("generated_text", "")
	else:
	return None

	# ✅ Extract only the portion after the [/INST]</s> marker
	if "[/INST]</s>" in raw_output:
	return raw_output.split("[/INST]</s>")[-1].strip()
	return raw_output.strip()

	except Exception as e:
	print(f"⚠️ Mistral error: {e}")
	return None

	# ✂️ Summarize top N documents
	def summarize_topic(docs: List[str], topic: str) -> List[Dict]:
	feed = []
	for doc in docs[:5]:
	prompt = build_prompt(doc, topic)
	print("\n📤 Prompt sent to Mistral:\n", prompt[:300], "...\n")
	summary_block = call_mistral(prompt)

	if summary_block:
	# Split by lines that start with "- " or "– " (dash or en dash)
	for line in summary_block.splitlines():
	line = line.strip()
	if line.startswith("-") or line.startswith("–"):
	clean_summary = line.lstrip("-–").strip()
	if clean_summary:
	feed.append({
	"summary": clean_summary,
	"image_url": "https://source.unsplash.com/800x600/?news",
	"article_link": "https://google.com/search?q=" + topic.replace(" ", "+")
	})

	return feed


	# ⚡ Generate and cache daily feed
	def generate_and_cache_daily_feed(documents: List[Document]):
	index = VectorStoreIndex.from_documents(documents)
	retriever = index.as_retriever()
	query_engine = RetrieverQueryEngine(retriever=retriever)

	final_feed = []

	for topic in TOPICS:
	print(f"\n🔍 Generating for: {topic}")
	response = query_engine.query(topic)
	docs = [str(node.get_content()) for node in response.source_nodes]

	topic_feed = summarize_topic(docs, topic)
	final_feed.append({
	"topic": topic.lower().replace(" news", ""),
	"feed": topic_feed
	})

	redis_client.set(REDIS_KEY, json.dumps(final_feed, ensure_ascii=False))
	print(f"✅ Cached daily feed under key '{REDIS_KEY}'")
	return final_feed

	# 📦 For testing or API access
	def get_cached_daily_feed():
	cached = redis_client.get(REDIS_KEY)
	return json.loads(cached) if cached else []