Spaces:

nuseAI
/

fastAPIv2

Running

App Files Files Community

fastAPIv2 / components /generators /daily_feed.py

ragV98

should work this time

62a4bec about 1 month ago

raw

history blame

4.18 kB

	import os
	import sys
	import json
	import requests
	from requests.exceptions import RequestException

	sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))

	import redis
	from typing import List, Dict
	from llama_index.core import VectorStoreIndex
	from llama_index.core.query_engine import RetrieverQueryEngine
	from llama_index.core.schema import Document
	from llama_index.core.settings import Settings

	# ✅ Disable OpenAI fallback
	Settings.llm = None

	# 🔐 Environment variables
	REDIS_URL = os.environ.get("UPSTASH_REDIS_URL", "redis://localhost:6379")
	REDIS_KEY = os.environ.get("UPSTASH_REDIS_TOKEN")
	MISTRAL_URL = os.environ.get("MISTRAL_URL") # Hugging Face endpoint
	HF_TOKEN = os.environ.get("HF_TOKEN") # Hugging Face token

	# ✅ Redis client
	redis_client = redis.Redis.from_url(REDIS_URL, decode_responses=True)

	# 📰 Topics to summarize
	TOPICS = ["India news", "World news", "Tech news", "Finance news", "Sports news"]

	# ✍️ Build Mistral prompt
	def build_prompt(content: str, topic: str) -> str:
	return (
	f"You are a news summarizer. Summarize the following content in 25-30 words. "
	f"Make it engaging and informative. Include appropriate emojis. Topic: {topic}\n\n{content}"
	)

	# 🧠 Send prompt to Mistral
	HEADERS = {
	"Authorization": f"Bearer {HF_TOKEN}",
	"Content-Type": "application/json"
	}

	def call_mistral(prompt: str, max_new_tokens: int = 128, temperature: float = 0.7) -> str:
	"""
	Call Hugging Face Inference Endpoint hosting Mistral-7B.
	Returns the generated summary, or empty string on failure.
	"""
	payload = {
	"inputs": prompt,
	"parameters": {
	"max_new_tokens": max_new_tokens,
	"temperature": temperature
	}
	}

	try:
	response = requests.post(MISTRAL_URL, headers=HEADERS, json=payload, timeout=60)
	response.raise_for_status()
	data = response.json()

	# Handle both list and dict output formats
	if isinstance(data, list) and data:
	return data[0].get("generated_text", "").strip()
	if isinstance(data, dict) and "generated_text" in data:
	return data["generated_text"].strip()

	except RequestException as e:
	print("❌ Mistral HF request failed:", str(e))
	if e.response is not None:
	print("↪️ Response:", e.response.text[:300])
	except Exception as e:
	print("❌ Unexpected error:", str(e))

	return ""


	# ✂️ Generate summaries per topic
	def summarize_topic(docs: List[str], topic: str) -> List[Dict]:
	feed = []

	for i, doc in enumerate(docs[:5]):
	if not doc or len(doc.strip()) < 200:
	print(f"⚠️ Skipped short/empty doc {i+1} for '{topic}'\n")
	continue

	print(f"📄 Doc {i+1} preview:\n{doc[:300]}...\n")
	prompt = build_prompt(doc, topic)
	summary = call_mistral(prompt)

	if summary:
	feed.append({
	"summary": summary,
	"image_url": "https://source.unsplash.com/800x600/?news",
	"article_link": "https://google.com/search?q=" + topic.replace(" ", "+")
	})

	return feed

	# 🔁 Full pipeline
	def generate_and_cache_daily_feed(documents: List[Document]):
	index = VectorStoreIndex.from_documents(documents)
	retriever = index.as_retriever()
	query_engine = RetrieverQueryEngine(retriever=retriever)

	final_feed = []

	for topic in TOPICS:
	print(f"\n🔍 Generating for: {topic}")
	response = query_engine.query(topic)
	docs = [str(node.get_content()) for node in response.source_nodes]

	topic_feed = summarize_topic(docs, topic)
	final_feed.append({
	"topic": topic.lower().replace(" news", ""),
	"feed": topic_feed
	})

	# 💾 Cache feed to Redis
	redis_client.set(REDIS_KEY, json.dumps(final_feed, ensure_ascii=False))
	print(f"✅ Cached daily feed under key '{REDIS_KEY}'")
	return final_feed

	# 📦 For API access
	def get_cached_daily_feed():
	cached = redis_client.get(REDIS_KEY)
	return json.loads(cached) if cached else []