Spaces:

nuseAI
/

fastAPIv2

Sleeping

App Files Files Community

fastAPIv2 / components /generators /daily_feed.py

ragV98

ref changes and scraper changes

fbd9dbe 4 months ago

raw

history blame

5.13 kB

	import os
	import json
	import redis
	import numpy as np
	from typing import List, Dict
	from openai import OpenAI
	from components.indexers.news_indexer import get_upstash_vector_store
	from llama_index.core.vector_stores.types import VectorStoreQuery, MetadataFilter, MetadataFilters, FilterOperator

	# 🔐 Environment variables
	REDIS_URL = os.environ.get("UPSTASH_REDIS_URL", "redis://localhost:6379")
	OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")

	# ✅ Redis client
	redis_client = redis.Redis.from_url(REDIS_URL, decode_responses=True)

	# 📰 Topics
	TOPICS = ["India news", "World news", "Tech news", "Finance news", "Sports news"]
	TOPIC_KEYS = [t.lower().replace(" news", "") for t in TOPICS]

	# 🧠 Summarization Prompt
	BASE_PROMPT = (
	"You are Nuse’s editorial summarizer. Read the excerpts below and extract the most important stories. "
	"Return up to 3 punchy headlines, each under 20 words. Each headline should be followed by a short explanation of why the story matters."
	)

	# 📥 Load documents and metadata
	def load_docs_by_topic_with_refs() -> Dict[str, List[Dict]]:
	topic_docs = {key: [] for key in TOPIC_KEYS}
	try:
	vector_store = get_upstash_vector_store()
	for full_topic, topic_key in zip(TOPICS, TOPIC_KEYS):
	filters = MetadataFilters(
	filters=[MetadataFilter(key="topic", value=topic_key, operator=FilterOperator.EQ)]
	)
	dummy_vector = np.random.rand(384).tolist()
	query = VectorStoreQuery(query_embedding=dummy_vector, similarity_top_k=50, filters=filters)
	result = vector_store.query(query)
	for node in result.nodes:
	content = node.get_content().strip()
	ref_id = node.node_id or node.id_ or ""
	if content:
	topic_docs[topic_key].append({"text": content, "ref": ref_id})
	except Exception as e:
	print("❌ [load_docs_by_topic_with_refs Error]", e)
	return topic_docs

	# 🧪 Topic summarizer
	def summarize_topic(topic_key: str, docs: List[Dict], start_index: int) -> List[Dict]:
	if not docs:
	print(f"⚠️ No docs for topic: {topic_key}")
	return []

	try:
	content = "\n\n---\n\n".join([d["text"] for d in docs])[:12000]
	client = OpenAI(api_key=OPENAI_API_KEY)
	response = client.chat.completions.create(
	model="gpt-4",
	messages=[
	{"role": "system", "content": BASE_PROMPT},
	{"role": "user", "content": content},
	],
	max_tokens=512,
	temperature=0.7,
	)
	headlines = response.choices[0].message.content.strip().splitlines()
	result = []
	for i, line in enumerate(headlines):
	line = line.strip("-–• ").strip()
	if line:
	ref_id = start_index + i
	result.append({
	"summary": line,
	"ref": ref_id,
	"image_url": "https://source.unsplash.com/800x600/?news",
	"article_link": f"https://google.com/search?q={topic_key}+news"
	})
	return result
	except Exception as e:
	print(f"❌ [Summarize topic '{topic_key}' Error]", e)
	return []

	# 🚀 Generate and cache feed
	def generate_and_cache_daily_feed():
	try:
	print("🆕 Generating daily feed...")
	topic_docs = load_docs_by_topic_with_refs()
	feed_map = {}
	global_ref = 1

	for topic_key in TOPIC_KEYS:
	try:
	summaries = summarize_topic(topic_key, topic_docs.get(topic_key, []), global_ref)
	feed_map[topic_key] = summaries
	global_ref += len(summaries)
	except Exception as e:
	print(f"❌ [Topic summarization error: {topic_key}]", e)
	feed_map[topic_key] = []

	final_feed = []
	for topic, topic_key in zip(TOPICS, TOPIC_KEYS):
	topic_feed = feed_map.get(topic_key, [])
	final_feed.append({
	"topic": topic,
	"feed": topic_feed
	})

	# Cache to Redis
	try:
	cache_key = "daily_news_feed_cache"
	redis_client.set(cache_key, json.dumps(final_feed, ensure_ascii=False))
	redis_client.expire(cache_key, 86400)
	print(f"✅ Cached feed under key '{cache_key}' with 24-hour expiry.")
	except Exception as e:
	print("❌ [Redis cache error]", e)

	return final_feed

	except Exception as e:
	print("❌ [generate_and_cache_daily_feed Error]", e)
	return []

	# 📦 Retrieve from cache
	def get_cached_daily_feed():
	try:
	cache_key = "daily_news_feed_cache"
	cached = redis_client.get(cache_key)
	return json.loads(cached) if cached else []
	except Exception as e:
	print("❌ [get_cached_daily_feed Error]", e)
	return []

	# 🧪 Run if main
	if __name__ == "__main__":
	feed = generate_and_cache_daily_feed()
	print(json.dumps(feed, indent=2, ensure_ascii=False))