import os import json import redis import numpy as np from typing import List, Dict from openai import OpenAI from components.indexers.news_indexer import get_upstash_vector_store from llama_index.core.vector_stores.types import VectorStoreQuery, MetadataFilter, MetadataFilters, FilterOperator # ๐Ÿ” Environment variables REDIS_URL = os.environ.get("UPSTASH_REDIS_URL", "redis://localhost:6379") OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY") # โœ… Redis client redis_client = redis.Redis.from_url(REDIS_URL, decode_responses=True) # ๐Ÿ“ฐ Topics TOPICS = ["India news", "World news", "Tech news", "Finance news", "Sports news"] TOPIC_KEYS = [t.lower().replace(" news", "") for t in TOPICS] # ๐Ÿง  Summarization Prompt BASE_PROMPT = ( "You are Nuseโ€™s editorial summarizer. Read the excerpts below and extract the most important stories. " "Return up to 3 punchy headlines, each under 20 words. Each headline should be followed by a short explanation of why the story matters." ) # ๐Ÿ“ฅ Load documents and metadata def load_docs_by_topic_with_refs() -> Dict[str, List[Dict]]: topic_docs = {key: [] for key in TOPIC_KEYS} try: vector_store = get_upstash_vector_store() for full_topic, topic_key in zip(TOPICS, TOPIC_KEYS): filters = MetadataFilters( filters=[MetadataFilter(key="topic", value=topic_key, operator=FilterOperator.EQ)] ) dummy_vector = np.random.rand(384).tolist() query = VectorStoreQuery(query_embedding=dummy_vector, similarity_top_k=50, filters=filters) result = vector_store.query(query) for node in result.nodes: content = node.get_content().strip() ref_id = node.node_id or node.id_ or "" if content: topic_docs[topic_key].append({"text": content, "ref": ref_id}) except Exception as e: print("โŒ [load_docs_by_topic_with_refs Error]", e) return topic_docs # ๐Ÿงช Topic summarizer def summarize_topic(topic_key: str, docs: List[Dict], start_index: int) -> List[Dict]: if not docs: print(f"โš ๏ธ No docs for topic: {topic_key}") return [] try: content = "\n\n---\n\n".join([d["text"] for d in docs])[:12000] client = OpenAI(api_key=OPENAI_API_KEY) response = client.chat.completions.create( model="gpt-4", messages=[ {"role": "system", "content": BASE_PROMPT}, {"role": "user", "content": content}, ], max_tokens=512, temperature=0.7, ) headlines = response.choices[0].message.content.strip().splitlines() result = [] for i, line in enumerate(headlines): line = line.strip("-โ€“โ€ข ").strip() if line: ref_id = start_index + i result.append({ "summary": line, "ref": ref_id, "image_url": "https://source.unsplash.com/800x600/?news", "article_link": f"https://google.com/search?q={topic_key}+news" }) return result except Exception as e: print(f"โŒ [Summarize topic '{topic_key}' Error]", e) return [] # ๐Ÿš€ Generate and cache feed def generate_and_cache_daily_feed(): try: print("๐Ÿ†• Generating daily feed...") topic_docs = load_docs_by_topic_with_refs() feed_map = {} global_ref = 1 for topic_key in TOPIC_KEYS: try: summaries = summarize_topic(topic_key, topic_docs.get(topic_key, []), global_ref) feed_map[topic_key] = summaries global_ref += len(summaries) except Exception as e: print(f"โŒ [Topic summarization error: {topic_key}]", e) feed_map[topic_key] = [] final_feed = [] for topic, topic_key in zip(TOPICS, TOPIC_KEYS): topic_feed = feed_map.get(topic_key, []) final_feed.append({ "topic": topic, "feed": topic_feed }) # Cache to Redis try: cache_key = "daily_news_feed_cache" redis_client.set(cache_key, json.dumps(final_feed, ensure_ascii=False)) redis_client.expire(cache_key, 86400) print(f"โœ… Cached feed under key '{cache_key}' with 24-hour expiry.") except Exception as e: print("โŒ [Redis cache error]", e) return final_feed except Exception as e: print("โŒ [generate_and_cache_daily_feed Error]", e) return [] # ๐Ÿ“ฆ Retrieve from cache def get_cached_daily_feed(): try: cache_key = "daily_news_feed_cache" cached = redis_client.get(cache_key) return json.loads(cached) if cached else [] except Exception as e: print("โŒ [get_cached_daily_feed Error]", e) return [] # ๐Ÿงช Run if main if __name__ == "__main__": feed = generate_and_cache_daily_feed() print(json.dumps(feed, indent=2, ensure_ascii=False))