File size: 5,130 Bytes
69210b9 faed34c 6858714 2af85a2 e51955e ec3b991 0e7d7a3 69210b9 2af85a2 69210b9 0e7d7a3 8cb2491 69210b9 8cb2491 69210b9 6858714 fbd9dbe c8b3b66 2af85a2 fbd9dbe c8b3b66 71257bd fbd9dbe 8cb2491 faed34c ec3b991 8cb2491 fbd9dbe 8cb2491 ec3b991 8cb2491 b1c1acd 3f4bef7 fbd9dbe 8cb2491 3f4bef7 8cb2491 3f4bef7 ec3b991 8cb2491 2af85a2 8cb2491 2af85a2 8cb2491 2af85a2 8cb2491 fbd9dbe 8cb2491 fbd9dbe 8cb2491 2af85a2 8cb2491 ec3b991 8cb2491 ec3b991 7200af5 fbd9dbe 4df303e fbd9dbe ec3b991 fbd9dbe ec3b991 fbd9dbe ec3b991 fbd9dbe 8cb2491 fbd9dbe 69210b9 fbd9dbe 69210b9 ec3b991 fbd9dbe ec3b991 fbd9dbe ec3b991 e51955e fbd9dbe e51955e 8cb2491 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
import os
import json
import redis
import numpy as np
from typing import List, Dict
from openai import OpenAI
from components.indexers.news_indexer import get_upstash_vector_store
from llama_index.core.vector_stores.types import VectorStoreQuery, MetadataFilter, MetadataFilters, FilterOperator
# π Environment variables
REDIS_URL = os.environ.get("UPSTASH_REDIS_URL", "redis://localhost:6379")
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
# β
Redis client
redis_client = redis.Redis.from_url(REDIS_URL, decode_responses=True)
# π° Topics
TOPICS = ["India news", "World news", "Tech news", "Finance news", "Sports news"]
TOPIC_KEYS = [t.lower().replace(" news", "") for t in TOPICS]
# π§ Summarization Prompt
BASE_PROMPT = (
"You are Nuseβs editorial summarizer. Read the excerpts below and extract the most important stories. "
"Return up to 3 punchy headlines, each under 20 words. Each headline should be followed by a short explanation of why the story matters."
)
# π₯ Load documents and metadata
def load_docs_by_topic_with_refs() -> Dict[str, List[Dict]]:
topic_docs = {key: [] for key in TOPIC_KEYS}
try:
vector_store = get_upstash_vector_store()
for full_topic, topic_key in zip(TOPICS, TOPIC_KEYS):
filters = MetadataFilters(
filters=[MetadataFilter(key="topic", value=topic_key, operator=FilterOperator.EQ)]
)
dummy_vector = np.random.rand(384).tolist()
query = VectorStoreQuery(query_embedding=dummy_vector, similarity_top_k=50, filters=filters)
result = vector_store.query(query)
for node in result.nodes:
content = node.get_content().strip()
ref_id = node.node_id or node.id_ or ""
if content:
topic_docs[topic_key].append({"text": content, "ref": ref_id})
except Exception as e:
print("β [load_docs_by_topic_with_refs Error]", e)
return topic_docs
# π§ͺ Topic summarizer
def summarize_topic(topic_key: str, docs: List[Dict], start_index: int) -> List[Dict]:
if not docs:
print(f"β οΈ No docs for topic: {topic_key}")
return []
try:
content = "\n\n---\n\n".join([d["text"] for d in docs])[:12000]
client = OpenAI(api_key=OPENAI_API_KEY)
response = client.chat.completions.create(
model="gpt-4",
messages=[
{"role": "system", "content": BASE_PROMPT},
{"role": "user", "content": content},
],
max_tokens=512,
temperature=0.7,
)
headlines = response.choices[0].message.content.strip().splitlines()
result = []
for i, line in enumerate(headlines):
line = line.strip("-ββ’ ").strip()
if line:
ref_id = start_index + i
result.append({
"summary": line,
"ref": ref_id,
"image_url": "https://source.unsplash.com/800x600/?news",
"article_link": f"https://google.com/search?q={topic_key}+news"
})
return result
except Exception as e:
print(f"β [Summarize topic '{topic_key}' Error]", e)
return []
# π Generate and cache feed
def generate_and_cache_daily_feed():
try:
print("π Generating daily feed...")
topic_docs = load_docs_by_topic_with_refs()
feed_map = {}
global_ref = 1
for topic_key in TOPIC_KEYS:
try:
summaries = summarize_topic(topic_key, topic_docs.get(topic_key, []), global_ref)
feed_map[topic_key] = summaries
global_ref += len(summaries)
except Exception as e:
print(f"β [Topic summarization error: {topic_key}]", e)
feed_map[topic_key] = []
final_feed = []
for topic, topic_key in zip(TOPICS, TOPIC_KEYS):
topic_feed = feed_map.get(topic_key, [])
final_feed.append({
"topic": topic,
"feed": topic_feed
})
# Cache to Redis
try:
cache_key = "daily_news_feed_cache"
redis_client.set(cache_key, json.dumps(final_feed, ensure_ascii=False))
redis_client.expire(cache_key, 86400)
print(f"β
Cached feed under key '{cache_key}' with 24-hour expiry.")
except Exception as e:
print("β [Redis cache error]", e)
return final_feed
except Exception as e:
print("β [generate_and_cache_daily_feed Error]", e)
return []
# π¦ Retrieve from cache
def get_cached_daily_feed():
try:
cache_key = "daily_news_feed_cache"
cached = redis_client.get(cache_key)
return json.loads(cached) if cached else []
except Exception as e:
print("β [get_cached_daily_feed Error]", e)
return []
# π§ͺ Run if main
if __name__ == "__main__":
feed = generate_and_cache_daily_feed()
print(json.dumps(feed, indent=2, ensure_ascii=False))
|