File size: 5,130 Bytes
69210b9
 
 
faed34c
6858714
2af85a2
 
e51955e
ec3b991
0e7d7a3
69210b9
2af85a2
69210b9
0e7d7a3
8cb2491
69210b9
8cb2491
69210b9
6858714
 
fbd9dbe
c8b3b66
2af85a2
fbd9dbe
c8b3b66
71257bd
fbd9dbe
8cb2491
faed34c
ec3b991
 
8cb2491
 
 
 
 
 
 
 
 
 
fbd9dbe
8cb2491
ec3b991
8cb2491
b1c1acd
3f4bef7
fbd9dbe
8cb2491
3f4bef7
8cb2491
3f4bef7
 
ec3b991
8cb2491
2af85a2
8cb2491
 
2af85a2
 
 
 
8cb2491
 
2af85a2
8cb2491
 
 
fbd9dbe
 
 
8cb2491
fbd9dbe
8cb2491
 
 
2af85a2
8cb2491
ec3b991
8cb2491
ec3b991
7200af5
fbd9dbe
4df303e
fbd9dbe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ec3b991
fbd9dbe
 
 
 
ec3b991
fbd9dbe
ec3b991
fbd9dbe
8cb2491
fbd9dbe
 
 
69210b9
fbd9dbe
69210b9
ec3b991
fbd9dbe
 
 
ec3b991
fbd9dbe
ec3b991
e51955e
fbd9dbe
e51955e
8cb2491
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import os
import json
import redis
import numpy as np
from typing import List, Dict
from openai import OpenAI
from components.indexers.news_indexer import get_upstash_vector_store
from llama_index.core.vector_stores.types import VectorStoreQuery, MetadataFilter, MetadataFilters, FilterOperator

# πŸ” Environment variables
REDIS_URL = os.environ.get("UPSTASH_REDIS_URL", "redis://localhost:6379")
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")

# βœ… Redis client
redis_client = redis.Redis.from_url(REDIS_URL, decode_responses=True)

# πŸ“° Topics
TOPICS = ["India news", "World news", "Tech news", "Finance news", "Sports news"]
TOPIC_KEYS = [t.lower().replace(" news", "") for t in TOPICS]

# 🧠 Summarization Prompt
BASE_PROMPT = (
    "You are Nuse’s editorial summarizer. Read the excerpts below and extract the most important stories. "
    "Return up to 3 punchy headlines, each under 20 words. Each headline should be followed by a short explanation of why the story matters."
)

# πŸ“₯ Load documents and metadata
def load_docs_by_topic_with_refs() -> Dict[str, List[Dict]]:
    topic_docs = {key: [] for key in TOPIC_KEYS}
    try:
        vector_store = get_upstash_vector_store()
        for full_topic, topic_key in zip(TOPICS, TOPIC_KEYS):
            filters = MetadataFilters(
                filters=[MetadataFilter(key="topic", value=topic_key, operator=FilterOperator.EQ)]
            )
            dummy_vector = np.random.rand(384).tolist()
            query = VectorStoreQuery(query_embedding=dummy_vector, similarity_top_k=50, filters=filters)
            result = vector_store.query(query)
            for node in result.nodes:
                content = node.get_content().strip()
                ref_id = node.node_id or node.id_ or ""
                if content:
                    topic_docs[topic_key].append({"text": content, "ref": ref_id})
    except Exception as e:
        print("❌ [load_docs_by_topic_with_refs Error]", e)
    return topic_docs

# πŸ§ͺ Topic summarizer
def summarize_topic(topic_key: str, docs: List[Dict], start_index: int) -> List[Dict]:
    if not docs:
        print(f"⚠️ No docs for topic: {topic_key}")
        return []

    try:
        content = "\n\n---\n\n".join([d["text"] for d in docs])[:12000]
        client = OpenAI(api_key=OPENAI_API_KEY)
        response = client.chat.completions.create(
            model="gpt-4",
            messages=[
                {"role": "system", "content": BASE_PROMPT},
                {"role": "user", "content": content},
            ],
            max_tokens=512,
            temperature=0.7,
        )
        headlines = response.choices[0].message.content.strip().splitlines()
        result = []
        for i, line in enumerate(headlines):
            line = line.strip("-–‒ ").strip()
            if line:
                ref_id = start_index + i
                result.append({
                    "summary": line,
                    "ref": ref_id,
                    "image_url": "https://source.unsplash.com/800x600/?news",
                    "article_link": f"https://google.com/search?q={topic_key}+news"
                })
        return result
    except Exception as e:
        print(f"❌ [Summarize topic '{topic_key}' Error]", e)
        return []

# πŸš€ Generate and cache feed
def generate_and_cache_daily_feed():
    try:
        print("πŸ†• Generating daily feed...")
        topic_docs = load_docs_by_topic_with_refs()
        feed_map = {}
        global_ref = 1

        for topic_key in TOPIC_KEYS:
            try:
                summaries = summarize_topic(topic_key, topic_docs.get(topic_key, []), global_ref)
                feed_map[topic_key] = summaries
                global_ref += len(summaries)
            except Exception as e:
                print(f"❌ [Topic summarization error: {topic_key}]", e)
                feed_map[topic_key] = []

        final_feed = []
        for topic, topic_key in zip(TOPICS, TOPIC_KEYS):
            topic_feed = feed_map.get(topic_key, [])
            final_feed.append({
                "topic": topic,
                "feed": topic_feed
            })

        # Cache to Redis
        try:
            cache_key = "daily_news_feed_cache"
            redis_client.set(cache_key, json.dumps(final_feed, ensure_ascii=False))
            redis_client.expire(cache_key, 86400)
            print(f"βœ… Cached feed under key '{cache_key}' with 24-hour expiry.")
        except Exception as e:
            print("❌ [Redis cache error]", e)

        return final_feed

    except Exception as e:
        print("❌ [generate_and_cache_daily_feed Error]", e)
        return []

# πŸ“¦ Retrieve from cache
def get_cached_daily_feed():
    try:
        cache_key = "daily_news_feed_cache"
        cached = redis_client.get(cache_key)
        return json.loads(cached) if cached else []
    except Exception as e:
        print("❌ [get_cached_daily_feed Error]", e)
        return []

# πŸ§ͺ Run if main
if __name__ == "__main__":
    feed = generate_and_cache_daily_feed()
    print(json.dumps(feed, indent=2, ensure_ascii=False))