File size: 5,162 Bytes
69210b9
 
 
 
 
c86bfda
69210b9
a092d54
67fbb52
e465159
69210b9
6716a7e
f312f0d
1804706
0e7d7a3
69210b9
 
27120a6
6716a7e
69210b9
0e7d7a3
69210b9
 
6716a7e
69210b9
 
6716a7e
62a4bec
 
 
 
 
6716a7e
 
 
93ca074
3521e98
 
93ca074
 
3521e98
 
 
93ca074
22c12ad
93ca074
 
6716a7e
 
 
 
93ca074
6716a7e
27120a6
 
 
 
 
69210b9
27120a6
69210b9
27120a6
69210b9
27120a6
69210b9
62a4bec
 
27120a6
6716a7e
27120a6
 
 
 
 
62a4bec
27120a6
 
 
 
236d6c7
27120a6
 
 
71257bd
6716a7e
69210b9
 
6716a7e
69210b9
6716a7e
93ca074
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69210b9
 
93ca074
6716a7e
67fbb52
 
e465159
 
69210b9
 
0e7d7a3
69210b9
 
 
 
0e7d7a3
69210b9
 
 
 
 
 
 
 
 
 
6716a7e
69210b9
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
import os
import sys
import json
import requests
import redis
from typing import List, Dict, Optional
from llama_index.core import VectorStoreIndex
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.schema import Document
from llama_index.core.settings import Settings

# โœ… Disable implicit LLM usage
Settings.llm = None

# ๐Ÿ” Environment variables
REDIS_URL = os.environ.get("UPSTASH_REDIS_URL", "redis://localhost:6379")
REDIS_KEY = os.environ.get("UPSTASH_REDIS_TOKEN")
MISTRAL_URL = os.environ.get("MISTRAL_URL")
HF_TOKEN = os.environ.get("HF_TOKEN")

# โœ… Redis client
redis_client = redis.Redis.from_url(REDIS_URL, decode_responses=True)

# ๐Ÿ“ฐ Topics
TOPICS = ["India news", "World news", "Tech news", "Finance news", "Sports news"]

# ๐Ÿ“„ Headers for HF endpoint
HEADERS = {
    "Authorization": f"Bearer {HF_TOKEN}",
    "Content-Type": "application/json"
}

# ๐Ÿง  Build Mistral-style instruction prompt
def build_prompt(content: str, topic: str) -> str:
    base_instruction = (
        "You are Nuseโ€™s official news summarizer โ€” insightful, punchy, and always on point. ๐Ÿง โœจ\n"
        "Your job is to scan the content below and extract the key news items. For each item, craft a crisp summary (15โ€“20 words), add 1โ€“2 fitting emojis, and make it pop.\n"
        "List each summary on a new line starting with a dash (-) and no numbers. This is how Nuse keeps it clean and scannable.\n"
        "\n"
        "Example format:\n"
        "- India stuns Australia in a last-ball thriller at the World Cup finals ๐Ÿ๐Ÿ‡ฎ๐Ÿ‡ณ\n (15โ€“20 words)"
        "- U.S. imposes sweeping tariffs on Chinese tech giants, rattling global markets ๐Ÿ“‰๐Ÿ‡บ๐Ÿ‡ธ\n (15โ€“20 words)"
        "- Ceasefire breakthrough: Netanyahu bows to pressure after week-long escalation ๐Ÿ”ฅ๐Ÿ•Š๏ธ\n (15โ€“20 words)"
        "\n"
        "If you don't find anything useful, don't return anything for that news item"
        "Be sharp. Be brief. No fluff. No preambles. Just the summaries.\n"
        "Return only the final summary block โ€” no extra commentary, no prompt repetition."
    )
    tail = f"Topic: {topic}\n\n{content.strip()}"
    return f"<s>[INST]{base_instruction}\n\n{tail}[/INST]</s>"


# ๐Ÿ” Call Mistral using HF Inference Endpoint
def call_mistral(prompt: str) -> Optional[str]:
    headers = {
        "Authorization": f"Bearer {HF_TOKEN}",
        "Content-Type": "application/json"
    }
    payload = {
        "inputs": prompt
    }

    try:
        response = requests.post(MISTRAL_URL, headers=headers, json=payload, timeout=20)
        response.raise_for_status()
        data = response.json()

        # Get the generated text
        if isinstance(data, list) and data:
            raw_output = data[0].get("generated_text", "")
        elif isinstance(data, dict):
            raw_output = data.get("generated_text", "")
        else:
            return None

        # โœ… Extract only the portion after the [/INST]</s> marker
        if "[/INST]</s>" in raw_output:
            return raw_output.split("[/INST]</s>")[-1].strip()
        return raw_output.strip()

    except Exception as e:
        print(f"โš ๏ธ Mistral error: {e}")
        return None

# โœ‚๏ธ Summarize top N documents
def summarize_topic(docs: List[str], topic: str) -> List[Dict]:
    feed = []
    for doc in docs[:5]:
        prompt = build_prompt(doc, topic)
        print("\n๐Ÿ“ค Prompt sent to Mistral:\n", prompt[:300], "...\n")
        summary_block = call_mistral(prompt)

        if summary_block:
            # Split by lines that start with "- " or "โ€“ " (dash or en dash)
            for line in summary_block.splitlines():
                line = line.strip()
                if line.startswith("-") or line.startswith("โ€“"):
                    clean_summary = line.lstrip("-โ€“").strip()
                    if clean_summary:
                        feed.append({
                            "summary": clean_summary,
                            "image_url": "https://source.unsplash.com/800x600/?news",
                            "article_link": "https://google.com/search?q=" + topic.replace(" ", "+")
                        })

    return feed


# โšก Generate and cache daily feed
def generate_and_cache_daily_feed(documents: List[Document]):
    index = VectorStoreIndex.from_documents(documents)
    retriever = index.as_retriever()
    query_engine = RetrieverQueryEngine(retriever=retriever)

    final_feed = []

    for topic in TOPICS:
        print(f"\n๐Ÿ” Generating for: {topic}")
        response = query_engine.query(topic)
        docs = [str(node.get_content()) for node in response.source_nodes]

        topic_feed = summarize_topic(docs, topic)
        final_feed.append({
            "topic": topic.lower().replace(" news", ""),
            "feed": topic_feed
        })

    redis_client.set(REDIS_KEY, json.dumps(final_feed, ensure_ascii=False))
    print(f"โœ… Cached daily feed under key '{REDIS_KEY}'")
    return final_feed

# ๐Ÿ“ฆ For testing or API access
def get_cached_daily_feed():
    cached = redis_client.get(REDIS_KEY)
    return json.loads(cached) if cached else []