File size: 4,184 Bytes
69210b9
 
 
 
62a4bec
69210b9
 
 
 
 
 
a092d54
67fbb52
e465159
69210b9
9d73da0
f312f0d
1804706
0e7d7a3
69210b9
 
0e7d7a3
 
69210b9
0e7d7a3
69210b9
 
9d73da0
69210b9
 
9d73da0
69210b9
 
 
 
 
 
9d73da0
62a4bec
 
 
 
 
 
 
 
 
 
69210b9
62a4bec
 
 
 
 
69210b9
0e7d7a3
69210b9
62a4bec
69210b9
62a4bec
 
 
 
 
 
 
 
 
 
 
 
69210b9
62a4bec
 
 
69210b9
71257bd
9d73da0
69210b9
 
9d73da0
 
 
 
 
 
 
69210b9
 
9d73da0
69210b9
 
 
 
 
 
9d73da0
69210b9
 
9d73da0
67fbb52
 
e465159
 
69210b9
 
0e7d7a3
69210b9
 
 
 
0e7d7a3
69210b9
 
 
 
 
 
9d73da0
69210b9
 
 
 
9d73da0
69210b9
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import os
import sys
import json
import requests
from requests.exceptions import RequestException

sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))

import redis
from typing import List, Dict
from llama_index.core import VectorStoreIndex
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.schema import Document
from llama_index.core.settings import Settings

# βœ… Disable OpenAI fallback
Settings.llm = None

# πŸ” Environment variables
REDIS_URL = os.environ.get("UPSTASH_REDIS_URL", "redis://localhost:6379")
REDIS_KEY = os.environ.get("UPSTASH_REDIS_TOKEN")
MISTRAL_URL = os.environ.get("MISTRAL_URL")       # Hugging Face endpoint
HF_TOKEN = os.environ.get("HF_TOKEN")             # Hugging Face token

# βœ… Redis client
redis_client = redis.Redis.from_url(REDIS_URL, decode_responses=True)

# πŸ“° Topics to summarize
TOPICS = ["India news", "World news", "Tech news", "Finance news", "Sports news"]

# ✍️ Build Mistral prompt
def build_prompt(content: str, topic: str) -> str:
    return (
        f"You are a news summarizer. Summarize the following content in 25-30 words. "
        f"Make it engaging and informative. Include appropriate emojis. Topic: {topic}\n\n{content}"
    )

# 🧠 Send prompt to Mistral
HEADERS = {
    "Authorization": f"Bearer {HF_TOKEN}",
    "Content-Type": "application/json"
}

def call_mistral(prompt: str, max_new_tokens: int = 128, temperature: float = 0.7) -> str:
    """
    Call Hugging Face Inference Endpoint hosting Mistral-7B.
    Returns the generated summary, or empty string on failure.
    """
    payload = {
        "inputs": prompt,
        "parameters": {
            "max_new_tokens": max_new_tokens,
            "temperature": temperature
        }
    }

    try:
        response = requests.post(MISTRAL_URL, headers=HEADERS, json=payload, timeout=60)
        response.raise_for_status()
        data = response.json()

        # Handle both list and dict output formats
        if isinstance(data, list) and data:
            return data[0].get("generated_text", "").strip()
        if isinstance(data, dict) and "generated_text" in data:
            return data["generated_text"].strip()

    except RequestException as e:
        print("❌ Mistral HF request failed:", str(e))
        if e.response is not None:
            print("β†ͺ️ Response:", e.response.text[:300])
    except Exception as e:
        print("❌ Unexpected error:", str(e))

    return ""


# βœ‚οΈ Generate summaries per topic
def summarize_topic(docs: List[str], topic: str) -> List[Dict]:
    feed = []

    for i, doc in enumerate(docs[:5]):
        if not doc or len(doc.strip()) < 200:
            print(f"⚠️ Skipped short/empty doc {i+1} for '{topic}'\n")
            continue

        print(f"πŸ“„ Doc {i+1} preview:\n{doc[:300]}...\n")
        prompt = build_prompt(doc, topic)
        summary = call_mistral(prompt)

        if summary:
            feed.append({
                "summary": summary,
                "image_url": "https://source.unsplash.com/800x600/?news",
                "article_link": "https://google.com/search?q=" + topic.replace(" ", "+")
            })

    return feed

# πŸ” Full pipeline
def generate_and_cache_daily_feed(documents: List[Document]):
    index = VectorStoreIndex.from_documents(documents)
    retriever = index.as_retriever()
    query_engine = RetrieverQueryEngine(retriever=retriever)

    final_feed = []

    for topic in TOPICS:
        print(f"\nπŸ” Generating for: {topic}")
        response = query_engine.query(topic)
        docs = [str(node.get_content()) for node in response.source_nodes]

        topic_feed = summarize_topic(docs, topic)
        final_feed.append({
            "topic": topic.lower().replace(" news", ""),
            "feed": topic_feed
        })

    # πŸ’Ύ Cache feed to Redis
    redis_client.set(REDIS_KEY, json.dumps(final_feed, ensure_ascii=False))
    print(f"βœ… Cached daily feed under key '{REDIS_KEY}'")
    return final_feed

# πŸ“¦ For API access
def get_cached_daily_feed():
    cached = redis_client.get(REDIS_KEY)
    return json.loads(cached) if cached else []