Spaces:

nuseAI
/

fastAPIv2

Sleeping

App Files Files Community

ragV98 commited on Jul 20

Commit

c8b3b66

1 Parent(s): c8c2401

switching to tinyllama

Browse files

Files changed (5) hide show

components/LLMs/Mistral.py +39 -0
components/LLMs/TinyLLama.py +34 -0
components/LLMs/__init__.py +0 -0
components/generators/daily_feed.py +24 -59
requirements.txt +1 -0

components/LLMs/Mistral.py ADDED Viewed

	@@ -0,0 +1,39 @@

+import os
+import requests
+from typing import Optional
+# 🔐 Load HF credentials and endpoint URL from environment variables
+HF_TOKEN = os.environ.get("HF_TOKEN")
+MISTRAL_URL = os.environ.get("MISTRAL_URL")
+# 📜 Headers for HF Inference Endpoint
+HEADERS = {
+    "Authorization": f"Bearer {HF_TOKEN}",
+    "Content-Type": "application/json"
+}
+# 🔁 Call Mistral using HF Inference Endpoint
+def call_mistral(base_prompt: str, tail_prompt: str) -> Optional[str]:
+    full_prompt = f"<s>[INST]{base_prompt}\n\n{tail_prompt}[/INST]</s>"
+    payload = {
+        "inputs": full_prompt
+    }
+    try:
+        response = requests.post(MISTRAL_URL, headers=HEADERS, json=payload, timeout=20)
+        response.raise_for_status()
+        data = response.json()
+        raw_output = ""
+        if isinstance(data, list) and data:
+            raw_output = data[0].get("generated_text", "")
+        elif isinstance(data, dict):
+            raw_output = data.get("generated_text", "")
+        if "[/INST]</s>" in raw_output:
+            return raw_output.split("[/INST]</s>")[-1].strip()
+        return raw_output.strip()
+    except Exception as e:
+        print(f"⚠️ Mistral error: {e}")
+        return None

components/LLMs/TinyLLama.py ADDED Viewed

	@@ -0,0 +1,34 @@

+import os
+from transformers import pipeline
+from typing import Optional
+# Load model just once when module is imported
+_tinyllama_pipeline = pipeline(
+    "text-generation",
+    model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+    torch_dtype="auto",
+    device_map="auto"
+)
+def call_tinyllama(base_prompt: str, tail_prompt: str, max_new_tokens: int = 256) -> Optional[str]:
+    """
+    Calls TinyLlama model with an instruction-tuned prompt.
+    Args:
+        base_prompt (str): Instruction or system prompt.
+        tail_prompt (str): User or content-specific prompt.
+        max_new_tokens (int): Max tokens to generate.
+    Returns:
+        str or None: The generated summary content.
+    """
+    prompt = f"<s>[INST]{base_prompt}\n\n{tail_prompt}[/INST]</s>"
+    try:
+        result = _tinyllama_pipeline(prompt, max_new_tokens=max_new_tokens)
+        output = result[0]["generated_text"]
+        if "[/INST]" in output:
+            return output.split("[/INST]")[-1].strip()
+        return output.strip()
+    except Exception as e:
+        print(f"⚠️ TinyLlama error: {e}")
+        return None

components/LLMs/__init__.py ADDED Viewed

File without changes

components/generators/daily_feed.py CHANGED Viewed

@@ -8,6 +8,8 @@ from llama_index.core import VectorStoreIndex
 from llama_index.core.query_engine import RetrieverQueryEngine
 from llama_index.core.schema import Document
 from llama_index.core.settings import Settings
 # ✅ Disable implicit LLM usage
 Settings.llm = None
@@ -15,8 +17,6 @@ Settings.llm = None
 # 🔐 Environment variables
 REDIS_URL = os.environ.get("UPSTASH_REDIS_URL", "redis://localhost:6379")
 REDIS_KEY = os.environ.get("UPSTASH_REDIS_TOKEN")
-MISTRAL_URL = os.environ.get("MISTRAL_URL")
-HF_TOKEN = os.environ.get("HF_TOKEN")
 # ✅ Redis client
 redis_client = redis.Redis.from_url(REDIS_URL, decode_responses=True)
@@ -24,67 +24,32 @@ redis_client = redis.Redis.from_url(REDIS_URL, decode_responses=True)
 # 📰 Topics
 TOPICS = ["India news", "World news", "Tech news", "Finance news", "Sports news"]
-# 📜 Headers for HF endpoint
-HEADERS = {
-    "Authorization": f"Bearer {HF_TOKEN}",
-    "Content-Type": "application/json"
-}
-# 🧠 Build Mistral-style instruction prompt
-def build_prompt(content: str, topic: str) -> str:
-    base_instruction = (
-        "You are Nuse’s official news summarizer — insightful, punchy, and always on point.\n"
-        "Your job is to scan the content below and extract the key news items. For each item, craft a crisp summary (15–20 words). Avoid using any emojis.\n"
-        "List each summary on a new line starting with a dash (-) and no numbers. This is how Nuse keeps it clean and scannable.\n"
-        "\n"
-        "Example format:\n"
-        "- India stuns Australia in a last-ball thriller at the World Cup finals\n"
-        "- U.S. imposes sweeping tariffs on Chinese tech giants, rattling global markets\n"
-        "- Ceasefire breakthrough: Netanyahu (Prime minister of Isreal) bows to pressure after week-long escalation\n"
-        "\n"
-        "If you are mentioning a person, include their designation in brackets. For example: Jeff Bezos (Amazon CEO), Narendra Modi (Prime minister of India).\n"
-        "If you're referencing a post like 'NATO Chief', also include the name of the person who holds the post.\n"
-        "If you don't find anything useful, don't return anything for that news item.\n"
-        "Skim through the content and write summaries that are compelling, include essential facts, and feel like strong hook lines.\n"
-        "Be sharp. Be brief. No fluff. No preambles. Avoid source citations like (U.S. Security Council) or (The New York Times).\n"
-        "Return only the summary block — no extra commentary, no prompt repetition."
-    )
-    tail = f"Topic: {topic}\n\n{content.strip()}"
-    return f"<s>[INST]{base_instruction}\n\n{tail}[/INST]</s>"
-# 🔁 Call Mistral using HF Inference Endpoint
-def call_mistral(prompt: str) -> Optional[str]:
-    payload = {
-        "inputs": prompt
-    }
-    try:
-        response = requests.post(MISTRAL_URL, headers=HEADERS, json=payload, timeout=20)
-        response.raise_for_status()
-        data = response.json()
-        # Get the generated text
-        raw_output = ""
-        if isinstance(data, list) and data:
-            raw_output = data[0].get("generated_text", "")
-        elif isinstance(data, dict):
-            raw_output = data.get("generated_text", "")
-        if "[/INST]</s>" in raw_output:
-            return raw_output.split("[/INST]</s>")[-1].strip()
-        return raw_output.strip()
-    except Exception as e:
-        print(f"⚠️ Mistral error: {e}")
-        return None
 # ✂️ Summarize top N documents
 def summarize_topic(docs: List[str], topic: str) -> List[Dict]:
     feed = []
     for doc in docs[:5]:
-        prompt = build_prompt(doc, topic)
-        print("\n📤 Prompt sent to Mistral:\n", prompt[:300], "...\n")
-        summary_block = call_mistral(prompt)
         if summary_block:
             for line in summary_block.splitlines():
@@ -123,4 +88,4 @@ def generate_and_cache_daily_feed(documents: List[Document]):
 # 📦 For testing or API access
 def get_cached_daily_feed():
     cached = redis_client.get(REDIS_KEY)
-    return json.loads(cached) if cached else []

 from llama_index.core.query_engine import RetrieverQueryEngine
 from llama_index.core.schema import Document
 from llama_index.core.settings import Settings
+from components.LLMs.Mistral import call_mistral
+from components.LLMs.TinyLLama import call_tinyllama
 # ✅ Disable implicit LLM usage
 Settings.llm = None
 # 🔐 Environment variables
 REDIS_URL = os.environ.get("UPSTASH_REDIS_URL", "redis://localhost:6379")
 REDIS_KEY = os.environ.get("UPSTASH_REDIS_TOKEN")
 # ✅ Redis client
 redis_client = redis.Redis.from_url(REDIS_URL, decode_responses=True)
 # 📰 Topics
 TOPICS = ["India news", "World news", "Tech news", "Finance news", "Sports news"]
+# 🧠 Base summarization prompt (used for all topics)
+BASE_PROMPT = (
+    "You are Nuse’s official news summarizer — insightful, punchy, and always on point.\n"
+    "Your job is to scan the content below and extract the key news items. For each item, craft a crisp summary (15–20 words). Avoid using any emojis.\n"
+    "List each summary on a new line starting with a dash (-) and no numbers. This is how Nuse keeps it clean and scannable.\n"
+    "\n"
+    "Example format:\n"
+    "- India stuns Australia in a last-ball thriller at the World Cup finals\n"
+    "- U.S. imposes sweeping tariffs on Chinese tech giants, rattling global markets\n"
+    "- Ceasefire breakthrough: Netanyahu (Prime minister of Isreal) bows to pressure after week-long escalation\n"
+    "\n"
+    "If you are mentioning a person, include their designation in brackets. For example: Jeff Bezos (Amazon CEO), Narendra Modi (Prime minister of India).\n"
+    "If you're referencing a post like 'NATO Chief', also include the name of the person who holds the post.\n"
+    "If you don't find anything useful, don't return anything for that news item.\n"
+    "Skim through the content and write summaries that are compelling, include essential facts, and feel like strong hook lines.\n"
+    "Be sharp. Be brief. No fluff. No preambles. Avoid source citations like (U.S. Security Council) or (The New York Times).\n"
+    "Return only the summary block — no extra commentary, no prompt repetition."
+)
 # ✂️ Summarize top N documents
 def summarize_topic(docs: List[str], topic: str) -> List[Dict]:
     feed = []
     for doc in docs[:5]:
+        tail_prompt = f"Topic: {topic}\n\n{doc.strip()}"
+        print(f"\n📤 Prompt tail for Mistral:\n{tail_prompt[:300]}...\n")
+        summary_block = call_tinyllama(base_prompt=BASE_PROMPT, tail_prompt=tail_prompt)
         if summary_block:
             for line in summary_block.splitlines():
 # 📦 For testing or API access
 def get_cached_daily_feed():
     cached = redis_client.get(REDIS_KEY)
+    return json.loads(cached) if cached else []

requirements.txt CHANGED Viewed

@@ -6,6 +6,7 @@ llama-index
 llama-index-embeddings-huggingface
 llama_index.llms.huggingface
 sentence-transformers
 llama-index-vector-stores-upstash
 trafilatura
 newspaper3k

 llama-index-embeddings-huggingface
 llama_index.llms.huggingface
 sentence-transformers
+transformers
 llama-index-vector-stores-upstash
 trafilatura
 newspaper3k