Spaces:

nuseAI
/

FastAPI

Sleeping

App Files Files Community

raghavNCI commited on May 28

Commit

5cb1b50

1 Parent(s): 826a1b8

google search changes v1

Browse files

Files changed (1) hide show

question.py +43 -58

question.py CHANGED Viewed

@@ -1,39 +1,35 @@
-# app/routes/question.py
 import os
 import requests
 from fastapi import APIRouter
 from pydantic import BaseModel
 from typing import List
 from redis_client import redis_client as r
 from dotenv import load_dotenv
 from urllib.parse import quote
-import json
 from nuse_modules.classifier import classify_question, REVERSE_MAP
 from nuse_modules.keyword_extracter import keywords_extractor
 from nuse_modules.google_search import search_google_news
 load_dotenv()
-GNEWS_API_KEY = os.getenv("GNEWS_API_KEY")
 HF_TOKEN = os.getenv("HF_TOKEN")
 askMe = APIRouter()
 class QuestionInput(BaseModel):
     question: str
-HF_API_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.3"
-HEADERS = {
-    "Authorization": f"Bearer {HF_TOKEN}",
-    "Content-Type": "application/json"
-}
 def should_extract_keywords(type_id: int) -> bool:
     return type_id in {1, 2, 3, 4, 5, 6, 7, 10}
-def is_relevant(article, keywords):
-    text = f"{article.get('title', '')} {article.get('content', '')}".lower()
-    return any(kw.lower() in text for kw in keywords)
 def extract_answer_after_label(text: str) -> str:
     """
@@ -42,7 +38,8 @@ def extract_answer_after_label(text: str) -> str:
     """
     if "Answer:" in text:
         return text.split("Answer:", 1)[1].strip()
-    return text.strip()
 def mistral_generate(prompt: str, max_new_tokens=128):
     payload = {
@@ -63,81 +60,69 @@ def mistral_generate(prompt: str, max_new_tokens=128):
     except Exception:
         return ""
-def fetch_gnews_articles(query: str) -> List[dict]:
-    encoded_query = quote(query)
-    gnews_url = f"https://gnews.io/api/v4/search?q={encoded_query}&lang=en&max=5&expand=content&token={GNEWS_API_KEY}"
-    print("GNews URL:", gnews_url)
-    try:
-        response = requests.get(gnews_url, timeout=10)
-        response.raise_for_status()
-        return response.json().get("articles", [])
-    except Exception as e:
-        print("GNews API error:", str(e))
-        return []
 @askMe.post("/ask")
 async def ask_question(input: QuestionInput):
     question = input.question
     qid = classify_question(question)
     print("Intent ID:", qid)
     print("Category:", REVERSE_MAP.get(qid, "unknown"))
-    necessary = should_extract_keywords(qid)
-    if necessary:
         keywords = keywords_extractor(question)
         print("Raw extracted keywords:", keywords)
         if not keywords:
             return {"error": "Keyword extraction failed."}
         results = search_google_news(keywords)
         for r in results:
             print(r["title"], r["link"])
-        # Step 2: Fetch articles using AND, then fallback to OR
-        # query_and = " AND ".join(f'"{kw}"' for kw in keywords)
-        # articles = fetch_gnews_articles(query_and)
-        # if not articles:
-        #     query_or = " OR ".join(f'"{kw}"' for kw in keywords)
-        #     articles = fetch_gnews_articles(query_or)
-        # relevant_articles = [a for a in articles if is_relevant(a, keywords)]
-        # context = "\n\n".join([
-        #     a.get("content") or ""
-        #     for a in relevant_articles
-        # ])[:15000]
-        # if not context.strip():
-        #     return {
-        #         "question": question,
-        #         "answer": "Cannot answer – no relevant context found.",
-        #         "sources": []
-        #     }
-    # Step 3: Ask Mistral to answer using the context
     answer_prompt = (
         f"You are a concise news assistant. Answer the user's question clearly using the context below if relevant. "
         f"If the context is not helpful, you may rely on your own knowledge, but do not mention the context or question again.\n\n"
-        # f"Context:\n{context}\n\n"
         f"Question: {question}\n\n"
         f"Answer:"
     )
-    answer = mistral_generate(answer_prompt, max_new_tokens=256)
-    if not answer:
-        final_answer = "Cannot answer – model did not return a valid response."
-    final_answer = extract_answer_after_label(answer)
     return {
         "question": question,
         "answer": final_answer.strip(),
-        # "sources": [
-        #     {"title": a["title"], "url": a["url"]}
-        #     for a in relevant_articles
-        # ]
     }

 import os
 import requests
+import json
 from fastapi import APIRouter
 from pydantic import BaseModel
 from typing import List
 from redis_client import redis_client as r
 from dotenv import load_dotenv
 from urllib.parse import quote
 from nuse_modules.classifier import classify_question, REVERSE_MAP
 from nuse_modules.keyword_extracter import keywords_extractor
 from nuse_modules.google_search import search_google_news
 load_dotenv()
 HF_TOKEN = os.getenv("HF_TOKEN")
+HF_API_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.3"
+HEADERS = {
+    "Authorization": f"Bearer {HF_TOKEN}",
+    "Content-Type": "application/json"
+}
 askMe = APIRouter()
 class QuestionInput(BaseModel):
     question: str
 def should_extract_keywords(type_id: int) -> bool:
     return type_id in {1, 2, 3, 4, 5, 6, 7, 10}
 def extract_answer_after_label(text: str) -> str:
     """
     """
     if "Answer:" in text:
         return text.split("Answer:", 1)[1].strip()
+    return text.strip()
 def mistral_generate(prompt: str, max_new_tokens=128):
     payload = {
     except Exception:
         return ""
 @askMe.post("/ask")
 async def ask_question(input: QuestionInput):
     question = input.question
+    # Step 1: Classify question intent
     qid = classify_question(question)
     print("Intent ID:", qid)
     print("Category:", REVERSE_MAP.get(qid, "unknown"))
+    context = ""
+    sources = []
+    # Step 2: Keyword extraction and news search (if needed)
+    if should_extract_keywords(qid):
         keywords = keywords_extractor(question)
         print("Raw extracted keywords:", keywords)
         if not keywords:
             return {"error": "Keyword extraction failed."}
+        # Search Google News
         results = search_google_news(keywords)
+        print("Found articles:", len(results))
         for r in results:
             print(r["title"], r["link"])
+        # Build context from snippet/description
+        context = "\n\n".join([
+            r.get("snippet") or r.get("description", "")
+            for r in results
+        ])[:15000]
+        sources = [
+            {"title": r["title"], "url": r["link"]}
+            for r in results
+        ]
+        if not context.strip():
+            return {
+                "question": question,
+                "answer": "Cannot answer – no relevant context found.",
+                "sources": sources
+            }
+    # Step 3: Ask Mistral to answer
     answer_prompt = (
         f"You are a concise news assistant. Answer the user's question clearly using the context below if relevant. "
         f"If the context is not helpful, you may rely on your own knowledge, but do not mention the context or question again.\n\n"
+        f"Context:\n{context}\n\n"
         f"Question: {question}\n\n"
         f"Answer:"
     )
+    answer_raw = mistral_generate(answer_prompt, max_new_tokens=256)
+    if not answer_raw:
+        final_answer = "Cannot answer – model did not return a valid response."
+    else:
+        final_answer = extract_answer_after_label(answer_raw)
     return {
         "question": question,
         "answer": final_answer.strip(),
+        "sources": sources
     }