raghavNCI
commited on
Commit
·
5cb1b50
1
Parent(s):
826a1b8
google search changes v1
Browse files- question.py +43 -58
question.py
CHANGED
@@ -1,39 +1,35 @@
|
|
1 |
-
# app/routes/question.py
|
2 |
import os
|
3 |
import requests
|
|
|
4 |
from fastapi import APIRouter
|
5 |
from pydantic import BaseModel
|
6 |
from typing import List
|
7 |
from redis_client import redis_client as r
|
8 |
from dotenv import load_dotenv
|
9 |
from urllib.parse import quote
|
10 |
-
|
11 |
from nuse_modules.classifier import classify_question, REVERSE_MAP
|
12 |
from nuse_modules.keyword_extracter import keywords_extractor
|
13 |
from nuse_modules.google_search import search_google_news
|
14 |
|
15 |
load_dotenv()
|
16 |
|
17 |
-
GNEWS_API_KEY = os.getenv("GNEWS_API_KEY")
|
18 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
askMe = APIRouter()
|
21 |
|
22 |
class QuestionInput(BaseModel):
|
23 |
question: str
|
24 |
|
25 |
-
HF_API_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.3"
|
26 |
-
HEADERS = {
|
27 |
-
"Authorization": f"Bearer {HF_TOKEN}",
|
28 |
-
"Content-Type": "application/json"
|
29 |
-
}
|
30 |
|
31 |
def should_extract_keywords(type_id: int) -> bool:
|
32 |
return type_id in {1, 2, 3, 4, 5, 6, 7, 10}
|
33 |
|
34 |
-
def is_relevant(article, keywords):
|
35 |
-
text = f"{article.get('title', '')} {article.get('content', '')}".lower()
|
36 |
-
return any(kw.lower() in text for kw in keywords)
|
37 |
|
38 |
def extract_answer_after_label(text: str) -> str:
|
39 |
"""
|
@@ -42,7 +38,8 @@ def extract_answer_after_label(text: str) -> str:
|
|
42 |
"""
|
43 |
if "Answer:" in text:
|
44 |
return text.split("Answer:", 1)[1].strip()
|
45 |
-
return text.strip()
|
|
|
46 |
|
47 |
def mistral_generate(prompt: str, max_new_tokens=128):
|
48 |
payload = {
|
@@ -63,81 +60,69 @@ def mistral_generate(prompt: str, max_new_tokens=128):
|
|
63 |
except Exception:
|
64 |
return ""
|
65 |
|
66 |
-
def fetch_gnews_articles(query: str) -> List[dict]:
|
67 |
-
encoded_query = quote(query)
|
68 |
-
gnews_url = f"https://gnews.io/api/v4/search?q={encoded_query}&lang=en&max=5&expand=content&token={GNEWS_API_KEY}"
|
69 |
-
print("GNews URL:", gnews_url)
|
70 |
-
try:
|
71 |
-
response = requests.get(gnews_url, timeout=10)
|
72 |
-
response.raise_for_status()
|
73 |
-
return response.json().get("articles", [])
|
74 |
-
except Exception as e:
|
75 |
-
print("GNews API error:", str(e))
|
76 |
-
return []
|
77 |
|
78 |
@askMe.post("/ask")
|
79 |
async def ask_question(input: QuestionInput):
|
80 |
question = input.question
|
81 |
|
|
|
82 |
qid = classify_question(question)
|
83 |
print("Intent ID:", qid)
|
84 |
print("Category:", REVERSE_MAP.get(qid, "unknown"))
|
85 |
|
86 |
-
|
|
|
87 |
|
88 |
-
if
|
|
|
89 |
keywords = keywords_extractor(question)
|
90 |
print("Raw extracted keywords:", keywords)
|
91 |
|
92 |
if not keywords:
|
93 |
return {"error": "Keyword extraction failed."}
|
94 |
-
|
|
|
95 |
results = search_google_news(keywords)
|
|
|
96 |
|
97 |
for r in results:
|
98 |
print(r["title"], r["link"])
|
99 |
|
100 |
-
#
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
# "sources": []
|
120 |
-
# }
|
121 |
-
|
122 |
-
# Step 3: Ask Mistral to answer using the context
|
123 |
answer_prompt = (
|
124 |
f"You are a concise news assistant. Answer the user's question clearly using the context below if relevant. "
|
125 |
f"If the context is not helpful, you may rely on your own knowledge, but do not mention the context or question again.\n\n"
|
126 |
-
|
127 |
f"Question: {question}\n\n"
|
128 |
f"Answer:"
|
129 |
)
|
130 |
-
|
131 |
-
if not answer:
|
132 |
-
final_answer = "Cannot answer – model did not return a valid response."
|
133 |
|
134 |
-
|
|
|
|
|
|
|
135 |
|
136 |
return {
|
137 |
"question": question,
|
138 |
"answer": final_answer.strip(),
|
139 |
-
|
140 |
-
# {"title": a["title"], "url": a["url"]}
|
141 |
-
# for a in relevant_articles
|
142 |
-
# ]
|
143 |
}
|
|
|
|
|
1 |
import os
|
2 |
import requests
|
3 |
+
import json
|
4 |
from fastapi import APIRouter
|
5 |
from pydantic import BaseModel
|
6 |
from typing import List
|
7 |
from redis_client import redis_client as r
|
8 |
from dotenv import load_dotenv
|
9 |
from urllib.parse import quote
|
10 |
+
|
11 |
from nuse_modules.classifier import classify_question, REVERSE_MAP
|
12 |
from nuse_modules.keyword_extracter import keywords_extractor
|
13 |
from nuse_modules.google_search import search_google_news
|
14 |
|
15 |
load_dotenv()
|
16 |
|
|
|
17 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
18 |
+
HF_API_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.3"
|
19 |
+
HEADERS = {
|
20 |
+
"Authorization": f"Bearer {HF_TOKEN}",
|
21 |
+
"Content-Type": "application/json"
|
22 |
+
}
|
23 |
|
24 |
askMe = APIRouter()
|
25 |
|
26 |
class QuestionInput(BaseModel):
|
27 |
question: str
|
28 |
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
def should_extract_keywords(type_id: int) -> bool:
|
31 |
return type_id in {1, 2, 3, 4, 5, 6, 7, 10}
|
32 |
|
|
|
|
|
|
|
33 |
|
34 |
def extract_answer_after_label(text: str) -> str:
|
35 |
"""
|
|
|
38 |
"""
|
39 |
if "Answer:" in text:
|
40 |
return text.split("Answer:", 1)[1].strip()
|
41 |
+
return text.strip()
|
42 |
+
|
43 |
|
44 |
def mistral_generate(prompt: str, max_new_tokens=128):
|
45 |
payload = {
|
|
|
60 |
except Exception:
|
61 |
return ""
|
62 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
|
64 |
@askMe.post("/ask")
|
65 |
async def ask_question(input: QuestionInput):
|
66 |
question = input.question
|
67 |
|
68 |
+
# Step 1: Classify question intent
|
69 |
qid = classify_question(question)
|
70 |
print("Intent ID:", qid)
|
71 |
print("Category:", REVERSE_MAP.get(qid, "unknown"))
|
72 |
|
73 |
+
context = ""
|
74 |
+
sources = []
|
75 |
|
76 |
+
# Step 2: Keyword extraction and news search (if needed)
|
77 |
+
if should_extract_keywords(qid):
|
78 |
keywords = keywords_extractor(question)
|
79 |
print("Raw extracted keywords:", keywords)
|
80 |
|
81 |
if not keywords:
|
82 |
return {"error": "Keyword extraction failed."}
|
83 |
+
|
84 |
+
# Search Google News
|
85 |
results = search_google_news(keywords)
|
86 |
+
print("Found articles:", len(results))
|
87 |
|
88 |
for r in results:
|
89 |
print(r["title"], r["link"])
|
90 |
|
91 |
+
# Build context from snippet/description
|
92 |
+
context = "\n\n".join([
|
93 |
+
r.get("snippet") or r.get("description", "")
|
94 |
+
for r in results
|
95 |
+
])[:15000]
|
96 |
+
|
97 |
+
sources = [
|
98 |
+
{"title": r["title"], "url": r["link"]}
|
99 |
+
for r in results
|
100 |
+
]
|
101 |
+
|
102 |
+
if not context.strip():
|
103 |
+
return {
|
104 |
+
"question": question,
|
105 |
+
"answer": "Cannot answer – no relevant context found.",
|
106 |
+
"sources": sources
|
107 |
+
}
|
108 |
+
|
109 |
+
# Step 3: Ask Mistral to answer
|
|
|
|
|
|
|
|
|
110 |
answer_prompt = (
|
111 |
f"You are a concise news assistant. Answer the user's question clearly using the context below if relevant. "
|
112 |
f"If the context is not helpful, you may rely on your own knowledge, but do not mention the context or question again.\n\n"
|
113 |
+
f"Context:\n{context}\n\n"
|
114 |
f"Question: {question}\n\n"
|
115 |
f"Answer:"
|
116 |
)
|
117 |
+
answer_raw = mistral_generate(answer_prompt, max_new_tokens=256)
|
|
|
|
|
118 |
|
119 |
+
if not answer_raw:
|
120 |
+
final_answer = "Cannot answer – model did not return a valid response."
|
121 |
+
else:
|
122 |
+
final_answer = extract_answer_after_label(answer_raw)
|
123 |
|
124 |
return {
|
125 |
"question": question,
|
126 |
"answer": final_answer.strip(),
|
127 |
+
"sources": sources
|
|
|
|
|
|
|
128 |
}
|