raghavNCI commited on
Commit
5cb1b50
·
1 Parent(s): 826a1b8

google search changes v1

Browse files
Files changed (1) hide show
  1. question.py +43 -58
question.py CHANGED
@@ -1,39 +1,35 @@
1
- # app/routes/question.py
2
  import os
3
  import requests
 
4
  from fastapi import APIRouter
5
  from pydantic import BaseModel
6
  from typing import List
7
  from redis_client import redis_client as r
8
  from dotenv import load_dotenv
9
  from urllib.parse import quote
10
- import json
11
  from nuse_modules.classifier import classify_question, REVERSE_MAP
12
  from nuse_modules.keyword_extracter import keywords_extractor
13
  from nuse_modules.google_search import search_google_news
14
 
15
  load_dotenv()
16
 
17
- GNEWS_API_KEY = os.getenv("GNEWS_API_KEY")
18
  HF_TOKEN = os.getenv("HF_TOKEN")
 
 
 
 
 
19
 
20
  askMe = APIRouter()
21
 
22
  class QuestionInput(BaseModel):
23
  question: str
24
 
25
- HF_API_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.3"
26
- HEADERS = {
27
- "Authorization": f"Bearer {HF_TOKEN}",
28
- "Content-Type": "application/json"
29
- }
30
 
31
  def should_extract_keywords(type_id: int) -> bool:
32
  return type_id in {1, 2, 3, 4, 5, 6, 7, 10}
33
 
34
- def is_relevant(article, keywords):
35
- text = f"{article.get('title', '')} {article.get('content', '')}".lower()
36
- return any(kw.lower() in text for kw in keywords)
37
 
38
  def extract_answer_after_label(text: str) -> str:
39
  """
@@ -42,7 +38,8 @@ def extract_answer_after_label(text: str) -> str:
42
  """
43
  if "Answer:" in text:
44
  return text.split("Answer:", 1)[1].strip()
45
- return text.strip()
 
46
 
47
  def mistral_generate(prompt: str, max_new_tokens=128):
48
  payload = {
@@ -63,81 +60,69 @@ def mistral_generate(prompt: str, max_new_tokens=128):
63
  except Exception:
64
  return ""
65
 
66
- def fetch_gnews_articles(query: str) -> List[dict]:
67
- encoded_query = quote(query)
68
- gnews_url = f"https://gnews.io/api/v4/search?q={encoded_query}&lang=en&max=5&expand=content&token={GNEWS_API_KEY}"
69
- print("GNews URL:", gnews_url)
70
- try:
71
- response = requests.get(gnews_url, timeout=10)
72
- response.raise_for_status()
73
- return response.json().get("articles", [])
74
- except Exception as e:
75
- print("GNews API error:", str(e))
76
- return []
77
 
78
  @askMe.post("/ask")
79
  async def ask_question(input: QuestionInput):
80
  question = input.question
81
 
 
82
  qid = classify_question(question)
83
  print("Intent ID:", qid)
84
  print("Category:", REVERSE_MAP.get(qid, "unknown"))
85
 
86
- necessary = should_extract_keywords(qid)
 
87
 
88
- if necessary:
 
89
  keywords = keywords_extractor(question)
90
  print("Raw extracted keywords:", keywords)
91
 
92
  if not keywords:
93
  return {"error": "Keyword extraction failed."}
94
-
 
95
  results = search_google_news(keywords)
 
96
 
97
  for r in results:
98
  print(r["title"], r["link"])
99
 
100
- # Step 2: Fetch articles using AND, then fallback to OR
101
- # query_and = " AND ".join(f'"{kw}"' for kw in keywords)
102
- # articles = fetch_gnews_articles(query_and)
103
-
104
- # if not articles:
105
- # query_or = " OR ".join(f'"{kw}"' for kw in keywords)
106
- # articles = fetch_gnews_articles(query_or)
107
-
108
- # relevant_articles = [a for a in articles if is_relevant(a, keywords)]
109
-
110
- # context = "\n\n".join([
111
- # a.get("content") or ""
112
- # for a in relevant_articles
113
- # ])[:15000]
114
-
115
- # if not context.strip():
116
- # return {
117
- # "question": question,
118
- # "answer": "Cannot answer no relevant context found.",
119
- # "sources": []
120
- # }
121
-
122
- # Step 3: Ask Mistral to answer using the context
123
  answer_prompt = (
124
  f"You are a concise news assistant. Answer the user's question clearly using the context below if relevant. "
125
  f"If the context is not helpful, you may rely on your own knowledge, but do not mention the context or question again.\n\n"
126
- # f"Context:\n{context}\n\n"
127
  f"Question: {question}\n\n"
128
  f"Answer:"
129
  )
130
- answer = mistral_generate(answer_prompt, max_new_tokens=256)
131
- if not answer:
132
- final_answer = "Cannot answer – model did not return a valid response."
133
 
134
- final_answer = extract_answer_after_label(answer)
 
 
 
135
 
136
  return {
137
  "question": question,
138
  "answer": final_answer.strip(),
139
- # "sources": [
140
- # {"title": a["title"], "url": a["url"]}
141
- # for a in relevant_articles
142
- # ]
143
  }
 
 
1
  import os
2
  import requests
3
+ import json
4
  from fastapi import APIRouter
5
  from pydantic import BaseModel
6
  from typing import List
7
  from redis_client import redis_client as r
8
  from dotenv import load_dotenv
9
  from urllib.parse import quote
10
+
11
  from nuse_modules.classifier import classify_question, REVERSE_MAP
12
  from nuse_modules.keyword_extracter import keywords_extractor
13
  from nuse_modules.google_search import search_google_news
14
 
15
  load_dotenv()
16
 
 
17
  HF_TOKEN = os.getenv("HF_TOKEN")
18
+ HF_API_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.3"
19
+ HEADERS = {
20
+ "Authorization": f"Bearer {HF_TOKEN}",
21
+ "Content-Type": "application/json"
22
+ }
23
 
24
  askMe = APIRouter()
25
 
26
  class QuestionInput(BaseModel):
27
  question: str
28
 
 
 
 
 
 
29
 
30
  def should_extract_keywords(type_id: int) -> bool:
31
  return type_id in {1, 2, 3, 4, 5, 6, 7, 10}
32
 
 
 
 
33
 
34
  def extract_answer_after_label(text: str) -> str:
35
  """
 
38
  """
39
  if "Answer:" in text:
40
  return text.split("Answer:", 1)[1].strip()
41
+ return text.strip()
42
+
43
 
44
  def mistral_generate(prompt: str, max_new_tokens=128):
45
  payload = {
 
60
  except Exception:
61
  return ""
62
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
  @askMe.post("/ask")
65
  async def ask_question(input: QuestionInput):
66
  question = input.question
67
 
68
+ # Step 1: Classify question intent
69
  qid = classify_question(question)
70
  print("Intent ID:", qid)
71
  print("Category:", REVERSE_MAP.get(qid, "unknown"))
72
 
73
+ context = ""
74
+ sources = []
75
 
76
+ # Step 2: Keyword extraction and news search (if needed)
77
+ if should_extract_keywords(qid):
78
  keywords = keywords_extractor(question)
79
  print("Raw extracted keywords:", keywords)
80
 
81
  if not keywords:
82
  return {"error": "Keyword extraction failed."}
83
+
84
+ # Search Google News
85
  results = search_google_news(keywords)
86
+ print("Found articles:", len(results))
87
 
88
  for r in results:
89
  print(r["title"], r["link"])
90
 
91
+ # Build context from snippet/description
92
+ context = "\n\n".join([
93
+ r.get("snippet") or r.get("description", "")
94
+ for r in results
95
+ ])[:15000]
96
+
97
+ sources = [
98
+ {"title": r["title"], "url": r["link"]}
99
+ for r in results
100
+ ]
101
+
102
+ if not context.strip():
103
+ return {
104
+ "question": question,
105
+ "answer": "Cannot answer – no relevant context found.",
106
+ "sources": sources
107
+ }
108
+
109
+ # Step 3: Ask Mistral to answer
 
 
 
 
110
  answer_prompt = (
111
  f"You are a concise news assistant. Answer the user's question clearly using the context below if relevant. "
112
  f"If the context is not helpful, you may rely on your own knowledge, but do not mention the context or question again.\n\n"
113
+ f"Context:\n{context}\n\n"
114
  f"Question: {question}\n\n"
115
  f"Answer:"
116
  )
117
+ answer_raw = mistral_generate(answer_prompt, max_new_tokens=256)
 
 
118
 
119
+ if not answer_raw:
120
+ final_answer = "Cannot answer – model did not return a valid response."
121
+ else:
122
+ final_answer = extract_answer_after_label(answer_raw)
123
 
124
  return {
125
  "question": question,
126
  "answer": final_answer.strip(),
127
+ "sources": sources
 
 
 
128
  }