File size: 3,774 Bytes
8e9fd43 5cb1b50 2c42748 8e9fd43 587894c 5cb1b50 eafca75 aefa1e1 826a1b8 8e9fd43 fbd5aba 5cb1b50 8e9fd43 c1ba890 8e9fd43 2f3b9d0 826a1b8 43cf665 f5e2bc7 5cb1b50 43cf665 2f3b9d0 ed08d36 2f3b9d0 ed08d36 c1ba890 8e9fd43 5cb1b50 eafca75 5cb1b50 587894c 5cb1b50 826a1b8 d229ca3 826a1b8 5cb1b50 826a1b8 5cb1b50 d229ca3 826a1b8 587894c 5cb1b50 2f3b9d0 43cf665 5cb1b50 43cf665 2f3b9d0 5cb1b50 f5e2bc7 5cb1b50 8573cc3 8e9fd43 f5e2bc7 5cb1b50 2c42748 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 |
import os
import requests
import json
from fastapi import APIRouter
from pydantic import BaseModel
from typing import List
from redis_client import redis_client as r
from dotenv import load_dotenv
from urllib.parse import quote
from nuse_modules.classifier import classify_question, REVERSE_MAP
from nuse_modules.keyword_extracter import keywords_extractor
from nuse_modules.google_search import search_google_news
load_dotenv()
HF_TOKEN = os.getenv("HF_TOKEN")
HF_API_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.3"
HEADERS = {
"Authorization": f"Bearer {HF_TOKEN}",
"Content-Type": "application/json"
}
askMe = APIRouter()
class QuestionInput(BaseModel):
question: str
def should_extract_keywords(type_id: int) -> bool:
return type_id in {1, 2, 3, 4, 5, 6, 7, 10}
def extract_answer_after_label(text: str) -> str:
"""
Extracts everything after the first 'Answer:' label.
Assumes 'Answer:' appears once and is followed by the relevant content.
"""
if "Answer:" in text:
return text.split("Answer:", 1)[1].strip()
return text.strip()
def mistral_generate(prompt: str, max_new_tokens=128):
payload = {
"inputs": prompt,
"parameters": {
"max_new_tokens": max_new_tokens,
"temperature": 0.7
}
}
try:
response = requests.post(HF_API_URL, headers=HEADERS, data=json.dumps(payload), timeout=30)
response.raise_for_status()
result = response.json()
if isinstance(result, list) and len(result) > 0:
return result[0].get("generated_text", "").strip()
else:
return ""
except Exception:
return ""
@askMe.post("/ask")
async def ask_question(input: QuestionInput):
question = input.question
# Step 1: Classify question intent
qid = classify_question(question)
print("Intent ID:", qid)
print("Category:", REVERSE_MAP.get(qid, "unknown"))
context = ""
sources = []
# Step 2: Keyword extraction and news search (if needed)
if should_extract_keywords(qid):
keywords = keywords_extractor(question)
print("Raw extracted keywords:", keywords)
if not keywords:
return {"error": "Keyword extraction failed."}
# Search Google News
results = search_google_news(keywords)
print("Found articles:", len(results))
for r in results:
print(r["title"], r["link"])
# Build context from snippet/description
context = "\n\n".join([
r.get("snippet") or r.get("description", "")
for r in results
])[:15000]
sources = [
{"title": r["title"], "url": r["link"]}
for r in results
]
if not context.strip():
return {
"question": question,
"answer": "Cannot answer – no relevant context found.",
"sources": sources
}
# Step 3: Ask Mistral to answer
answer_prompt = (
f"You are a concise news assistant. Answer the user's question clearly using the context below if relevant. "
f"If the context is not helpful, you may rely on your own knowledge, but do not mention the context or question again.\n\n"
f"Context:\n{context}\n\n"
f"Question: {question}\n\n"
f"Answer:"
)
answer_raw = mistral_generate(answer_prompt, max_new_tokens=256)
if not answer_raw:
final_answer = "Cannot answer – model did not return a valid response."
else:
final_answer = extract_answer_after_label(answer_raw)
return {
"question": question,
"answer": final_answer.strip(),
"sources": sources
}
|