File size: 2,849 Bytes
8e9fd43
 
 
2c42748
8e9fd43
 
 
 
fbd5aba
2c42748
fbd5aba
8e9fd43
 
 
 
fbd5aba
8e9fd43
c1ba890
8e9fd43
 
 
 
c1ba890
8e9fd43
 
 
fbd5aba
 
 
 
 
 
bf0cf05
fbd5aba
8e9fd43
d229ca3
 
9d31665
d229ca3
 
 
8e9fd43
 
 
 
 
 
9d31665
 
8e9fd43
 
 
 
fbd5aba
8e9fd43
fbd5aba
 
 
 
 
 
5da06ea
fbd5aba
 
 
 
 
 
2c42748
fbd5aba
 
 
 
0f20317
fbd5aba
 
 
 
 
 
48d6945
fbd5aba
 
 
 
 
 
 
8e9fd43
 
 
fbd5aba
8e9fd43
 
 
 
2c42748
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# app/routes/question.py
import os
import requests
from fastapi import APIRouter
from pydantic import BaseModel
from typing import List
from redis_client import redis_client as r
from dotenv import load_dotenv
from urllib.parse import quote_plus
import re
import json

load_dotenv()

GNEWS_API_KEY = os.getenv("GNEWS_API_KEY")
HF_TOKEN = os.getenv("HF_TOKEN")

askMe = APIRouter()

class QuestionInput(BaseModel):
    question: str

@askMe.post("/ask")
async def ask_question(input: QuestionInput):
    question = input.question

    # Basic keyword extraction with stopword filtering
    STOPWORDS = {"what", "about", "which", "would", "could", "this", "that"}
    keywords = [
        kw for kw in re.findall(r"\b\w{4,}\b", question.lower())
        if kw not in STOPWORDS
    ]
    query_string = " AND ".join(f'"{kw}"' for kw in keywords[:7])
    encoded_query = quote_plus(query_string)

    print("Query string", query_string)

    gnews_url = f"https://gnews.io/api/v4/search?q={query_string}&lang=en&max=3&expand=content&token={GNEWS_API_KEY}"

    print("GNEWS URL", gnews_url)

    try:
        response = requests.get(gnews_url, timeout=10)
        response.raise_for_status()
        articles = response.json().get("articles", [])
    except Exception as e:
        return {"error": f"GNews API error: {str(e)}"}
    
    print("the articles are", articles)

    context = "\n\n".join([
        article.get("content") or article.get("description") or ""
        for article in articles
    ])[:1500]

    if not context.strip():
        return {
            "question": question,
            "answer": "Cannot answer – no relevant context found.",
            "sources": []
        }

    # Call HF Inference API manually
    hf_api_url = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.3"
    headers = {
        "Authorization": f"Bearer {HF_TOKEN}",
        "Content-Type": "application/json"
    }

    payload = {
        "inputs": {
            "past_user_inputs": [],
            "generated_responses": [],
            "text": f"You are an assistant that answers questions based on recent news.\n\nContext:\n{context}\n\nQuestion: {question}"
        },
        "parameters": {
            "max_new_tokens": 256,
            "temperature": 0.7
        }
    }

    try:
        response = requests.post(hf_api_url, headers=headers, data=json.dumps(payload), timeout=30)
        response.raise_for_status()
        hf_response = response.json()
        answer = hf_response.get("generated_text", "Cannot answer.")
    except Exception as e:
        return {"error": f"Hugging Face API error: {str(e)}"}

    return {
        "question": question,
        "answer": answer.strip(),
        "sources": [
            {"title": a["title"], "url": a["url"]}
            for a in articles
        ]
    }