Spaces:
Sleeping
Sleeping
import gradio as gr | |
import numpy as np | |
import faiss | |
from transformers import pipeline, AutoTokenizer, AutoModelForQuestionAnswering | |
from sentence_transformers import SentenceTransformer | |
from utils import get_wikipedia_content, split_text, create_faiss_index | |
# Load models | |
embedding_model = SentenceTransformer("sentence-transformers/all-mpnet-base-v2") | |
qa_model_name = "deepset/roberta-base-squad2" | |
qa_tokenizer = AutoTokenizer.from_pretrained(qa_model_name) | |
qa_model = AutoModelForQuestionAnswering.from_pretrained(qa_model_name) | |
qa_pipeline = pipeline("question-answering", model=qa_model, tokenizer=qa_tokenizer) | |
# Function to handle user query | |
def answer_question(topic, question): | |
# Fetch Wikipedia content | |
document = get_wikipedia_content(topic) | |
if not document: | |
return "Could not retrieve information from Wikipedia." | |
# Split content into chunks | |
chunks = split_text(document) | |
# Create FAISS index | |
index, embeddings = create_faiss_index(chunks) | |
# Encode query and search relevant chunks | |
query_embedding = embedding_model.encode([question]) | |
k = 3 | |
distances, indices = index.search(np.array(query_embedding), k) | |
retrieved_chunks = [chunks[i] for i in indices[0]] | |
# Generate answer using QA model | |
context = " ".join(retrieved_chunks) | |
answer = qa_pipeline(question=question, context=context) | |
return f"**Answer:** {answer['answer']}" | |
# Create Gradio Interface | |
iface = gr.Interface( | |
fn=answer_question, | |
inputs=[gr.Textbox(label="Enter a Wikipedia Topic"), gr.Textbox(label="Ask a Question")], | |
outputs="text", | |
title="Wikipedia Q&A System", | |
description="Ask questions about any Wikipedia topic using AI-powered retrieval and QA models.", | |
) | |
if __name__ == "__main__": | |
iface.launch(share=True) | |