smrup's picture
Upload 4 files
87198ce verified
import gradio as gr
import numpy as np
import faiss
from transformers import pipeline, AutoTokenizer, AutoModelForQuestionAnswering
from sentence_transformers import SentenceTransformer
from utils import get_wikipedia_content, split_text, create_faiss_index
# Load models
embedding_model = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")
qa_model_name = "deepset/roberta-base-squad2"
qa_tokenizer = AutoTokenizer.from_pretrained(qa_model_name)
qa_model = AutoModelForQuestionAnswering.from_pretrained(qa_model_name)
qa_pipeline = pipeline("question-answering", model=qa_model, tokenizer=qa_tokenizer)
# Function to handle user query
def answer_question(topic, question):
# Fetch Wikipedia content
document = get_wikipedia_content(topic)
if not document:
return "Could not retrieve information from Wikipedia."
# Split content into chunks
chunks = split_text(document)
# Create FAISS index
index, embeddings = create_faiss_index(chunks)
# Encode query and search relevant chunks
query_embedding = embedding_model.encode([question])
k = 3
distances, indices = index.search(np.array(query_embedding), k)
retrieved_chunks = [chunks[i] for i in indices[0]]
# Generate answer using QA model
context = " ".join(retrieved_chunks)
answer = qa_pipeline(question=question, context=context)
return f"**Answer:** {answer['answer']}"
# Create Gradio Interface
iface = gr.Interface(
fn=answer_question,
inputs=[gr.Textbox(label="Enter a Wikipedia Topic"), gr.Textbox(label="Ask a Question")],
outputs="text",
title="Wikipedia Q&A System",
description="Ask questions about any Wikipedia topic using AI-powered retrieval and QA models.",
)
if __name__ == "__main__":
iface.launch(share=True)