import gradio as gr import numpy as np import faiss from transformers import pipeline, AutoTokenizer, AutoModelForQuestionAnswering from sentence_transformers import SentenceTransformer from utils import get_wikipedia_content, split_text, create_faiss_index # Load models embedding_model = SentenceTransformer("sentence-transformers/all-mpnet-base-v2") qa_model_name = "deepset/roberta-base-squad2" qa_tokenizer = AutoTokenizer.from_pretrained(qa_model_name) qa_model = AutoModelForQuestionAnswering.from_pretrained(qa_model_name) qa_pipeline = pipeline("question-answering", model=qa_model, tokenizer=qa_tokenizer) # Function to handle user query def answer_question(topic, question): # Fetch Wikipedia content document = get_wikipedia_content(topic) if not document: return "Could not retrieve information from Wikipedia." # Split content into chunks chunks = split_text(document) # Create FAISS index index, embeddings = create_faiss_index(chunks) # Encode query and search relevant chunks query_embedding = embedding_model.encode([question]) k = 3 distances, indices = index.search(np.array(query_embedding), k) retrieved_chunks = [chunks[i] for i in indices[0]] # Generate answer using QA model context = " ".join(retrieved_chunks) answer = qa_pipeline(question=question, context=context) return f"**Answer:** {answer['answer']}" # Create Gradio Interface iface = gr.Interface( fn=answer_question, inputs=[gr.Textbox(label="Enter a Wikipedia Topic"), gr.Textbox(label="Ask a Question")], outputs="text", title="Wikipedia Q&A System", description="Ask questions about any Wikipedia topic using AI-powered retrieval and QA models.", ) if __name__ == "__main__": iface.launch(share=True)