import gradio as gr import requests from bs4 import BeautifulSoup from transformers import pipeline # Load the trained model qa_model = pipeline("question-answering") def extract_answer(question, url): """Get context from URL and use it to answer the question""" # Retrieve actual page content html = requests.get(url).content # Create BS4 object to handle HTML data soup = BeautifulSoup(html, 'html.parser') for data in soup(['style', 'script', 'meta', 'link', 'noscript']): # Remove tags data.decompose() # Get and clean up plain text context = soup.get_text() while "\n\n" in context: context = context.replace("\n\n", "\n") answer_dict = qa_model(question = question, context = context) return answer_dict title = "Webpage-Based Question Answering" description = "Using a webpage as context for extractive question answering." enable_queue=True examples=[ ["Where was Messi born?", "https://en.wikipedia.org/wiki/Lionel_Messi"], ["When was Cristiano Ronaldo born?", "https://en.wikipedia.org/wiki/Cristiano_Ronaldo"] ] iface = gr.Interface( fn=extract_answer, inputs=["text", "text"], outputs="text", title=title, description=description, examples=examples ) iface.launch(enable_queue=enable_queue)