import gradio as gr
import requests
from bs4 import BeautifulSoup
from transformers import pipeline

# Load the trained model
qa_model = pipeline("question-answering")

def extract_answer(question, url):
    """Get context from URL and use it to answer the question"""
    
    # Retrieve actual page content
    html = requests.get(url).content
    # Create BS4 object to handle HTML data
    soup = BeautifulSoup(html, 'html.parser')

    for data in soup(['style', 'script', 'meta', 'link', 'noscript']):
        # Remove tags
        data.decompose()

    # Get and clean up plain text
    context = soup.get_text()
    while "\n\n" in context:
        context = context.replace("\n\n", "\n")
    
    answer_dict = qa_model(question = question, context = context)
    return answer_dict


title = "Webpage-Based Question Answering"
description = "Using a webpage as context for extractive question answering."
enable_queue=True
examples=[
    ["Where was Messi born?", "https://en.wikipedia.org/wiki/Lionel_Messi"],
    ["When was Cristiano Ronaldo born?", "https://en.wikipedia.org/wiki/Cristiano_Ronaldo"]
]

iface = gr.Interface(
    fn=extract_answer, 
    inputs=["text", "text"], 
    outputs="text",
    title=title,
    description=description,
    examples=examples
)
iface.launch(enable_queue=enable_queue)