import gradio as gr
import pandas as pd
from io import BytesIO
from transformers import AutoTokenizer, AutoModelForTableQuestionAnswering, TableQuestionAnsweringPipeline

# Load the tokenizer and model with "google/tapas-large-finetuned-wtq"
tokenizer = AutoTokenizer.from_pretrained("google/tapas-large-finetuned-wtq")
model = AutoModelForTableQuestionAnswering.from_pretrained("google/tapas-large-finetuned-wtq")

# Initialize the TableQuestionAnsweringPipeline manually
pipe = TableQuestionAnsweringPipeline(model=model, tokenizer=tokenizer)

def answer_question(uploaded_file, question):
    # Convert the binary stream to a file-like object
    file_like = BytesIO(uploaded_file)
    
    # Read the uploaded file directly into a DataFrame
    df = pd.read_csv(file_like)

    # Convert all DataFrame elements to string, as TAPAS expects string inputs
    df = df.astype(str)
    
    # Use the pipeline to answer the question based on the table
    result = pipe({"table": df, "query": question})
    
    # Format the answer before returning it
    answer = result['answer']
    return answer

logo_url = "https://i.ibb.co/Brr7bPP/xflow.png"
# Define the Gradio app interface
iface = gr.Interface(
    fn=answer_question,
    inputs=[gr.File(label="Upload CSV File", type="binary"), gr.Textbox(lines=2, placeholder="Ask a question...")],
    outputs=gr.Text(),
    title="Table-based Question Answering",
    description=f"![Logo]({logo_url})\n\nUpload a CSV file and ask a question related to the data in the file."
)

# Run the app
iface.launch()