table_questions2

Running

File size: 3,498 Bytes

082cde2
 
 
 
 
 
40d15de
 
082cde2
40d15de
2abe7fa
082cde2
4931940
 
 
 
 
 
 
 
001fb6f
 
4931940
082cde2
3ee2a8f
 
 
 
 
 
082cde2
3ee2a8f
 
a7f975a
082cde2
3ee2a8f
40d15de
082cde2
2abe7fa
ff376a0
2abe7fa
3ee2a8f
40d15de
 
 
ff376a0
40d15de
3ee2a8f
a7f975a
 
 
3ee2a8f
082cde2
 
a7f975a
094ce77
a7f975a
f14d548
082cde2
 
cbab253
 
 
082cde2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
293747f
f14d548
a7f975a
082cde2
0aa8e95
082cde2

import gradio as gr
from transformers import (
    AutoModelForSeq2SeqLM,
    AutoModelForTableQuestionAnswering,
    AutoTokenizer,
    pipeline,
    TapexTokenizer, 
    BartForConditionalGeneration
)
import pandas as pd
import json

# model_tapex = "microsoft/tapex-large-finetuned-wtq"
# tokenizer_tapex = AutoTokenizer.from_pretrained(model_tapex)
# model_tapex = AutoModelForSeq2SeqLM.from_pretrained(model_tapex)
# pipe_tapex = pipeline(
#     "table-question-answering", model=model_tapex, tokenizer=tokenizer_tapex
# )

#new
tokenizer = TapexTokenizer.from_pretrained("microsoft/tapex-large-finetuned-wtq")
model = BartForConditionalGeneration.from_pretrained("microsoft/tapex-large-finetuned-wtq")


# model_tapas = "google/tapas-large-finetuned-wtq"
# tokenizer_tapas = AutoTokenizer.from_pretrained(model_tapas)
# model_tapas = AutoModelForTableQuestionAnswering.from_pretrained(model_tapas)
# pipe_tapas = pipeline(
#     "table-question-answering", model=model_tapas, tokenizer=tokenizer_tapas
# )

#new
pipe_tapas = pipeline(task="table-question-answering", model="google/tapas-large-finetuned-wtq")
pipe_tapas2 = pipeline(task="table-question-answering", model="google/tapas-large-finetuned-wikisql-supervised")




def process2(query, csv_dataStr):
    # csv_data={"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], "Number of movies": ["87", "53", "69"]}
    csv_data = json.loads(csv_dataStr)
    table = pd.DataFrame.from_dict(csv_data)
    #microsoft
    encoding = tokenizer(table=table, query=query, return_tensors="pt")
    outputs = model.generate(**encoding)
    result_tapex=tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
    #google
    result_tapas = pipe_tapas(table=table, query=query)['cells'][0]
    #google2
    result_tapas2 = pipe_tapas2(table=table, query=query)['cells'][0]
    return result_tapex, result_tapas, result_tapas2


# Inputs
query_text = gr.Text(label="")
# input_file = gr.File(label="Upload a CSV file", type="file")
input_data = gr.Text(label="")
# rows_slider = gr.Slider(label="Number of rows")

# Output
answer_text_tapex = gr.Text(label="")
answer_text_tapas = gr.Text(label="")
answer_text_tapas2 = gr.Text(label="")

description = "This Space lets you ask questions on CSV documents with Microsoft [TAPEX-Large](https://huggingface.co/microsoft/tapex-large-finetuned-wtq) and Google [TAPAS-Large](https://huggingface.co/google/tapas-large-finetuned-wtq). \
Both have been fine-tuned on the [WikiTableQuestions](https://huggingface.co/datasets/wikitablequestions) dataset. \n\n\
A sample file with football statistics is available in the repository: \n\n\
* Which team has the most wins? Answer: Manchester City FC\n\
* Which team has the most wins: Chelsea, Liverpool or Everton? Answer: Liverpool\n\
* Which teams have scored less than 40 goals? Answer: Cardiff City FC, Fulham FC, Brighton & Hove Albion FC, Huddersfield Town FC\n\
* What is the average number of wins? Answer: 16 (rounded)\n\n\
You can also upload your own CSV file. Please note that maximum sequence length for both models is 1024 tokens, \
so you may need to limit the number of rows in your CSV file. Chunking is not implemented yet."

iface = gr.Interface(
    theme="huggingface",
    description=description,
    layout="vertical",
    fn=process2,
    inputs=[query_text, input_data],
    outputs=[answer_text_tapex, answer_text_tapas, answer_text_tapas2],
    examples=[
        
    ],
    allow_flagging="never",
)

iface.launch()