|
import gradio as gr |
|
from transformers import ( |
|
AutoModelForSeq2SeqLM, |
|
AutoModelForTableQuestionAnswering, |
|
AutoTokenizer, |
|
pipeline, |
|
TapexTokenizer, |
|
BartForConditionalGeneration |
|
) |
|
import pandas as pd |
|
import json |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tokenizer = TapexTokenizer.from_pretrained("microsoft/tapex-large-finetuned-wtq") |
|
model = BartForConditionalGeneration.from_pretrained("microsoft/tapex-large-finetuned-wtq") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pipe_tapas = pipeline(task="table-question-answering", model="google/tapas-large-finetuned-wtq") |
|
pipe_tapas2 = pipeline(task="table-question-answering", model="google/tapas-large-finetuned-wikisql-supervised") |
|
|
|
|
|
|
|
|
|
def process2(query, csv_dataStr): |
|
|
|
csv_data = json.loads(csv_dataStr) |
|
table = pd.DataFrame.from_dict(csv_data) |
|
|
|
encoding = tokenizer(table=table, query=query, return_tensors="pt") |
|
outputs = model.generate(**encoding) |
|
result_tapex=tokenizer.batch_decode(outputs, skip_special_tokens=True)[0] |
|
|
|
result_tapas = pipe_tapas(table=table, query=query)['cells'][0] |
|
|
|
result_tapas2 = pipe_tapas2(table=table, query=query)['cells'][0] |
|
return result_tapex, result_tapas, result_tapas2 |
|
|
|
|
|
|
|
query_text = gr.Text(label="") |
|
|
|
input_data = gr.Text(label="") |
|
|
|
|
|
|
|
answer_text_tapex = gr.Text(label="") |
|
answer_text_tapas = gr.Text(label="") |
|
answer_text_tapas2 = gr.Text(label="") |
|
|
|
description = "This Space lets you ask questions on CSV documents with Microsoft [TAPEX-Large](https://huggingface.co/microsoft/tapex-large-finetuned-wtq) and Google [TAPAS-Large](https://huggingface.co/google/tapas-large-finetuned-wtq). \ |
|
Both have been fine-tuned on the [WikiTableQuestions](https://huggingface.co/datasets/wikitablequestions) dataset. \n\n\ |
|
A sample file with football statistics is available in the repository: \n\n\ |
|
* Which team has the most wins? Answer: Manchester City FC\n\ |
|
* Which team has the most wins: Chelsea, Liverpool or Everton? Answer: Liverpool\n\ |
|
* Which teams have scored less than 40 goals? Answer: Cardiff City FC, Fulham FC, Brighton & Hove Albion FC, Huddersfield Town FC\n\ |
|
* What is the average number of wins? Answer: 16 (rounded)\n\n\ |
|
You can also upload your own CSV file. Please note that maximum sequence length for both models is 1024 tokens, \ |
|
so you may need to limit the number of rows in your CSV file. Chunking is not implemented yet." |
|
|
|
iface = gr.Interface( |
|
theme="huggingface", |
|
description=description, |
|
layout="vertical", |
|
fn=process2, |
|
inputs=[query_text, input_data], |
|
outputs=[answer_text_tapex, answer_text_tapas, answer_text_tapas2], |
|
examples=[ |
|
|
|
], |
|
allow_flagging="never", |
|
) |
|
|
|
iface.launch() |