File size: 3,498 Bytes
c0d5863
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import gradio as gr
from transformers import (
    AutoModelForSeq2SeqLM,
    AutoModelForTableQuestionAnswering,
    AutoTokenizer,
    pipeline,
    TapexTokenizer, 
    BartForConditionalGeneration
)
import pandas as pd
import json

# model_tapex = "microsoft/tapex-large-finetuned-wtq"
# tokenizer_tapex = AutoTokenizer.from_pretrained(model_tapex)
# model_tapex = AutoModelForSeq2SeqLM.from_pretrained(model_tapex)
# pipe_tapex = pipeline(
#     "table-question-answering", model=model_tapex, tokenizer=tokenizer_tapex
# )

#new
tokenizer = TapexTokenizer.from_pretrained("microsoft/tapex-large-finetuned-wtq")
model = BartForConditionalGeneration.from_pretrained("microsoft/tapex-large-finetuned-wtq")


# model_tapas = "google/tapas-large-finetuned-wtq"
# tokenizer_tapas = AutoTokenizer.from_pretrained(model_tapas)
# model_tapas = AutoModelForTableQuestionAnswering.from_pretrained(model_tapas)
# pipe_tapas = pipeline(
#     "table-question-answering", model=model_tapas, tokenizer=tokenizer_tapas
# )

#new
pipe_tapas = pipeline(task="table-question-answering", model="google/tapas-large-finetuned-wtq")
pipe_tapas2 = pipeline(task="table-question-answering", model="google/tapas-large-finetuned-wikisql-supervised")




def process2(query, csv_dataStr):
    # csv_data={"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], "Number of movies": ["87", "53", "69"]}
    csv_data = json.loads(csv_dataStr)
    table = pd.DataFrame.from_dict(csv_data)
    #microsoft
    encoding = tokenizer(table=table, query=query, return_tensors="pt")
    outputs = model.generate(**encoding)
    result_tapex=tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
    #google
    result_tapas = pipe_tapas(table=table, query=query)['cells'][0]
    #google2
    result_tapas2 = pipe_tapas2(table=table, query=query)['cells'][0]
    return result_tapex, result_tapas, result_tapas2


# Inputs
query_text = gr.Text(label="")
# input_file = gr.File(label="Upload a CSV file", type="file")
input_data = gr.Text(label="")
# rows_slider = gr.Slider(label="Number of rows")

# Output
answer_text_tapex = gr.Text(label="")
answer_text_tapas = gr.Text(label="")
answer_text_tapas2 = gr.Text(label="")

description = "This Space lets you ask questions on CSV documents with Microsoft [TAPEX-Large](https://huggingface.co/microsoft/tapex-large-finetuned-wtq) and Google [TAPAS-Large](https://huggingface.co/google/tapas-large-finetuned-wtq). \
Both have been fine-tuned on the [WikiTableQuestions](https://huggingface.co/datasets/wikitablequestions) dataset. \n\n\
A sample file with football statistics is available in the repository: \n\n\
* Which team has the most wins? Answer: Manchester City FC\n\
* Which team has the most wins: Chelsea, Liverpool or Everton? Answer: Liverpool\n\
* Which teams have scored less than 40 goals? Answer: Cardiff City FC, Fulham FC, Brighton & Hove Albion FC, Huddersfield Town FC\n\
* What is the average number of wins? Answer: 16 (rounded)\n\n\
You can also upload your own CSV file. Please note that maximum sequence length for both models is 1024 tokens, \
so you may need to limit the number of rows in your CSV file. Chunking is not implemented yet."

iface = gr.Interface(
    theme="huggingface",
    description=description,
    layout="vertical",
    fn=process2,
    inputs=[query_text, input_data],
    outputs=[answer_text_tapex, answer_text_tapas, answer_text_tapas2],
    examples=[
        
    ],
    allow_flagging="never",
)

iface.launch()