nielsr HF staff commited on
Commit
5e3e423
1 Parent(s): 18c58a6

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +34 -0
README.md ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ TAPEX model fine-tuned on WTQ.
2
+
3
+ To load it and run inference, you can do the following:
4
+
5
+ from transformers import BartTokenizer, BartForConditionalGeneration
6
+ import pandas as pd
7
+
8
+ tokenizer = BartTokenizer.from_pretrained("nielsr/tapex-large-finetuned-wtq")
9
+ model = BartForConditionalGeneration.from_pretrained("nielsr/tapex-large-finetuned-wtq")
10
+
11
+ # create table
12
+ data = {'Actors': ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], 'Number of movies': ["87", "53", "69"]}
13
+ table = pd.DataFrame.from_dict(data)
14
+
15
+ # turn into dict
16
+ table_dict = {"header": list(table.columns), "rows": [list(row.values) for i,row in table.iterrows()]}
17
+
18
+ # turn into format TAPEX expects
19
+ # define the linearizer based on this code: https://github.com/microsoft/Table-Pretraining/blob/main/tapex/processor/table_linearize.py
20
+ linearizer = IndexedRowTableLinearize()
21
+ linear_table = linearizer.process_table(table_dict)
22
+
23
+ # add question
24
+ question = "how many movies does George Clooney have?"
25
+ joint_input = question + " " + linear_table
26
+
27
+ # encode
28
+ encoding = tokenizer(joint_input, return_tensors="pt")
29
+
30
+ # forward pass
31
+ outputs = model.generate(**encoding)
32
+
33
+ # decode
34
+ tokenizer.batch_decode(outputs, skip_special_tokens=True)