Meena commited on
Commit
d9ea2c1
1 Parent(s): 9ab7fd6

Update app/tapas.py

Browse files
Files changed (1) hide show
  1. app/tapas.py +116 -44
app/tapas.py CHANGED
@@ -1,48 +1,120 @@
1
  from transformers import TapasTokenizer, TFTapasForQuestionAnswering
2
  import pandas as pd
3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  def execute_query(query, table):
5
- if table.shape[0]:
6
- #table = pd.read_csv(csv_file.name, delimiter=",")
7
- table.fillna(0, inplace=True)
8
- table = table.astype(str)
9
-
10
- model_name = "google/tapas-base-finetuned-wtq"
11
- model = TFTapasForQuestionAnswering.from_pretrained(model_name)
12
- tokenizer = TapasTokenizer.from_pretrained(model_name)
13
-
14
- queries = [query]
15
-
16
- inputs = tokenizer(table=table, queries=queries, padding="max_length", return_tensors="tf")
17
- outputs = model(**inputs)
18
-
19
- predicted_answer_coordinates, predicted_aggregation_indices = tokenizer.convert_logits_to_predictions(
20
- inputs, outputs.logits, outputs.logits_aggregation
21
- )
22
-
23
- # let's print out the results:
24
- id2aggregation = {0: "NONE", 1: "SUM", 2: "AVERAGE", 3: "COUNT"}
25
- aggregation_predictions_string = [id2aggregation[x] for x in predicted_aggregation_indices]
26
-
27
- answers = []
28
- for coordinates in predicted_answer_coordinates:
29
- if len(coordinates) == 1:
30
- # only a single cell:
31
- answers.append(table.iat[coordinates[0]])
32
- else:
33
- # multiple cells
34
- cell_values = []
35
- for coordinate in coordinates:
36
- cell_values.append(table.iat[coordinate])
37
- answers.append(cell_values)
38
-
39
- for query, answer, predicted_agg in zip(queries, answers, aggregation_predictions_string):
40
- if predicted_agg != "NONE":
41
- answers.append(predicted_agg)
42
-
43
- query_result = {
44
- "query": query,
45
- "result": answers
46
- }
47
-
48
- return query_result, table
 
1
  from transformers import TapasTokenizer, TFTapasForQuestionAnswering
2
  import pandas as pd
3
 
4
+ from transformers import TapasTokenizer, TapasForQuestionAnswering
5
+ import pandas as pd
6
+ import re
7
+
8
+ p = re.compile('\d+(\.\d+)?')
9
+
10
+ # Define the questions
11
+ queries = [
12
+ "When did Spider-Man: No Way Home release?",
13
+ "which Movies have rating 5?"
14
+ ]
15
+
16
+ def load_model_and_tokenizer():
17
+ """
18
+ Load
19
+ """
20
+ # Load pretrained tokenizer: TAPAS finetuned on WikiTable Questions
21
+ tokenizer = TapasTokenizer.from_pretrained("google/tapas-base-finetuned-wtq")
22
+
23
+ # Load pretrained model: TAPAS finetuned on WikiTable Questions
24
+ model = TapasForQuestionAnswering.from_pretrained("google/tapas-base-finetuned-wtq")
25
+
26
+ # Return tokenizer and model
27
+ return tokenizer, model
28
+
29
+
30
+ def prepare_inputs(table, queries, tokenizer):
31
+ """
32
+ Convert dictionary into data frame and tokenize inputs given queries.
33
+ """
34
+ # Prepare inputs
35
+ # table = pd.DataFrame.from_dict(data)
36
+ # table = netflix_df[['title', 'release_year', 'rating']].astype('str').head(50)
37
+ table = table.astype('str').head(100)
38
+ inputs = tokenizer(table=table, queries=queries, padding='max_length', return_tensors="pt")
39
+
40
+ # Return things
41
+ return table, inputs
42
+
43
+
44
+ def generate_predictions(inputs, model, tokenizer):
45
+ """
46
+ Generate predictions for some tokenized input.
47
+ """
48
+ # Generate model results
49
+ outputs = model(**inputs)
50
+
51
+ # Convert logit outputs into predictions for table cells and aggregation operators
52
+ predicted_table_cell_coords, predicted_aggregation_operators = tokenizer.convert_logits_to_predictions(
53
+ inputs,
54
+ outputs.logits.detach(),
55
+ outputs.logits_aggregation.detach()
56
+ )
57
+
58
+ # Return values
59
+ return predicted_table_cell_coords, predicted_aggregation_operators
60
+
61
+
62
+ def postprocess_predictions(predicted_aggregation_operators, predicted_table_cell_coords, table):
63
+ """
64
+ Compute the predicted operation and nicely structure the answers.
65
+ """
66
+ # Process predicted aggregation operators
67
+ aggregation_operators = {0: "NONE", 1: "SUM", 2: "AVERAGE", 3:"COUNT"}
68
+ aggregation_predictions_string = [aggregation_operators[x] for x in predicted_aggregation_operators]
69
+
70
+ # Process predicted table cell coordinates
71
+ answers = []
72
+ for agg, coordinates in zip(predicted_aggregation_operators, predicted_table_cell_coords):
73
+ if len(coordinates) == 1:
74
+ # 1 cell
75
+ answers.append(table.iat[coordinates[0]])
76
+ else:
77
+ # > 1 cell
78
+ cell_values = []
79
+ for coordinate in coordinates:
80
+ cell_values.append(table.iat[coordinate])
81
+ answers.append(", ".join(cell_values))
82
+
83
+ # Return values
84
+ return aggregation_predictions_string, answers
85
+
86
+
87
+ def show_answers(queries, answers, aggregation_predictions_string):
88
+ """
89
+ Visualize the postprocessed answers.
90
+ """
91
+ agg = {"NONE": lambda x: x, "SUM" : lambda x: sum(x), "AVERAGE": lambda x: (sum(x) / len(x)), "COUNT": lambda x: len(x)}
92
+
93
+ for query, answer, predicted_agg in zip(queries, answers, aggregation_predictions_string):
94
+ print(query)
95
+ if predicted_agg == "NONE":
96
+ print("Predicted answer: " + answer)
97
+ else:
98
+ if all([not p.match(val) == None for val in answer.split(', ')]):
99
+ # print("Predicted answer: " + predicted_agg + "(" + answer + ") = " + str(agg[predicted_agg](list(map(float, answer.split(','))))))
100
+ return "Predicted answer: " + str(agg[predicted_agg](list(map(float, answer.split(',')))))
101
+ elif predicted_agg == "COUNT":
102
+ # print("Predicted answer: " + predicted_agg + "(" + answer + ") = " + str(agg[predicted_agg](answer.split(','))))
103
+ return "Predicted answer: " + str(agg[predicted_agg](answer.split(',')))
104
+ else:
105
+ return "Predicted answer: " + predicted_agg + " > " + answer
106
+
107
+
108
+
109
+
110
  def execute_query(query, table):
111
+
112
+ """
113
+ Invoke the TAPAS model.
114
+ """
115
+ queries = [query]
116
+ tokenizer, model = load_model_and_tokenizer()
117
+ table, inputs = prepare_inputs(table, queries, tokenizer)
118
+ predicted_table_cell_coords, predicted_aggregation_operators = generate_predictions(inputs, model, tokenizer)
119
+ aggregation_predictions_string, answers = postprocess_predictions(predicted_aggregation_operators, predicted_table_cell_coords, table)
120
+ return show_answers(queries, answers, aggregation_predictions_string)