Meena commited on
Commit
9ea7eaa
1 Parent(s): 8df608f

Update app/tapas.py

Browse files
Files changed (1) hide show
  1. app/tapas.py +54 -56
app/tapas.py CHANGED
@@ -5,66 +5,64 @@ import re
5
  p = re.compile('\d+(\.\d+)?')
6
 
7
  def load_model_and_tokenizer():
8
- """
9
- Load
10
- """
11
- tokenizer = AutoTokenizer.from_pretrained("Meena/table-question-answering-tapas")
12
- model = AutoModelForTableQuestionAnswering.from_pretrained("Meena/table-question-answering-tapas")
13
 
14
- # Return tokenizer and model
15
- return tokenizer, model
16
 
17
 
18
  def prepare_inputs(table, queries, tokenizer):
19
- """
20
- Convert dictionary into data frame and tokenize inputs given queries.
21
- """
22
- table = table.astype('str').head(100)
23
- inputs = tokenizer(table=table, queries=queries, padding='max_length', return_tensors="pt")
24
- return table, inputs
25
 
26
 
27
  def generate_predictions(inputs, model, tokenizer):
28
- """
29
- Generate predictions for some tokenized input.
30
- """
31
- # Generate model results
32
- outputs = model(**inputs)
33
-
34
- # Convert logit outputs into predictions for table cells and aggregation operators
35
- predicted_table_cell_coords, predicted_aggregation_operators = tokenizer.convert_logits_to_predictions(
36
- inputs,
37
- outputs.logits.detach(),
38
- outputs.logits_aggregation.detach()
39
- )
40
 
41
- # Return values
42
- return predicted_table_cell_coords, predicted_aggregation_operators
 
 
 
 
43
 
 
 
44
 
45
  def postprocess_predictions(predicted_aggregation_operators, predicted_table_cell_coords, table):
46
- """
47
- Compute the predicted operation and nicely structure the answers.
48
- """
49
- # Process predicted aggregation operators
50
- aggregation_operators = {0: "NONE", 1: "SUM", 2: "AVERAGE", 3:"COUNT"}
51
- aggregation_predictions_string = [aggregation_operators[x] for x in predicted_aggregation_operators]
52
-
53
- # Process predicted table cell coordinates
54
- answers = []
55
- for agg, coordinates in zip(predicted_aggregation_operators, predicted_table_cell_coords):
56
- if len(coordinates) == 1:
57
- # 1 cell
58
- answers.append(table.iat[coordinates[0]])
59
- else:
60
- # > 1 cell
61
- cell_values = []
62
- for coordinate in coordinates:
63
- cell_values.append(table.iat[coordinate])
64
- answers.append(", ".join(cell_values))
65
-
66
- # Return values
67
- return aggregation_predictions_string, answers
68
 
69
 
70
  def show_answers(queries, answers, aggregation_predictions_string):
@@ -90,12 +88,12 @@ def show_answers(queries, answers, aggregation_predictions_string):
90
  return results
91
 
92
  def execute_query(query, table):
93
- """
94
  Invoke the TAPAS model.
95
  """
96
- queries = [query]
97
- tokenizer, model = load_model_and_tokenizer()
98
- table, inputs = prepare_inputs(table, queries, tokenizer)
99
- predicted_table_cell_coords, predicted_aggregation_operators = generate_predictions(inputs, model, tokenizer)
100
- aggregation_predictions_string, answers = postprocess_predictions(predicted_aggregation_operators, predicted_table_cell_coords, table)
101
- return show_answers(queries, answers, aggregation_predictions_string)
5
  p = re.compile('\d+(\.\d+)?')
6
 
7
  def load_model_and_tokenizer():
8
+ """
9
+ Load
10
+ """
11
+ tokenizer = AutoTokenizer.from_pretrained("Meena/table-question-answering-tapas")
12
+ model = AutoModelForTableQuestionAnswering.from_pretrained("Meena/table-question-answering-tapas")
13
 
14
+ # Return tokenizer and model
15
+ return tokenizer, model
16
 
17
 
18
  def prepare_inputs(table, queries, tokenizer):
19
+ """
20
+ Convert dictionary into data frame and tokenize inputs given queries.
21
+ """
22
+ table = table.astype('str').head(100)
23
+ inputs = tokenizer(table=table, queries=queries, padding='max_length', return_tensors="pt")
24
+ return table, inputs
25
 
26
 
27
  def generate_predictions(inputs, model, tokenizer):
28
+ """
29
+ Generate predictions for some tokenized input.
30
+ """
31
+ # Generate model results
32
+ outputs = model(**inputs)
 
 
 
 
 
 
 
33
 
34
+ # Convert logit outputs into predictions for table cells and aggregation operators
35
+ predicted_table_cell_coords, predicted_aggregation_operators = tokenizer.convert_logits_to_predictions(
36
+ inputs,
37
+ outputs.logits.detach(),
38
+ outputs.logits_aggregation.detach()
39
+ )
40
 
41
+ # Return values
42
+ return predicted_table_cell_coords, predicted_aggregation_operators
43
 
44
  def postprocess_predictions(predicted_aggregation_operators, predicted_table_cell_coords, table):
45
+ """
46
+ Compute the predicted operation and nicely structure the answers.
47
+ """
48
+ # Process predicted aggregation operators
49
+ aggregation_operators = {0: "NONE", 1: "SUM", 2: "AVERAGE", 3:"COUNT"}
50
+ aggregation_predictions_string = [aggregation_operators[x] for x in predicted_aggregation_operators]
51
+ # Process predicted table cell coordinates
52
+ answers = []
53
+ for agg, coordinates in zip(predicted_aggregation_operators, predicted_table_cell_coords):
54
+ if len(coordinates) == 1:
55
+ # 1 cell
56
+ answers.append(table.iat[coordinates[0]])
57
+ else:
58
+ # > 1 cell
59
+ cell_values = []
60
+ for coordinate in coordinates:
61
+ cell_values.append(table.iat[coordinate])
62
+ answers.append(", ".join(cell_values))
63
+
64
+ # Return values
65
+ return aggregation_predictions_string, answers
 
66
 
67
 
68
  def show_answers(queries, answers, aggregation_predictions_string):
88
  return results
89
 
90
  def execute_query(query, table):
91
+ """
92
  Invoke the TAPAS model.
93
  """
94
+ queries = [query]
95
+ tokenizer, model = load_model_and_tokenizer()
96
+ table, inputs = prepare_inputs(table, queries, tokenizer)
97
+ predicted_table_cell_coords, predicted_aggregation_operators = generate_predictions(inputs, model, tokenizer)
98
+ aggregation_predictions_string, answers = postprocess_predictions(predicted_aggregation_operators, predicted_table_cell_coords, table)
99
+ return show_answers(queries, answers, aggregation_predictions_string)