File size: 1,953 Bytes
6abee0e
 
 
de5d62c
6abee0e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4a19d8a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
from transformers import TapasTokenizer, TapasForQuestionAnswering
import pandas as pd
from typing import List, Dict
from src.constants import id2aggregation

def infer(query: str, file_name: str, model_name: str="google/tapas-base-finetuned-wtq") -> Dict[str, str]:
    # Load the file 
    table = pd.read_csv(file_name, delimiter=",")
    table = table.astype(str)
    
    # Load the model
    model = TapasForQuestionAnswering.from_pretrained(model_name)
    tokenizer = TapasTokenizer.from_pretrained(model_name)
    
    # Make predictions
    queries = [query]
    inputs = tokenizer(table=table, queries=queries, padding="max_length", return_tensors="pt")
    outputs = model(**inputs)
    predicted_answer_coordinates, predicted_aggregation_indices = tokenizer.convert_logits_to_predictions(
        inputs, outputs.logits.detach(), outputs.logits_aggregation.detach()
    )  # predicted_answer_coordinates: contains coordinates for the respective answer cells, predicted_aggregation_indices: contains the aggregation type for each query
    aggregation_predictions_string = [id2aggregation[x] for x in predicted_aggregation_indices]
    
    answers = []
    for coordinates in predicted_answer_coordinates:
        if len(coordinates) == 1:
            # only a single cell:
            answers.append(table.iat[coordinates[0]])
        else:
            # multiple cells
            cell_values = []
            for coordinate in coordinates:
                cell_values.append(table.iat[coordinate])
            answers.append(", ".join(cell_values))
    
    # Create the answer string
    answer_str = ""
    for query, answer, predicted_agg in zip(queries, answers, aggregation_predictions_string):
        
        if predicted_agg == "NONE":
            answer_str = answer
        else:
            answer_str = f"{predicted_agg} : {answer}"
            
    return {
        "query": query,
        "answer": answer_str
    }, table