RohitMidha23 commited on
Commit
6abee0e
1 Parent(s): 40fc826
Files changed (5) hide show
  1. app.py +23 -0
  2. requirements.txt +3 -0
  3. sample.csv +6 -0
  4. src/constants.py +10 -0
  5. src/inference.py +48 -0
app.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.inference import infer
2
+ import gradio as gr
3
+
4
+
5
+ def main():
6
+ description = "A quick and easy way to understand and talk to your data!"
7
+
8
+ iface = gr.Interface(fn=infer,
9
+ inputs=[gr.Textbox(label="Query"),
10
+ gr.File(label="CSV file")],
11
+ outputs=[gr.JSON(label="Result"),
12
+ gr.Dataframe(label="Data")],
13
+ examples=[
14
+ ["Who scored the highest?", "sample.csv"],
15
+ ],
16
+ title="Talk to your Data!",
17
+ description=description,
18
+ allow_flagging='never')
19
+ iface.launch(enable_queue=True)
20
+
21
+
22
+ if __name__ == "__main__":
23
+ main()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ transformers==4.41.2
2
+ torch==2.3.0
3
+ gradio
sample.csv ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ ID,Name,Age,Score
2
+ 1,Alice,23,85.0
3
+ 2,Bob,35,90.5
4
+ 3,Charlie,45,78.0
5
+ 4,David,25,88.5
6
+ 5,Eve,30,92.0
src/constants.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ from enum import Enum
2
+
3
+ # create an enum with all aggregation types
4
+ class Aggregation(Enum):
5
+ NONE = 0
6
+ SUM = 1
7
+ AVERAGE = 2
8
+ COUNT = 3
9
+
10
+ id2aggregation = {x.value: x.name for x in Aggregation}
src/inference.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import TapasTokenizer, TapasForQuestionAnswering
2
+ import pandas as pd
3
+ from typing import List, Dict
4
+ from constants import id2aggregation
5
+
6
+ def infer(query: str, file_name: str, model_name: str="google/tapas-base-finetuned-wtq") -> Dict[str, str]:
7
+ # Load the file
8
+ table = pd.read_csv(file_name, delimiter=",")
9
+ table = table.astype(str)
10
+
11
+ # Load the model
12
+ model = TapasForQuestionAnswering.from_pretrained(model_name)
13
+ tokenizer = TapasTokenizer.from_pretrained(model_name)
14
+
15
+ # Make predictions
16
+ queries = [query]
17
+ inputs = tokenizer(table=table, queries=queries, padding="max_length", return_tensors="pt")
18
+ outputs = model(**inputs)
19
+ predicted_answer_coordinates, predicted_aggregation_indices = tokenizer.convert_logits_to_predictions(
20
+ inputs, outputs.logits.detach(), outputs.logits_aggregation.detach()
21
+ ) # predicted_answer_coordinates: contains coordinates for the respective answer cells, predicted_aggregation_indices: contains the aggregation type for each query
22
+ aggregation_predictions_string = [id2aggregation[x] for x in predicted_aggregation_indices]
23
+
24
+ answers = []
25
+ for coordinates in predicted_answer_coordinates:
26
+ if len(coordinates) == 1:
27
+ # only a single cell:
28
+ answers.append(table.iat[coordinates[0]])
29
+ else:
30
+ # multiple cells
31
+ cell_values = []
32
+ for coordinate in coordinates:
33
+ cell_values.append(table.iat[coordinate])
34
+ answers.append(", ".join(cell_values))
35
+
36
+ # Create the answer string
37
+ answer_str = ""
38
+ for query, answer, predicted_agg in zip(queries, answers, aggregation_predictions_string):
39
+
40
+ if predicted_agg == "NONE":
41
+ answer_str = answer
42
+ else:
43
+ answer_str = f"{predicted_agg} : {answer}"
44
+
45
+ return {
46
+ "query": query,
47
+ "answer": answer_str
48
+ }