Debesh Sahoo commited on
Commit
e66ad92
1 Parent(s): a280f47

text2sql finetune

Browse files
Files changed (1) hide show
  1. README.md +36 -0
README.md ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
2
+
3
+ # Load the tokenizer
4
+ tokenizer = AutoTokenizer.from_pretrained("t5-small")
5
+
6
+ # Load the model
7
+ model = AutoModelForSeq2SeqLM.from_pretrained("t5-small")
8
+
9
+ # Rest of the code for preparing input, generating predictions, and decoding the output...
10
+
11
+
12
+ from typing import List
13
+
14
+ table_prefix = "table:"
15
+ question_prefix = "question:"
16
+
17
+ def prepare_input(question: str, table: List[str]):
18
+ print("question:", question)
19
+ print("table:", table)
20
+ join_table = ",".join(table)
21
+ inputs = f"{question_prefix} {question} {table_prefix} {join_table}"
22
+ input_ids = tokenizer(inputs, max_length=700, return_tensors="pt").input_ids
23
+ return input_ids
24
+
25
+ def inference(question: str, table: List[str]) -> str:
26
+ input_data = prepare_input(question=question, table=table)
27
+ input_data = input_data.to(model.device)
28
+ outputs = model.generate(inputs=input_data, num_beams=10, top_k=10, max_length=512)
29
+ result = tokenizer.decode(token_ids=outputs[0], skip_special_tokens=True)
30
+ return result
31
+
32
+ test_id = 1000
33
+ print("model result:", inference(dataset["test"][test_id]["question"], dataset["test"][test_id]["table"]["header"]))
34
+ print("real result:", dataset["test"][test_id]["sql"]["human_readable"])
35
+
36
+ inference("what is id with name jui and age equal 25", ["id","name", "age"])