File size: 2,182 Bytes
4f704f4
 
 
 
 
 
 
 
 
b74c191
d773610
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b74c191
 
e66ad92
 
 
b74c191
e66ad92
 
b74c191
e66ad92
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4f704f4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
---
datasets:
- wikisql
metrics:
- accuracy
pipeline_tag: text-classification
tags:
- code
---

Base Model:t5-small

#Training Result

[17610/17610 1:32:31, Epoch 9/10]
Step	Training Loss	Validation Loss
1000	2.682400	0.829368
2000	0.914000	0.568155
3000	0.707700	0.465733
4000	0.613500	0.408758
5000	0.557300	0.374811
6000	0.515800	0.350752
7000	0.487000	0.331517
8000	0.466100	0.319071
9000	0.449400	0.309488
10000	0.438800	0.301829
11000	0.430000	0.296482
12000	0.420200	0.292672
13000	0.418200	0.290445
14000	0.413400	0.288662
15000	0.410100	0.287757
16000	0.412600	0.287280
17000	0.410000	0.287134

question: what is id with name jui and age equal 25
table: ['id', 'name', 'age']
SELECT ID FROM table WHEREname = jui AND age equal 25

#Copy below piece of code to your notebook to use the model

from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained("DebeshSahoo/text2sql-finetune")

# Load the model
model = AutoModelForSeq2SeqLM.from_pretrained("DebeshSahoo/text2sql-finetune")

# Rest of the code for preparing input, generating predictions, and decoding the output...


from typing import List

table_prefix = "table:"
question_prefix = "question:"

def prepare_input(question: str, table: List[str]):
    print("question:", question)
    print("table:", table)
    join_table = ",".join(table)
    inputs = f"{question_prefix} {question} {table_prefix} {join_table}"
    input_ids = tokenizer(inputs, max_length=700, return_tensors="pt").input_ids
    return input_ids

def inference(question: str, table: List[str]) -> str:
    input_data = prepare_input(question=question, table=table)
    input_data = input_data.to(model.device)
    outputs = model.generate(inputs=input_data, num_beams=10, top_k=10, max_length=512)
    result = tokenizer.decode(token_ids=outputs[0], skip_special_tokens=True)
    return result

test_id = 1000
print("model result:", inference(dataset["test"][test_id]["question"], dataset["test"][test_id]["table"]["header"]))
print("real result:", dataset["test"][test_id]["sql"]["human_readable"])

inference("what is id with name jui and age equal 25", ["id","name", "age"])