text2sql-finetune / README.md
Debesh Sahoo
text2sql finetune
e66ad92
|
raw
history blame
1.34 kB
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained("t5-small")
# Load the model
model = AutoModelForSeq2SeqLM.from_pretrained("t5-small")
# Rest of the code for preparing input, generating predictions, and decoding the output...
from typing import List
table_prefix = "table:"
question_prefix = "question:"
def prepare_input(question: str, table: List[str]):
print("question:", question)
print("table:", table)
join_table = ",".join(table)
inputs = f"{question_prefix} {question} {table_prefix} {join_table}"
input_ids = tokenizer(inputs, max_length=700, return_tensors="pt").input_ids
return input_ids
def inference(question: str, table: List[str]) -> str:
input_data = prepare_input(question=question, table=table)
input_data = input_data.to(model.device)
outputs = model.generate(inputs=input_data, num_beams=10, top_k=10, max_length=512)
result = tokenizer.decode(token_ids=outputs[0], skip_special_tokens=True)
return result
test_id = 1000
print("model result:", inference(dataset["test"][test_id]["question"], dataset["test"][test_id]["table"]["header"]))
print("real result:", dataset["test"][test_id]["sql"]["human_readable"])
inference("what is id with name jui and age equal 25", ["id","name", "age"])