text2sql-finetune / README.md
Debesh Sahoo
text2sql finetune
e66ad92
|
raw
history blame
1.34 kB

from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

Load the tokenizer

tokenizer = AutoTokenizer.from_pretrained("t5-small")

Load the model

model = AutoModelForSeq2SeqLM.from_pretrained("t5-small")

Rest of the code for preparing input, generating predictions, and decoding the output...

from typing import List

table_prefix = "table:" question_prefix = "question:"

def prepare_input(question: str, table: List[str]): print("question:", question) print("table:", table) join_table = ",".join(table) inputs = f"{question_prefix} {question} {table_prefix} {join_table}" input_ids = tokenizer(inputs, max_length=700, return_tensors="pt").input_ids return input_ids

def inference(question: str, table: List[str]) -> str: input_data = prepare_input(question=question, table=table) input_data = input_data.to(model.device) outputs = model.generate(inputs=input_data, num_beams=10, top_k=10, max_length=512) result = tokenizer.decode(token_ids=outputs[0], skip_special_tokens=True) return result

test_id = 1000 print("model result:", inference(dataset["test"][test_id]["question"], dataset["test"][test_id]["table"]["header"])) print("real result:", dataset["test"][test_id]["sql"]["human_readable"])

inference("what is id with name jui and age equal 25", ["id","name", "age"])