File size: 1,340 Bytes

e66ad92

from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained("t5-small")

# Load the model
model = AutoModelForSeq2SeqLM.from_pretrained("t5-small")

# Rest of the code for preparing input, generating predictions, and decoding the output...


from typing import List

table_prefix = "table:"
question_prefix = "question:"

def prepare_input(question: str, table: List[str]):
    print("question:", question)
    print("table:", table)
    join_table = ",".join(table)
    inputs = f"{question_prefix} {question} {table_prefix} {join_table}"
    input_ids = tokenizer(inputs, max_length=700, return_tensors="pt").input_ids
    return input_ids

def inference(question: str, table: List[str]) -> str:
    input_data = prepare_input(question=question, table=table)
    input_data = input_data.to(model.device)
    outputs = model.generate(inputs=input_data, num_beams=10, top_k=10, max_length=512)
    result = tokenizer.decode(token_ids=outputs[0], skip_special_tokens=True)
    return result

test_id = 1000
print("model result:", inference(dataset["test"][test_id]["question"], dataset["test"][test_id]["table"]["header"]))
print("real result:", dataset["test"][test_id]["sql"]["human_readable"])

inference("what is id with name jui and age equal 25", ["id","name", "age"])