|
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM |
|
|
|
# Load the tokenizer |
|
tokenizer = AutoTokenizer.from_pretrained("t5-small") |
|
|
|
# Load the model |
|
model = AutoModelForSeq2SeqLM.from_pretrained("t5-small") |
|
|
|
# Rest of the code for preparing input, generating predictions, and decoding the output... |
|
|
|
|
|
from typing import List |
|
|
|
table_prefix = "table:" |
|
question_prefix = "question:" |
|
|
|
def prepare_input(question: str, table: List[str]): |
|
print("question:", question) |
|
print("table:", table) |
|
join_table = ",".join(table) |
|
inputs = f"{question_prefix} {question} {table_prefix} {join_table}" |
|
input_ids = tokenizer(inputs, max_length=700, return_tensors="pt").input_ids |
|
return input_ids |
|
|
|
def inference(question: str, table: List[str]) -> str: |
|
input_data = prepare_input(question=question, table=table) |
|
input_data = input_data.to(model.device) |
|
outputs = model.generate(inputs=input_data, num_beams=10, top_k=10, max_length=512) |
|
result = tokenizer.decode(token_ids=outputs[0], skip_special_tokens=True) |
|
return result |
|
|
|
test_id = 1000 |
|
print("model result:", inference(dataset["test"][test_id]["question"], dataset["test"][test_id]["table"]["header"])) |
|
print("real result:", dataset["test"][test_id]["sql"]["human_readable"]) |
|
|
|
inference("what is id with name jui and age equal 25", ["id","name", "age"]) |
|
|