INFERENCE CODE
pip install transformers[torch]
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch
import time
tokenizer = AutoTokenizer.from_pretrained("Mr-Vicky-01/nl-pgsql-248M")
model = AutoModelForSeq2SeqLM.from_pretrained("Mr-Vicky-01/nl-pgsql-248M")
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
prefix = "Translate the following text to PGSQL: "
inp = YOUR_QUESTION
import time
start = time.time()
inp = inp.replace(',','')
inputs = tokenizer(prefix + inp.lower(), return_tensors="pt")
model.to(device)
inputs = inputs.to(device)
outputs = model.generate(**inputs, max_length=256)
answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(answer.strip())
end = time.time()
print(f"Time taken: {end - start}")