Code to test this model.
import torch
import time
device_name="cuda" if torch.cuda.is_available() else "cpu"
device = torch.device(device_name)
model_name="skhatri/distilgpt2med"
from transformers import AutoTokenizer, AutoModelForCausalLM
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
model.to(device)
raw_input = "Headache Cough"
import sys
if len(sys.argv) > 1:
raw_input = sys.argv[1]
start=time.time()
input_ids = tokenizer.encode(raw_input, return_tensors='pt').to(device)
output = model.generate(input_ids)
response = tokenizer.decode(output[0], skip_special_tokens=True)
print(response)
end=time.time()
print(f'Time taken: {round(end - start, 2)} seconds')