from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
import torch | |
tokenizer = AutoTokenizer.from_pretrained("BEE-spoke-data/hf_slimpajama-6B-28672-BPE-forT5") | |
special_tokens_dict = {'additional_special_tokens': ['[R]', '[S]', '[X]', '[NTP]']} | |
tokenizer.add_special_tokens(special_tokens_dict) | |
model = AutoModelForSeq2SeqLM.from_pretrained("/workspace/nanoT5/logs/2024-10-20/18-25-17/amazingvince/ul3-base").to("cuda") | |
prompt = "[NTP] The " | |
inputs = tokenizer(prompt, return_tensors="pt").to(model.device) | |
# Add decoder_input_ids | |
# decoder_input_ids = torch.ones((inputs.input_ids.shape[0], 1), dtype=torch.long) * model.config.decoder_start_token_id | |
# Generate | |
generated_ids = model.generate( | |
**inputs, | |
# decoder_input_ids=decoder_input_ids, | |
max_new_tokens=20, | |
no_repeat_ngram_size=5 | |
) | |
# Decode the output | |
generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True) | |
print(generated_text) |