license: apache-2.0 | |
``` | |
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModel | |
import torch | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
print(f"Using device: {device}") | |
model = AutoModelForCausalLM.from_pretrained("orionweller/test-flex-gpt", trust_remote_code=True) | |
model = model.to(device) | |
tokenizer = AutoTokenizer.from_pretrained("orionweller/test-flex-gpt", trust_remote_code=True) | |
# test it out and encode some text | |
prompt = "The capital of France is" | |
inputs = tokenizer(prompt, return_tensors="pt").input_ids | |
# put the input ids on the right device | |
inputs = inputs.to(device) | |
outputs = model.generate(inputs, max_new_tokens=5, do_sample=True, top_p=0.95) | |
print(tokenizer.batch_decode(outputs, skip_special_tokens=True)) | |
``` |