filopedraz's picture
added resharded model
4048bc1
raw
history blame contribute delete
398 Bytes
# pip install git+https://github.com/huggingface/transformers.git@main accelerate
from transformers import LlamaTokenizer, AutoModelForCausalLM
tokenizer = LlamaTokenizer.from_pretrained("./")
model = AutoModelForCausalLM.from_pretrained("./")
inputs = tokenizer("A cat sat", return_tensors="pt")["input_ids"]
outputs = model.generate(inputs, max_new_tokens=5)
print(tokenizer.decode(outputs[0]))