Prompt Format:
[instruction]
[optional input]
[response will start after two newlines]
!pip install -q bitsandbytes datasets accelerate loralib
!pip install -q git+https://github.com/huggingface/transformers.git@main git+https://github.com/huggingface/peft.git
!pip install -q geov
import torch
from peft import PeftModel, PeftConfig
from geov import GeoVForCausalLM, GeoVTokenizer
model = GeoVForCausalLM.from_pretrained(
"GeoV/GeoV-9b",
load_in_8bit=True,
low_cpu_mem_usage=True,
device_map='auto',
)
tokenizer = GeoVTokenizer.from_pretrained("GeoV/GeoV-9b")
peft_model_id = "crumb/GeoV-Instruct-LoRA"
model = PeftModel.from_pretrained(model, peft_model_id)
# Inference
prompt = '''
Describe the structure of an atom.
'''
batch = tokenizer(prompt, return_tensors='pt')
with torch.cuda.amp.autocast():
output_tokens = model.generate(**batch, max_new_tokens=50)
print(tokenizer.decode(output_tokens[0], skip_special_tokens=True))