Edit model card

Model Card for Model ID

This finetuned model is already merged with Mistral7B (Base model) There will be 2 options running this model for inference

  • Option 1: Load base model and use Peft library to load parameters of finetuned model on base model
  • Option 2: Load finetuned model straight from this huggingface hub

Approach 1

Run Inference on Google Colab

  1. First run this code to load the base model which is Mistral-7B-v0.1
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

base_model_id = "mistralai/Mistral-7B-v0.1"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

base_model = AutoModelForCausalLM.from_pretrained(
    base_model_id,  # Mistral, same as before
    quantization_config=bnb_config,  # Same quantization config as before
    device_map="auto",
    trust_remote_code=True,
    use_auth_token=True
)

eval_tokenizer = AutoTokenizer.from_pretrained(base_model_id, add_bos_token=True, trust_remote_code=True)
  1. After that, we would use Peft library to merge the new parameters that we already finetuned with GAML with this code
from peft import PeftModel
import torch
# ft_model = PeftModel.from_pretrained(base_model, "mistral-gama-finetune_allblocks_newdata/checkpoint-45")
ft_model = PeftModel.from_pretrained(base_model, "Phanh2532/GAML-151-500")
#ft_model3 = PeftModel.from_pretrained(base_model, "mistral-allbloclks//checkpoint-250")
#ft_model.save_pretrained('/content/mistral-allblocksft/')
eval_prompt = "Create a GAML code snippet inspired by water pollution in real life"
model_input = eval_tokenizer(eval_prompt, return_tensors="pt").to("cuda")
ft_model.eval()
with torch.no_grad():
    print(eval_tokenizer.decode(ft_model.generate(**model_input, max_new_tokens=2000, repetition_penalty=1.15)[0], skip_special_tokens=True))
    print('----------------------------------------------------------------------')
    #print(eval_tokenizer.decode(ft_model2.generate(**model_input, max_new_tokens=2000, repetition_penalty=1.15)[0], skip_special_tokens=True))

Approach 2

Run this code snippet

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

# Load Mistral 7B model and tokenizer
model_id = "Phanh2532/GAML-151-500"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
    use_auth_token=True
)

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_id, add_bos_token=True, trust_remote_code=True)
with torch.no_grad():
    print(tokenizer.decode(model.generate(**model_input, max_new_tokens=2000, repetition_penalty=1.15)[0], skip_special_tokens=True))
    print('----------------------------------------------------------------------')
    # print(eval_tokenizer.decode(ft_model2.generate(**model_input, max_new_tokens=2000, repetition_penalty=1.15)[0], skip_special_tokens=True))

Framework versions

  • PEFT 0.7.2.dev0
Downloads last month
9
Safetensors
Model size
7.24B params
Tensor type
BF16
·
Inference API
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social visibility and check back later, or deploy to Inference Endpoints (dedicated) instead.

Model tree for Phanh2532/GAMA-Code-Generator-1.2

Finetuned
this model