Unable to load and run finetuned falcon model

#70
by DioulaD - opened

Hi everyone,
I have a finetuned model that i have tested on colab and it runs without any issue. I am trying to do the same locally but I gettting some issues.

ValueError: The current 'device_map'  had weights offloaded to the disk. Please provide an 'offload_folder' for them. Alternatively, make sure you have 'safe tensors' installed if the model you are using offers weights in this format

I added the offload folder as suggested but still unable to run the model successfully. Below is the code:

from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel, PeftConfig
import torch


def load_peft_model():
    peft_model_id = "DioulaD/falcon-7b-instruct-qlora-ge-dq-v2"    
    model = AutoModelForCausalLM.from_pretrained(
            "tiiuae/falcon-7b-instruct",
            torch_dtype=torch.bfloat16,
            device_map="auto",
            trust_remote_code=True,
            offload_folder = "offload",
            offload_state_dict = True
        )
    model = PeftModel.from_pretrained(model, peft_model_id, offload_folder="offload" )
    model = model.merge_and_unload()
    

    config = PeftConfig.from_pretrained(peft_model_id)

    tknizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
    tknizer.pad_token = tknizer.eos_token
    return model, tknizer

def get_expectations(prompt, model, tknizer):
  """
  Convert natural language query to great expectation methods using finetuned falcon 7b
  Params:
    prompt : Natural language query
    model : Model download from huggingface hub
    tknizer = Tokenizer from peft model
  """
  try:
    # If CUDA support is not available, encoding will silenty fail if cuda:0 is hardcoded
    if torch.cuda.is_available():
      device = 'cuda:0'
    else:
      device = 'cpu'
    
    encoding = tknizer(prompt, return_tensors="pt").to(device)
    model.to(device)

    with torch.inference_mode():
      out = model.generate(
          input_ids=encoding.input_ids,
          attention_mask=encoding.attention_mask,
          max_new_tokens=100, do_sample=True, temperature=0.3,
          eos_token_id=tknizer.eos_token_id,
          top_k=0
      )
    response = tknizer.decode(out[0], skip_special_tokens=True)
    return response.split("\n")[1]

  except Exception as e:
    print("An error occurred: ", e)
model, tknizer = load_peft_model()
prompt = "Verify that the userID column does not contain null values"
output = get_expectations(prompt, model, tknizer)

I added model.to(device) because I was getting an error (inputs_ids was on cuda and model on cpu). No I am getting the following error:

An error occurred:  Cannot copy out of meta tensor; no data!

Thanks in advance for you help!

image.png

DioulaD changed discussion title from Unable to load and run finetuned falcon 7b model to Unable to load and run finetuned falcon model

Sign up or log in to comment