from transformers import AutoModelForSeq2SeqLM import torch from datasets import Dataset from torch.utils.data import DataLoader from transformers import default_data_collator from transformers import AutoTokenizer from tqdm import tqdm import pandas as pd import numpy import random import nevergrad as ng from peft.utils.save_and_load import set_peft_model_state_dict, get_peft_model_state_dict from peft import PeftModel, PeftConfig from functools import partial random.seed(42) numpy.random.seed(42) def load_base_model_and_lora_modules(lora_module_list): # use gpu if available device = "cuda" if torch.cuda.is_available() else "cpu" # load basic model default_peft_model_id = lora_module_list[0] # find the base model model_name_or_path = PeftConfig.from_pretrained(default_peft_model_id).base_model_name_or_path base_model = AutoModelForSeq2SeqLM.from_pretrained(model_name_or_path) # load tokenizer tokenizer = AutoTokenizer.from_pretrained(model_name_or_path) # 0 is the default model peft_model = PeftModel.from_pretrained(base_model, default_peft_model_id) peft_model = peft_model.to(device) peft_model.eval() print("> Begin to load lora modules") cache = {} for peft_model_id in tqdm(lora_module_list): print("> Loading {} ...".format(peft_model_id)) cur_peft_model = PeftModel.from_pretrained(base_model, peft_model_id) cache[peft_model_id] = get_peft_model_state_dict(cur_peft_model) return peft_model, tokenizer, cache def preprocess_function(examples, tokenizer): inputs = examples["input"] targets = examples["output"] model_inputs = tokenizer( inputs, max_length=2048, padding=True, truncation=True, return_tensors="pt", ) labels = tokenizer( targets, max_length=256, padding=True, truncation=True, return_tensors="pt", ) labels = labels["input_ids"] labels[labels == tokenizer.pad_token_id] = -100 model_inputs["labels"] = labels return model_inputs def load_dataset_and_run(example_inputs, example_outputs, tokenizer): df = [ {"input": example_inputs[i], "output": example_outputs[i]} for i in range(len(example_inputs)) ] dataset = Dataset.from_pandas(pd.DataFrame(df)) preprocess_func_with_tokenizer = partial(preprocess_function, tokenizer=tokenizer) processed_datasets = dataset.map( preprocess_func_with_tokenizer, batched=True, num_proc=1, desc="Running tokenizer on dataset", ) return processed_datasets def get_score(weights, model, cache, example_dataset): # the composed lora state dict final_state_dict = {} # module list is the list lora_module_list = list(cache.keys()) # all keys are the same keys = cache[lora_module_list[0]].keys() for i, peft_model_id in enumerate(lora_module_list): lora_state_dict = cache[peft_model_id] if i == 0: for key in keys: final_state_dict[key] = weights[i] * lora_state_dict[key] else: for key in keys: final_state_dict[key] = ( final_state_dict[key] + weights[i] * lora_state_dict[key] ) # reload the model with the new adapter config set_peft_model_state_dict(model, final_state_dict) def get_loss(): # use gpu if available train_dataset = example_dataset train_dataloader = DataLoader( train_dataset, collate_fn=default_data_collator, batch_size=len(train_dataset), pin_memory=True, ) train_loss = 0 with torch.no_grad(): device = "cuda" if torch.cuda.is_available() else "cpu" for _, batch in enumerate(train_dataloader): batch = {k: v.to(device) for k, v in batch.items()} with torch.no_grad(): outputs = model(**batch) loss = outputs.loss train_loss += loss.detach().float() loss = train_loss.float() return float(loss) / len(train_dataset["input"]) # minimize the metric loss = get_loss() # L1 regularization term sum_of_squares = sum([abs(x) for x in weights]) / len(weights) metric_val = loss + 0.05 * sum_of_squares return metric_val def get_final_weights(weights, lora_module_list, cache): final_state_dict = {} keys = cache[lora_module_list[0]].keys() for i, peft_model_id in enumerate(lora_module_list): lora_state_dict = cache[peft_model_id] if i == 0: for key in keys: final_state_dict[key] = weights[i] * lora_state_dict[key] else: for key in keys: final_state_dict[key] = ( final_state_dict[key] + weights[i] * lora_state_dict[key] ) return final_state_dict def lorahub_learning(lora_module_list, text_input, text_output, max_inference_step): number_of_loras = len(lora_module_list) if number_of_loras == 0: return None # load model model, tokenizer, cache = load_base_model_and_lora_modules(lora_module_list) # process dataset dataset = load_dataset_and_run(text_input.split("\n"), text_output.split("\n"), tokenizer) get_score_partial = partial(get_score, model=model, cache=cache, example_dataset=dataset) # set up the limit of the weights instrum = ng.p.Array( init=[0] * number_of_loras, upper=[1.5] * number_of_loras, lower=[-1.5] * number_of_loras, ) optimizer = ng.optimizers.NGOpt(parametrization=instrum, budget=max_inference_step) print("> Begin to perform gradient-free optimization ...") recommendation = optimizer.minimize(get_score_partial, verbosity=1) final_lora = get_final_weights(recommendation.value, lora_module_list, cache) return recommendation, final_lora