Spaces:

mohitFlooid
/

randomSpace

Runtime error

File size: 5,353 Bytes


print('this is the main file called main.py')

'''

#this repo contains the code for mixtral model for finding the icd-10 codes and this scripts runs well on the single GPU and is now trying to run with the multiple GPU and i need to make sure that this script runs in a multi gpu environment

import warnings
warnings.filterwarnings("ignore")

from accelerate import FullyShardedDataParallelPlugin, Accelerator
from torch.distributed.fsdp.fully_sharded_data_parallel import FullOptimStateDictConfig, FullStateDictConfig
from datasets import load_dataset
import torch
import transformers
from datetime import datetime
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import prepare_model_for_kbit_training , LoraConfig, get_peft_model

fsdp_plugin = FullyShardedDataParallelPlugin(
    state_dict_config=FullStateDictConfig(offload_to_cpu=True, rank0_only=False),
    optim_state_dict_config=FullOptimStateDictConfig(offload_to_cpu=True, rank0_only=False),
) #made to distribute the weights across multi gpu env 

accelerator = Accelerator(fsdp_plugin=fsdp_plugin)

## Loading the dataset
def Profiler_load_dataset(data_files , field = 'train'):
      return load_dataset('json' , data_files = data_files , field= field)


## high ram used here 
train_dataset = Profiler_load_dataset(data_files='/content/prov_data2.jsonl', field='train')
eval_dataset = Profiler_load_dataset(data_files='/content/prov_data2.jsonl', field='test')


### What is the use of formatting function ? 
## It formats the data in this form for the mixtral model ( means easy to use in an instruction fine-tuning scenario )
def format_fun(example):
    text = f" The ICD10 code for {example['Input']} is , {example['Output']} "
    return text

# base_model_id = "mistralai/Mixtral-8x7B-v0.1"
#try out different models from the hugging faces library ( the best would have been the once released by the authors but that wont be quantised so dont think it would work well !! 


base_model_id = 'TheBloke/dolphin-2.5-mixtral-8x7b-GGUF' # this is passed in as arg -> args.model_id

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(base_model_id, quantization_config=bnb_config, device_map="cuda")
## The model got loaded and works !! 


tokenizer = AutoTokenizer.from_pretrained(
    base_model_id,
    padding_side="left",
    add_eos_token=True,
    add_bos_token=True,
)
tokenizer.pad_token = tokenizer.eos_token


max_length = 50 #max number of word generation 
def generate_and_tokenize_prompt(prompt):
    result = tokenizer(
        format_fun(prompt), 
        truncation=True,
        max_length=max_length,
        padding="max_length",
    )
    result["labels"] = result["input_ids"].copy() #what this do ?? 
    return result

tokenized_train_dataset = train_dataset.map(generate_and_tokenize_prompt)
tokenized_val_dataset = eval_dataset.map(generate_and_tokenize_prompt)


#Fine tuning the model 
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

config = LoraConfig(
    r=32,
    lora_alpha=64,
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "w1",
        "w2",
        "w3",
        "lm_head",
    ],
    bias="none",
    lora_dropout=0.05,  # Conventional
    task_type="CAUSAL_LM",
)

model = get_peft_model(model, config)

if torch.cuda.device_count() > 1: # If more than 1 GPU
    model.is_parallelizable = True
    model.model_parallel = True
    

project = "icd-finetune"
base_model_name = "mixtral"
run_name = base_model_name + "-" + project
output_dir = "./" + run_name

trainer = transformers.Trainer(
    model=model,
    train_dataset=tokenized_train_dataset,
    eval_dataset=tokenized_val_dataset,
    args=transformers.TrainingArguments(
        output_dir=output_dir,
        warmup_steps=1,
        per_device_train_batch_size=2,
        gradient_accumulation_steps=1,
        gradient_checkpointing=True,
        max_steps=300,
        learning_rate=2.5e-5, # Want a small lr for finetuning
        fp16=True, 
        optim="paged_adamw_8bit",
        logging_steps=25,              # When to start reporting loss
        logging_dir="./logs",        # Directory for storing logs
        save_strategy="steps",       # Save the model checkpoint every logging step
        save_steps=25,                # Save checkpoints every 50 steps
        evaluation_strategy="steps", # Evaluate the model every logging step
        eval_steps=25,               # Evaluate and save checkpoints every 50 steps
        do_eval=True,                # Perform evaluation at the end of training
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)

model.config.use_cache = False  # silence the warnings. Please re-enable for inference!
trainer.train()



# Implement RAG on the fine tuned model 





# final model prepared 
'''
# 1) Make sure the model runs on multi gpu script ! 
# 2) The dataset is loaded 
# 3) The langchain implementation to oversee the prompt generation guide 
# 4) Also try the bert models rather than directly using the mixtral model () 
# 5) Once the model is trained copy the checkpoint folder and paste in a local env 
'''

'''