Spaces:

mohitFlooid
/

randomSpace

Runtime error

randomSpace / main.py

complete-dope

dockerFirstTimeUsed

1fd6029 over 1 year ago

5.29 kB

	#this repo contains the code for mixtral model for finding the icd-10 codes and this scripts runs well on the single GPU and is now trying to run with the multiple GPU and i need to make sure that this script runs in a multi gpu environment

	import warnings
	warnings.filterwarnings("ignore")

	from accelerate import FullyShardedDataParallelPlugin, Accelerator
	from torch.distributed.fsdp.fully_sharded_data_parallel import FullOptimStateDictConfig, FullStateDictConfig
	from datasets import load_dataset
	import torch
	import transformers
	from datetime import datetime
	from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
	from peft import prepare_model_for_kbit_training , LoraConfig, get_peft_model

	fsdp_plugin = FullyShardedDataParallelPlugin(
	state_dict_config=FullStateDictConfig(offload_to_cpu=True, rank0_only=False),
	optim_state_dict_config=FullOptimStateDictConfig(offload_to_cpu=True, rank0_only=False),
	) #made to distribute the weights across multi gpu env

	accelerator = Accelerator(fsdp_plugin=fsdp_plugin)

	## Loading the dataset
	def Profiler_load_dataset(data_files , field = 'train'):
	return load_dataset('json' , data_files = data_files , field= field)


	## high ram used here
	train_dataset = Profiler_load_dataset(data_files='/content/prov_data2.jsonl', field='train')
	eval_dataset = Profiler_load_dataset(data_files='/content/prov_data2.jsonl', field='test')


	### What is the use of formatting function ?
	## It formats the data in this form for the mixtral model ( means easy to use in an instruction fine-tuning scenario )
	def format_fun(example):
	text = f" The ICD10 code for {example['Input']} is , {example['Output']} "
	return text

	# base_model_id = "mistralai/Mixtral-8x7B-v0.1"
	#try out different models from the hugging faces library ( the best would have been the once released by the authors but that wont be quantised so dont think it would work well !!


	base_model_id = 'TheBloke/dolphin-2.5-mixtral-8x7b-GGUF' # this is passed in as arg -> args.model_id

	bnb_config = BitsAndBytesConfig(
	load_in_4bit=True,
	bnb_4bit_use_double_quant=True,
	bnb_4bit_compute_dtype=torch.bfloat16
	)

	model = AutoModelForCausalLM.from_pretrained(base_model_id, quantization_config=bnb_config, device_map="cuda")
	## The model got loaded and works !!


	tokenizer = AutoTokenizer.from_pretrained(
	base_model_id,
	padding_side="left",
	add_eos_token=True,
	add_bos_token=True,
	)
	tokenizer.pad_token = tokenizer.eos_token


	max_length = 50 #max number of word generation
	def generate_and_tokenize_prompt(prompt):
	result = tokenizer(
	format_fun(prompt),
	truncation=True,
	max_length=max_length,
	padding="max_length",
	)
	result["labels"] = result["input_ids"].copy() #what this do ??
	return result

	tokenized_train_dataset = train_dataset.map(generate_and_tokenize_prompt)
	tokenized_val_dataset = eval_dataset.map(generate_and_tokenize_prompt)


	#Fine tuning the model
	model.gradient_checkpointing_enable()
	model = prepare_model_for_kbit_training(model)

	config = LoraConfig(
	r=32,
	lora_alpha=64,
	target_modules=[
	"q_proj",
	"k_proj",
	"v_proj",
	"o_proj",
	"w1",
	"w2",
	"w3",
	"lm_head",
	],
	bias="none",
	lora_dropout=0.05, # Conventional
	task_type="CAUSAL_LM",
	)

	model = get_peft_model(model, config)

	if torch.cuda.device_count() > 1: # If more than 1 GPU
	model.is_parallelizable = True
	model.model_parallel = True


	project = "icd-finetune"
	base_model_name = "mixtral"
	run_name = base_model_name + "-" + project
	output_dir = "./" + run_name

	trainer = transformers.Trainer(
	model=model,
	train_dataset=tokenized_train_dataset,
	eval_dataset=tokenized_val_dataset,
	args=transformers.TrainingArguments(
	output_dir=output_dir,
	warmup_steps=1,
	per_device_train_batch_size=2,
	gradient_accumulation_steps=1,
	gradient_checkpointing=True,
	max_steps=300,
	learning_rate=2.5e-5, # Want a small lr for finetuning
	fp16=True,
	optim="paged_adamw_8bit",
	logging_steps=25, # When to start reporting loss
	logging_dir="./logs", # Directory for storing logs
	save_strategy="steps", # Save the model checkpoint every logging step
	save_steps=25, # Save checkpoints every 50 steps
	evaluation_strategy="steps", # Evaluate the model every logging step
	eval_steps=25, # Evaluate and save checkpoints every 50 steps
	do_eval=True, # Perform evaluation at the end of training
	),
	data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
	)

	model.config.use_cache = False # silence the warnings. Please re-enable for inference!
	trainer.train()



	# Implement RAG on the fine tuned model





	# final model prepared
	'''
	1) Make sure the model runs on multi gpu script !
	2) The dataset is loaded
	3) The langchain implementation to oversee the prompt generation guide
	4) Also try the bert models rather than directly using the mixtral model ()
	5) Once the model is trained copy the checkpoint folder and paste in a local env
	'''