test_trainer / autotrain_llm.py

Upload 3 files

96601f8 verified 5 months ago

4.18 kB

	# -- coding: utf-8 --
	"""AutoTrain_LLM.ipynb
	Automatically generated by Colab.
	Original file is located at
	https://colab.research.google.com/github/huggingface/autotrain-advanced/blob/main/colabs/AutoTrain_LLM.ipynb
	"""

	#@title 🤗 AutoTrain LLM
	#@markdown In order to use this colab
	#@markdown - upload train.csv to a folder named `data/`
	#@markdown - train.csv must contain a `text` column
	#@markdown - choose a project name if you wish
	#@markdown - change model if you wish, you can use most of the text-generation models from Hugging Face Hub
	#@markdown - add huggingface information (token) if you wish to push trained model to huggingface hub
	#@markdown - update hyperparameters if you wish
	#@markdown - click `Runtime > Run all` or run each cell individually
	#@markdown - report issues / feature requests here: https://github.com/huggingface/autotrain-advanced/issues

	import os
	!pip install -U autotrain-advanced > install_logs.txt
	!autotrain setup --colab > setup_logs.txt

	#@markdown ---
	#@markdown #### Project Config
	#@markdown Note: if you are using a restricted/private model, you need to enter your Hugging Face token in the next step.
	project_name = 'my-autotrain-llm' # @param {type:"string"}
	model_name = 'abhishek/llama-2-7b-hf-small-shards' # @param {type:"string"}

	#@markdown ---
	#@markdown #### Push to Hub?
	#@markdown Use these only if you want to push your trained model to a private repo in your Hugging Face Account
	#@markdown If you dont use these, the model will be saved in Google Colab and you are required to download it manually.
	#@markdown Please enter your Hugging Face write token. The trained model will be saved to your Hugging Face account.
	#@markdown You can find your token here: https://huggingface.co/settings/tokens
	push_to_hub = False # @param ["False", "True"] {type:"raw"}
	hf_token = "hf_XXX" #@param {type:"string"}
	hf_username = "abc" #@param {type:"string"}

	#@markdown ---
	#@markdown #### Hyperparameters
	learning_rate = 2e-4 # @param {type:"number"}
	num_epochs = 1 #@param {type:"number"}
	batch_size = 1 # @param {type:"slider", min:1, max:32, step:1}
	block_size = 1024 # @param {type:"number"}
	trainer = "sft" # @param ["default", "sft", "orpo"] {type:"raw"}
	warmup_ratio = 0.1 # @param {type:"number"}
	weight_decay = 0.01 # @param {type:"number"}
	gradient_accumulation = 4 # @param {type:"number"}
	mixed_precision = "fp16" # @param ["fp16", "bf16", "none"] {type:"raw"}
	peft = True # @param ["False", "True"] {type:"raw"}
	quantization = "int4" # @param ["int4", "int8", "none"] {type:"raw"}
	lora_r = 16 #@param {type:"number"}
	lora_alpha = 32 #@param {type:"number"}
	lora_dropout = 0.05 #@param {type:"number"}

	os.environ["PROJECT_NAME"] = project_name
	os.environ["MODEL_NAME"] = model_name
	os.environ["PUSH_TO_HUB"] = str(push_to_hub)
	os.environ["HF_TOKEN"] = hf_token
	os.environ["LEARNING_RATE"] = str(learning_rate)
	os.environ["NUM_EPOCHS"] = str(num_epochs)
	os.environ["BATCH_SIZE"] = str(batch_size)
	os.environ["BLOCK_SIZE"] = str(block_size)
	os.environ["WARMUP_RATIO"] = str(warmup_ratio)
	os.environ["WEIGHT_DECAY"] = str(weight_decay)
	os.environ["GRADIENT_ACCUMULATION"] = str(gradient_accumulation)
	os.environ["MIXED_PRECISION"] = str(mixed_precision)
	os.environ["PEFT"] = str(peft)
	os.environ["QUANTIZATION"] = str(quantization)
	os.environ["LORA_R"] = str(lora_r)
	os.environ["LORA_ALPHA"] = str(lora_alpha)
	os.environ["LORA_DROPOUT"] = str(lora_dropout)
	os.environ["HF_USERNAME"] = hf_username
	os.environ["TRAINER"] = trainer

	!autotrain llm \
	--train \
	--model ${MODEL_NAME} \
	--project-name ${PROJECT_NAME} \
	--data-path data/ \
	--text-column text \
	--lr ${LEARNING_RATE} \
	--batch-size ${BATCH_SIZE} \
	--epochs ${NUM_EPOCHS} \
	--block-size ${BLOCK_SIZE} \
	--warmup-ratio ${WARMUP_RATIO} \
	--lora-r ${LORA_R} \
	--lora-alpha ${LORA_ALPHA} \
	--lora-dropout ${LORA_DROPOUT} \
	--weight-decay ${WEIGHT_DECAY} \
	--gradient-accumulation ${GRADIENT_ACCUMULATION} \
	--quantization ${QUANTIZATION} \
	--mixed-precision ${MIXED_PRECISION} \
	--username ${HF_USERNAME} \
	--trainer ${TRAINER} \
	$( [[ "$PEFT" == "True" ]] && echo "--peft" ) \
	$( [[ "$PUSH_TO_HUB" == "True" ]] && echo "--push-to-hub --token ${HF_TOKEN}" )