gerou161
/

step15000

Model card Files Files and versions Community

step15000 / first_layer_1 /training_config.yaml

gerou161's picture

Add files using upload-large-folder tool

1b37db9 verified about 1 month ago

history blame contribute delete

1.08 kB

	adam_beta1: 0.9
	adam_beta2: 0.95
	adjust_step: 0
	autoresume: false
	batch_size: 6
	clip_grad_norm: 1.0
	comment: null
	cycle_length: null
	dtype: bfloat16
	emb_freeze: null
	eval_dataset_path: /work01/yanokazuki/fineweb/valid_data_gpt2/
	eval_every: 1000
	first_attention: false
	first_attention_resume: false
	first_layer: true
	gradient_accumulation: 13
	keep_checkpoints: null
	layer_freeze: null
	layer_freeze_2: false
	load_optimizer_state_on_resume: true
	lr: 0.0004
	max_length: 1024
	max_train_tokens: null
	min_lr_ratio: 0.1
	model_config: model_config/478m.json
	model_name_or_path: null
	model_revision: null
	num_training_steps: 15000
	optimizer: Adam
	restart_warmup_steps: null
	resume_from: null
	run_name: first_layer
	save_dir: checkpoints/first_layer
	save_every: 1000
	scheduler: cosine
	seed: 0
	shuffle: true
	skip_batches: !!set {}
	tags:
	- 396m-for-680m
	total_batch_size: 624
	train_dataset_path: /work01/yanokazuki/fineweb/train_data_gpt2/
	training_config: training_config/two_stage/478m_first_layer.yaml
	wandb_watch: true
	warmed_up_model: null
	warmup_steps: 1500
	weight_decay: 0.0
	workers: 8