llm_reason / jam /jam-srcml /finetune_funcom_srcml.py

init

2f30e1d 2 months ago

1.1 kB

	import time

	#out_dir = 'out-owt-gpt2mini'
	out_dir = 'out-funcom_raw_scratch'
	eval_interval = 1000
	eval_iters = 40

	wandb_log = True # feel free to turn on
	wandb_project = 'fundats_srcml'
	wandb_run_name = 'ft-gpt2-srcml-1' #+ str(time.time())

	dataset = 'fundats_srcml'
	init_from = 'scratch'
	#init_from = 'gpt2-large'

	# only save checkpoints if the validation loss improves
	always_save_checkpoint = True

	#n_layer = 6
	#n_head = 6
	#n_embd = 384
	#dropout = 0.2

	block_size = 1024

	# gpt2-large
	#n_layer = 36
	#n_head = 20
	#n_embd = 1280
	#dropout = 0.2

	# gpt2-medium
	n_layer = 24
	n_head = 16
	n_embd = 1024
	dropout = 0.2

	# the number of examples per iter:
	# 1 batch_size * 32 grad_accum * 1024 tokens = 32,768 tokens/iter
	# shakespeare has 301,966 tokens, so 1 epoch ~= 9.2 iters

	# stackoverflow has 10,495,518,108 tokens
	# openwebtext has 9,035,582,489 tokens
	# funcom_raw has 8,752,695,577 tokens

	# fundats_srcml has 48,774,749,459 tokens

	batch_size = 4
	gradient_accumulation_steps = 4
	max_iters = 372122 * 10

	# finetune at constant LR
	learning_rate = 3e-5
	decay_lr = False

	#weight_decay = 1e-1