Spaces:

teachyourselfcoding
/

chatlawv1

Runtime error

App Files Files Community

chatlawv1 / trlx /configs /test_config.yml

teachyourselfcoding

Upload 245 files

fa6856c 11 months ago

raw

history blame

No virus

1.92 kB

	train:
	seq_length: 64 # Size of LM context
	epochs: 100 # Train for max(epochs, total_steps)
	total_steps: 1000 # Train for max(epochs, total_steps)
	batch_size: 16 # batch size

	checkpoint_interval: 10000 # checkpoint interval
	eval_interval: 128 # eval interval

	pipeline: "PromptPipeline" # prompt pipeline to load
	trainer: "AcceleratePPOTrainer" # Name of model trainer to load

	model:
	model_path: "lvwerra/gpt2-imdb" # Name of hf model to load
	num_layers_unfrozen: 2 # Number of bottom layers to freeze during training

	tokenizer:
	tokenizer_path: "gpt2" # Name of hf tokenizer to load
	truncation_side: "right" # Trim this side of samples if they are longer than LM context

	optimizer:
	name: "adamw" # Name of optimizer to load
	kwargs:
	lr: 1.412e-4 # Learning rate
	betas: [0.9, 0.95] # Adam betas
	eps: 1.0e-8 # Adam eps
	weight_decay: 1.0e-6 # Weight decay param

	scheduler:
	name: "cosine_annealing" # Name of learning rate scheduler
	kwargs:
	T_max: 10000 # Maximum number of steps
	eta_min: 1.412e-4 # Minimum learning rate

	method:
	name: "ppoconfig" # Name of RL method config
	num_rollouts: 128 # Number of rollouts to collect per epoch
	chunk_size: 128 # Number of rollouts to collect in one loop
	ppo_epochs: 4 # Number of ppo epochs
	init_kl_coef: 0.2 # init kl coefficient
	target: 6 # target kl coefficient, set None for fixed kl coef
	horizon: 10000 # PPO horizon
	gamma: 0.99 # PPO discount
	lam: 0.95 # PPO lambda
	cliprange: 0.2 # clip range
	cliprange_value: 0.2 # clip range
	vf_coef: 1.0 # value term weight
	scale_reward: "running" # False\|"ref"\|"running" estimate against which to scale rewards
	cliprange_reward: 10
	ref_mean: null
	ref_std: null
	gen_kwargs:
	max_length: 48 # LM max sample gen length
	min_length: 48 # LM min sample gen length
	top_k: 0.0 # top k
	top_p: 1.0 # top p
	do_sample: True # sample