Spaces:
Runtime error
Runtime error
train: | |
seq_length: 64 # Size of LM context | |
epochs: 100 # Train for max(epochs, total_steps) | |
total_steps: 1000 # Train for max(epochs, total_steps) | |
batch_size: 16 # batch size | |
checkpoint_interval: 10000 # checkpoint interval | |
eval_interval: 128 # eval interval | |
pipeline: "PromptPipeline" # prompt pipeline to load | |
trainer: "AcceleratePPOTrainer" # Name of model trainer to load | |
model: | |
model_path: "lvwerra/gpt2-imdb" # Name of hf model to load | |
num_layers_unfrozen: 2 # Number of bottom layers to freeze during training | |
tokenizer: | |
tokenizer_path: "gpt2" # Name of hf tokenizer to load | |
truncation_side: "right" # Trim this side of samples if they are longer than LM context | |
optimizer: | |
name: "adamw" # Name of optimizer to load | |
kwargs: | |
lr: 1.412e-4 # Learning rate | |
betas: [0.9, 0.95] # Adam betas | |
eps: 1.0e-8 # Adam eps | |
weight_decay: 1.0e-6 # Weight decay param | |
scheduler: | |
name: "cosine_annealing" # Name of learning rate scheduler | |
kwargs: | |
T_max: 10000 # Maximum number of steps | |
eta_min: 1.412e-4 # Minimum learning rate | |
method: | |
name: "ppoconfig" # Name of RL method config | |
num_rollouts: 128 # Number of rollouts to collect per epoch | |
chunk_size: 128 # Number of rollouts to collect in one loop | |
ppo_epochs: 4 # Number of ppo epochs | |
init_kl_coef: 0.2 # init kl coefficient | |
target: 6 # target kl coefficient, set None for fixed kl coef | |
horizon: 10000 # PPO horizon | |
gamma: 0.99 # PPO discount | |
lam: 0.95 # PPO lambda | |
cliprange: 0.2 # clip range | |
cliprange_value: 0.2 # clip range | |
vf_coef: 1.0 # value term weight | |
scale_reward: "running" # False|"ref"|"running" estimate against which to scale rewards | |
cliprange_reward: 10 | |
ref_mean: null | |
ref_std: null | |
gen_kwargs: | |
max_length: 48 # LM max sample gen length | |
min_length: 48 # LM min sample gen length | |
top_k: 0.0 # top k | |
top_p: 1.0 # top p | |
do_sample: True # sample | |