chatlawv1 / trlx /configs /sweeps /ppo_sweep.yml
teachyourselfcoding's picture
Upload 245 files
fa6856c
raw
history blame
No virus
697 Bytes
tune_config:
mode: "max"
metric: "reward/mean"
search_alg: "random"
scheduler: "fifo"
num_samples: 32
# https://docs.ray.io/en/latest/tune/api_docs/search_space.html#tune-sample-docs
optimizer.kwargs.lr:
strategy: "loguniform"
values: [0.000001, 0.001]
method.init_kl_coef:
strategy: "loguniform"
values: [0.0001, 0.2]
model.num_layers_unfrozen:
strategy: "choice"
values: [-1, 2, 6]
method.num_rollouts:
strategy: "choice"
values: [32, 128, 512]
method.target:
strategy: "choice"
values: [null, 1]
# disable checkpointing for storage sake
train.checkpoint_interval:
strategy: "choice"
values: [10000000]
train.save_best:
strategy: "choice"
values: [false]