tune_config: mode: "max" metric: "reward/mean" search_alg: "random" scheduler: "fifo" num_samples: 32 # https://docs.ray.io/en/latest/tune/api_docs/search_space.html#tune-sample-docs optimizer.kwargs.lr: strategy: "loguniform" values: [0.000001, 0.001] method.init_kl_coef: strategy: "loguniform" values: [0.0001, 0.2] model.num_layers_unfrozen: strategy: "choice" values: [-1, 2, 6] method.num_rollouts: strategy: "choice" values: [32, 128, 512] method.target: strategy: "choice" values: [null, 1] # disable checkpointing for storage sake train.checkpoint_interval: strategy: "choice" values: [10000000] train.save_best: strategy: "choice" values: [false]