File size: 5,185 Bytes

4592ef9
 
 
 
90bbf2c
4592ef9
f823c26
4592ef9
90bbf2c
4592ef9
90bbf2c
4592ef9
90bbf2c
4592ef9
f823c26
4592ef9
 
90bbf2c
f823c26
90bbf2c
 
4592ef9
 
 
 
 
 
 
 
 
90bbf2c
4592ef9
 
 
 
 
 
 
 
 
 
 
 
 
90bbf2c
 
4592ef9
 
 
 
 
 
 
90bbf2c
 
 
4592ef9
 
 
 
 
 
 
 
 
 
f823c26
90bbf2c
 
f823c26
4592ef9
 
 
 
90bbf2c
4592ef9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90bbf2c
4592ef9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f823c26
4592ef9
 
 
90bbf2c
4592ef9
f823c26
90bbf2c
4592ef9
90bbf2c
 
f823c26
90bbf2c
f823c26
90bbf2c
 
 
 
 
 
f823c26
90bbf2c
 
f823c26
90bbf2c
4592ef9
 
90bbf2c
4592ef9
 
 
 
 
 
 
 
f823c26
4592ef9

{
  "help": false,
  "algo": "APPO",
  "env": "atari_wizardofwor",
  "experiment": "atari_wizardofwor_APPO",
  "train_dir": "./train_atari",
  "restart_behavior": "resume",
  "device": "gpu",
  "seed": 1234,
  "num_policies": 2,
  "async_rl": true,
  "serial_mode": false,
  "batched_sampling": true,
  "num_batches_to_accumulate": 2,
  "worker_num_splits": 2,
  "policy_workers_per_policy": 1,
  "max_policy_lag": 1000,
  "num_workers": 16,
  "num_envs_per_worker": 8,
  "batch_size": 1024,
  "num_batches_per_epoch": 8,
  "num_epochs": 4,
  "rollout": 128,
  "recurrence": 1,
  "shuffle_minibatches": false,
  "gamma": 0.99,
  "reward_scale": 1.0,
  "reward_clip": 1000.0,
  "value_bootstrap": false,
  "normalize_returns": true,
  "exploration_loss_coeff": 0.0004677351413,
  "value_loss_coeff": 0.5,
  "kl_loss_coeff": 0.0,
  "exploration_loss": "entropy",
  "gae_lambda": 0.95,
  "ppo_clip_ratio": 0.1,
  "ppo_clip_value": 1.0,
  "with_vtrace": false,
  "vtrace_rho": 1.0,
  "vtrace_c": 1.0,
  "optimizer": "adam",
  "adam_eps": 1e-05,
  "adam_beta1": 0.9,
  "adam_beta2": 0.999,
  "max_grad_norm": 0.0,
  "learning_rate": 0.0003033891184,
  "lr_schedule": "linear_decay",
  "lr_schedule_kl_threshold": 0.008,
  "lr_adaptive_min": 1e-06,
  "lr_adaptive_max": 0.01,
  "obs_subtract_mean": 0.0,
  "obs_scale": 255.0,
  "normalize_input": true,
  "normalize_input_keys": [
    "obs"
  ],
  "decorrelate_experience_max_seconds": 0,
  "decorrelate_envs_on_one_worker": true,
  "actor_worker_gpus": [],
  "set_workers_cpu_affinity": true,
  "force_envs_single_thread": false,
  "default_niceness": 0,
  "log_to_file": true,
  "experiment_summaries_interval": 3,
  "flush_summaries_interval": 30,
  "stats_avg": 100,
  "summaries_use_frameskip": false,
  "heartbeat_interval": 10,
  "heartbeat_reporting_interval": 60,
  "train_for_env_steps": 500000000,
  "train_for_seconds": 10000000000,
  "save_every_sec": 120,
  "keep_checkpoints": 2,
  "load_checkpoint_kind": "latest",
  "save_milestones_sec": 1200,
  "save_best_every_sec": 5,
  "save_best_metric": "reward",
  "save_best_after": 100000,
  "benchmark": false,
  "encoder_mlp_layers": [
    512,
    512
  ],
  "encoder_conv_architecture": "convnet_atari",
  "encoder_conv_mlp_layers": [
    512
  ],
  "use_rnn": false,
  "rnn_size": 512,
  "rnn_type": "gru",
  "rnn_num_layers": 1,
  "decoder_mlp_layers": [],
  "nonlinearity": "relu",
  "policy_initialization": "orthogonal",
  "policy_init_gain": 1.0,
  "actor_critic_share_weights": true,
  "adaptive_stddev": false,
  "continuous_tanh_scale": 0.0,
  "initial_stddev": 1.0,
  "use_env_info_cache": false,
  "env_gpu_actions": false,
  "env_gpu_observations": true,
  "env_frameskip": 4,
  "env_framestack": 4,
  "pixel_format": "CHW",
  "use_record_episode_statistics": true,
  "with_wandb": true,
  "wandb_user": "matt-stammers",
  "wandb_project": "atari_APPO",
  "wandb_group": "atari_wizardofwor",
  "wandb_job_type": "SF",
  "wandb_tags": [
    "atari"
  ],
  "with_pbt": false,
  "pbt_mix_policies_in_one_env": true,
  "pbt_period_env_steps": 5000000,
  "pbt_start_mutation": 20000000,
  "pbt_replace_fraction": 0.3,
  "pbt_mutation_rate": 0.15,
  "pbt_replace_reward_gap": 0.1,
  "pbt_replace_reward_gap_absolute": 1e-06,
  "pbt_optimize_gamma": false,
  "pbt_target_objective": "true_objective",
  "pbt_perturb_min": 1.1,
  "pbt_perturb_max": 1.5,
  "command_line": "--algo=APPO --env=atari_wizardofwor --experiment=atari_wizardofwor_APPO --num_policies=2 --restart_behavior=resume --train_dir=./train_atari --train_for_env_steps=500000000 --seed=1234 --num_workers=16 --num_envs_per_worker=8 --num_batches_per_epoch=8 --worker_num_splits=2 --async_rl=true --batched_sampling=true --batch_size=1024 --max_grad_norm=0 --learning_rate=0.0003033891184 --heartbeat_interval=10 --heartbeat_reporting_interval=60 --save_milestones_sec=1200 --num_epochs=4 --exploration_loss_coeff=0.0004677351413 --summaries_use_frameskip=False --with_wandb=true --wandb_user=matt-stammers --wandb_project=atari_APPO --wandb_group=atari_wizardofwor --wandb_job_type=SF --wandb_tags=atari",
  "cli_args": {
    "algo": "APPO",
    "env": "atari_wizardofwor",
    "experiment": "atari_wizardofwor_APPO",
    "train_dir": "./train_atari",
    "restart_behavior": "resume",
    "seed": 1234,
    "num_policies": 2,
    "async_rl": true,
    "batched_sampling": true,
    "worker_num_splits": 2,
    "num_workers": 16,
    "num_envs_per_worker": 8,
    "batch_size": 1024,
    "num_batches_per_epoch": 8,
    "num_epochs": 4,
    "exploration_loss_coeff": 0.0004677351413,
    "max_grad_norm": 0.0,
    "learning_rate": 0.0003033891184,
    "summaries_use_frameskip": false,
    "heartbeat_interval": 10,
    "heartbeat_reporting_interval": 60,
    "train_for_env_steps": 500000000,
    "save_milestones_sec": 1200,
    "with_wandb": true,
    "wandb_user": "matt-stammers",
    "wandb_project": "atari_APPO",
    "wandb_group": "atari_wizardofwor",
    "wandb_job_type": "SF",
    "wandb_tags": [
      "atari"
    ]
  },
  "git_hash": "5fff97c2f535da5987d358cdbe6927cccd43621e",
  "git_repo_name": "not a git repository",
  "wandb_unique_id": "atari_wizardofwor_APPO_20231214_002345_411434"
}