Upload . with huggingface_hub
Browse files- .summary/0/events.out.tfevents.1677113085.f54eb2240718 +3 -0
- README.md +1 -1
- checkpoint_p0/checkpoint_000002447_10022912.pth +3 -0
- config.json +1 -1
- replay.mp4 +2 -2
- sf_log.txt +563 -0
.summary/0/events.out.tfevents.1677113085.f54eb2240718
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cc55a4c1084efc746a5ed4cf48120476b155daceb4bb870a4e15b9cf6c660c1d
|
3 |
+
size 2683
|
README.md
CHANGED
@@ -15,7 +15,7 @@ model-index:
|
|
15 |
type: doom_health_gathering_supreme
|
16 |
metrics:
|
17 |
- type: mean_reward
|
18 |
-
value: 4.
|
19 |
name: mean_reward
|
20 |
verified: false
|
21 |
---
|
|
|
15 |
type: doom_health_gathering_supreme
|
16 |
metrics:
|
17 |
- type: mean_reward
|
18 |
+
value: 4.19 +/- 0.56
|
19 |
name: mean_reward
|
20 |
verified: false
|
21 |
---
|
checkpoint_p0/checkpoint_000002447_10022912.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0788c774d56aa22ced225fdc77679ab03a5f12285c48857899a0cb39e9935424
|
3 |
+
size 34929220
|
config.json
CHANGED
@@ -23,7 +23,7 @@
|
|
23 |
"rollout": 32,
|
24 |
"recurrence": 32,
|
25 |
"shuffle_minibatches": false,
|
26 |
-
"gamma": 0.
|
27 |
"reward_scale": 1.0,
|
28 |
"reward_clip": 1000.0,
|
29 |
"value_bootstrap": false,
|
|
|
23 |
"rollout": 32,
|
24 |
"recurrence": 32,
|
25 |
"shuffle_minibatches": false,
|
26 |
+
"gamma": 0.97,
|
27 |
"reward_scale": 1.0,
|
28 |
"reward_clip": 1000.0,
|
29 |
"value_bootstrap": false,
|
replay.mp4
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fa318558a81f3083a89ea6e50af8629d4ddd3569b8c1d9991c6bc53794fa001e
|
3 |
+
size 5366056
|
sf_log.txt
CHANGED
@@ -3024,3 +3024,566 @@ main_loop: 38.4461
|
|
3024 |
[2023-02-23 00:39:49,195][05631] Avg episode rewards: #0: 4.500, true rewards: #0: 4.000
|
3025 |
[2023-02-23 00:39:49,198][05631] Avg episode reward: 4.500, avg true_objective: 4.000
|
3026 |
[2023-02-23 00:40:09,146][05631] Replay video saved to /content/train_dir/default_experiment/replay.mp4!
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3024 |
[2023-02-23 00:39:49,195][05631] Avg episode rewards: #0: 4.500, true rewards: #0: 4.000
|
3025 |
[2023-02-23 00:39:49,198][05631] Avg episode reward: 4.500, avg true_objective: 4.000
|
3026 |
[2023-02-23 00:40:09,146][05631] Replay video saved to /content/train_dir/default_experiment/replay.mp4!
|
3027 |
+
[2023-02-23 00:40:14,495][05631] The model has been pushed to https://huggingface.co/pittawat/rl_course_vizdoom_health_gathering_supreme
|
3028 |
+
[2023-02-23 00:44:45,344][05631] Environment doom_basic already registered, overwriting...
|
3029 |
+
[2023-02-23 00:44:45,347][05631] Environment doom_two_colors_easy already registered, overwriting...
|
3030 |
+
[2023-02-23 00:44:45,351][05631] Environment doom_two_colors_hard already registered, overwriting...
|
3031 |
+
[2023-02-23 00:44:45,352][05631] Environment doom_dm already registered, overwriting...
|
3032 |
+
[2023-02-23 00:44:45,353][05631] Environment doom_dwango5 already registered, overwriting...
|
3033 |
+
[2023-02-23 00:44:45,356][05631] Environment doom_my_way_home_flat_actions already registered, overwriting...
|
3034 |
+
[2023-02-23 00:44:45,357][05631] Environment doom_defend_the_center_flat_actions already registered, overwriting...
|
3035 |
+
[2023-02-23 00:44:45,360][05631] Environment doom_my_way_home already registered, overwriting...
|
3036 |
+
[2023-02-23 00:44:45,361][05631] Environment doom_deadly_corridor already registered, overwriting...
|
3037 |
+
[2023-02-23 00:44:45,362][05631] Environment doom_defend_the_center already registered, overwriting...
|
3038 |
+
[2023-02-23 00:44:45,366][05631] Environment doom_defend_the_line already registered, overwriting...
|
3039 |
+
[2023-02-23 00:44:45,367][05631] Environment doom_health_gathering already registered, overwriting...
|
3040 |
+
[2023-02-23 00:44:45,368][05631] Environment doom_health_gathering_supreme already registered, overwriting...
|
3041 |
+
[2023-02-23 00:44:45,369][05631] Environment doom_battle already registered, overwriting...
|
3042 |
+
[2023-02-23 00:44:45,371][05631] Environment doom_battle2 already registered, overwriting...
|
3043 |
+
[2023-02-23 00:44:45,373][05631] Environment doom_duel_bots already registered, overwriting...
|
3044 |
+
[2023-02-23 00:44:45,376][05631] Environment doom_deathmatch_bots already registered, overwriting...
|
3045 |
+
[2023-02-23 00:44:45,377][05631] Environment doom_duel already registered, overwriting...
|
3046 |
+
[2023-02-23 00:44:45,378][05631] Environment doom_deathmatch_full already registered, overwriting...
|
3047 |
+
[2023-02-23 00:44:45,381][05631] Environment doom_benchmark already registered, overwriting...
|
3048 |
+
[2023-02-23 00:44:45,382][05631] register_encoder_factory: <function make_vizdoom_encoder at 0x7f0a330101f0>
|
3049 |
+
[2023-02-23 00:44:45,417][05631] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json
|
3050 |
+
[2023-02-23 00:44:45,419][05631] Overriding arg 'gamma' with value 0.97 passed from command line
|
3051 |
+
[2023-02-23 00:44:45,426][05631] Experiment dir /content/train_dir/default_experiment already exists!
|
3052 |
+
[2023-02-23 00:44:45,428][05631] Resuming existing experiment from /content/train_dir/default_experiment...
|
3053 |
+
[2023-02-23 00:44:45,429][05631] Weights and Biases integration disabled
|
3054 |
+
[2023-02-23 00:44:45,438][05631] Environment var CUDA_VISIBLE_DEVICES is 0
|
3055 |
+
|
3056 |
+
[2023-02-23 00:44:47,591][05631] Starting experiment with the following configuration:
|
3057 |
+
help=False
|
3058 |
+
algo=APPO
|
3059 |
+
env=doom_health_gathering_supreme
|
3060 |
+
experiment=default_experiment
|
3061 |
+
train_dir=/content/train_dir
|
3062 |
+
restart_behavior=resume
|
3063 |
+
device=gpu
|
3064 |
+
seed=None
|
3065 |
+
num_policies=1
|
3066 |
+
async_rl=True
|
3067 |
+
serial_mode=False
|
3068 |
+
batched_sampling=False
|
3069 |
+
num_batches_to_accumulate=2
|
3070 |
+
worker_num_splits=2
|
3071 |
+
policy_workers_per_policy=1
|
3072 |
+
max_policy_lag=1000
|
3073 |
+
num_workers=8
|
3074 |
+
num_envs_per_worker=4
|
3075 |
+
batch_size=1024
|
3076 |
+
num_batches_per_epoch=1
|
3077 |
+
num_epochs=1
|
3078 |
+
rollout=32
|
3079 |
+
recurrence=32
|
3080 |
+
shuffle_minibatches=False
|
3081 |
+
gamma=0.97
|
3082 |
+
reward_scale=1.0
|
3083 |
+
reward_clip=1000.0
|
3084 |
+
value_bootstrap=False
|
3085 |
+
normalize_returns=True
|
3086 |
+
exploration_loss_coeff=0.001
|
3087 |
+
value_loss_coeff=0.5
|
3088 |
+
kl_loss_coeff=0.0
|
3089 |
+
exploration_loss=symmetric_kl
|
3090 |
+
gae_lambda=0.95
|
3091 |
+
ppo_clip_ratio=0.1
|
3092 |
+
ppo_clip_value=0.2
|
3093 |
+
with_vtrace=False
|
3094 |
+
vtrace_rho=1.0
|
3095 |
+
vtrace_c=1.0
|
3096 |
+
optimizer=adam
|
3097 |
+
adam_eps=1e-06
|
3098 |
+
adam_beta1=0.9
|
3099 |
+
adam_beta2=0.999
|
3100 |
+
max_grad_norm=4.0
|
3101 |
+
learning_rate=0.0001
|
3102 |
+
lr_schedule=constant
|
3103 |
+
lr_schedule_kl_threshold=0.008
|
3104 |
+
lr_adaptive_min=1e-06
|
3105 |
+
lr_adaptive_max=0.01
|
3106 |
+
obs_subtract_mean=0.0
|
3107 |
+
obs_scale=255.0
|
3108 |
+
normalize_input=True
|
3109 |
+
normalize_input_keys=None
|
3110 |
+
decorrelate_experience_max_seconds=0
|
3111 |
+
decorrelate_envs_on_one_worker=True
|
3112 |
+
actor_worker_gpus=[]
|
3113 |
+
set_workers_cpu_affinity=True
|
3114 |
+
force_envs_single_thread=False
|
3115 |
+
default_niceness=0
|
3116 |
+
log_to_file=True
|
3117 |
+
experiment_summaries_interval=10
|
3118 |
+
flush_summaries_interval=30
|
3119 |
+
stats_avg=100
|
3120 |
+
summaries_use_frameskip=True
|
3121 |
+
heartbeat_interval=20
|
3122 |
+
heartbeat_reporting_interval=600
|
3123 |
+
train_for_env_steps=10000000
|
3124 |
+
train_for_seconds=10000000000
|
3125 |
+
save_every_sec=120
|
3126 |
+
keep_checkpoints=2
|
3127 |
+
load_checkpoint_kind=latest
|
3128 |
+
save_milestones_sec=-1
|
3129 |
+
save_best_every_sec=5
|
3130 |
+
save_best_metric=reward
|
3131 |
+
save_best_after=100000
|
3132 |
+
benchmark=False
|
3133 |
+
encoder_mlp_layers=[512, 512]
|
3134 |
+
encoder_conv_architecture=convnet_simple
|
3135 |
+
encoder_conv_mlp_layers=[512]
|
3136 |
+
use_rnn=True
|
3137 |
+
rnn_size=512
|
3138 |
+
rnn_type=gru
|
3139 |
+
rnn_num_layers=1
|
3140 |
+
decoder_mlp_layers=[]
|
3141 |
+
nonlinearity=elu
|
3142 |
+
policy_initialization=orthogonal
|
3143 |
+
policy_init_gain=1.0
|
3144 |
+
actor_critic_share_weights=True
|
3145 |
+
adaptive_stddev=True
|
3146 |
+
continuous_tanh_scale=0.0
|
3147 |
+
initial_stddev=1.0
|
3148 |
+
use_env_info_cache=False
|
3149 |
+
env_gpu_actions=False
|
3150 |
+
env_gpu_observations=True
|
3151 |
+
env_frameskip=4
|
3152 |
+
env_framestack=1
|
3153 |
+
pixel_format=CHW
|
3154 |
+
use_record_episode_statistics=False
|
3155 |
+
with_wandb=False
|
3156 |
+
wandb_user=None
|
3157 |
+
wandb_project=sample_factory
|
3158 |
+
wandb_group=None
|
3159 |
+
wandb_job_type=SF
|
3160 |
+
wandb_tags=[]
|
3161 |
+
with_pbt=False
|
3162 |
+
pbt_mix_policies_in_one_env=True
|
3163 |
+
pbt_period_env_steps=5000000
|
3164 |
+
pbt_start_mutation=20000000
|
3165 |
+
pbt_replace_fraction=0.3
|
3166 |
+
pbt_mutation_rate=0.15
|
3167 |
+
pbt_replace_reward_gap=0.1
|
3168 |
+
pbt_replace_reward_gap_absolute=1e-06
|
3169 |
+
pbt_optimize_gamma=False
|
3170 |
+
pbt_target_objective=true_objective
|
3171 |
+
pbt_perturb_min=1.1
|
3172 |
+
pbt_perturb_max=1.5
|
3173 |
+
num_agents=-1
|
3174 |
+
num_humans=0
|
3175 |
+
num_bots=-1
|
3176 |
+
start_bot_difficulty=None
|
3177 |
+
timelimit=None
|
3178 |
+
res_w=128
|
3179 |
+
res_h=72
|
3180 |
+
wide_aspect_ratio=False
|
3181 |
+
eval_env_frameskip=1
|
3182 |
+
fps=35
|
3183 |
+
command_line=--env=doom_health_gathering_supreme --num_workers=8 --num_envs_per_worker=4 --train_for_env_steps=4000000
|
3184 |
+
cli_args={'env': 'doom_health_gathering_supreme', 'num_workers': 8, 'num_envs_per_worker': 4, 'train_for_env_steps': 4000000}
|
3185 |
+
git_hash=unknown
|
3186 |
+
git_repo_name=not a git repository
|
3187 |
+
[2023-02-23 00:44:47,598][05631] Saving configuration to /content/train_dir/default_experiment/config.json...
|
3188 |
+
[2023-02-23 00:44:47,602][05631] Rollout worker 0 uses device cpu
|
3189 |
+
[2023-02-23 00:44:47,603][05631] Rollout worker 1 uses device cpu
|
3190 |
+
[2023-02-23 00:44:47,606][05631] Rollout worker 2 uses device cpu
|
3191 |
+
[2023-02-23 00:44:47,608][05631] Rollout worker 3 uses device cpu
|
3192 |
+
[2023-02-23 00:44:47,609][05631] Rollout worker 4 uses device cpu
|
3193 |
+
[2023-02-23 00:44:47,610][05631] Rollout worker 5 uses device cpu
|
3194 |
+
[2023-02-23 00:44:47,612][05631] Rollout worker 6 uses device cpu
|
3195 |
+
[2023-02-23 00:44:47,613][05631] Rollout worker 7 uses device cpu
|
3196 |
+
[2023-02-23 00:44:47,732][05631] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
3197 |
+
[2023-02-23 00:44:47,734][05631] InferenceWorker_p0-w0: min num requests: 2
|
3198 |
+
[2023-02-23 00:44:47,890][05631] Starting all processes...
|
3199 |
+
[2023-02-23 00:44:47,893][05631] Starting process learner_proc0
|
3200 |
+
[2023-02-23 00:44:48,025][05631] Starting all processes...
|
3201 |
+
[2023-02-23 00:44:48,033][05631] Starting process inference_proc0-0
|
3202 |
+
[2023-02-23 00:44:48,033][05631] Starting process rollout_proc0
|
3203 |
+
[2023-02-23 00:44:48,035][05631] Starting process rollout_proc1
|
3204 |
+
[2023-02-23 00:44:48,035][05631] Starting process rollout_proc2
|
3205 |
+
[2023-02-23 00:44:48,035][05631] Starting process rollout_proc3
|
3206 |
+
[2023-02-23 00:44:48,035][05631] Starting process rollout_proc4
|
3207 |
+
[2023-02-23 00:44:48,035][05631] Starting process rollout_proc5
|
3208 |
+
[2023-02-23 00:44:48,035][05631] Starting process rollout_proc6
|
3209 |
+
[2023-02-23 00:44:48,035][05631] Starting process rollout_proc7
|
3210 |
+
[2023-02-23 00:44:56,168][39829] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
3211 |
+
[2023-02-23 00:44:56,168][39829] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0
|
3212 |
+
[2023-02-23 00:44:56,202][39829] Num visible devices: 1
|
3213 |
+
[2023-02-23 00:44:56,239][39829] Starting seed is not provided
|
3214 |
+
[2023-02-23 00:44:56,240][39829] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
3215 |
+
[2023-02-23 00:44:56,241][39829] Initializing actor-critic model on device cuda:0
|
3216 |
+
[2023-02-23 00:44:56,242][39829] RunningMeanStd input shape: (3, 72, 128)
|
3217 |
+
[2023-02-23 00:44:56,245][39829] RunningMeanStd input shape: (1,)
|
3218 |
+
[2023-02-23 00:44:56,288][39829] ConvEncoder: input_channels=3
|
3219 |
+
[2023-02-23 00:44:57,277][39829] Conv encoder output size: 512
|
3220 |
+
[2023-02-23 00:44:57,280][39829] Policy head output size: 512
|
3221 |
+
[2023-02-23 00:44:57,450][39829] Created Actor Critic model with architecture:
|
3222 |
+
[2023-02-23 00:44:57,468][39829] ActorCriticSharedWeights(
|
3223 |
+
(obs_normalizer): ObservationNormalizer(
|
3224 |
+
(running_mean_std): RunningMeanStdDictInPlace(
|
3225 |
+
(running_mean_std): ModuleDict(
|
3226 |
+
(obs): RunningMeanStdInPlace()
|
3227 |
+
)
|
3228 |
+
)
|
3229 |
+
)
|
3230 |
+
(returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace)
|
3231 |
+
(encoder): VizdoomEncoder(
|
3232 |
+
(basic_encoder): ConvEncoder(
|
3233 |
+
(enc): RecursiveScriptModule(
|
3234 |
+
original_name=ConvEncoderImpl
|
3235 |
+
(conv_head): RecursiveScriptModule(
|
3236 |
+
original_name=Sequential
|
3237 |
+
(0): RecursiveScriptModule(original_name=Conv2d)
|
3238 |
+
(1): RecursiveScriptModule(original_name=ELU)
|
3239 |
+
(2): RecursiveScriptModule(original_name=Conv2d)
|
3240 |
+
(3): RecursiveScriptModule(original_name=ELU)
|
3241 |
+
(4): RecursiveScriptModule(original_name=Conv2d)
|
3242 |
+
(5): RecursiveScriptModule(original_name=ELU)
|
3243 |
+
)
|
3244 |
+
(mlp_layers): RecursiveScriptModule(
|
3245 |
+
original_name=Sequential
|
3246 |
+
(0): RecursiveScriptModule(original_name=Linear)
|
3247 |
+
(1): RecursiveScriptModule(original_name=ELU)
|
3248 |
+
)
|
3249 |
+
)
|
3250 |
+
)
|
3251 |
+
)
|
3252 |
+
(core): ModelCoreRNN(
|
3253 |
+
(core): GRU(512, 512)
|
3254 |
+
)
|
3255 |
+
(decoder): MlpDecoder(
|
3256 |
+
(mlp): Identity()
|
3257 |
+
)
|
3258 |
+
(critic_linear): Linear(in_features=512, out_features=1, bias=True)
|
3259 |
+
(action_parameterization): ActionParameterizationDefault(
|
3260 |
+
(distribution_linear): Linear(in_features=512, out_features=5, bias=True)
|
3261 |
+
)
|
3262 |
+
)
|
3263 |
+
[2023-02-23 00:44:57,531][39843] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
3264 |
+
[2023-02-23 00:44:57,537][39843] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0
|
3265 |
+
[2023-02-23 00:44:57,689][39843] Num visible devices: 1
|
3266 |
+
[2023-02-23 00:44:58,288][39844] Worker 1 uses CPU cores [1]
|
3267 |
+
[2023-02-23 00:44:58,437][39850] Worker 0 uses CPU cores [0]
|
3268 |
+
[2023-02-23 00:44:58,552][39852] Worker 3 uses CPU cores [1]
|
3269 |
+
[2023-02-23 00:44:58,941][39854] Worker 2 uses CPU cores [0]
|
3270 |
+
[2023-02-23 00:44:59,465][39864] Worker 5 uses CPU cores [1]
|
3271 |
+
[2023-02-23 00:44:59,470][39857] Worker 4 uses CPU cores [0]
|
3272 |
+
[2023-02-23 00:44:59,564][39860] Worker 6 uses CPU cores [0]
|
3273 |
+
[2023-02-23 00:44:59,766][39866] Worker 7 uses CPU cores [1]
|
3274 |
+
[2023-02-23 00:45:02,236][39829] Using optimizer <class 'torch.optim.adam.Adam'>
|
3275 |
+
[2023-02-23 00:45:02,236][39829] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002445_10014720.pth...
|
3276 |
+
[2023-02-23 00:45:02,270][39829] Loading model from checkpoint
|
3277 |
+
[2023-02-23 00:45:02,274][39829] Loaded experiment state at self.train_step=2445, self.env_steps=10014720
|
3278 |
+
[2023-02-23 00:45:02,275][39829] Initialized policy 0 weights for model version 2445
|
3279 |
+
[2023-02-23 00:45:02,278][39829] LearnerWorker_p0 finished initialization!
|
3280 |
+
[2023-02-23 00:45:02,280][39829] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
3281 |
+
[2023-02-23 00:45:02,390][39843] RunningMeanStd input shape: (3, 72, 128)
|
3282 |
+
[2023-02-23 00:45:02,392][39843] RunningMeanStd input shape: (1,)
|
3283 |
+
[2023-02-23 00:45:02,405][39843] ConvEncoder: input_channels=3
|
3284 |
+
[2023-02-23 00:45:02,507][39843] Conv encoder output size: 512
|
3285 |
+
[2023-02-23 00:45:02,507][39843] Policy head output size: 512
|
3286 |
+
[2023-02-23 00:45:04,694][05631] Inference worker 0-0 is ready!
|
3287 |
+
[2023-02-23 00:45:04,696][05631] All inference workers are ready! Signal rollout workers to start!
|
3288 |
+
[2023-02-23 00:45:04,798][39844] Doom resolution: 160x120, resize resolution: (128, 72)
|
3289 |
+
[2023-02-23 00:45:04,799][39852] Doom resolution: 160x120, resize resolution: (128, 72)
|
3290 |
+
[2023-02-23 00:45:04,790][39857] Doom resolution: 160x120, resize resolution: (128, 72)
|
3291 |
+
[2023-02-23 00:45:04,792][39860] Doom resolution: 160x120, resize resolution: (128, 72)
|
3292 |
+
[2023-02-23 00:45:04,797][39866] Doom resolution: 160x120, resize resolution: (128, 72)
|
3293 |
+
[2023-02-23 00:45:04,795][39854] Doom resolution: 160x120, resize resolution: (128, 72)
|
3294 |
+
[2023-02-23 00:45:04,799][39850] Doom resolution: 160x120, resize resolution: (128, 72)
|
3295 |
+
[2023-02-23 00:45:04,795][39864] Doom resolution: 160x120, resize resolution: (128, 72)
|
3296 |
+
[2023-02-23 00:45:05,438][05631] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 10014720. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
3297 |
+
[2023-02-23 00:45:05,630][39864] Decorrelating experience for 0 frames...
|
3298 |
+
[2023-02-23 00:45:05,633][39866] Decorrelating experience for 0 frames...
|
3299 |
+
[2023-02-23 00:45:05,940][39854] Decorrelating experience for 0 frames...
|
3300 |
+
[2023-02-23 00:45:05,942][39860] Decorrelating experience for 0 frames...
|
3301 |
+
[2023-02-23 00:45:05,945][39857] Decorrelating experience for 0 frames...
|
3302 |
+
[2023-02-23 00:45:06,265][39844] Decorrelating experience for 0 frames...
|
3303 |
+
[2023-02-23 00:45:06,275][39860] Decorrelating experience for 32 frames...
|
3304 |
+
[2023-02-23 00:45:06,729][39854] Decorrelating experience for 32 frames...
|
3305 |
+
[2023-02-23 00:45:07,336][39864] Decorrelating experience for 32 frames...
|
3306 |
+
[2023-02-23 00:45:07,354][39866] Decorrelating experience for 32 frames...
|
3307 |
+
[2023-02-23 00:45:07,407][39852] Decorrelating experience for 0 frames...
|
3308 |
+
[2023-02-23 00:45:07,509][39860] Decorrelating experience for 64 frames...
|
3309 |
+
[2023-02-23 00:45:07,686][39854] Decorrelating experience for 64 frames...
|
3310 |
+
[2023-02-23 00:45:07,711][39844] Decorrelating experience for 32 frames...
|
3311 |
+
[2023-02-23 00:45:07,725][05631] Heartbeat connected on Batcher_0
|
3312 |
+
[2023-02-23 00:45:07,728][05631] Heartbeat connected on LearnerWorker_p0
|
3313 |
+
[2023-02-23 00:45:07,759][05631] Heartbeat connected on InferenceWorker_p0-w0
|
3314 |
+
[2023-02-23 00:45:08,275][39850] Decorrelating experience for 0 frames...
|
3315 |
+
[2023-02-23 00:45:08,280][39857] Decorrelating experience for 32 frames...
|
3316 |
+
[2023-02-23 00:45:08,857][39852] Decorrelating experience for 32 frames...
|
3317 |
+
[2023-02-23 00:45:09,043][39866] Decorrelating experience for 64 frames...
|
3318 |
+
[2023-02-23 00:45:09,122][39860] Decorrelating experience for 96 frames...
|
3319 |
+
[2023-02-23 00:45:09,189][39864] Decorrelating experience for 64 frames...
|
3320 |
+
[2023-02-23 00:45:09,271][39857] Decorrelating experience for 64 frames...
|
3321 |
+
[2023-02-23 00:45:09,351][05631] Heartbeat connected on RolloutWorker_w6
|
3322 |
+
[2023-02-23 00:45:09,472][39844] Decorrelating experience for 64 frames...
|
3323 |
+
[2023-02-23 00:45:09,955][39850] Decorrelating experience for 32 frames...
|
3324 |
+
[2023-02-23 00:45:10,106][39854] Decorrelating experience for 96 frames...
|
3325 |
+
[2023-02-23 00:45:10,365][05631] Heartbeat connected on RolloutWorker_w2
|
3326 |
+
[2023-02-23 00:45:10,438][05631] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 10014720. Throughput: 0: 0.0. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
3327 |
+
[2023-02-23 00:45:10,596][39852] Decorrelating experience for 64 frames...
|
3328 |
+
[2023-02-23 00:45:10,741][39864] Decorrelating experience for 96 frames...
|
3329 |
+
[2023-02-23 00:45:10,986][05631] Heartbeat connected on RolloutWorker_w5
|
3330 |
+
[2023-02-23 00:45:11,290][39844] Decorrelating experience for 96 frames...
|
3331 |
+
[2023-02-23 00:45:11,683][05631] Heartbeat connected on RolloutWorker_w1
|
3332 |
+
[2023-02-23 00:45:12,330][39857] Decorrelating experience for 96 frames...
|
3333 |
+
[2023-02-23 00:45:12,849][05631] Heartbeat connected on RolloutWorker_w4
|
3334 |
+
[2023-02-23 00:45:13,241][39852] Decorrelating experience for 96 frames...
|
3335 |
+
[2023-02-23 00:45:13,738][05631] Heartbeat connected on RolloutWorker_w3
|
3336 |
+
[2023-02-23 00:45:15,355][39866] Decorrelating experience for 96 frames...
|
3337 |
+
[2023-02-23 00:45:15,439][05631] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 10014720. Throughput: 0: 137.8. Samples: 1378. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
3338 |
+
[2023-02-23 00:45:15,444][05631] Avg episode reward: [(0, '2.665')]
|
3339 |
+
[2023-02-23 00:45:16,721][05631] Heartbeat connected on RolloutWorker_w7
|
3340 |
+
[2023-02-23 00:45:16,893][39829] Signal inference workers to stop experience collection...
|
3341 |
+
[2023-02-23 00:45:16,943][39843] InferenceWorker_p0-w0: stopping experience collection
|
3342 |
+
[2023-02-23 00:45:17,440][39850] Decorrelating experience for 64 frames...
|
3343 |
+
[2023-02-23 00:45:18,212][39850] Decorrelating experience for 96 frames...
|
3344 |
+
[2023-02-23 00:45:18,373][05631] Heartbeat connected on RolloutWorker_w0
|
3345 |
+
[2023-02-23 00:45:18,423][39829] Signal inference workers to resume experience collection...
|
3346 |
+
[2023-02-23 00:45:18,424][39843] InferenceWorker_p0-w0: resuming experience collection
|
3347 |
+
[2023-02-23 00:45:18,435][39829] Stopping Batcher_0...
|
3348 |
+
[2023-02-23 00:45:18,436][39829] Loop batcher_evt_loop terminating...
|
3349 |
+
[2023-02-23 00:45:18,437][05631] Component Batcher_0 stopped!
|
3350 |
+
[2023-02-23 00:45:18,467][05631] Component RolloutWorker_w7 stopped!
|
3351 |
+
[2023-02-23 00:45:18,489][05631] Component RolloutWorker_w1 stopped!
|
3352 |
+
[2023-02-23 00:45:18,472][39866] Stopping RolloutWorker_w7...
|
3353 |
+
[2023-02-23 00:45:18,496][39866] Loop rollout_proc7_evt_loop terminating...
|
3354 |
+
[2023-02-23 00:45:18,494][39844] Stopping RolloutWorker_w1...
|
3355 |
+
[2023-02-23 00:45:18,505][05631] Component RolloutWorker_w5 stopped!
|
3356 |
+
[2023-02-23 00:45:18,511][39864] Stopping RolloutWorker_w5...
|
3357 |
+
[2023-02-23 00:45:18,511][39864] Loop rollout_proc5_evt_loop terminating...
|
3358 |
+
[2023-02-23 00:45:18,517][05631] Component RolloutWorker_w3 stopped!
|
3359 |
+
[2023-02-23 00:45:18,522][39852] Stopping RolloutWorker_w3...
|
3360 |
+
[2023-02-23 00:45:18,523][39852] Loop rollout_proc3_evt_loop terminating...
|
3361 |
+
[2023-02-23 00:45:18,526][39844] Loop rollout_proc1_evt_loop terminating...
|
3362 |
+
[2023-02-23 00:45:18,534][39857] Stopping RolloutWorker_w4...
|
3363 |
+
[2023-02-23 00:45:18,534][05631] Component RolloutWorker_w4 stopped!
|
3364 |
+
[2023-02-23 00:45:18,545][39857] Loop rollout_proc4_evt_loop terminating...
|
3365 |
+
[2023-02-23 00:45:18,557][39850] Stopping RolloutWorker_w0...
|
3366 |
+
[2023-02-23 00:45:18,557][39850] Loop rollout_proc0_evt_loop terminating...
|
3367 |
+
[2023-02-23 00:45:18,557][05631] Component RolloutWorker_w0 stopped!
|
3368 |
+
[2023-02-23 00:45:18,585][39854] Stopping RolloutWorker_w2...
|
3369 |
+
[2023-02-23 00:45:18,585][05631] Component RolloutWorker_w2 stopped!
|
3370 |
+
[2023-02-23 00:45:18,591][39860] Stopping RolloutWorker_w6...
|
3371 |
+
[2023-02-23 00:45:18,592][39860] Loop rollout_proc6_evt_loop terminating...
|
3372 |
+
[2023-02-23 00:45:18,586][39854] Loop rollout_proc2_evt_loop terminating...
|
3373 |
+
[2023-02-23 00:45:18,591][05631] Component RolloutWorker_w6 stopped!
|
3374 |
+
[2023-02-23 00:45:18,613][39843] Weights refcount: 2 0
|
3375 |
+
[2023-02-23 00:45:18,629][05631] Component InferenceWorker_p0-w0 stopped!
|
3376 |
+
[2023-02-23 00:45:18,633][39843] Stopping InferenceWorker_p0-w0...
|
3377 |
+
[2023-02-23 00:45:18,633][39843] Loop inference_proc0-0_evt_loop terminating...
|
3378 |
+
[2023-02-23 00:45:21,008][39829] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002447_10022912.pth...
|
3379 |
+
[2023-02-23 00:45:21,173][39829] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002443_10006528.pth
|
3380 |
+
[2023-02-23 00:45:21,183][39829] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002447_10022912.pth...
|
3381 |
+
[2023-02-23 00:45:21,420][39829] Stopping LearnerWorker_p0...
|
3382 |
+
[2023-02-23 00:45:21,421][05631] Component LearnerWorker_p0 stopped!
|
3383 |
+
[2023-02-23 00:45:21,421][39829] Loop learner_proc0_evt_loop terminating...
|
3384 |
+
[2023-02-23 00:45:21,424][05631] Waiting for process learner_proc0 to stop...
|
3385 |
+
[2023-02-23 00:45:22,957][05631] Waiting for process inference_proc0-0 to join...
|
3386 |
+
[2023-02-23 00:45:22,959][05631] Waiting for process rollout_proc0 to join...
|
3387 |
+
[2023-02-23 00:45:22,962][05631] Waiting for process rollout_proc1 to join...
|
3388 |
+
[2023-02-23 00:45:23,075][05631] Waiting for process rollout_proc2 to join...
|
3389 |
+
[2023-02-23 00:45:23,077][05631] Waiting for process rollout_proc3 to join...
|
3390 |
+
[2023-02-23 00:45:23,083][05631] Waiting for process rollout_proc4 to join...
|
3391 |
+
[2023-02-23 00:45:23,085][05631] Waiting for process rollout_proc5 to join...
|
3392 |
+
[2023-02-23 00:45:23,088][05631] Waiting for process rollout_proc6 to join...
|
3393 |
+
[2023-02-23 00:45:23,089][05631] Waiting for process rollout_proc7 to join...
|
3394 |
+
[2023-02-23 00:45:23,093][05631] Batcher 0 profile tree view:
|
3395 |
+
batching: 0.0454, releasing_batches: 0.0004
|
3396 |
+
[2023-02-23 00:45:23,095][05631] InferenceWorker_p0-w0 profile tree view:
|
3397 |
+
wait_policy: 0.0051
|
3398 |
+
wait_policy_total: 8.5334
|
3399 |
+
update_model: 0.0442
|
3400 |
+
weight_update: 0.0259
|
3401 |
+
one_step: 0.1178
|
3402 |
+
handle_policy_step: 3.6239
|
3403 |
+
deserialize: 0.0496, stack: 0.0089, obs_to_device_normalize: 0.3427, forward: 2.7631, send_messages: 0.1124
|
3404 |
+
prepare_outputs: 0.2459
|
3405 |
+
to_cpu: 0.1354
|
3406 |
+
[2023-02-23 00:45:23,097][05631] Learner 0 profile tree view:
|
3407 |
+
misc: 0.0000, prepare_batch: 6.3831
|
3408 |
+
train: 0.7580
|
3409 |
+
epoch_init: 0.0000, minibatch_init: 0.0000, losses_postprocess: 0.0004, kl_divergence: 0.0005, after_optimizer: 0.0041
|
3410 |
+
calculate_losses: 0.1495
|
3411 |
+
losses_init: 0.0000, forward_head: 0.1162, bptt_initial: 0.0202, tail: 0.0017, advantages_returns: 0.0010, losses: 0.0060
|
3412 |
+
bptt: 0.0038
|
3413 |
+
bptt_forward_core: 0.0037
|
3414 |
+
update: 0.6026
|
3415 |
+
clip: 0.0081
|
3416 |
+
[2023-02-23 00:45:23,101][05631] RolloutWorker_w0 profile tree view:
|
3417 |
+
wait_for_trajectories: 0.0003, enqueue_policy_requests: 0.0006
|
3418 |
+
[2023-02-23 00:45:23,104][05631] RolloutWorker_w7 profile tree view:
|
3419 |
+
wait_for_trajectories: 0.0003, enqueue_policy_requests: 0.0175, env_step: 0.2402, overhead: 0.0012, complete_rollouts: 0.0000
|
3420 |
+
save_policy_outputs: 0.0009
|
3421 |
+
split_output_tensors: 0.0004
|
3422 |
+
[2023-02-23 00:45:23,105][05631] Loop Runner_EvtLoop terminating...
|
3423 |
+
[2023-02-23 00:45:23,107][05631] Runner profile tree view:
|
3424 |
+
main_loop: 35.2166
|
3425 |
+
[2023-02-23 00:45:23,108][05631] Collected {0: 10022912}, FPS: 232.6
|
3426 |
+
[2023-02-23 00:51:07,960][05631] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json
|
3427 |
+
[2023-02-23 00:51:07,965][05631] Overriding arg 'num_workers' with value 1 passed from command line
|
3428 |
+
[2023-02-23 00:51:07,967][05631] Adding new argument 'no_render'=True that is not in the saved config file!
|
3429 |
+
[2023-02-23 00:51:07,970][05631] Adding new argument 'save_video'=True that is not in the saved config file!
|
3430 |
+
[2023-02-23 00:51:07,972][05631] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file!
|
3431 |
+
[2023-02-23 00:51:07,974][05631] Adding new argument 'video_name'=None that is not in the saved config file!
|
3432 |
+
[2023-02-23 00:51:07,976][05631] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file!
|
3433 |
+
[2023-02-23 00:51:07,977][05631] Adding new argument 'max_num_episodes'=10 that is not in the saved config file!
|
3434 |
+
[2023-02-23 00:51:07,978][05631] Adding new argument 'push_to_hub'=False that is not in the saved config file!
|
3435 |
+
[2023-02-23 00:51:07,979][05631] Adding new argument 'hf_repository'=None that is not in the saved config file!
|
3436 |
+
[2023-02-23 00:51:07,980][05631] Adding new argument 'policy_index'=0 that is not in the saved config file!
|
3437 |
+
[2023-02-23 00:51:07,981][05631] Adding new argument 'eval_deterministic'=False that is not in the saved config file!
|
3438 |
+
[2023-02-23 00:51:07,983][05631] Adding new argument 'train_script'=None that is not in the saved config file!
|
3439 |
+
[2023-02-23 00:51:07,984][05631] Adding new argument 'enjoy_script'=None that is not in the saved config file!
|
3440 |
+
[2023-02-23 00:51:07,985][05631] Using frameskip 1 and render_action_repeat=4 for evaluation
|
3441 |
+
[2023-02-23 00:51:08,013][05631] RunningMeanStd input shape: (3, 72, 128)
|
3442 |
+
[2023-02-23 00:51:08,016][05631] RunningMeanStd input shape: (1,)
|
3443 |
+
[2023-02-23 00:51:08,038][05631] ConvEncoder: input_channels=3
|
3444 |
+
[2023-02-23 00:51:08,096][05631] Conv encoder output size: 512
|
3445 |
+
[2023-02-23 00:51:08,100][05631] Policy head output size: 512
|
3446 |
+
[2023-02-23 00:51:08,130][05631] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002447_10022912.pth...
|
3447 |
+
[2023-02-23 00:51:08,807][05631] Num frames 100...
|
3448 |
+
[2023-02-23 00:51:08,941][05631] Num frames 200...
|
3449 |
+
[2023-02-23 00:51:09,054][05631] Num frames 300...
|
3450 |
+
[2023-02-23 00:51:09,207][05631] Avg episode rewards: #0: 3.840, true rewards: #0: 3.840
|
3451 |
+
[2023-02-23 00:51:09,209][05631] Avg episode reward: 3.840, avg true_objective: 3.840
|
3452 |
+
[2023-02-23 00:51:09,233][05631] Num frames 400...
|
3453 |
+
[2023-02-23 00:51:09,366][05631] Num frames 500...
|
3454 |
+
[2023-02-23 00:51:09,487][05631] Num frames 600...
|
3455 |
+
[2023-02-23 00:51:09,611][05631] Num frames 700...
|
3456 |
+
[2023-02-23 00:51:09,743][05631] Avg episode rewards: #0: 3.840, true rewards: #0: 3.840
|
3457 |
+
[2023-02-23 00:51:09,746][05631] Avg episode reward: 3.840, avg true_objective: 3.840
|
3458 |
+
[2023-02-23 00:51:09,788][05631] Num frames 800...
|
3459 |
+
[2023-02-23 00:51:09,912][05631] Num frames 900...
|
3460 |
+
[2023-02-23 00:51:10,031][05631] Num frames 1000...
|
3461 |
+
[2023-02-23 00:51:10,159][05631] Num frames 1100...
|
3462 |
+
[2023-02-23 00:51:10,318][05631] Avg episode rewards: #0: 4.280, true rewards: #0: 3.947
|
3463 |
+
[2023-02-23 00:51:10,322][05631] Avg episode reward: 4.280, avg true_objective: 3.947
|
3464 |
+
[2023-02-23 00:51:10,352][05631] Num frames 1200...
|
3465 |
+
[2023-02-23 00:51:10,477][05631] Num frames 1300...
|
3466 |
+
[2023-02-23 00:51:10,596][05631] Num frames 1400...
|
3467 |
+
[2023-02-23 00:51:10,710][05631] Num frames 1500...
|
3468 |
+
[2023-02-23 00:51:10,824][05631] Num frames 1600...
|
3469 |
+
[2023-02-23 00:51:10,877][05631] Avg episode rewards: #0: 4.500, true rewards: #0: 4.000
|
3470 |
+
[2023-02-23 00:51:10,879][05631] Avg episode reward: 4.500, avg true_objective: 4.000
|
3471 |
+
[2023-02-23 00:51:11,019][05631] Num frames 1700...
|
3472 |
+
[2023-02-23 00:51:11,141][05631] Num frames 1800...
|
3473 |
+
[2023-02-23 00:51:11,273][05631] Avg episode rewards: #0: 4.112, true rewards: #0: 3.712
|
3474 |
+
[2023-02-23 00:51:11,275][05631] Avg episode reward: 4.112, avg true_objective: 3.712
|
3475 |
+
[2023-02-23 00:51:11,332][05631] Num frames 1900...
|
3476 |
+
[2023-02-23 00:51:11,459][05631] Num frames 2000...
|
3477 |
+
[2023-02-23 00:51:11,582][05631] Num frames 2100...
|
3478 |
+
[2023-02-23 00:51:11,713][05631] Num frames 2200...
|
3479 |
+
[2023-02-23 00:51:11,817][05631] Avg episode rewards: #0: 4.067, true rewards: #0: 3.733
|
3480 |
+
[2023-02-23 00:51:11,820][05631] Avg episode reward: 4.067, avg true_objective: 3.733
|
3481 |
+
[2023-02-23 00:51:11,896][05631] Num frames 2300...
|
3482 |
+
[2023-02-23 00:51:12,021][05631] Num frames 2400...
|
3483 |
+
[2023-02-23 00:51:12,141][05631] Num frames 2500...
|
3484 |
+
[2023-02-23 00:51:12,259][05631] Num frames 2600...
|
3485 |
+
[2023-02-23 00:51:12,342][05631] Avg episode rewards: #0: 4.034, true rewards: #0: 3.749
|
3486 |
+
[2023-02-23 00:51:12,344][05631] Avg episode reward: 4.034, avg true_objective: 3.749
|
3487 |
+
[2023-02-23 00:51:12,452][05631] Num frames 2700...
|
3488 |
+
[2023-02-23 00:51:12,567][05631] Num frames 2800...
|
3489 |
+
[2023-02-23 00:51:12,689][05631] Num frames 2900...
|
3490 |
+
[2023-02-23 00:51:12,803][05631] Num frames 3000...
|
3491 |
+
[2023-02-23 00:51:12,932][05631] Num frames 3100...
|
3492 |
+
[2023-02-23 00:51:12,993][05631] Avg episode rewards: #0: 4.255, true rewards: #0: 3.880
|
3493 |
+
[2023-02-23 00:51:12,995][05631] Avg episode reward: 4.255, avg true_objective: 3.880
|
3494 |
+
[2023-02-23 00:51:13,113][05631] Num frames 3200...
|
3495 |
+
[2023-02-23 00:51:13,236][05631] Num frames 3300...
|
3496 |
+
[2023-02-23 00:51:13,369][05631] Num frames 3400...
|
3497 |
+
[2023-02-23 00:51:13,526][05631] Avg episode rewards: #0: 4.209, true rewards: #0: 3.876
|
3498 |
+
[2023-02-23 00:51:13,528][05631] Avg episode reward: 4.209, avg true_objective: 3.876
|
3499 |
+
[2023-02-23 00:51:13,554][05631] Num frames 3500...
|
3500 |
+
[2023-02-23 00:51:13,678][05631] Num frames 3600...
|
3501 |
+
[2023-02-23 00:51:13,801][05631] Num frames 3700...
|
3502 |
+
[2023-02-23 00:51:13,924][05631] Num frames 3800...
|
3503 |
+
[2023-02-23 00:51:14,038][05631] Num frames 3900...
|
3504 |
+
[2023-02-23 00:51:14,135][05631] Avg episode rewards: #0: 4.336, true rewards: #0: 3.936
|
3505 |
+
[2023-02-23 00:51:14,137][05631] Avg episode reward: 4.336, avg true_objective: 3.936
|
3506 |
+
[2023-02-23 00:51:35,049][05631] Replay video saved to /content/train_dir/default_experiment/replay.mp4!
|
3507 |
+
[2023-02-23 00:51:35,237][05631] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json
|
3508 |
+
[2023-02-23 00:51:35,239][05631] Overriding arg 'num_workers' with value 1 passed from command line
|
3509 |
+
[2023-02-23 00:51:35,246][05631] Adding new argument 'no_render'=True that is not in the saved config file!
|
3510 |
+
[2023-02-23 00:51:35,248][05631] Adding new argument 'save_video'=True that is not in the saved config file!
|
3511 |
+
[2023-02-23 00:51:35,250][05631] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file!
|
3512 |
+
[2023-02-23 00:51:35,252][05631] Adding new argument 'video_name'=None that is not in the saved config file!
|
3513 |
+
[2023-02-23 00:51:35,257][05631] Adding new argument 'max_num_frames'=100000 that is not in the saved config file!
|
3514 |
+
[2023-02-23 00:51:35,258][05631] Adding new argument 'max_num_episodes'=10 that is not in the saved config file!
|
3515 |
+
[2023-02-23 00:51:35,259][05631] Adding new argument 'push_to_hub'=True that is not in the saved config file!
|
3516 |
+
[2023-02-23 00:51:35,261][05631] Adding new argument 'hf_repository'='pittawat/rl_course_vizdoom_health_gathering_supreme' that is not in the saved config file!
|
3517 |
+
[2023-02-23 00:51:35,262][05631] Adding new argument 'policy_index'=0 that is not in the saved config file!
|
3518 |
+
[2023-02-23 00:51:35,263][05631] Adding new argument 'eval_deterministic'=False that is not in the saved config file!
|
3519 |
+
[2023-02-23 00:51:35,269][05631] Adding new argument 'train_script'=None that is not in the saved config file!
|
3520 |
+
[2023-02-23 00:51:35,270][05631] Adding new argument 'enjoy_script'=None that is not in the saved config file!
|
3521 |
+
[2023-02-23 00:51:35,271][05631] Using frameskip 1 and render_action_repeat=4 for evaluation
|
3522 |
+
[2023-02-23 00:51:35,306][05631] RunningMeanStd input shape: (3, 72, 128)
|
3523 |
+
[2023-02-23 00:51:35,308][05631] RunningMeanStd input shape: (1,)
|
3524 |
+
[2023-02-23 00:51:35,336][05631] ConvEncoder: input_channels=3
|
3525 |
+
[2023-02-23 00:51:35,400][05631] Conv encoder output size: 512
|
3526 |
+
[2023-02-23 00:51:35,402][05631] Policy head output size: 512
|
3527 |
+
[2023-02-23 00:51:35,434][05631] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002447_10022912.pth...
|
3528 |
+
[2023-02-23 00:51:36,290][05631] Num frames 100...
|
3529 |
+
[2023-02-23 00:51:36,467][05631] Num frames 200...
|
3530 |
+
[2023-02-23 00:51:36,651][05631] Num frames 300...
|
3531 |
+
[2023-02-23 00:51:36,865][05631] Avg episode rewards: #0: 3.840, true rewards: #0: 3.840
|
3532 |
+
[2023-02-23 00:51:36,868][05631] Avg episode reward: 3.840, avg true_objective: 3.840
|
3533 |
+
[2023-02-23 00:51:36,908][05631] Num frames 400...
|
3534 |
+
[2023-02-23 00:51:37,124][05631] Num frames 500...
|
3535 |
+
[2023-02-23 00:51:37,321][05631] Num frames 600...
|
3536 |
+
[2023-02-23 00:51:37,519][05631] Num frames 700...
|
3537 |
+
[2023-02-23 00:51:37,712][05631] Num frames 800...
|
3538 |
+
[2023-02-23 00:51:37,896][05631] Num frames 900...
|
3539 |
+
[2023-02-23 00:51:38,009][05631] Avg episode rewards: #0: 5.640, true rewards: #0: 4.640
|
3540 |
+
[2023-02-23 00:51:38,012][05631] Avg episode reward: 5.640, avg true_objective: 4.640
|
3541 |
+
[2023-02-23 00:51:38,141][05631] Num frames 1000...
|
3542 |
+
[2023-02-23 00:51:38,301][05631] Num frames 1100...
|
3543 |
+
[2023-02-23 00:51:38,469][05631] Num frames 1200...
|
3544 |
+
[2023-02-23 00:51:38,603][05631] Num frames 1300...
|
3545 |
+
[2023-02-23 00:51:38,674][05631] Avg episode rewards: #0: 5.040, true rewards: #0: 4.373
|
3546 |
+
[2023-02-23 00:51:38,675][05631] Avg episode reward: 5.040, avg true_objective: 4.373
|
3547 |
+
[2023-02-23 00:51:38,785][05631] Num frames 1400...
|
3548 |
+
[2023-02-23 00:51:38,906][05631] Num frames 1500...
|
3549 |
+
[2023-02-23 00:51:39,030][05631] Num frames 1600...
|
3550 |
+
[2023-02-23 00:51:39,208][05631] Avg episode rewards: #0: 4.740, true rewards: #0: 4.240
|
3551 |
+
[2023-02-23 00:51:39,210][05631] Avg episode reward: 4.740, avg true_objective: 4.240
|
3552 |
+
[2023-02-23 00:51:39,221][05631] Num frames 1700...
|
3553 |
+
[2023-02-23 00:51:39,334][05631] Num frames 1800...
|
3554 |
+
[2023-02-23 00:51:39,452][05631] Num frames 1900...
|
3555 |
+
[2023-02-23 00:51:39,574][05631] Num frames 2000...
|
3556 |
+
[2023-02-23 00:51:39,721][05631] Avg episode rewards: #0: 4.560, true rewards: #0: 4.160
|
3557 |
+
[2023-02-23 00:51:39,723][05631] Avg episode reward: 4.560, avg true_objective: 4.160
|
3558 |
+
[2023-02-23 00:51:39,752][05631] Num frames 2100...
|
3559 |
+
[2023-02-23 00:51:39,874][05631] Num frames 2200...
|
3560 |
+
[2023-02-23 00:51:39,995][05631] Num frames 2300...
|
3561 |
+
[2023-02-23 00:51:40,113][05631] Num frames 2400...
|
3562 |
+
[2023-02-23 00:51:40,283][05631] Avg episode rewards: #0: 4.493, true rewards: #0: 4.160
|
3563 |
+
[2023-02-23 00:51:40,286][05631] Avg episode reward: 4.493, avg true_objective: 4.160
|
3564 |
+
[2023-02-23 00:51:40,295][05631] Num frames 2500...
|
3565 |
+
[2023-02-23 00:51:40,408][05631] Num frames 2600...
|
3566 |
+
[2023-02-23 00:51:40,526][05631] Num frames 2700...
|
3567 |
+
[2023-02-23 00:51:40,641][05631] Num frames 2800...
|
3568 |
+
[2023-02-23 00:51:40,787][05631] Avg episode rewards: #0: 4.400, true rewards: #0: 4.114
|
3569 |
+
[2023-02-23 00:51:40,789][05631] Avg episode reward: 4.400, avg true_objective: 4.114
|
3570 |
+
[2023-02-23 00:51:40,818][05631] Num frames 2900...
|
3571 |
+
[2023-02-23 00:51:40,943][05631] Num frames 3000...
|
3572 |
+
[2023-02-23 00:51:41,058][05631] Num frames 3100...
|
3573 |
+
[2023-02-23 00:51:41,172][05631] Num frames 3200...
|
3574 |
+
[2023-02-23 00:51:41,286][05631] Num frames 3300...
|
3575 |
+
[2023-02-23 00:51:41,458][05631] Avg episode rewards: #0: 4.740, true rewards: #0: 4.240
|
3576 |
+
[2023-02-23 00:51:41,459][05631] Avg episode reward: 4.740, avg true_objective: 4.240
|
3577 |
+
[2023-02-23 00:51:41,473][05631] Num frames 3400...
|
3578 |
+
[2023-02-23 00:51:41,594][05631] Num frames 3500...
|
3579 |
+
[2023-02-23 00:51:41,710][05631] Num frames 3600...
|
3580 |
+
[2023-02-23 00:51:41,824][05631] Num frames 3700...
|
3581 |
+
[2023-02-23 00:51:41,944][05631] Num frames 3800...
|
3582 |
+
[2023-02-23 00:51:42,010][05631] Avg episode rewards: #0: 4.787, true rewards: #0: 4.231
|
3583 |
+
[2023-02-23 00:51:42,013][05631] Avg episode reward: 4.787, avg true_objective: 4.231
|
3584 |
+
[2023-02-23 00:51:42,125][05631] Num frames 3900...
|
3585 |
+
[2023-02-23 00:51:42,253][05631] Num frames 4000...
|
3586 |
+
[2023-02-23 00:51:42,370][05631] Num frames 4100...
|
3587 |
+
[2023-02-23 00:51:42,528][05631] Avg episode rewards: #0: 4.692, true rewards: #0: 4.192
|
3588 |
+
[2023-02-23 00:51:42,532][05631] Avg episode reward: 4.692, avg true_objective: 4.192
|
3589 |
+
[2023-02-23 00:52:02,869][05631] Replay video saved to /content/train_dir/default_experiment/replay.mp4!
|