Upload . with huggingface_hub
Browse files- .summary/0/events.out.tfevents.1677112709.f54eb2240718 +3 -0
- README.md +1 -1
- checkpoint_p0/checkpoint_000002445_10014720.pth +3 -0
- config.json +1 -1
- replay.mp4 +2 -2
- sf_log.txt +565 -0
.summary/0/events.out.tfevents.1677112709.f54eb2240718
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:41a402a1f4c0d94882b613aea62f44328064236c87352c850bf7f95b86eccbff
|
3 |
+
size 2343
|
README.md
CHANGED
@@ -15,7 +15,7 @@ model-index:
|
|
15 |
type: doom_health_gathering_supreme
|
16 |
metrics:
|
17 |
- type: mean_reward
|
18 |
-
value: 4.
|
19 |
name: mean_reward
|
20 |
verified: false
|
21 |
---
|
|
|
15 |
type: doom_health_gathering_supreme
|
16 |
metrics:
|
17 |
- type: mean_reward
|
18 |
+
value: 4.00 +/- 0.68
|
19 |
name: mean_reward
|
20 |
verified: false
|
21 |
---
|
checkpoint_p0/checkpoint_000002445_10014720.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7e39d4dd75fee0ec62008470cbd583761a51ab57a687a270bc53bb6034e566d0
|
3 |
+
size 34929220
|
config.json
CHANGED
@@ -23,7 +23,7 @@
|
|
23 |
"rollout": 32,
|
24 |
"recurrence": 32,
|
25 |
"shuffle_minibatches": false,
|
26 |
-
"gamma": 0.
|
27 |
"reward_scale": 1.0,
|
28 |
"reward_clip": 1000.0,
|
29 |
"value_bootstrap": false,
|
|
|
23 |
"rollout": 32,
|
24 |
"recurrence": 32,
|
25 |
"shuffle_minibatches": false,
|
26 |
+
"gamma": 0.98,
|
27 |
"reward_scale": 1.0,
|
28 |
"reward_clip": 1000.0,
|
29 |
"value_bootstrap": false,
|
replay.mp4
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:70258433234a67ba882f938454e050765928c1bce517d1c7bb922800bacacd92
|
3 |
+
size 4994059
|
sf_log.txt
CHANGED
@@ -2459,3 +2459,568 @@ main_loop: 1813.1331
|
|
2459 |
[2023-02-23 00:26:52,988][05631] Avg episode rewards: #0: 5.020, true rewards: #0: 4.320
|
2460 |
[2023-02-23 00:26:52,991][05631] Avg episode reward: 5.020, avg true_objective: 4.320
|
2461 |
[2023-02-23 00:27:13,979][05631] Replay video saved to /content/train_dir/default_experiment/replay.mp4!
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2459 |
[2023-02-23 00:26:52,988][05631] Avg episode rewards: #0: 5.020, true rewards: #0: 4.320
|
2460 |
[2023-02-23 00:26:52,991][05631] Avg episode reward: 5.020, avg true_objective: 4.320
|
2461 |
[2023-02-23 00:27:13,979][05631] Replay video saved to /content/train_dir/default_experiment/replay.mp4!
|
2462 |
+
[2023-02-23 00:27:19,636][05631] The model has been pushed to https://huggingface.co/pittawat/rl_course_vizdoom_health_gathering_supreme
|
2463 |
+
[2023-02-23 00:38:29,240][05631] Environment doom_basic already registered, overwriting...
|
2464 |
+
[2023-02-23 00:38:29,244][05631] Environment doom_two_colors_easy already registered, overwriting...
|
2465 |
+
[2023-02-23 00:38:29,246][05631] Environment doom_two_colors_hard already registered, overwriting...
|
2466 |
+
[2023-02-23 00:38:29,248][05631] Environment doom_dm already registered, overwriting...
|
2467 |
+
[2023-02-23 00:38:29,250][05631] Environment doom_dwango5 already registered, overwriting...
|
2468 |
+
[2023-02-23 00:38:29,251][05631] Environment doom_my_way_home_flat_actions already registered, overwriting...
|
2469 |
+
[2023-02-23 00:38:29,253][05631] Environment doom_defend_the_center_flat_actions already registered, overwriting...
|
2470 |
+
[2023-02-23 00:38:29,254][05631] Environment doom_my_way_home already registered, overwriting...
|
2471 |
+
[2023-02-23 00:38:29,256][05631] Environment doom_deadly_corridor already registered, overwriting...
|
2472 |
+
[2023-02-23 00:38:29,258][05631] Environment doom_defend_the_center already registered, overwriting...
|
2473 |
+
[2023-02-23 00:38:29,259][05631] Environment doom_defend_the_line already registered, overwriting...
|
2474 |
+
[2023-02-23 00:38:29,261][05631] Environment doom_health_gathering already registered, overwriting...
|
2475 |
+
[2023-02-23 00:38:29,262][05631] Environment doom_health_gathering_supreme already registered, overwriting...
|
2476 |
+
[2023-02-23 00:38:29,263][05631] Environment doom_battle already registered, overwriting...
|
2477 |
+
[2023-02-23 00:38:29,265][05631] Environment doom_battle2 already registered, overwriting...
|
2478 |
+
[2023-02-23 00:38:29,266][05631] Environment doom_duel_bots already registered, overwriting...
|
2479 |
+
[2023-02-23 00:38:29,268][05631] Environment doom_deathmatch_bots already registered, overwriting...
|
2480 |
+
[2023-02-23 00:38:29,269][05631] Environment doom_duel already registered, overwriting...
|
2481 |
+
[2023-02-23 00:38:29,271][05631] Environment doom_deathmatch_full already registered, overwriting...
|
2482 |
+
[2023-02-23 00:38:29,272][05631] Environment doom_benchmark already registered, overwriting...
|
2483 |
+
[2023-02-23 00:38:29,274][05631] register_encoder_factory: <function make_vizdoom_encoder at 0x7f0a330101f0>
|
2484 |
+
[2023-02-23 00:38:29,308][05631] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json
|
2485 |
+
[2023-02-23 00:38:29,311][05631] Overriding arg 'gamma' with value 0.98 passed from command line
|
2486 |
+
[2023-02-23 00:38:29,317][05631] Experiment dir /content/train_dir/default_experiment already exists!
|
2487 |
+
[2023-02-23 00:38:29,319][05631] Resuming existing experiment from /content/train_dir/default_experiment...
|
2488 |
+
[2023-02-23 00:38:29,322][05631] Weights and Biases integration disabled
|
2489 |
+
[2023-02-23 00:38:29,326][05631] Environment var CUDA_VISIBLE_DEVICES is 0
|
2490 |
+
|
2491 |
+
[2023-02-23 00:38:32,154][05631] Starting experiment with the following configuration:
|
2492 |
+
help=False
|
2493 |
+
algo=APPO
|
2494 |
+
env=doom_health_gathering_supreme
|
2495 |
+
experiment=default_experiment
|
2496 |
+
train_dir=/content/train_dir
|
2497 |
+
restart_behavior=resume
|
2498 |
+
device=gpu
|
2499 |
+
seed=None
|
2500 |
+
num_policies=1
|
2501 |
+
async_rl=True
|
2502 |
+
serial_mode=False
|
2503 |
+
batched_sampling=False
|
2504 |
+
num_batches_to_accumulate=2
|
2505 |
+
worker_num_splits=2
|
2506 |
+
policy_workers_per_policy=1
|
2507 |
+
max_policy_lag=1000
|
2508 |
+
num_workers=8
|
2509 |
+
num_envs_per_worker=4
|
2510 |
+
batch_size=1024
|
2511 |
+
num_batches_per_epoch=1
|
2512 |
+
num_epochs=1
|
2513 |
+
rollout=32
|
2514 |
+
recurrence=32
|
2515 |
+
shuffle_minibatches=False
|
2516 |
+
gamma=0.98
|
2517 |
+
reward_scale=1.0
|
2518 |
+
reward_clip=1000.0
|
2519 |
+
value_bootstrap=False
|
2520 |
+
normalize_returns=True
|
2521 |
+
exploration_loss_coeff=0.001
|
2522 |
+
value_loss_coeff=0.5
|
2523 |
+
kl_loss_coeff=0.0
|
2524 |
+
exploration_loss=symmetric_kl
|
2525 |
+
gae_lambda=0.95
|
2526 |
+
ppo_clip_ratio=0.1
|
2527 |
+
ppo_clip_value=0.2
|
2528 |
+
with_vtrace=False
|
2529 |
+
vtrace_rho=1.0
|
2530 |
+
vtrace_c=1.0
|
2531 |
+
optimizer=adam
|
2532 |
+
adam_eps=1e-06
|
2533 |
+
adam_beta1=0.9
|
2534 |
+
adam_beta2=0.999
|
2535 |
+
max_grad_norm=4.0
|
2536 |
+
learning_rate=0.0001
|
2537 |
+
lr_schedule=constant
|
2538 |
+
lr_schedule_kl_threshold=0.008
|
2539 |
+
lr_adaptive_min=1e-06
|
2540 |
+
lr_adaptive_max=0.01
|
2541 |
+
obs_subtract_mean=0.0
|
2542 |
+
obs_scale=255.0
|
2543 |
+
normalize_input=True
|
2544 |
+
normalize_input_keys=None
|
2545 |
+
decorrelate_experience_max_seconds=0
|
2546 |
+
decorrelate_envs_on_one_worker=True
|
2547 |
+
actor_worker_gpus=[]
|
2548 |
+
set_workers_cpu_affinity=True
|
2549 |
+
force_envs_single_thread=False
|
2550 |
+
default_niceness=0
|
2551 |
+
log_to_file=True
|
2552 |
+
experiment_summaries_interval=10
|
2553 |
+
flush_summaries_interval=30
|
2554 |
+
stats_avg=100
|
2555 |
+
summaries_use_frameskip=True
|
2556 |
+
heartbeat_interval=20
|
2557 |
+
heartbeat_reporting_interval=600
|
2558 |
+
train_for_env_steps=10000000
|
2559 |
+
train_for_seconds=10000000000
|
2560 |
+
save_every_sec=120
|
2561 |
+
keep_checkpoints=2
|
2562 |
+
load_checkpoint_kind=latest
|
2563 |
+
save_milestones_sec=-1
|
2564 |
+
save_best_every_sec=5
|
2565 |
+
save_best_metric=reward
|
2566 |
+
save_best_after=100000
|
2567 |
+
benchmark=False
|
2568 |
+
encoder_mlp_layers=[512, 512]
|
2569 |
+
encoder_conv_architecture=convnet_simple
|
2570 |
+
encoder_conv_mlp_layers=[512]
|
2571 |
+
use_rnn=True
|
2572 |
+
rnn_size=512
|
2573 |
+
rnn_type=gru
|
2574 |
+
rnn_num_layers=1
|
2575 |
+
decoder_mlp_layers=[]
|
2576 |
+
nonlinearity=elu
|
2577 |
+
policy_initialization=orthogonal
|
2578 |
+
policy_init_gain=1.0
|
2579 |
+
actor_critic_share_weights=True
|
2580 |
+
adaptive_stddev=True
|
2581 |
+
continuous_tanh_scale=0.0
|
2582 |
+
initial_stddev=1.0
|
2583 |
+
use_env_info_cache=False
|
2584 |
+
env_gpu_actions=False
|
2585 |
+
env_gpu_observations=True
|
2586 |
+
env_frameskip=4
|
2587 |
+
env_framestack=1
|
2588 |
+
pixel_format=CHW
|
2589 |
+
use_record_episode_statistics=False
|
2590 |
+
with_wandb=False
|
2591 |
+
wandb_user=None
|
2592 |
+
wandb_project=sample_factory
|
2593 |
+
wandb_group=None
|
2594 |
+
wandb_job_type=SF
|
2595 |
+
wandb_tags=[]
|
2596 |
+
with_pbt=False
|
2597 |
+
pbt_mix_policies_in_one_env=True
|
2598 |
+
pbt_period_env_steps=5000000
|
2599 |
+
pbt_start_mutation=20000000
|
2600 |
+
pbt_replace_fraction=0.3
|
2601 |
+
pbt_mutation_rate=0.15
|
2602 |
+
pbt_replace_reward_gap=0.1
|
2603 |
+
pbt_replace_reward_gap_absolute=1e-06
|
2604 |
+
pbt_optimize_gamma=False
|
2605 |
+
pbt_target_objective=true_objective
|
2606 |
+
pbt_perturb_min=1.1
|
2607 |
+
pbt_perturb_max=1.5
|
2608 |
+
num_agents=-1
|
2609 |
+
num_humans=0
|
2610 |
+
num_bots=-1
|
2611 |
+
start_bot_difficulty=None
|
2612 |
+
timelimit=None
|
2613 |
+
res_w=128
|
2614 |
+
res_h=72
|
2615 |
+
wide_aspect_ratio=False
|
2616 |
+
eval_env_frameskip=1
|
2617 |
+
fps=35
|
2618 |
+
command_line=--env=doom_health_gathering_supreme --num_workers=8 --num_envs_per_worker=4 --train_for_env_steps=4000000
|
2619 |
+
cli_args={'env': 'doom_health_gathering_supreme', 'num_workers': 8, 'num_envs_per_worker': 4, 'train_for_env_steps': 4000000}
|
2620 |
+
git_hash=unknown
|
2621 |
+
git_repo_name=not a git repository
|
2622 |
+
[2023-02-23 00:38:32,160][05631] Saving configuration to /content/train_dir/default_experiment/config.json...
|
2623 |
+
[2023-02-23 00:38:32,167][05631] Rollout worker 0 uses device cpu
|
2624 |
+
[2023-02-23 00:38:32,172][05631] Rollout worker 1 uses device cpu
|
2625 |
+
[2023-02-23 00:38:32,174][05631] Rollout worker 2 uses device cpu
|
2626 |
+
[2023-02-23 00:38:32,177][05631] Rollout worker 3 uses device cpu
|
2627 |
+
[2023-02-23 00:38:32,180][05631] Rollout worker 4 uses device cpu
|
2628 |
+
[2023-02-23 00:38:32,182][05631] Rollout worker 5 uses device cpu
|
2629 |
+
[2023-02-23 00:38:32,184][05631] Rollout worker 6 uses device cpu
|
2630 |
+
[2023-02-23 00:38:32,193][05631] Rollout worker 7 uses device cpu
|
2631 |
+
[2023-02-23 00:38:32,318][05631] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
2632 |
+
[2023-02-23 00:38:32,323][05631] InferenceWorker_p0-w0: min num requests: 2
|
2633 |
+
[2023-02-23 00:38:32,354][05631] Starting all processes...
|
2634 |
+
[2023-02-23 00:38:32,359][05631] Starting process learner_proc0
|
2635 |
+
[2023-02-23 00:38:32,505][05631] Starting all processes...
|
2636 |
+
[2023-02-23 00:38:32,516][05631] Starting process inference_proc0-0
|
2637 |
+
[2023-02-23 00:38:32,517][05631] Starting process rollout_proc0
|
2638 |
+
[2023-02-23 00:38:32,521][05631] Starting process rollout_proc1
|
2639 |
+
[2023-02-23 00:38:32,521][05631] Starting process rollout_proc2
|
2640 |
+
[2023-02-23 00:38:32,521][05631] Starting process rollout_proc3
|
2641 |
+
[2023-02-23 00:38:32,521][05631] Starting process rollout_proc4
|
2642 |
+
[2023-02-23 00:38:32,521][05631] Starting process rollout_proc5
|
2643 |
+
[2023-02-23 00:38:32,521][05631] Starting process rollout_proc6
|
2644 |
+
[2023-02-23 00:38:32,521][05631] Starting process rollout_proc7
|
2645 |
+
[2023-02-23 00:38:44,016][34379] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
2646 |
+
[2023-02-23 00:38:44,017][34379] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0
|
2647 |
+
[2023-02-23 00:38:44,094][34379] Num visible devices: 1
|
2648 |
+
[2023-02-23 00:38:44,127][34379] Starting seed is not provided
|
2649 |
+
[2023-02-23 00:38:44,127][34379] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
2650 |
+
[2023-02-23 00:38:44,128][34379] Initializing actor-critic model on device cuda:0
|
2651 |
+
[2023-02-23 00:38:44,129][34379] RunningMeanStd input shape: (3, 72, 128)
|
2652 |
+
[2023-02-23 00:38:44,136][34379] RunningMeanStd input shape: (1,)
|
2653 |
+
[2023-02-23 00:38:44,260][34379] ConvEncoder: input_channels=3
|
2654 |
+
[2023-02-23 00:38:44,635][34394] Worker 0 uses CPU cores [0]
|
2655 |
+
[2023-02-23 00:38:44,821][34393] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
2656 |
+
[2023-02-23 00:38:44,822][34393] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0
|
2657 |
+
[2023-02-23 00:38:44,894][34393] Num visible devices: 1
|
2658 |
+
[2023-02-23 00:38:45,181][34379] Conv encoder output size: 512
|
2659 |
+
[2023-02-23 00:38:45,182][34379] Policy head output size: 512
|
2660 |
+
[2023-02-23 00:38:45,290][34379] Created Actor Critic model with architecture:
|
2661 |
+
[2023-02-23 00:38:45,292][34395] Worker 2 uses CPU cores [0]
|
2662 |
+
[2023-02-23 00:38:45,291][34379] ActorCriticSharedWeights(
|
2663 |
+
(obs_normalizer): ObservationNormalizer(
|
2664 |
+
(running_mean_std): RunningMeanStdDictInPlace(
|
2665 |
+
(running_mean_std): ModuleDict(
|
2666 |
+
(obs): RunningMeanStdInPlace()
|
2667 |
+
)
|
2668 |
+
)
|
2669 |
+
)
|
2670 |
+
(returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace)
|
2671 |
+
(encoder): VizdoomEncoder(
|
2672 |
+
(basic_encoder): ConvEncoder(
|
2673 |
+
(enc): RecursiveScriptModule(
|
2674 |
+
original_name=ConvEncoderImpl
|
2675 |
+
(conv_head): RecursiveScriptModule(
|
2676 |
+
original_name=Sequential
|
2677 |
+
(0): RecursiveScriptModule(original_name=Conv2d)
|
2678 |
+
(1): RecursiveScriptModule(original_name=ELU)
|
2679 |
+
(2): RecursiveScriptModule(original_name=Conv2d)
|
2680 |
+
(3): RecursiveScriptModule(original_name=ELU)
|
2681 |
+
(4): RecursiveScriptModule(original_name=Conv2d)
|
2682 |
+
(5): RecursiveScriptModule(original_name=ELU)
|
2683 |
+
)
|
2684 |
+
(mlp_layers): RecursiveScriptModule(
|
2685 |
+
original_name=Sequential
|
2686 |
+
(0): RecursiveScriptModule(original_name=Linear)
|
2687 |
+
(1): RecursiveScriptModule(original_name=ELU)
|
2688 |
+
)
|
2689 |
+
)
|
2690 |
+
)
|
2691 |
+
)
|
2692 |
+
(core): ModelCoreRNN(
|
2693 |
+
(core): GRU(512, 512)
|
2694 |
+
)
|
2695 |
+
(decoder): MlpDecoder(
|
2696 |
+
(mlp): Identity()
|
2697 |
+
)
|
2698 |
+
(critic_linear): Linear(in_features=512, out_features=1, bias=True)
|
2699 |
+
(action_parameterization): ActionParameterizationDefault(
|
2700 |
+
(distribution_linear): Linear(in_features=512, out_features=5, bias=True)
|
2701 |
+
)
|
2702 |
+
)
|
2703 |
+
[2023-02-23 00:38:45,460][34404] Worker 4 uses CPU cores [0]
|
2704 |
+
[2023-02-23 00:38:45,507][34400] Worker 1 uses CPU cores [1]
|
2705 |
+
[2023-02-23 00:38:45,837][34406] Worker 3 uses CPU cores [1]
|
2706 |
+
[2023-02-23 00:38:45,918][34415] Worker 5 uses CPU cores [1]
|
2707 |
+
[2023-02-23 00:38:46,026][34408] Worker 6 uses CPU cores [0]
|
2708 |
+
[2023-02-23 00:38:46,052][34416] Worker 7 uses CPU cores [1]
|
2709 |
+
[2023-02-23 00:38:49,594][34379] Using optimizer <class 'torch.optim.adam.Adam'>
|
2710 |
+
[2023-02-23 00:38:49,595][34379] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002443_10006528.pth...
|
2711 |
+
[2023-02-23 00:38:49,629][34379] Loading model from checkpoint
|
2712 |
+
[2023-02-23 00:38:49,634][34379] Loaded experiment state at self.train_step=2443, self.env_steps=10006528
|
2713 |
+
[2023-02-23 00:38:49,635][34379] Initialized policy 0 weights for model version 2443
|
2714 |
+
[2023-02-23 00:38:49,637][34379] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
2715 |
+
[2023-02-23 00:38:49,644][34379] LearnerWorker_p0 finished initialization!
|
2716 |
+
[2023-02-23 00:38:49,753][34393] RunningMeanStd input shape: (3, 72, 128)
|
2717 |
+
[2023-02-23 00:38:49,754][34393] RunningMeanStd input shape: (1,)
|
2718 |
+
[2023-02-23 00:38:49,767][34393] ConvEncoder: input_channels=3
|
2719 |
+
[2023-02-23 00:38:49,868][34393] Conv encoder output size: 512
|
2720 |
+
[2023-02-23 00:38:49,869][34393] Policy head output size: 512
|
2721 |
+
[2023-02-23 00:38:52,129][05631] Inference worker 0-0 is ready!
|
2722 |
+
[2023-02-23 00:38:52,131][05631] All inference workers are ready! Signal rollout workers to start!
|
2723 |
+
[2023-02-23 00:38:52,234][34406] Doom resolution: 160x120, resize resolution: (128, 72)
|
2724 |
+
[2023-02-23 00:38:52,236][34408] Doom resolution: 160x120, resize resolution: (128, 72)
|
2725 |
+
[2023-02-23 00:38:52,235][34400] Doom resolution: 160x120, resize resolution: (128, 72)
|
2726 |
+
[2023-02-23 00:38:52,237][34404] Doom resolution: 160x120, resize resolution: (128, 72)
|
2727 |
+
[2023-02-23 00:38:52,232][34416] Doom resolution: 160x120, resize resolution: (128, 72)
|
2728 |
+
[2023-02-23 00:38:52,237][34394] Doom resolution: 160x120, resize resolution: (128, 72)
|
2729 |
+
[2023-02-23 00:38:52,232][34395] Doom resolution: 160x120, resize resolution: (128, 72)
|
2730 |
+
[2023-02-23 00:38:52,239][34415] Doom resolution: 160x120, resize resolution: (128, 72)
|
2731 |
+
[2023-02-23 00:38:52,310][05631] Heartbeat connected on Batcher_0
|
2732 |
+
[2023-02-23 00:38:52,315][05631] Heartbeat connected on LearnerWorker_p0
|
2733 |
+
[2023-02-23 00:38:52,366][05631] Heartbeat connected on InferenceWorker_p0-w0
|
2734 |
+
[2023-02-23 00:38:52,742][34406] Decorrelating experience for 0 frames...
|
2735 |
+
[2023-02-23 00:38:53,295][34415] Decorrelating experience for 0 frames...
|
2736 |
+
[2023-02-23 00:38:53,847][34395] Decorrelating experience for 0 frames...
|
2737 |
+
[2023-02-23 00:38:53,851][34408] Decorrelating experience for 0 frames...
|
2738 |
+
[2023-02-23 00:38:53,857][34404] Decorrelating experience for 0 frames...
|
2739 |
+
[2023-02-23 00:38:53,859][34394] Decorrelating experience for 0 frames...
|
2740 |
+
[2023-02-23 00:38:54,328][05631] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 10006528. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
2741 |
+
[2023-02-23 00:38:55,102][34404] Decorrelating experience for 32 frames...
|
2742 |
+
[2023-02-23 00:38:55,113][34394] Decorrelating experience for 32 frames...
|
2743 |
+
[2023-02-23 00:38:55,146][34395] Decorrelating experience for 32 frames...
|
2744 |
+
[2023-02-23 00:38:55,194][34400] Decorrelating experience for 0 frames...
|
2745 |
+
[2023-02-23 00:38:55,186][34416] Decorrelating experience for 0 frames...
|
2746 |
+
[2023-02-23 00:38:56,646][34408] Decorrelating experience for 32 frames...
|
2747 |
+
[2023-02-23 00:38:56,994][34416] Decorrelating experience for 32 frames...
|
2748 |
+
[2023-02-23 00:38:57,000][34400] Decorrelating experience for 32 frames...
|
2749 |
+
[2023-02-23 00:38:57,003][34406] Decorrelating experience for 32 frames...
|
2750 |
+
[2023-02-23 00:38:57,016][34394] Decorrelating experience for 64 frames...
|
2751 |
+
[2023-02-23 00:38:57,018][34395] Decorrelating experience for 64 frames...
|
2752 |
+
[2023-02-23 00:38:58,783][34404] Decorrelating experience for 64 frames...
|
2753 |
+
[2023-02-23 00:38:58,802][34415] Decorrelating experience for 32 frames...
|
2754 |
+
[2023-02-23 00:38:59,076][34408] Decorrelating experience for 64 frames...
|
2755 |
+
[2023-02-23 00:38:59,141][34406] Decorrelating experience for 64 frames...
|
2756 |
+
[2023-02-23 00:38:59,160][34400] Decorrelating experience for 64 frames...
|
2757 |
+
[2023-02-23 00:38:59,261][34395] Decorrelating experience for 96 frames...
|
2758 |
+
[2023-02-23 00:38:59,275][34394] Decorrelating experience for 96 frames...
|
2759 |
+
[2023-02-23 00:38:59,327][05631] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 10006528. Throughput: 0: 0.0. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
2760 |
+
[2023-02-23 00:38:59,623][05631] Heartbeat connected on RolloutWorker_w2
|
2761 |
+
[2023-02-23 00:38:59,671][05631] Heartbeat connected on RolloutWorker_w0
|
2762 |
+
[2023-02-23 00:39:00,510][34404] Decorrelating experience for 96 frames...
|
2763 |
+
[2023-02-23 00:39:00,708][05631] Heartbeat connected on RolloutWorker_w4
|
2764 |
+
[2023-02-23 00:39:00,726][34408] Decorrelating experience for 96 frames...
|
2765 |
+
[2023-02-23 00:39:00,911][05631] Heartbeat connected on RolloutWorker_w6
|
2766 |
+
[2023-02-23 00:39:01,609][34415] Decorrelating experience for 64 frames...
|
2767 |
+
[2023-02-23 00:39:01,635][34416] Decorrelating experience for 64 frames...
|
2768 |
+
[2023-02-23 00:39:01,747][34406] Decorrelating experience for 96 frames...
|
2769 |
+
[2023-02-23 00:39:01,758][34400] Decorrelating experience for 96 frames...
|
2770 |
+
[2023-02-23 00:39:02,043][05631] Heartbeat connected on RolloutWorker_w3
|
2771 |
+
[2023-02-23 00:39:02,062][05631] Heartbeat connected on RolloutWorker_w1
|
2772 |
+
[2023-02-23 00:39:04,220][34415] Decorrelating experience for 96 frames...
|
2773 |
+
[2023-02-23 00:39:04,249][34416] Decorrelating experience for 96 frames...
|
2774 |
+
[2023-02-23 00:39:04,327][05631] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 10006528. Throughput: 0: 166.6. Samples: 1666. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
2775 |
+
[2023-02-23 00:39:04,329][05631] Avg episode reward: [(0, '1.925')]
|
2776 |
+
[2023-02-23 00:39:04,908][05631] Heartbeat connected on RolloutWorker_w5
|
2777 |
+
[2023-02-23 00:39:04,955][05631] Heartbeat connected on RolloutWorker_w7
|
2778 |
+
[2023-02-23 00:39:05,985][34379] Signal inference workers to stop experience collection...
|
2779 |
+
[2023-02-23 00:39:06,010][34393] InferenceWorker_p0-w0: stopping experience collection
|
2780 |
+
[2023-02-23 00:39:07,046][34379] Signal inference workers to resume experience collection...
|
2781 |
+
[2023-02-23 00:39:07,049][34393] InferenceWorker_p0-w0: resuming experience collection
|
2782 |
+
[2023-02-23 00:39:07,049][34379] Stopping Batcher_0...
|
2783 |
+
[2023-02-23 00:39:07,052][34379] Loop batcher_evt_loop terminating...
|
2784 |
+
[2023-02-23 00:39:07,050][05631] Component Batcher_0 stopped!
|
2785 |
+
[2023-02-23 00:39:07,084][34406] Stopping RolloutWorker_w3...
|
2786 |
+
[2023-02-23 00:39:07,085][34406] Loop rollout_proc3_evt_loop terminating...
|
2787 |
+
[2023-02-23 00:39:07,084][05631] Component RolloutWorker_w3 stopped!
|
2788 |
+
[2023-02-23 00:39:07,091][34415] Stopping RolloutWorker_w5...
|
2789 |
+
[2023-02-23 00:39:07,092][34400] Stopping RolloutWorker_w1...
|
2790 |
+
[2023-02-23 00:39:07,093][34415] Loop rollout_proc5_evt_loop terminating...
|
2791 |
+
[2023-02-23 00:39:07,094][34400] Loop rollout_proc1_evt_loop terminating...
|
2792 |
+
[2023-02-23 00:39:07,091][05631] Component RolloutWorker_w5 stopped!
|
2793 |
+
[2023-02-23 00:39:07,097][34416] Stopping RolloutWorker_w7...
|
2794 |
+
[2023-02-23 00:39:07,098][34416] Loop rollout_proc7_evt_loop terminating...
|
2795 |
+
[2023-02-23 00:39:07,098][05631] Component RolloutWorker_w1 stopped!
|
2796 |
+
[2023-02-23 00:39:07,105][05631] Component RolloutWorker_w7 stopped!
|
2797 |
+
[2023-02-23 00:39:07,143][05631] Component RolloutWorker_w0 stopped!
|
2798 |
+
[2023-02-23 00:39:07,150][34394] Stopping RolloutWorker_w0...
|
2799 |
+
[2023-02-23 00:39:07,150][34394] Loop rollout_proc0_evt_loop terminating...
|
2800 |
+
[2023-02-23 00:39:07,160][05631] Component RolloutWorker_w4 stopped!
|
2801 |
+
[2023-02-23 00:39:07,166][34404] Stopping RolloutWorker_w4...
|
2802 |
+
[2023-02-23 00:39:07,166][34404] Loop rollout_proc4_evt_loop terminating...
|
2803 |
+
[2023-02-23 00:39:07,159][34393] Weights refcount: 2 0
|
2804 |
+
[2023-02-23 00:39:07,182][05631] Component InferenceWorker_p0-w0 stopped!
|
2805 |
+
[2023-02-23 00:39:07,191][34408] Stopping RolloutWorker_w6...
|
2806 |
+
[2023-02-23 00:39:07,191][05631] Component RolloutWorker_w6 stopped!
|
2807 |
+
[2023-02-23 00:39:07,193][34393] Stopping InferenceWorker_p0-w0...
|
2808 |
+
[2023-02-23 00:39:07,194][34393] Loop inference_proc0-0_evt_loop terminating...
|
2809 |
+
[2023-02-23 00:39:07,200][34408] Loop rollout_proc6_evt_loop terminating...
|
2810 |
+
[2023-02-23 00:39:07,204][05631] Component RolloutWorker_w2 stopped!
|
2811 |
+
[2023-02-23 00:39:07,206][34395] Stopping RolloutWorker_w2...
|
2812 |
+
[2023-02-23 00:39:07,208][34395] Loop rollout_proc2_evt_loop terminating...
|
2813 |
+
[2023-02-23 00:39:09,166][34379] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002445_10014720.pth...
|
2814 |
+
[2023-02-23 00:39:09,278][34379] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002434_9969664.pth
|
2815 |
+
[2023-02-23 00:39:09,286][34379] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002445_10014720.pth...
|
2816 |
+
[2023-02-23 00:39:09,425][05631] Component LearnerWorker_p0 stopped!
|
2817 |
+
[2023-02-23 00:39:09,431][05631] Waiting for process learner_proc0 to stop...
|
2818 |
+
[2023-02-23 00:39:09,426][34379] Stopping LearnerWorker_p0...
|
2819 |
+
[2023-02-23 00:39:09,436][34379] Loop learner_proc0_evt_loop terminating...
|
2820 |
+
[2023-02-23 00:39:10,754][05631] Waiting for process inference_proc0-0 to join...
|
2821 |
+
[2023-02-23 00:39:10,758][05631] Waiting for process rollout_proc0 to join...
|
2822 |
+
[2023-02-23 00:39:10,763][05631] Waiting for process rollout_proc1 to join...
|
2823 |
+
[2023-02-23 00:39:10,765][05631] Waiting for process rollout_proc2 to join...
|
2824 |
+
[2023-02-23 00:39:10,770][05631] Waiting for process rollout_proc3 to join...
|
2825 |
+
[2023-02-23 00:39:10,774][05631] Waiting for process rollout_proc4 to join...
|
2826 |
+
[2023-02-23 00:39:10,778][05631] Waiting for process rollout_proc5 to join...
|
2827 |
+
[2023-02-23 00:39:10,781][05631] Waiting for process rollout_proc6 to join...
|
2828 |
+
[2023-02-23 00:39:10,783][05631] Waiting for process rollout_proc7 to join...
|
2829 |
+
[2023-02-23 00:39:10,787][05631] Batcher 0 profile tree view:
|
2830 |
+
batching: 0.0630, releasing_batches: 0.0022
|
2831 |
+
[2023-02-23 00:39:10,793][05631] InferenceWorker_p0-w0 profile tree view:
|
2832 |
+
update_model: 0.0221
|
2833 |
+
wait_policy: 0.0000
|
2834 |
+
wait_policy_total: 9.8845
|
2835 |
+
one_step: 0.0029
|
2836 |
+
handle_policy_step: 3.6858
|
2837 |
+
deserialize: 0.0487, stack: 0.0120, obs_to_device_normalize: 0.3783, forward: 2.8836, send_messages: 0.0690
|
2838 |
+
prepare_outputs: 0.2014
|
2839 |
+
to_cpu: 0.1193
|
2840 |
+
[2023-02-23 00:39:10,795][05631] Learner 0 profile tree view:
|
2841 |
+
misc: 0.0000, prepare_batch: 5.0141
|
2842 |
+
train: 0.6550
|
2843 |
+
epoch_init: 0.0000, minibatch_init: 0.0000, losses_postprocess: 0.0004, kl_divergence: 0.0004, after_optimizer: 0.0048
|
2844 |
+
calculate_losses: 0.1413
|
2845 |
+
losses_init: 0.0000, forward_head: 0.1155, bptt_initial: 0.0169, tail: 0.0014, advantages_returns: 0.0011, losses: 0.0034
|
2846 |
+
bptt: 0.0026
|
2847 |
+
bptt_forward_core: 0.0025
|
2848 |
+
update: 0.5072
|
2849 |
+
clip: 0.0054
|
2850 |
+
[2023-02-23 00:39:10,796][05631] RolloutWorker_w0 profile tree view:
|
2851 |
+
wait_for_trajectories: 0.0233, enqueue_policy_requests: 0.9685, env_step: 3.1668, overhead: 0.0812, complete_rollouts: 0.0138
|
2852 |
+
save_policy_outputs: 0.0673
|
2853 |
+
split_output_tensors: 0.0201
|
2854 |
+
[2023-02-23 00:39:10,797][05631] RolloutWorker_w7 profile tree view:
|
2855 |
+
wait_for_trajectories: 0.0003, enqueue_policy_requests: 0.1870, env_step: 0.7674, overhead: 0.0097, complete_rollouts: 0.0001
|
2856 |
+
save_policy_outputs: 0.0074
|
2857 |
+
split_output_tensors: 0.0037
|
2858 |
+
[2023-02-23 00:39:10,799][05631] Loop Runner_EvtLoop terminating...
|
2859 |
+
[2023-02-23 00:39:10,800][05631] Runner profile tree view:
|
2860 |
+
main_loop: 38.4461
|
2861 |
+
[2023-02-23 00:39:10,801][05631] Collected {0: 10014720}, FPS: 213.1
|
2862 |
+
[2023-02-23 00:39:10,846][05631] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json
|
2863 |
+
[2023-02-23 00:39:10,850][05631] Overriding arg 'num_workers' with value 1 passed from command line
|
2864 |
+
[2023-02-23 00:39:10,854][05631] Adding new argument 'no_render'=True that is not in the saved config file!
|
2865 |
+
[2023-02-23 00:39:10,856][05631] Adding new argument 'save_video'=True that is not in the saved config file!
|
2866 |
+
[2023-02-23 00:39:10,858][05631] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file!
|
2867 |
+
[2023-02-23 00:39:10,859][05631] Adding new argument 'video_name'=None that is not in the saved config file!
|
2868 |
+
[2023-02-23 00:39:10,865][05631] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file!
|
2869 |
+
[2023-02-23 00:39:10,867][05631] Adding new argument 'max_num_episodes'=10 that is not in the saved config file!
|
2870 |
+
[2023-02-23 00:39:10,872][05631] Adding new argument 'push_to_hub'=False that is not in the saved config file!
|
2871 |
+
[2023-02-23 00:39:10,874][05631] Adding new argument 'hf_repository'=None that is not in the saved config file!
|
2872 |
+
[2023-02-23 00:39:10,875][05631] Adding new argument 'policy_index'=0 that is not in the saved config file!
|
2873 |
+
[2023-02-23 00:39:10,877][05631] Adding new argument 'eval_deterministic'=False that is not in the saved config file!
|
2874 |
+
[2023-02-23 00:39:10,878][05631] Adding new argument 'train_script'=None that is not in the saved config file!
|
2875 |
+
[2023-02-23 00:39:10,880][05631] Adding new argument 'enjoy_script'=None that is not in the saved config file!
|
2876 |
+
[2023-02-23 00:39:10,883][05631] Using frameskip 1 and render_action_repeat=4 for evaluation
|
2877 |
+
[2023-02-23 00:39:10,923][05631] RunningMeanStd input shape: (3, 72, 128)
|
2878 |
+
[2023-02-23 00:39:10,926][05631] RunningMeanStd input shape: (1,)
|
2879 |
+
[2023-02-23 00:39:10,947][05631] ConvEncoder: input_channels=3
|
2880 |
+
[2023-02-23 00:39:11,007][05631] Conv encoder output size: 512
|
2881 |
+
[2023-02-23 00:39:11,009][05631] Policy head output size: 512
|
2882 |
+
[2023-02-23 00:39:11,044][05631] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002445_10014720.pth...
|
2883 |
+
[2023-02-23 00:39:11,890][05631] Num frames 100...
|
2884 |
+
[2023-02-23 00:39:12,066][05631] Num frames 200...
|
2885 |
+
[2023-02-23 00:39:12,256][05631] Num frames 300...
|
2886 |
+
[2023-02-23 00:39:12,465][05631] Avg episode rewards: #0: 3.840, true rewards: #0: 3.840
|
2887 |
+
[2023-02-23 00:39:12,468][05631] Avg episode reward: 3.840, avg true_objective: 3.840
|
2888 |
+
[2023-02-23 00:39:12,504][05631] Num frames 400...
|
2889 |
+
[2023-02-23 00:39:12,689][05631] Num frames 500...
|
2890 |
+
[2023-02-23 00:39:12,859][05631] Num frames 600...
|
2891 |
+
[2023-02-23 00:39:13,033][05631] Num frames 700...
|
2892 |
+
[2023-02-23 00:39:13,213][05631] Avg episode rewards: #0: 3.840, true rewards: #0: 3.840
|
2893 |
+
[2023-02-23 00:39:13,216][05631] Avg episode reward: 3.840, avg true_objective: 3.840
|
2894 |
+
[2023-02-23 00:39:13,278][05631] Num frames 800...
|
2895 |
+
[2023-02-23 00:39:13,449][05631] Num frames 900...
|
2896 |
+
[2023-02-23 00:39:13,626][05631] Num frames 1000...
|
2897 |
+
[2023-02-23 00:39:13,797][05631] Num frames 1100...
|
2898 |
+
[2023-02-23 00:39:13,944][05631] Avg episode rewards: #0: 3.840, true rewards: #0: 3.840
|
2899 |
+
[2023-02-23 00:39:13,947][05631] Avg episode reward: 3.840, avg true_objective: 3.840
|
2900 |
+
[2023-02-23 00:39:14,040][05631] Num frames 1200...
|
2901 |
+
[2023-02-23 00:39:14,212][05631] Num frames 1300...
|
2902 |
+
[2023-02-23 00:39:14,396][05631] Num frames 1400...
|
2903 |
+
[2023-02-23 00:39:14,571][05631] Num frames 1500...
|
2904 |
+
[2023-02-23 00:39:14,703][05631] Avg episode rewards: #0: 3.840, true rewards: #0: 3.840
|
2905 |
+
[2023-02-23 00:39:14,706][05631] Avg episode reward: 3.840, avg true_objective: 3.840
|
2906 |
+
[2023-02-23 00:39:14,832][05631] Num frames 1600...
|
2907 |
+
[2023-02-23 00:39:14,955][05631] Num frames 1700...
|
2908 |
+
[2023-02-23 00:39:15,076][05631] Num frames 1800...
|
2909 |
+
[2023-02-23 00:39:15,196][05631] Num frames 1900...
|
2910 |
+
[2023-02-23 00:39:15,312][05631] Num frames 2000...
|
2911 |
+
[2023-02-23 00:39:15,443][05631] Num frames 2100...
|
2912 |
+
[2023-02-23 00:39:15,570][05631] Num frames 2200...
|
2913 |
+
[2023-02-23 00:39:15,676][05631] Avg episode rewards: #0: 5.280, true rewards: #0: 4.480
|
2914 |
+
[2023-02-23 00:39:15,678][05631] Avg episode reward: 5.280, avg true_objective: 4.480
|
2915 |
+
[2023-02-23 00:39:15,760][05631] Num frames 2300...
|
2916 |
+
[2023-02-23 00:39:15,888][05631] Num frames 2400...
|
2917 |
+
[2023-02-23 00:39:16,008][05631] Num frames 2500...
|
2918 |
+
[2023-02-23 00:39:16,129][05631] Num frames 2600...
|
2919 |
+
[2023-02-23 00:39:16,262][05631] Avg episode rewards: #0: 5.260, true rewards: #0: 4.427
|
2920 |
+
[2023-02-23 00:39:16,264][05631] Avg episode reward: 5.260, avg true_objective: 4.427
|
2921 |
+
[2023-02-23 00:39:16,324][05631] Num frames 2700...
|
2922 |
+
[2023-02-23 00:39:16,453][05631] Num frames 2800...
|
2923 |
+
[2023-02-23 00:39:16,577][05631] Num frames 2900...
|
2924 |
+
[2023-02-23 00:39:16,695][05631] Num frames 3000...
|
2925 |
+
[2023-02-23 00:39:16,797][05631] Avg episode rewards: #0: 5.057, true rewards: #0: 4.343
|
2926 |
+
[2023-02-23 00:39:16,799][05631] Avg episode reward: 5.057, avg true_objective: 4.343
|
2927 |
+
[2023-02-23 00:39:16,880][05631] Num frames 3100...
|
2928 |
+
[2023-02-23 00:39:17,017][05631] Num frames 3200...
|
2929 |
+
[2023-02-23 00:39:17,146][05631] Num frames 3300...
|
2930 |
+
[2023-02-23 00:39:17,277][05631] Num frames 3400...
|
2931 |
+
[2023-02-23 00:39:17,451][05631] Avg episode rewards: #0: 5.110, true rewards: #0: 4.360
|
2932 |
+
[2023-02-23 00:39:17,454][05631] Avg episode reward: 5.110, avg true_objective: 4.360
|
2933 |
+
[2023-02-23 00:39:17,476][05631] Num frames 3500...
|
2934 |
+
[2023-02-23 00:39:17,595][05631] Num frames 3600...
|
2935 |
+
[2023-02-23 00:39:17,733][05631] Num frames 3700...
|
2936 |
+
[2023-02-23 00:39:17,839][05631] Avg episode rewards: #0: 4.827, true rewards: #0: 4.160
|
2937 |
+
[2023-02-23 00:39:17,842][05631] Avg episode reward: 4.827, avg true_objective: 4.160
|
2938 |
+
[2023-02-23 00:39:17,915][05631] Num frames 3800...
|
2939 |
+
[2023-02-23 00:39:18,037][05631] Num frames 3900...
|
2940 |
+
[2023-02-23 00:39:18,159][05631] Num frames 4000...
|
2941 |
+
[2023-02-23 00:39:18,282][05631] Num frames 4100...
|
2942 |
+
[2023-02-23 00:39:18,412][05631] Avg episode rewards: #0: 4.860, true rewards: #0: 4.160
|
2943 |
+
[2023-02-23 00:39:18,415][05631] Avg episode reward: 4.860, avg true_objective: 4.160
|
2944 |
+
[2023-02-23 00:39:41,642][05631] Replay video saved to /content/train_dir/default_experiment/replay.mp4!
|
2945 |
+
[2023-02-23 00:39:41,852][05631] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json
|
2946 |
+
[2023-02-23 00:39:41,854][05631] Overriding arg 'num_workers' with value 1 passed from command line
|
2947 |
+
[2023-02-23 00:39:41,857][05631] Adding new argument 'no_render'=True that is not in the saved config file!
|
2948 |
+
[2023-02-23 00:39:41,859][05631] Adding new argument 'save_video'=True that is not in the saved config file!
|
2949 |
+
[2023-02-23 00:39:41,865][05631] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file!
|
2950 |
+
[2023-02-23 00:39:41,866][05631] Adding new argument 'video_name'=None that is not in the saved config file!
|
2951 |
+
[2023-02-23 00:39:41,868][05631] Adding new argument 'max_num_frames'=100000 that is not in the saved config file!
|
2952 |
+
[2023-02-23 00:39:41,869][05631] Adding new argument 'max_num_episodes'=10 that is not in the saved config file!
|
2953 |
+
[2023-02-23 00:39:41,870][05631] Adding new argument 'push_to_hub'=True that is not in the saved config file!
|
2954 |
+
[2023-02-23 00:39:41,871][05631] Adding new argument 'hf_repository'='pittawat/rl_course_vizdoom_health_gathering_supreme' that is not in the saved config file!
|
2955 |
+
[2023-02-23 00:39:41,873][05631] Adding new argument 'policy_index'=0 that is not in the saved config file!
|
2956 |
+
[2023-02-23 00:39:41,877][05631] Adding new argument 'eval_deterministic'=False that is not in the saved config file!
|
2957 |
+
[2023-02-23 00:39:41,878][05631] Adding new argument 'train_script'=None that is not in the saved config file!
|
2958 |
+
[2023-02-23 00:39:41,880][05631] Adding new argument 'enjoy_script'=None that is not in the saved config file!
|
2959 |
+
[2023-02-23 00:39:41,881][05631] Using frameskip 1 and render_action_repeat=4 for evaluation
|
2960 |
+
[2023-02-23 00:39:41,906][05631] RunningMeanStd input shape: (3, 72, 128)
|
2961 |
+
[2023-02-23 00:39:41,909][05631] RunningMeanStd input shape: (1,)
|
2962 |
+
[2023-02-23 00:39:41,928][05631] ConvEncoder: input_channels=3
|
2963 |
+
[2023-02-23 00:39:41,987][05631] Conv encoder output size: 512
|
2964 |
+
[2023-02-23 00:39:41,990][05631] Policy head output size: 512
|
2965 |
+
[2023-02-23 00:39:42,019][05631] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002445_10014720.pth...
|
2966 |
+
[2023-02-23 00:39:42,823][05631] Num frames 100...
|
2967 |
+
[2023-02-23 00:39:43,007][05631] Num frames 200...
|
2968 |
+
[2023-02-23 00:39:43,184][05631] Num frames 300...
|
2969 |
+
[2023-02-23 00:39:43,391][05631] Avg episode rewards: #0: 3.840, true rewards: #0: 3.840
|
2970 |
+
[2023-02-23 00:39:43,394][05631] Avg episode reward: 3.840, avg true_objective: 3.840
|
2971 |
+
[2023-02-23 00:39:43,429][05631] Num frames 400...
|
2972 |
+
[2023-02-23 00:39:43,622][05631] Num frames 500...
|
2973 |
+
[2023-02-23 00:39:43,820][05631] Num frames 600...
|
2974 |
+
[2023-02-23 00:39:44,016][05631] Num frames 700...
|
2975 |
+
[2023-02-23 00:39:44,226][05631] Num frames 800...
|
2976 |
+
[2023-02-23 00:39:44,433][05631] Num frames 900...
|
2977 |
+
[2023-02-23 00:39:44,553][05631] Avg episode rewards: #0: 5.640, true rewards: #0: 4.640
|
2978 |
+
[2023-02-23 00:39:44,555][05631] Avg episode reward: 5.640, avg true_objective: 4.640
|
2979 |
+
[2023-02-23 00:39:44,709][05631] Num frames 1000...
|
2980 |
+
[2023-02-23 00:39:44,901][05631] Num frames 1100...
|
2981 |
+
[2023-02-23 00:39:45,072][05631] Num frames 1200...
|
2982 |
+
[2023-02-23 00:39:45,252][05631] Num frames 1300...
|
2983 |
+
[2023-02-23 00:39:45,337][05631] Avg episode rewards: #0: 5.040, true rewards: #0: 4.373
|
2984 |
+
[2023-02-23 00:39:45,340][05631] Avg episode reward: 5.040, avg true_objective: 4.373
|
2985 |
+
[2023-02-23 00:39:45,500][05631] Num frames 1400...
|
2986 |
+
[2023-02-23 00:39:45,630][05631] Num frames 1500...
|
2987 |
+
[2023-02-23 00:39:45,751][05631] Num frames 1600...
|
2988 |
+
[2023-02-23 00:39:45,867][05631] Num frames 1700...
|
2989 |
+
[2023-02-23 00:39:45,955][05631] Avg episode rewards: #0: 5.070, true rewards: #0: 4.320
|
2990 |
+
[2023-02-23 00:39:45,963][05631] Avg episode reward: 5.070, avg true_objective: 4.320
|
2991 |
+
[2023-02-23 00:39:46,059][05631] Num frames 1800...
|
2992 |
+
[2023-02-23 00:39:46,186][05631] Num frames 1900...
|
2993 |
+
[2023-02-23 00:39:46,324][05631] Num frames 2000...
|
2994 |
+
[2023-02-23 00:39:46,443][05631] Num frames 2100...
|
2995 |
+
[2023-02-23 00:39:46,551][05631] Avg episode rewards: #0: 5.088, true rewards: #0: 4.288
|
2996 |
+
[2023-02-23 00:39:46,553][05631] Avg episode reward: 5.088, avg true_objective: 4.288
|
2997 |
+
[2023-02-23 00:39:46,628][05631] Num frames 2200...
|
2998 |
+
[2023-02-23 00:39:46,751][05631] Num frames 2300...
|
2999 |
+
[2023-02-23 00:39:46,870][05631] Num frames 2400...
|
3000 |
+
[2023-02-23 00:39:46,990][05631] Num frames 2500...
|
3001 |
+
[2023-02-23 00:39:47,080][05631] Avg episode rewards: #0: 4.880, true rewards: #0: 4.213
|
3002 |
+
[2023-02-23 00:39:47,082][05631] Avg episode reward: 4.880, avg true_objective: 4.213
|
3003 |
+
[2023-02-23 00:39:47,182][05631] Num frames 2600...
|
3004 |
+
[2023-02-23 00:39:47,318][05631] Num frames 2700...
|
3005 |
+
[2023-02-23 00:39:47,455][05631] Num frames 2800...
|
3006 |
+
[2023-02-23 00:39:47,581][05631] Num frames 2900...
|
3007 |
+
[2023-02-23 00:39:47,727][05631] Avg episode rewards: #0: 4.966, true rewards: #0: 4.251
|
3008 |
+
[2023-02-23 00:39:47,729][05631] Avg episode reward: 4.966, avg true_objective: 4.251
|
3009 |
+
[2023-02-23 00:39:47,764][05631] Num frames 3000...
|
3010 |
+
[2023-02-23 00:39:47,890][05631] Num frames 3100...
|
3011 |
+
[2023-02-23 00:39:48,018][05631] Num frames 3200...
|
3012 |
+
[2023-02-23 00:39:48,138][05631] Num frames 3300...
|
3013 |
+
[2023-02-23 00:39:48,265][05631] Avg episode rewards: #0: 4.825, true rewards: #0: 4.200
|
3014 |
+
[2023-02-23 00:39:48,267][05631] Avg episode reward: 4.825, avg true_objective: 4.200
|
3015 |
+
[2023-02-23 00:39:48,322][05631] Num frames 3400...
|
3016 |
+
[2023-02-23 00:39:48,453][05631] Num frames 3500...
|
3017 |
+
[2023-02-23 00:39:48,573][05631] Num frames 3600...
|
3018 |
+
[2023-02-23 00:39:48,693][05631] Num frames 3700...
|
3019 |
+
[2023-02-23 00:39:48,808][05631] Avg episode rewards: #0: 4.716, true rewards: #0: 4.160
|
3020 |
+
[2023-02-23 00:39:48,811][05631] Avg episode reward: 4.716, avg true_objective: 4.160
|
3021 |
+
[2023-02-23 00:39:48,888][05631] Num frames 3800...
|
3022 |
+
[2023-02-23 00:39:49,016][05631] Num frames 3900...
|
3023 |
+
[2023-02-23 00:39:49,139][05631] Num frames 4000...
|
3024 |
+
[2023-02-23 00:39:49,195][05631] Avg episode rewards: #0: 4.500, true rewards: #0: 4.000
|
3025 |
+
[2023-02-23 00:39:49,198][05631] Avg episode reward: 4.500, avg true_objective: 4.000
|
3026 |
+
[2023-02-23 00:40:09,146][05631] Replay video saved to /content/train_dir/default_experiment/replay.mp4!
|