File size: 3,439 Bytes
2cbbf46 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 |
num_threads: 1
float32_matmul_precision: high
dry_run: false
seed: 42
torch_use_deterministic_algorithms: false
torch_backends_cudnn_benchmark: true
torch_backends_cudnn_deterministic: false
cublas_workspace_config: null
exp_name: ppo_doapp
run_name: 2024-04-15_15-25-55_ppo_doapp_42
root_dir: ppo/doapp
algo:
name: ppo
total_steps: 1024
per_rank_batch_size: 16
run_test: true
cnn_keys:
encoder:
- frame
mlp_keys:
encoder:
- own_character
- own_health
- own_side
- own_wins
- opp_character
- opp_health
- opp_side
- opp_wins
- stage
- timer
- action
optimizer:
_target_: torch.optim.Adam
lr: 0.005
eps: 1.0e-06
weight_decay: 0
betas:
- 0.9
- 0.999
anneal_lr: false
gamma: 0.99
gae_lambda: 0.95
update_epochs: 1
loss_reduction: mean
normalize_advantages: true
clip_coef: 0.2
anneal_clip_coef: false
clip_vloss: false
ent_coef: 0.0
anneal_ent_coef: false
vf_coef: 1.0
rollout_steps: 32
dense_units: 16
mlp_layers: 1
dense_act: torch.nn.Tanh
layer_norm: false
max_grad_norm: 1.0
encoder:
cnn_features_dim: 128
mlp_features_dim: 32
dense_units: 16
mlp_layers: 1
dense_act: torch.nn.Tanh
layer_norm: false
actor:
dense_units: 16
mlp_layers: 1
dense_act: torch.nn.Tanh
layer_norm: false
critic:
dense_units: 16
mlp_layers: 1
dense_act: torch.nn.Tanh
layer_norm: false
buffer:
size: 32
memmap: true
validate_args: false
from_numpy: false
share_data: false
checkpoint:
every: 100
resume_from: null
save_last: true
keep_last: 5
distribution:
validate_args: false
env:
id: doapp
num_envs: 1
frame_stack: 1
sync_env: true
screen_size: 64
action_repeat: 1
grayscale: false
clip_rewards: false
capture_video: true
frame_stack_dilation: 1
max_episode_steps: null
reward_as_observation: false
wrapper:
_target_: sheeprl.envs.diambra.DiambraWrapper
id: doapp
action_space: DISCRETE
screen_size: 64
grayscale: false
repeat_action: 1
rank: null
log_level: 0
increase_performance: true
diambra_settings:
role: P1
step_ratio: 6
difficulty: 4
continue_game: 0.0
show_final: false
outfits: 2
splash_screen: false
diambra_wrappers:
stack_actions: 1
no_op_max: 0
no_attack_buttons_combinations: false
add_last_action: true
scale: false
exclude_image_scaling: false
process_discrete_binary: false
role_relative: true
fabric:
_target_: lightning.fabric.Fabric
devices: 1
num_nodes: 1
strategy: auto
accelerator: cpu
precision: 32-true
callbacks:
- _target_: sheeprl.utils.callback.CheckpointCallback
keep_last: 5
metric:
log_every: 5000
disable_timer: false
log_level: 1
sync_on_compute: false
aggregator:
_target_: sheeprl.utils.metric.MetricAggregator
raise_on_missing: false
metrics:
Rewards/rew_avg:
_target_: torchmetrics.MeanMetric
sync_on_compute: false
Game/ep_len_avg:
_target_: torchmetrics.MeanMetric
sync_on_compute: false
logger:
_target_: lightning.fabric.loggers.TensorBoardLogger
name: 2024-04-15_15-25-55_ppo_doapp_42
root_dir: logs/runs/ppo/doapp
version: null
default_hp_metric: true
prefix: ''
sub_dir: null
model_manager:
disabled: true
models: {}
|