|
num_threads: 1 |
|
float32_matmul_precision: high |
|
dry_run: false |
|
seed: 42 |
|
torch_use_deterministic_algorithms: false |
|
torch_backends_cudnn_benchmark: true |
|
torch_backends_cudnn_deterministic: false |
|
cublas_workspace_config: null |
|
exp_name: ppo_doapp |
|
run_name: 2024-04-15_15-25-55_ppo_doapp_42 |
|
root_dir: ppo/doapp |
|
algo: |
|
name: ppo |
|
total_steps: 1024 |
|
per_rank_batch_size: 16 |
|
run_test: true |
|
cnn_keys: |
|
encoder: |
|
- frame |
|
mlp_keys: |
|
encoder: |
|
- own_character |
|
- own_health |
|
- own_side |
|
- own_wins |
|
- opp_character |
|
- opp_health |
|
- opp_side |
|
- opp_wins |
|
- stage |
|
- timer |
|
- action |
|
optimizer: |
|
_target_: torch.optim.Adam |
|
lr: 0.005 |
|
eps: 1.0e-06 |
|
weight_decay: 0 |
|
betas: |
|
- 0.9 |
|
- 0.999 |
|
anneal_lr: false |
|
gamma: 0.99 |
|
gae_lambda: 0.95 |
|
update_epochs: 1 |
|
loss_reduction: mean |
|
normalize_advantages: true |
|
clip_coef: 0.2 |
|
anneal_clip_coef: false |
|
clip_vloss: false |
|
ent_coef: 0.0 |
|
anneal_ent_coef: false |
|
vf_coef: 1.0 |
|
rollout_steps: 32 |
|
dense_units: 16 |
|
mlp_layers: 1 |
|
dense_act: torch.nn.Tanh |
|
layer_norm: false |
|
max_grad_norm: 1.0 |
|
encoder: |
|
cnn_features_dim: 128 |
|
mlp_features_dim: 32 |
|
dense_units: 16 |
|
mlp_layers: 1 |
|
dense_act: torch.nn.Tanh |
|
layer_norm: false |
|
actor: |
|
dense_units: 16 |
|
mlp_layers: 1 |
|
dense_act: torch.nn.Tanh |
|
layer_norm: false |
|
critic: |
|
dense_units: 16 |
|
mlp_layers: 1 |
|
dense_act: torch.nn.Tanh |
|
layer_norm: false |
|
buffer: |
|
size: 32 |
|
memmap: true |
|
validate_args: false |
|
from_numpy: false |
|
share_data: false |
|
checkpoint: |
|
every: 100 |
|
resume_from: null |
|
save_last: true |
|
keep_last: 5 |
|
distribution: |
|
validate_args: false |
|
env: |
|
id: doapp |
|
num_envs: 1 |
|
frame_stack: 1 |
|
sync_env: true |
|
screen_size: 64 |
|
action_repeat: 1 |
|
grayscale: false |
|
clip_rewards: false |
|
capture_video: true |
|
frame_stack_dilation: 1 |
|
max_episode_steps: null |
|
reward_as_observation: false |
|
wrapper: |
|
_target_: sheeprl.envs.diambra.DiambraWrapper |
|
id: doapp |
|
action_space: DISCRETE |
|
screen_size: 64 |
|
grayscale: false |
|
repeat_action: 1 |
|
rank: null |
|
log_level: 0 |
|
increase_performance: true |
|
diambra_settings: |
|
role: P1 |
|
step_ratio: 6 |
|
difficulty: 4 |
|
continue_game: 0.0 |
|
show_final: false |
|
outfits: 2 |
|
splash_screen: false |
|
diambra_wrappers: |
|
stack_actions: 1 |
|
no_op_max: 0 |
|
no_attack_buttons_combinations: false |
|
add_last_action: true |
|
scale: false |
|
exclude_image_scaling: false |
|
process_discrete_binary: false |
|
role_relative: true |
|
fabric: |
|
_target_: lightning.fabric.Fabric |
|
devices: 1 |
|
num_nodes: 1 |
|
strategy: auto |
|
accelerator: cpu |
|
precision: 32-true |
|
callbacks: |
|
- _target_: sheeprl.utils.callback.CheckpointCallback |
|
keep_last: 5 |
|
metric: |
|
log_every: 5000 |
|
disable_timer: false |
|
log_level: 1 |
|
sync_on_compute: false |
|
aggregator: |
|
_target_: sheeprl.utils.metric.MetricAggregator |
|
raise_on_missing: false |
|
metrics: |
|
Rewards/rew_avg: |
|
_target_: torchmetrics.MeanMetric |
|
sync_on_compute: false |
|
Game/ep_len_avg: |
|
_target_: torchmetrics.MeanMetric |
|
sync_on_compute: false |
|
logger: |
|
_target_: lightning.fabric.loggers.TensorBoardLogger |
|
name: 2024-04-15_15-25-55_ppo_doapp_42 |
|
root_dir: logs/runs/ppo/doapp |
|
version: null |
|
default_hp_metric: true |
|
prefix: '' |
|
sub_dir: null |
|
model_manager: |
|
disabled: true |
|
models: {} |
|
|