Michele Milesi
feat: added dv3
6b39341
num_threads: 1
float32_matmul_precision: high
dry_run: false
seed: 42
torch_use_deterministic_algorithms: false
torch_backends_cudnn_benchmark: true
torch_backends_cudnn_deterministic: false
cublas_workspace_config: null
exp_name: ppo_doapp
run_name: 2024-04-15_15-25-55_ppo_doapp_42
root_dir: ppo/doapp
algo:
name: ppo
total_steps: 1024
per_rank_batch_size: 16
run_test: true
cnn_keys:
encoder:
- frame
mlp_keys:
encoder:
- own_character
- own_health
- own_side
- own_wins
- opp_character
- opp_health
- opp_side
- opp_wins
- stage
- timer
- action
optimizer:
_target_: torch.optim.Adam
lr: 0.005
eps: 1.0e-06
weight_decay: 0
betas:
- 0.9
- 0.999
anneal_lr: false
gamma: 0.99
gae_lambda: 0.95
update_epochs: 1
loss_reduction: mean
normalize_advantages: true
clip_coef: 0.2
anneal_clip_coef: false
clip_vloss: false
ent_coef: 0.0
anneal_ent_coef: false
vf_coef: 1.0
rollout_steps: 32
dense_units: 16
mlp_layers: 1
dense_act: torch.nn.Tanh
layer_norm: false
max_grad_norm: 1.0
encoder:
cnn_features_dim: 128
mlp_features_dim: 32
dense_units: 16
mlp_layers: 1
dense_act: torch.nn.Tanh
layer_norm: false
actor:
dense_units: 16
mlp_layers: 1
dense_act: torch.nn.Tanh
layer_norm: false
critic:
dense_units: 16
mlp_layers: 1
dense_act: torch.nn.Tanh
layer_norm: false
buffer:
size: 32
memmap: true
validate_args: false
from_numpy: false
share_data: false
checkpoint:
every: 100
resume_from: null
save_last: true
keep_last: 5
distribution:
validate_args: false
env:
id: doapp
num_envs: 1
frame_stack: 1
sync_env: true
screen_size: 64
action_repeat: 1
grayscale: false
clip_rewards: false
capture_video: true
frame_stack_dilation: 1
max_episode_steps: null
reward_as_observation: false
wrapper:
_target_: sheeprl.envs.diambra.DiambraWrapper
id: doapp
action_space: DISCRETE
screen_size: 64
grayscale: false
repeat_action: 1
rank: null
log_level: 0
increase_performance: true
diambra_settings:
role: P1
step_ratio: 6
difficulty: 4
continue_game: 0.0
show_final: false
outfits: 2
splash_screen: false
diambra_wrappers:
stack_actions: 1
no_op_max: 0
no_attack_buttons_combinations: false
add_last_action: true
scale: false
exclude_image_scaling: false
process_discrete_binary: false
role_relative: true
fabric:
_target_: lightning.fabric.Fabric
devices: 1
num_nodes: 1
strategy: auto
accelerator: cpu
precision: 32-true
callbacks:
- _target_: sheeprl.utils.callback.CheckpointCallback
keep_last: 5
metric:
log_every: 5000
disable_timer: false
log_level: 1
sync_on_compute: false
aggregator:
_target_: sheeprl.utils.metric.MetricAggregator
raise_on_missing: false
metrics:
Rewards/rew_avg:
_target_: torchmetrics.MeanMetric
sync_on_compute: false
Game/ep_len_avg:
_target_: torchmetrics.MeanMetric
sync_on_compute: false
logger:
_target_: lightning.fabric.loggers.TensorBoardLogger
name: 2024-04-15_15-25-55_ppo_doapp_42
root_dir: logs/runs/ppo/doapp
version: null
default_hp_metric: true
prefix: ''
sub_dir: null
model_manager:
disabled: true
models: {}