Michele Milesi
feat: added dv3
6b39341
raw
history blame contribute delete
No virus
8.39 kB
num_threads: 1
float32_matmul_precision: high
dry_run: false
seed: 42
torch_use_deterministic_algorithms: false
torch_backends_cudnn_benchmark: true
torch_backends_cudnn_deterministic: false
cublas_workspace_config: null
exp_name: dreamer_v3_doapp
run_name: 2024-04-16_17-34-17_dreamer_v3_doapp_42
root_dir: dreamer_v3/doapp
algo:
name: dreamer_v3
total_steps: 1024
per_rank_batch_size: 2
run_test: false
cnn_keys:
encoder:
- frame
decoder:
- frame
mlp_keys:
encoder:
- own_character
- own_health
- own_side
- own_wins
- opp_character
- opp_health
- opp_side
- opp_wins
- stage
- timer
- action
decoder:
- own_character
- own_health
- own_side
- own_wins
- opp_character
- opp_health
- opp_side
- opp_wins
- stage
- timer
- action
world_model:
optimizer:
_target_: torch.optim.Adam
lr: 0.0001
eps: 1.0e-08
weight_decay: 0
betas:
- 0.9
- 0.999
discrete_size: 4
stochastic_size: 4
kl_dynamic: 0.5
kl_representation: 0.1
kl_free_nats: 1.0
kl_regularizer: 1.0
continue_scale_factor: 1.0
clip_gradients: 1000.0
decoupled_rssm: false
learnable_initial_recurrent_state: true
encoder:
cnn_channels_multiplier: 2
cnn_act: torch.nn.SiLU
dense_act: torch.nn.SiLU
mlp_layers: 1
cnn_layer_norm:
cls: sheeprl.models.models.LayerNormChannelLast
kw:
eps: 0.001
mlp_layer_norm:
cls: sheeprl.models.models.LayerNorm
kw:
eps: 0.001
dense_units: 8
recurrent_model:
recurrent_state_size: 8
layer_norm:
cls: sheeprl.models.models.LayerNorm
kw:
eps: 0.001
dense_units: 8
transition_model:
hidden_size: 8
dense_act: torch.nn.SiLU
layer_norm:
cls: sheeprl.models.models.LayerNorm
kw:
eps: 0.001
representation_model:
hidden_size: 8
dense_act: torch.nn.SiLU
layer_norm:
cls: sheeprl.models.models.LayerNorm
kw:
eps: 0.001
observation_model:
cnn_channels_multiplier: 2
cnn_act: torch.nn.SiLU
dense_act: torch.nn.SiLU
mlp_layers: 1
cnn_layer_norm:
cls: sheeprl.models.models.LayerNormChannelLast
kw:
eps: 0.001
mlp_layer_norm:
cls: sheeprl.models.models.LayerNorm
kw:
eps: 0.001
dense_units: 8
reward_model:
dense_act: torch.nn.SiLU
mlp_layers: 1
layer_norm:
cls: sheeprl.models.models.LayerNorm
kw:
eps: 0.001
dense_units: 8
bins: 255
discount_model:
learnable: true
dense_act: torch.nn.SiLU
mlp_layers: 1
layer_norm:
cls: sheeprl.models.models.LayerNorm
kw:
eps: 0.001
dense_units: 8
actor:
optimizer:
_target_: torch.optim.Adam
lr: 8.0e-05
eps: 1.0e-05
weight_decay: 0
betas:
- 0.9
- 0.999
cls: sheeprl.algos.dreamer_v3.agent.Actor
ent_coef: 0.0003
min_std: 0.1
max_std: 1.0
init_std: 2.0
dense_act: torch.nn.SiLU
mlp_layers: 1
layer_norm:
cls: sheeprl.models.models.LayerNorm
kw:
eps: 0.001
dense_units: 8
clip_gradients: 100.0
unimix: 0.01
action_clip: 1.0
moments:
decay: 0.99
max: 1.0
percentile:
low: 0.05
high: 0.95
critic:
optimizer:
_target_: torch.optim.Adam
lr: 8.0e-05
eps: 1.0e-05
weight_decay: 0
betas:
- 0.9
- 0.999
dense_act: torch.nn.SiLU
mlp_layers: 1
layer_norm:
cls: sheeprl.models.models.LayerNorm
kw:
eps: 0.001
dense_units: 8
per_rank_target_network_update_freq: 1
tau: 0.02
bins: 255
clip_gradients: 100.0
gamma: 0.996996996996997
lmbda: 0.95
horizon: 15
replay_ratio: 0.0625
learning_starts: 1024
per_rank_pretrain_steps: 0
per_rank_sequence_length: 64
cnn_layer_norm:
cls: sheeprl.models.models.LayerNormChannelLast
kw:
eps: 0.001
mlp_layer_norm:
cls: sheeprl.models.models.LayerNorm
kw:
eps: 0.001
dense_units: 8
mlp_layers: 1
dense_act: torch.nn.SiLU
cnn_act: torch.nn.SiLU
unimix: 0.01
hafner_initialization: true
player:
discrete_size: 4
buffer:
size: 1024
memmap: true
validate_args: false
from_numpy: false
checkpoint: true
checkpoint:
every: 10000
resume_from: null
save_last: true
keep_last: 5
distribution:
validate_args: false
type: auto
env:
id: doapp
num_envs: 1
frame_stack: -1
sync_env: true
screen_size: 64
action_repeat: 1
grayscale: false
clip_rewards: false
capture_video: true
frame_stack_dilation: 1
max_episode_steps: null
reward_as_observation: false
wrapper:
_target_: sheeprl.envs.diambra.DiambraWrapper
id: doapp
action_space: DISCRETE
screen_size: 64
grayscale: false
repeat_action: 1
rank: null
log_level: 0
increase_performance: true
diambra_settings:
role: P1
step_ratio: 6
difficulty: 4
continue_game: 0.0
show_final: false
outfits: 2
splash_screen: false
diambra_wrappers:
stack_actions: 1
no_op_max: 0
no_attack_buttons_combinations: false
add_last_action: true
scale: false
exclude_image_scaling: false
process_discrete_binary: false
role_relative: true
fabric:
_target_: lightning.fabric.Fabric
devices: 1
num_nodes: 1
strategy: auto
accelerator: cpu
precision: 32-true
callbacks:
- _target_: sheeprl.utils.callback.CheckpointCallback
keep_last: 5
metric:
log_every: 5000
disable_timer: false
log_level: 1
sync_on_compute: false
aggregator:
_target_: sheeprl.utils.metric.MetricAggregator
raise_on_missing: false
metrics:
Rewards/rew_avg:
_target_: torchmetrics.MeanMetric
sync_on_compute: false
Game/ep_len_avg:
_target_: torchmetrics.MeanMetric
sync_on_compute: false
Loss/world_model_loss:
_target_: torchmetrics.MeanMetric
sync_on_compute: false
Loss/value_loss:
_target_: torchmetrics.MeanMetric
sync_on_compute: false
Loss/policy_loss:
_target_: torchmetrics.MeanMetric
sync_on_compute: false
Loss/observation_loss:
_target_: torchmetrics.MeanMetric
sync_on_compute: false
Loss/reward_loss:
_target_: torchmetrics.MeanMetric
sync_on_compute: false
Loss/state_loss:
_target_: torchmetrics.MeanMetric
sync_on_compute: false
Loss/continue_loss:
_target_: torchmetrics.MeanMetric
sync_on_compute: false
State/kl:
_target_: torchmetrics.MeanMetric
sync_on_compute: false
State/post_entropy:
_target_: torchmetrics.MeanMetric
sync_on_compute: false
State/prior_entropy:
_target_: torchmetrics.MeanMetric
sync_on_compute: false
Grads/world_model:
_target_: torchmetrics.MeanMetric
sync_on_compute: false
Grads/actor:
_target_: torchmetrics.MeanMetric
sync_on_compute: false
Grads/critic:
_target_: torchmetrics.MeanMetric
sync_on_compute: false
logger:
_target_: lightning.fabric.loggers.TensorBoardLogger
name: 2024-04-16_17-34-17_dreamer_v3_doapp_42
root_dir: logs/runs/dreamer_v3/doapp
version: null
default_hp_metric: true
prefix: ''
sub_dir: null
model_manager:
disabled: true
models:
world_model:
model_name: dreamer_v3_doapp_world_model
description: DreamerV3 World Model used in doapp Environment
tags: {}
actor:
model_name: dreamer_v3_doapp_actor
description: DreamerV3 Actor used in doapp Environment
tags: {}
critic:
model_name: dreamer_v3_doapp_critic
description: DreamerV3 Critic used in doapp Environment
tags: {}
target_critic:
model_name: dreamer_v3_doapp_target_critic
description: DreamerV3 Target Critic used in doapp Environment
tags: {}
moments:
model_name: dreamer_v3_doapp_moments
description: DreamerV3 Moments used in doapp Environment
tags: {}