num_threads: 1
float32_matmul_precision: high
dry_run: false
seed: 42
torch_use_deterministic_algorithms: false
torch_backends_cudnn_benchmark: true
torch_backends_cudnn_deterministic: false
cublas_workspace_config: null
exp_name: dreamer_v3_doapp
run_name: 2024-04-16_17-34-17_dreamer_v3_doapp_42
root_dir: dreamer_v3/doapp
algo:
  name: dreamer_v3
  total_steps: 1024
  per_rank_batch_size: 2
  run_test: false
  cnn_keys:
    encoder:
    - frame
    decoder:
    - frame
  mlp_keys:
    encoder:
    - own_character
    - own_health
    - own_side
    - own_wins
    - opp_character
    - opp_health
    - opp_side
    - opp_wins
    - stage
    - timer
    - action
    decoder:
    - own_character
    - own_health
    - own_side
    - own_wins
    - opp_character
    - opp_health
    - opp_side
    - opp_wins
    - stage
    - timer
    - action
  world_model:
    optimizer:
      _target_: torch.optim.Adam
      lr: 0.0001
      eps: 1.0e-08
      weight_decay: 0
      betas:
      - 0.9
      - 0.999
    discrete_size: 4
    stochastic_size: 4
    kl_dynamic: 0.5
    kl_representation: 0.1
    kl_free_nats: 1.0
    kl_regularizer: 1.0
    continue_scale_factor: 1.0
    clip_gradients: 1000.0
    decoupled_rssm: false
    learnable_initial_recurrent_state: true
    encoder:
      cnn_channels_multiplier: 2
      cnn_act: torch.nn.SiLU
      dense_act: torch.nn.SiLU
      mlp_layers: 1
      cnn_layer_norm:
        cls: sheeprl.models.models.LayerNormChannelLast
        kw:
          eps: 0.001
      mlp_layer_norm:
        cls: sheeprl.models.models.LayerNorm
        kw:
          eps: 0.001
      dense_units: 8
    recurrent_model:
      recurrent_state_size: 8
      layer_norm:
        cls: sheeprl.models.models.LayerNorm
        kw:
          eps: 0.001
      dense_units: 8
    transition_model:
      hidden_size: 8
      dense_act: torch.nn.SiLU
      layer_norm:
        cls: sheeprl.models.models.LayerNorm
        kw:
          eps: 0.001
    representation_model:
      hidden_size: 8
      dense_act: torch.nn.SiLU
      layer_norm:
        cls: sheeprl.models.models.LayerNorm
        kw:
          eps: 0.001
    observation_model:
      cnn_channels_multiplier: 2
      cnn_act: torch.nn.SiLU
      dense_act: torch.nn.SiLU
      mlp_layers: 1
      cnn_layer_norm:
        cls: sheeprl.models.models.LayerNormChannelLast
        kw:
          eps: 0.001
      mlp_layer_norm:
        cls: sheeprl.models.models.LayerNorm
        kw:
          eps: 0.001
      dense_units: 8
    reward_model:
      dense_act: torch.nn.SiLU
      mlp_layers: 1
      layer_norm:
        cls: sheeprl.models.models.LayerNorm
        kw:
          eps: 0.001
      dense_units: 8
      bins: 255
    discount_model:
      learnable: true
      dense_act: torch.nn.SiLU
      mlp_layers: 1
      layer_norm:
        cls: sheeprl.models.models.LayerNorm
        kw:
          eps: 0.001
      dense_units: 8
  actor:
    optimizer:
      _target_: torch.optim.Adam
      lr: 8.0e-05
      eps: 1.0e-05
      weight_decay: 0
      betas:
      - 0.9
      - 0.999
    cls: sheeprl.algos.dreamer_v3.agent.Actor
    ent_coef: 0.0003
    min_std: 0.1
    max_std: 1.0
    init_std: 2.0
    dense_act: torch.nn.SiLU
    mlp_layers: 1
    layer_norm:
      cls: sheeprl.models.models.LayerNorm
      kw:
        eps: 0.001
    dense_units: 8
    clip_gradients: 100.0
    unimix: 0.01
    action_clip: 1.0
    moments:
      decay: 0.99
      max: 1.0
      percentile:
        low: 0.05
        high: 0.95
  critic:
    optimizer:
      _target_: torch.optim.Adam
      lr: 8.0e-05
      eps: 1.0e-05
      weight_decay: 0
      betas:
      - 0.9
      - 0.999
    dense_act: torch.nn.SiLU
    mlp_layers: 1
    layer_norm:
      cls: sheeprl.models.models.LayerNorm
      kw:
        eps: 0.001
    dense_units: 8
    per_rank_target_network_update_freq: 1
    tau: 0.02
    bins: 255
    clip_gradients: 100.0
  gamma: 0.996996996996997
  lmbda: 0.95
  horizon: 15
  replay_ratio: 0.0625
  learning_starts: 1024
  per_rank_pretrain_steps: 0
  per_rank_sequence_length: 64
  cnn_layer_norm:
    cls: sheeprl.models.models.LayerNormChannelLast
    kw:
      eps: 0.001
  mlp_layer_norm:
    cls: sheeprl.models.models.LayerNorm
    kw:
      eps: 0.001
  dense_units: 8
  mlp_layers: 1
  dense_act: torch.nn.SiLU
  cnn_act: torch.nn.SiLU
  unimix: 0.01
  hafner_initialization: true
  player:
    discrete_size: 4
buffer:
  size: 1024
  memmap: true
  validate_args: false
  from_numpy: false
  checkpoint: true
checkpoint:
  every: 10000
  resume_from: null
  save_last: true
  keep_last: 5
distribution:
  validate_args: false
  type: auto
env:
  id: doapp
  num_envs: 1
  frame_stack: -1
  sync_env: true
  screen_size: 64
  action_repeat: 1
  grayscale: false
  clip_rewards: false
  capture_video: true
  frame_stack_dilation: 1
  max_episode_steps: null
  reward_as_observation: false
  wrapper:
    _target_: sheeprl.envs.diambra.DiambraWrapper
    id: doapp
    action_space: DISCRETE
    screen_size: 64
    grayscale: false
    repeat_action: 1
    rank: null
    log_level: 0
    increase_performance: true
    diambra_settings:
      role: P1
      step_ratio: 6
      difficulty: 4
      continue_game: 0.0
      show_final: false
      outfits: 2
      splash_screen: false
    diambra_wrappers:
      stack_actions: 1
      no_op_max: 0
      no_attack_buttons_combinations: false
      add_last_action: true
      scale: false
      exclude_image_scaling: false
      process_discrete_binary: false
      role_relative: true
fabric:
  _target_: lightning.fabric.Fabric
  devices: 1
  num_nodes: 1
  strategy: auto
  accelerator: cpu
  precision: 32-true
  callbacks:
  - _target_: sheeprl.utils.callback.CheckpointCallback
    keep_last: 5
metric:
  log_every: 5000
  disable_timer: false
  log_level: 1
  sync_on_compute: false
  aggregator:
    _target_: sheeprl.utils.metric.MetricAggregator
    raise_on_missing: false
    metrics:
      Rewards/rew_avg:
        _target_: torchmetrics.MeanMetric
        sync_on_compute: false
      Game/ep_len_avg:
        _target_: torchmetrics.MeanMetric
        sync_on_compute: false
      Loss/world_model_loss:
        _target_: torchmetrics.MeanMetric
        sync_on_compute: false
      Loss/value_loss:
        _target_: torchmetrics.MeanMetric
        sync_on_compute: false
      Loss/policy_loss:
        _target_: torchmetrics.MeanMetric
        sync_on_compute: false
      Loss/observation_loss:
        _target_: torchmetrics.MeanMetric
        sync_on_compute: false
      Loss/reward_loss:
        _target_: torchmetrics.MeanMetric
        sync_on_compute: false
      Loss/state_loss:
        _target_: torchmetrics.MeanMetric
        sync_on_compute: false
      Loss/continue_loss:
        _target_: torchmetrics.MeanMetric
        sync_on_compute: false
      State/kl:
        _target_: torchmetrics.MeanMetric
        sync_on_compute: false
      State/post_entropy:
        _target_: torchmetrics.MeanMetric
        sync_on_compute: false
      State/prior_entropy:
        _target_: torchmetrics.MeanMetric
        sync_on_compute: false
      Grads/world_model:
        _target_: torchmetrics.MeanMetric
        sync_on_compute: false
      Grads/actor:
        _target_: torchmetrics.MeanMetric
        sync_on_compute: false
      Grads/critic:
        _target_: torchmetrics.MeanMetric
        sync_on_compute: false
  logger:
    _target_: lightning.fabric.loggers.TensorBoardLogger
    name: 2024-04-16_17-34-17_dreamer_v3_doapp_42
    root_dir: logs/runs/dreamer_v3/doapp
    version: null
    default_hp_metric: true
    prefix: ''
    sub_dir: null
model_manager:
  disabled: true
  models:
    world_model:
      model_name: dreamer_v3_doapp_world_model
      description: DreamerV3 World Model used in doapp Environment
      tags: {}
    actor:
      model_name: dreamer_v3_doapp_actor
      description: DreamerV3 Actor used in doapp Environment
      tags: {}
    critic:
      model_name: dreamer_v3_doapp_critic
      description: DreamerV3 Critic used in doapp Environment
      tags: {}
    target_critic:
      model_name: dreamer_v3_doapp_target_critic
      description: DreamerV3 Target Critic used in doapp Environment
      tags: {}
    moments:
      model_name: dreamer_v3_doapp_moments
      description: DreamerV3 Moments used in doapp Environment
      tags: {}