michele-milesi
/

diambra-agent-example

Model card Files Files and versions Community

Michele Milesi commited on Apr 16

Commit

6b39341

•

1 Parent(s): 12794d0

feat: added dv3

Browse files

Files changed (5) hide show

agent-dreamer_v3.py +126 -0
results/dreamer_v3/ckpt_1024_0.ckpt +3 -0
results/dreamer_v3/config.yaml +354 -0
ckpt_1024_0.ckpt → results/ppo/ckpt_1024_0.ckpt +0 -0
config.yaml → results/ppo/config.yaml +0 -0

agent-dreamer_v3.py ADDED Viewed

	@@ -0,0 +1,126 @@

+import argparse
+import json
+import gymnasium as gym
+import torch
+from lightning import Fabric
+from omegaconf import OmegaConf
+from sheeprl.algos.dreamer_v3.agent import build_agent
+from sheeprl.utils.env import make_env
+from sheeprl.utils.utils import dotdict
+"""This is an example agent based on SheepRL.
+Usage:
+cd sheeprl
+diambra run python agent-dreamer_v3.py --cfg_path "./fake-logs/runs/dreamer_v3/doapp/fake-experiment/version_0/config.yaml" --checkpoint_path "./fake-logs/runs/dreamer_v3/doapp/fake-experiment/version_0/checkpoint/ckpt_1024_0.ckpt"
+"""
+def main(cfg_path: str, checkpoint_path: str, test=False):
+    # Read the cfg file
+    cfg = dotdict(OmegaConf.to_container(OmegaConf.load(cfg_path), resolve=True))
+    print("Config parameters = ", json.dumps(cfg, sort_keys=True, indent=4))
+    # Override configs for evaluation
+    # You do not need to capture the video since you are submitting the agent and the video is recorded by DIAMBRA
+    cfg.env.capture_video = False
+    # Instantiate Fabric
+    # You must use the same precision and plugins used for training.
+    precision = getattr(cfg.fabric, "precision", None)
+    plugins = getattr(cfg.fabric, "plugins", None)
+    fabric = Fabric(
+        accelerator="auto",
+        devices=1,
+        num_nodes=1,
+        precision=precision,
+        plugins=plugins,
+        strategy="auto",
+    )
+    # Create Environment
+    env = make_env(cfg, 0, 0)()
+    observation_space = env.observation_space
+    is_multidiscrete = isinstance(env.action_space, gym.spaces.MultiDiscrete)
+    actions_dim = tuple(
+        env.action_space.nvec.tolist() if is_multidiscrete else [env.action_space.n]
+    )
+    cnn_keys = cfg.algo.cnn_keys.encoder
+    mlp_keys = cfg.algo.mlp_keys.encoder
+    obs_keys = mlp_keys + cnn_keys
+    # Load the trained agent
+    state = fabric.load(checkpoint_path)
+    # You need to retrieve only the player
+    # Check for each algorithm what models the `build_agent()` function returns
+    # (placed in the `agent.py` file of the algorithm), and which arguments it needs.
+    # Check also which are the keys of the checkpoint: if the `build_agent()` parameter
+    # is called `model_state`, then you retrieve the model state with `state["model"]`.
+    agent = build_agent(
+        fabric=fabric,
+        actions_dim=actions_dim,
+        is_continuous=False,
+        cfg=cfg,
+        obs_space=observation_space,
+        world_model_state=state["world_model"],
+        actor_state=state["actor"],
+        critic_state=state["critic"],
+        target_critic_state=state["target_critic"],
+    )[-1]
+    agent.eval()
+    # Print policy network architecture
+    print("Policy architecture:")
+    print(agent)
+    o, info = env.reset()
+    while True:
+        # Convert numpy observations into torch observations and normalize image observations
+        # Every algorithm has its own way to do it, check in the test function of the algorithm
+        # which is the correct way to it.
+        # Check the `test()` function called in the `evaluate.py` file of the algorithm.
+        obs = {}
+        for k in obs_keys:
+            obs[k] = (
+                torch.from_numpy(o[k]).to(fabric.device).view(1, 1, *o[k].shape).float()
+            )
+            if k in cnn_keys:
+                obs[k] = obs[k] / 255 - 0.5
+        # Select actions, the agent returns a one-hot categorical or
+        # more one-hot categorical distributions for muli-discrete actions space
+        actions = agent.get_actions(obs, greedy=False)
+        # Convert actions from one-hot categorical to categorial
+        actions = torch.cat([act.argmax(dim=-1) for act in actions], dim=-1)
+        o, _, terminated, truncated, info = env.step(
+            actions.cpu().numpy().reshape(env.action_space.shape)
+        )
+        if terminated or truncated:
+            o, info = env.reset()
+            if info["env_done"] or test is True:
+                break
+    # Close the environment
+    env.close()
+    # Return success
+    return 0
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--cfg_path", type=str, required=True, help="Configuration file"
+    )
+    parser.add_argument(
+        "--checkpoint_path", type=str, default="model", help="Model checkpoint"
+    )
+    parser.add_argument("--test", action="store_true", help="Test mode")
+    opt = parser.parse_args()
+    print(opt)
+    main(opt.cfg_path, opt.checkpoint_path, opt.test)

results/dreamer_v3/ckpt_1024_0.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0f9a9b2bccb05f94a374a010446b009febd8ce8ae63105aec1455cf99c5b4cdc
+size 389012

results/dreamer_v3/config.yaml ADDED Viewed

	@@ -0,0 +1,354 @@

+num_threads: 1
+float32_matmul_precision: high
+dry_run: false
+seed: 42
+torch_use_deterministic_algorithms: false
+torch_backends_cudnn_benchmark: true
+torch_backends_cudnn_deterministic: false
+cublas_workspace_config: null
+exp_name: dreamer_v3_doapp
+run_name: 2024-04-16_17-34-17_dreamer_v3_doapp_42
+root_dir: dreamer_v3/doapp
+algo:
+  name: dreamer_v3
+  total_steps: 1024
+  per_rank_batch_size: 2
+  run_test: false
+  cnn_keys:
+    encoder:
+    - frame
+    decoder:
+    - frame
+  mlp_keys:
+    encoder:
+    - own_character
+    - own_health
+    - own_side
+    - own_wins
+    - opp_character
+    - opp_health
+    - opp_side
+    - opp_wins
+    - stage
+    - timer
+    - action
+    decoder:
+    - own_character
+    - own_health
+    - own_side
+    - own_wins
+    - opp_character
+    - opp_health
+    - opp_side
+    - opp_wins
+    - stage
+    - timer
+    - action
+  world_model:
+    optimizer:
+      _target_: torch.optim.Adam
+      lr: 0.0001
+      eps: 1.0e-08
+      weight_decay: 0
+      betas:
+      - 0.9
+      - 0.999
+    discrete_size: 4
+    stochastic_size: 4
+    kl_dynamic: 0.5
+    kl_representation: 0.1
+    kl_free_nats: 1.0
+    kl_regularizer: 1.0
+    continue_scale_factor: 1.0
+    clip_gradients: 1000.0
+    decoupled_rssm: false
+    learnable_initial_recurrent_state: true
+    encoder:
+      cnn_channels_multiplier: 2
+      cnn_act: torch.nn.SiLU
+      dense_act: torch.nn.SiLU
+      mlp_layers: 1
+      cnn_layer_norm:
+        cls: sheeprl.models.models.LayerNormChannelLast
+        kw:
+          eps: 0.001
+      mlp_layer_norm:
+        cls: sheeprl.models.models.LayerNorm
+        kw:
+          eps: 0.001
+      dense_units: 8
+    recurrent_model:
+      recurrent_state_size: 8
+      layer_norm:
+        cls: sheeprl.models.models.LayerNorm
+        kw:
+          eps: 0.001
+      dense_units: 8
+    transition_model:
+      hidden_size: 8
+      dense_act: torch.nn.SiLU
+      layer_norm:
+        cls: sheeprl.models.models.LayerNorm
+        kw:
+          eps: 0.001
+    representation_model:
+      hidden_size: 8
+      dense_act: torch.nn.SiLU
+      layer_norm:
+        cls: sheeprl.models.models.LayerNorm
+        kw:
+          eps: 0.001
+    observation_model:
+      cnn_channels_multiplier: 2
+      cnn_act: torch.nn.SiLU
+      dense_act: torch.nn.SiLU
+      mlp_layers: 1
+      cnn_layer_norm:
+        cls: sheeprl.models.models.LayerNormChannelLast
+        kw:
+          eps: 0.001
+      mlp_layer_norm:
+        cls: sheeprl.models.models.LayerNorm
+        kw:
+          eps: 0.001
+      dense_units: 8
+    reward_model:
+      dense_act: torch.nn.SiLU
+      mlp_layers: 1
+      layer_norm:
+        cls: sheeprl.models.models.LayerNorm
+        kw:
+          eps: 0.001
+      dense_units: 8
+      bins: 255
+    discount_model:
+      learnable: true
+      dense_act: torch.nn.SiLU
+      mlp_layers: 1
+      layer_norm:
+        cls: sheeprl.models.models.LayerNorm
+        kw:
+          eps: 0.001
+      dense_units: 8
+  actor:
+    optimizer:
+      _target_: torch.optim.Adam
+      lr: 8.0e-05
+      eps: 1.0e-05
+      weight_decay: 0
+      betas:
+      - 0.9
+      - 0.999
+    cls: sheeprl.algos.dreamer_v3.agent.Actor
+    ent_coef: 0.0003
+    min_std: 0.1
+    max_std: 1.0
+    init_std: 2.0
+    dense_act: torch.nn.SiLU
+    mlp_layers: 1
+    layer_norm:
+      cls: sheeprl.models.models.LayerNorm
+      kw:
+        eps: 0.001
+    dense_units: 8
+    clip_gradients: 100.0
+    unimix: 0.01
+    action_clip: 1.0
+    moments:
+      decay: 0.99
+      max: 1.0
+      percentile:
+        low: 0.05
+        high: 0.95
+  critic:
+    optimizer:
+      _target_: torch.optim.Adam
+      lr: 8.0e-05
+      eps: 1.0e-05
+      weight_decay: 0
+      betas:
+      - 0.9
+      - 0.999
+    dense_act: torch.nn.SiLU
+    mlp_layers: 1
+    layer_norm:
+      cls: sheeprl.models.models.LayerNorm
+      kw:
+        eps: 0.001
+    dense_units: 8
+    per_rank_target_network_update_freq: 1
+    tau: 0.02
+    bins: 255
+    clip_gradients: 100.0
+  gamma: 0.996996996996997
+  lmbda: 0.95
+  horizon: 15
+  replay_ratio: 0.0625
+  learning_starts: 1024
+  per_rank_pretrain_steps: 0
+  per_rank_sequence_length: 64
+  cnn_layer_norm:
+    cls: sheeprl.models.models.LayerNormChannelLast
+    kw:
+      eps: 0.001
+  mlp_layer_norm:
+    cls: sheeprl.models.models.LayerNorm
+    kw:
+      eps: 0.001
+  dense_units: 8
+  mlp_layers: 1
+  dense_act: torch.nn.SiLU
+  cnn_act: torch.nn.SiLU
+  unimix: 0.01
+  hafner_initialization: true
+  player:
+    discrete_size: 4
+buffer:
+  size: 1024
+  memmap: true
+  validate_args: false
+  from_numpy: false
+  checkpoint: true
+checkpoint:
+  every: 10000
+  resume_from: null
+  save_last: true
+  keep_last: 5
+distribution:
+  validate_args: false
+  type: auto
+env:
+  id: doapp
+  num_envs: 1
+  frame_stack: -1
+  sync_env: true
+  screen_size: 64
+  action_repeat: 1
+  grayscale: false
+  clip_rewards: false
+  capture_video: true
+  frame_stack_dilation: 1
+  max_episode_steps: null
+  reward_as_observation: false
+  wrapper:
+    _target_: sheeprl.envs.diambra.DiambraWrapper
+    id: doapp
+    action_space: DISCRETE
+    screen_size: 64
+    grayscale: false
+    repeat_action: 1
+    rank: null
+    log_level: 0
+    increase_performance: true
+    diambra_settings:
+      role: P1
+      step_ratio: 6
+      difficulty: 4
+      continue_game: 0.0
+      show_final: false
+      outfits: 2
+      splash_screen: false
+    diambra_wrappers:
+      stack_actions: 1
+      no_op_max: 0
+      no_attack_buttons_combinations: false
+      add_last_action: true
+      scale: false
+      exclude_image_scaling: false
+      process_discrete_binary: false
+      role_relative: true
+fabric:
+  _target_: lightning.fabric.Fabric
+  devices: 1
+  num_nodes: 1
+  strategy: auto
+  accelerator: cpu
+  precision: 32-true
+  callbacks:
+  - _target_: sheeprl.utils.callback.CheckpointCallback
+    keep_last: 5
+metric:
+  log_every: 5000
+  disable_timer: false
+  log_level: 1
+  sync_on_compute: false
+  aggregator:
+    _target_: sheeprl.utils.metric.MetricAggregator
+    raise_on_missing: false
+    metrics:
+      Rewards/rew_avg:
+        _target_: torchmetrics.MeanMetric
+        sync_on_compute: false
+      Game/ep_len_avg:
+        _target_: torchmetrics.MeanMetric
+        sync_on_compute: false
+      Loss/world_model_loss:
+        _target_: torchmetrics.MeanMetric
+        sync_on_compute: false
+      Loss/value_loss:
+        _target_: torchmetrics.MeanMetric
+        sync_on_compute: false
+      Loss/policy_loss:
+        _target_: torchmetrics.MeanMetric
+        sync_on_compute: false
+      Loss/observation_loss:
+        _target_: torchmetrics.MeanMetric
+        sync_on_compute: false
+      Loss/reward_loss:
+        _target_: torchmetrics.MeanMetric
+        sync_on_compute: false
+      Loss/state_loss:
+        _target_: torchmetrics.MeanMetric
+        sync_on_compute: false
+      Loss/continue_loss:
+        _target_: torchmetrics.MeanMetric
+        sync_on_compute: false
+      State/kl:
+        _target_: torchmetrics.MeanMetric
+        sync_on_compute: false
+      State/post_entropy:
+        _target_: torchmetrics.MeanMetric
+        sync_on_compute: false
+      State/prior_entropy:
+        _target_: torchmetrics.MeanMetric
+        sync_on_compute: false
+      Grads/world_model:
+        _target_: torchmetrics.MeanMetric
+        sync_on_compute: false
+      Grads/actor:
+        _target_: torchmetrics.MeanMetric
+        sync_on_compute: false
+      Grads/critic:
+        _target_: torchmetrics.MeanMetric
+        sync_on_compute: false
+  logger:
+    _target_: lightning.fabric.loggers.TensorBoardLogger
+    name: 2024-04-16_17-34-17_dreamer_v3_doapp_42
+    root_dir: logs/runs/dreamer_v3/doapp
+    version: null
+    default_hp_metric: true
+    prefix: ''
+    sub_dir: null
+model_manager:
+  disabled: true
+  models:
+    world_model:
+      model_name: dreamer_v3_doapp_world_model
+      description: DreamerV3 World Model used in doapp Environment
+      tags: {}
+    actor:
+      model_name: dreamer_v3_doapp_actor
+      description: DreamerV3 Actor used in doapp Environment
+      tags: {}
+    critic:
+      model_name: dreamer_v3_doapp_critic
+      description: DreamerV3 Critic used in doapp Environment
+      tags: {}
+    target_critic:
+      model_name: dreamer_v3_doapp_target_critic
+      description: DreamerV3 Target Critic used in doapp Environment
+      tags: {}
+    moments:
+      model_name: dreamer_v3_doapp_moments
+      description: DreamerV3 Moments used in doapp Environment
+      tags: {}

ckpt_1024_0.ckpt → results/ppo/ckpt_1024_0.ckpt RENAMED Viewed

File without changes

config.yaml → results/ppo/config.yaml RENAMED Viewed

File without changes