michele-milesi
/

diambra-agent-example

Model card Files Files and versions Community

michele-milesi commited on Apr 16

Commit

2cbbf46

•

1 Parent(s): 1dd30c9

Initial Commit

Browse files

Files changed (3) hide show

agent.py +116 -0
ckpt_1024_0.ckpt +3 -0
config.yaml +164 -0

agent.py ADDED Viewed

	@@ -0,0 +1,116 @@

+import argparse
+import json
+import gymnasium as gym
+import torch
+from lightning import Fabric
+from omegaconf import OmegaConf
+from sheeprl.algos.ppo.agent import build_agent
+from sheeprl.utils.env import make_env
+from sheeprl.utils.utils import dotdict
+"""This is an example agent based on SheepRL.
+Usage:
+diambra run python sheeprl/agent.py --cfg_path "./fake-logs/runs/ppo/doapp/fake-experiment/version_0/config.yaml" --checkpoint_path "./fake-logs/runs/ppo/doapp/fake-experiment/version_0/checkpoint/ckpt_1024_0.ckpt"
+"""
+def main(cfg_path: str, checkpoint_path: str, test=False):
+    # Read the cfg file
+    cfg = dotdict(OmegaConf.to_container(OmegaConf.load(cfg_path), resolve=True))
+    print("Config parameters = ", json.dumps(cfg, sort_keys=True, indent=4))
+    # Override configs for evaluation
+    if not test:
+        cfg.env.capture_video = False
+    cfg.env.num_envs = 1
+    # Instantiate Fabric
+    precision = getattr(cfg.fabric, "precision", None)
+    plugins = getattr(cfg.fabric, "plugins", None)
+    fabric = Fabric(
+        accelerator="cpu",
+        devices=1,
+        num_nodes=1,
+        precision=precision,
+        plugins=plugins,
+        strategy="auto",
+    )
+    # Create Environment
+    env = make_env(cfg, 0, 0)()
+    observation_space = env.observation_space
+    is_multidiscrete = isinstance(env.action_space, gym.spaces.MultiDiscrete)
+    actions_dim = tuple(
+        env.action_space.nvec.tolist() if is_multidiscrete else [env.action_space.n]
+    )
+    cnn_keys = cfg.algo.cnn_keys.encoder
+    mlp_keys = cfg.algo.mlp_keys.encoder
+    obs_keys = mlp_keys + cnn_keys
+    # Load the trained agent
+    state = fabric.load(checkpoint_path)
+    # You need to retrieve only the player
+    agent = build_agent(
+        fabric=fabric,
+        actions_dim=actions_dim,
+        is_continuous=False,
+        cfg=cfg,
+        obs_space=observation_space,
+        agent_state=state["agent"],
+    )[-1]
+    agent.eval()
+    # Print policy network architecture
+    print("Policy architecture:")
+    print(agent)
+    o, info = env.reset()
+    while True:
+        # Convert numpy observations into torch observations and normalize image observations
+        obs = {}
+        for k in o.keys():
+            if k in obs_keys:
+                torch_obs = torch.from_numpy(o[k].copy()).to(fabric.device).unsqueeze(0)
+                if k in cnn_keys:
+                    torch_obs = (
+                        torch_obs.reshape(1, -1, *torch_obs.shape[-2:]) / 255 - 0.5
+                    )
+                if k in mlp_keys:
+                    torch_obs = torch_obs.float()
+                obs[k] = torch_obs
+        actions = agent.get_actions(obs, greedy=True)
+        actions = torch.cat([act.argmax(dim=-1) for act in actions], dim=-1)
+        o, _, terminated, truncated, info = env.step(
+            actions.cpu().numpy().reshape(env.action_space.shape)
+        )
+        if terminated or truncated:
+            o, info = env.reset()
+            if info["env_done"] or test is True:
+                break
+    # Close the environment
+    env.close()
+    # Return success
+    return 0
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--cfg_path", type=str, required=True, help="Configuration file"
+    )
+    parser.add_argument(
+        "--checkpoint_path", type=str, default="model", help="Model checkpoint"
+    )
+    parser.add_argument("--test", action="store_true", help="Test mode")
+    opt = parser.parse_args()
+    print(opt)
+    main(opt.cfg_path, opt.checkpoint_path, opt.test)

ckpt_1024_0.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7a7b985f51b9f2f40182083b57bc785a9972c72227895a14f1d4c764bfa4b8f0
+size 2582118

config.yaml ADDED Viewed

	@@ -0,0 +1,164 @@

+num_threads: 1
+float32_matmul_precision: high
+dry_run: false
+seed: 42
+torch_use_deterministic_algorithms: false
+torch_backends_cudnn_benchmark: true
+torch_backends_cudnn_deterministic: false
+cublas_workspace_config: null
+exp_name: ppo_doapp
+run_name: 2024-04-15_15-25-55_ppo_doapp_42
+root_dir: ppo/doapp
+algo:
+  name: ppo
+  total_steps: 1024
+  per_rank_batch_size: 16
+  run_test: true
+  cnn_keys:
+    encoder:
+    - frame
+  mlp_keys:
+    encoder:
+    - own_character
+    - own_health
+    - own_side
+    - own_wins
+    - opp_character
+    - opp_health
+    - opp_side
+    - opp_wins
+    - stage
+    - timer
+    - action
+  optimizer:
+    _target_: torch.optim.Adam
+    lr: 0.005
+    eps: 1.0e-06
+    weight_decay: 0
+    betas:
+    - 0.9
+    - 0.999
+  anneal_lr: false
+  gamma: 0.99
+  gae_lambda: 0.95
+  update_epochs: 1
+  loss_reduction: mean
+  normalize_advantages: true
+  clip_coef: 0.2
+  anneal_clip_coef: false
+  clip_vloss: false
+  ent_coef: 0.0
+  anneal_ent_coef: false
+  vf_coef: 1.0
+  rollout_steps: 32
+  dense_units: 16
+  mlp_layers: 1
+  dense_act: torch.nn.Tanh
+  layer_norm: false
+  max_grad_norm: 1.0
+  encoder:
+    cnn_features_dim: 128
+    mlp_features_dim: 32
+    dense_units: 16
+    mlp_layers: 1
+    dense_act: torch.nn.Tanh
+    layer_norm: false
+  actor:
+    dense_units: 16
+    mlp_layers: 1
+    dense_act: torch.nn.Tanh
+    layer_norm: false
+  critic:
+    dense_units: 16
+    mlp_layers: 1
+    dense_act: torch.nn.Tanh
+    layer_norm: false
+buffer:
+  size: 32
+  memmap: true
+  validate_args: false
+  from_numpy: false
+  share_data: false
+checkpoint:
+  every: 100
+  resume_from: null
+  save_last: true
+  keep_last: 5
+distribution:
+  validate_args: false
+env:
+  id: doapp
+  num_envs: 1
+  frame_stack: 1
+  sync_env: true
+  screen_size: 64
+  action_repeat: 1
+  grayscale: false
+  clip_rewards: false
+  capture_video: true
+  frame_stack_dilation: 1
+  max_episode_steps: null
+  reward_as_observation: false
+  wrapper:
+    _target_: sheeprl.envs.diambra.DiambraWrapper
+    id: doapp
+    action_space: DISCRETE
+    screen_size: 64
+    grayscale: false
+    repeat_action: 1
+    rank: null
+    log_level: 0
+    increase_performance: true
+    diambra_settings:
+      role: P1
+      step_ratio: 6
+      difficulty: 4
+      continue_game: 0.0
+      show_final: false
+      outfits: 2
+      splash_screen: false
+    diambra_wrappers:
+      stack_actions: 1
+      no_op_max: 0
+      no_attack_buttons_combinations: false
+      add_last_action: true
+      scale: false
+      exclude_image_scaling: false
+      process_discrete_binary: false
+      role_relative: true
+fabric:
+  _target_: lightning.fabric.Fabric
+  devices: 1
+  num_nodes: 1
+  strategy: auto
+  accelerator: cpu
+  precision: 32-true
+  callbacks:
+  - _target_: sheeprl.utils.callback.CheckpointCallback
+    keep_last: 5
+metric:
+  log_every: 5000
+  disable_timer: false
+  log_level: 1
+  sync_on_compute: false
+  aggregator:
+    _target_: sheeprl.utils.metric.MetricAggregator
+    raise_on_missing: false
+    metrics:
+      Rewards/rew_avg:
+        _target_: torchmetrics.MeanMetric
+        sync_on_compute: false
+      Game/ep_len_avg:
+        _target_: torchmetrics.MeanMetric
+        sync_on_compute: false
+  logger:
+    _target_: lightning.fabric.loggers.TensorBoardLogger
+    name: 2024-04-15_15-25-55_ppo_doapp_42
+    root_dir: logs/runs/ppo/doapp
+    version: null
+    default_hp_metric: true
+    prefix: ''
+    sub_dir: null
+model_manager:
+  disabled: true
+  models: {}