ledmands
Modified watch_agent.py to include ability to give an argument to adjust repeat action probability.
c37ff18
from stable_baselines3 import DQN | |
from stable_baselines3.common.evaluation import evaluate_policy | |
from stable_baselines3.common.monitor import Monitor | |
import gymnasium as gym | |
import argparse | |
# This script should have some options | |
# 1. Turn off the stochasticity as determined by the ALEv5 | |
# Even if deterministic is set to true in evaluate policy, the environment will ignore this 25% of the time | |
# To compensate for this, we can set the repeat action probability to 0 | |
parser = argparse.ArgumentParser() | |
parser.add_argument("-r", "--repeat_action_probability", help="repeat action probability", type=float, default=0.25) | |
args = parser.parse_args() | |
MODEL_NAME = "ALE-Pacman-v5" | |
rpt_act_prob = args.repeat_action_probability | |
loaded_model = DQN.load(MODEL_NAME) | |
# Retrieve the environment | |
eval_env = Monitor(gym.make("ALE/Pacman-v5", render_mode="rgb_array", repeat_action_probability=rpt_act_prob)) | |
# Evaluate the policy | |
mean_rwd, std_rwd = evaluate_policy(loaded_model.policy, eval_env, n_eval_episodes=1) | |
print("mean rwd: ", mean_rwd) | |
print("std rwd: ", std_rwd) |