ALE-Pacman-v5 / agents /version_2 /watch_agent.py

ledmands

Modified watch_agent.py to include ability to give an argument to adjust repeat action probability.

c37ff18 9 months ago

1.07 kB

	from stable_baselines3 import DQN
	from stable_baselines3.common.evaluation import evaluate_policy
	from stable_baselines3.common.monitor import Monitor
	import gymnasium as gym

	import argparse

	# This script should have some options
	# 1. Turn off the stochasticity as determined by the ALEv5
	# Even if deterministic is set to true in evaluate policy, the environment will ignore this 25% of the time
	# To compensate for this, we can set the repeat action probability to 0

	parser = argparse.ArgumentParser()
	parser.add_argument("-r", "--repeat_action_probability", help="repeat action probability", type=float, default=0.25)
	args = parser.parse_args()

	MODEL_NAME = "ALE-Pacman-v5"
	rpt_act_prob = args.repeat_action_probability

	loaded_model = DQN.load(MODEL_NAME)

	# Retrieve the environment
	eval_env = Monitor(gym.make("ALE/Pacman-v5", render_mode="rgb_array", repeat_action_probability=rpt_act_prob))

	# Evaluate the policy
	mean_rwd, std_rwd = evaluate_policy(loaded_model.policy, eval_env, n_eval_episodes=1)
	print("mean rwd: ", mean_rwd)
	print("std rwd: ", std_rwd)