import gymnasium as gym from huggingface_sb3 import load_from_hub from stable_baselines3 import PPO from stable_baselines3.common.evaluation import evaluate_policy from stable_baselines3.common.monitor import Monitor env_id = "LunarLander-v2" model_fp = load_from_hub( "jostyposty/drl-course-unit-01-lunar-lander-v2", "ppo-LunarLander-v2_010_000_000_hf_defaults.zip", ) model = PPO.load(model_fp, print_system_info=True) eval_env = Monitor(gym.make(env_id)) mean_reward, std_reward = evaluate_policy( model, eval_env, n_eval_episodes=10, deterministic=True ) print(f"results: {mean_reward - std_reward:.2f}") print(f"mean_reward: {mean_reward:.2f} +/- {std_reward}")