|
import gymnasium as gym |
|
from huggingface_sb3 import load_from_hub |
|
from stable_baselines3 import PPO |
|
from stable_baselines3.common.evaluation import evaluate_policy |
|
from stable_baselines3.common.monitor import Monitor |
|
|
|
env_id = "LunarLander-v2" |
|
|
|
model_fp = load_from_hub( |
|
"jostyposty/drl-course-unit-01-lunar-lander-v2", |
|
"ppo-LunarLander-v2_010_000_000_hf_defaults.zip", |
|
) |
|
|
|
model = PPO.load(model_fp, print_system_info=True) |
|
eval_env = Monitor(gym.make(env_id)) |
|
mean_reward, std_reward = evaluate_policy( |
|
model, eval_env, n_eval_episodes=10, deterministic=True |
|
) |
|
print(f"results: {mean_reward - std_reward:.2f}") |
|
print(f"mean_reward: {mean_reward:.2f} +/- {std_reward}") |
|
|