File size: 3,883 Bytes
079c32c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
from zoo.minigrid.config.minigrid_muzero_config import main_config, create_config
from lzero.entry import eval_muzero
import numpy as np
if __name__ == "__main__":
"""
Overview:
Evaluate the model performance by running multiple episodes with different seeds using the MuZero algorithm.
The evaluation results (returns and mean returns) are printed out for each seed and summarized for all seeds.
Variables:
- model_path (:obj:`str`): Path to the pretrained model's checkpoint file. Usually something like
"exp_name/ckpt/ckpt_best.pth.tar". Absolute path is recommended.
- seeds (:obj:`List[int]`): List of seeds to use for evaluation. Each seed will run for a specified number
of episodes.
- num_episodes_each_seed (:obj:`int`): Number of episodes to be run for each seed.
- main_config (:obj:`EasyDict`): Main configuration for the evaluation, imported from the model's config file.
- returns_mean_seeds (:obj:`List[float]`): List to store the mean returns for each seed.
- returns_seeds (:obj:`List[List[float]]`): List to store the returns for each episode from each seed.
Outputs:
Prints out the mean returns and returns for each seed, along with the overall mean return across all seeds.
.. note::
The eval_muzero function is used here for evaluation. For more details about this function and its parameters,
please refer to its own documentation.
"""
# model_path = './ckpt/ckpt_best.pth.tar'
model_path = None
# Initialize a list with a single seed for the experiment
seeds = [0]
# Set the number of episodes to run for each seed
num_episodes_each_seed = 1
# Specify the number of environments for the evaluator to use
main_config.env.evaluator_env_num = 1
# Set the number of episodes for the evaluator to run
main_config.env.n_evaluator_episode = 1
# The total number of test episodes is the product of the number of episodes per seed and the number of seeds
total_test_episodes = num_episodes_each_seed * len(seeds)
# Uncomment the following lines to save a replay of the episodes as an mp4 video
# main_config.env.replay_path = './video'
# Enable saving of replay as a gif, specify the path to save the replay gif
main_config.env.save_replay_gif = True
main_config.env.replay_path_gif = './video'
# Initialize lists to store the mean and total returns for each seed
returns_mean_seeds = []
returns_seeds = []
# For each seed, run the evaluation function and store the resulting mean and total returns
for seed in seeds:
returns_mean, returns = eval_muzero(
[main_config, create_config], # Configuration parameters for the evaluation
seed=seed, # The seed for the random number generator
num_episodes_each_seed=num_episodes_each_seed, # The number of episodes to run for this seed
print_seed_details=False, # Whether to print detailed information for each seed
model_path=model_path # The path to the trained model to be evaluated
)
# Append the mean and total returns to their respective lists
returns_mean_seeds.append(returns_mean)
returns_seeds.append(returns)
# Convert the lists of returns to numpy arrays for easier statistical analysis
returns_mean_seeds = np.array(returns_mean_seeds)
returns_seeds = np.array(returns_seeds)
# Print evaluation results
print("=" * 20)
print(f"We evaluated a total of {len(seeds)} seeds. For each seed, we evaluated {num_episodes_each_seed} episode(s).")
print(f"For seeds {seeds}, the mean returns are {returns_mean_seeds}, and the returns are {returns_seeds}.")
print("Across all seeds, the mean reward is:", returns_mean_seeds.mean())
print("=" * 20) |