from typing import Union import gym from stable_baselines3.common.callbacks import BaseCallback, StopTrainingOnNoModelImprovement from stable_baselines3.common.vec_env import VecEnv, DummyVecEnv class UpdateValEnv(BaseCallback): def __init__(self, val_env: Union[gym.Env, VecEnv], verbose: int = 0): super().__init__(verbose=verbose) if not isinstance(val_env, VecEnv): val_env = DummyVecEnv([lambda: val_env]) self.val_env = val_env def _on_step(self) -> bool: assert self.parent is not None, "``UpdateValEnv`` callback must be used with an ``EvalCallback``" self.val_env.env_method('reset_instance_id') return True class UpdateValEnvAndStopTrainingOnNoModelImprovement(StopTrainingOnNoModelImprovement): def __init__(self, val_env: Union[gym.Env, VecEnv], max_no_improvement_evals: int, min_evals: int = 0, verbose: int = 0): super().__init__(max_no_improvement_evals=max_no_improvement_evals, min_evals=min_evals, verbose=verbose) if not isinstance(val_env, VecEnv): val_env = DummyVecEnv([lambda: val_env]) self.val_env = val_env def _on_step(self) -> bool: self.val_env.env_method('reset_instance_id') return super()._on_step()