Spaces:
Runtime error
Runtime error
File size: 6,434 Bytes
a257639 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 |
import os
from typing import Text, Dict
from stable_baselines3.common.utils import get_latest_run_id
import yaml
class Config:
def __init__(self, cfg_id: Text, global_seed: int, tmp: bool, root_dir: Text,
agent: Text = 'rl-gnn', reset_num_timesteps: bool = True, cfg_dict: Dict = None, model_path: Text = None):
self.cfg_id = cfg_id
self.seed = global_seed
if cfg_dict is not None:
cfg = cfg_dict
else:
file_path = './facility_location/cfg/{}.yaml'.format(self.cfg_id)
class TupleSafeLoader(yaml.SafeLoader):
def construct_python_tuple(self, node):
return tuple(self.construct_sequence(node))
TupleSafeLoader.add_constructor(
u'tag:yaml.org,2002:python/tuple',
TupleSafeLoader.construct_python_tuple)
def load_yaml(file_path):
cfg = yaml.load(open(file_path, 'r'), Loader=TupleSafeLoader)
return cfg
cfg = load_yaml(file_path)
# create dirs
self.root_dir = '/tmp/flp' if tmp else root_dir
self.agent = agent
self.multi = cfg.get('multi', False)
self.tb_log_path = os.path.join(self.root_dir, 'runs')
self.tb_log_name = f'{cfg_id}-agent-{agent}-seed-{global_seed}'
latest_run_id = get_latest_run_id(self.tb_log_path, self.tb_log_name)
if not reset_num_timesteps:
# Continue training in the same directory
latest_run_id -= 1
self.cfg_dir = os.path.join(self.root_dir,
'output', f'{cfg_id}-agent-{agent}-seed-{global_seed}_{latest_run_id + 1}')
self.ckpt_save_path = os.path.join(self.cfg_dir, 'ckpt')
self.best_model_path = os.path.join(self.cfg_dir, 'best-models')
self.latest_model_path = os.path.join(self.cfg_dir, 'latest-models')
self.load_model_path = model_path
# env
self.env_specs = cfg.get('env_specs', dict())
self.reward_specs = cfg.get('reward_specs', dict())
self.obs_specs = cfg.get('obs_specs', dict())
self.eval_specs = cfg.get('eval_specs', dict())
# agent config
self.agent_specs = cfg.get('agent_specs', dict())
self.mlp_specs = cfg.get('mlp_specs', dict())
self.gnn_specs = cfg.get('gnn_specs', dict())
self.ts_specs = cfg.get('ts_specs', dict())
self.popstar_specs = cfg.get('popstar_specs', dict())
self.ga_specs = cfg.get('ga_specs', dict())
# training config
self.gamma = cfg.get('gamma', 0.99)
self.tau = cfg.get('tau', 0.95)
self.state_encoder_specs = cfg.get('state_encoder_specs', dict())
self.policy_specs = cfg.get('policy_specs', dict())
self.value_specs = cfg.get('value_specs', dict())
self.lr = cfg.get('lr', 4e-4)
self.weightdecay = cfg.get('weightdecay', 0.0)
self.eps = cfg.get('eps', 1e-5)
self.value_pred_coef = cfg.get('value_pred_coef', 0.5)
self.entropy_coef = cfg.get('entropy_coef', 0.01)
self.clip_epsilon = cfg.get('clip_epsilon', 0.2)
self.max_num_iterations = cfg.get('max_num_iterations', 1000)
self.num_episodes_per_iteration = cfg.get('num_episodes_per_iteration', 1000)
self.max_sequence_length = cfg.get('max_sequence_length', 100)
self.num_optim_epoch = cfg.get('num_optim_epoch', 4)
self.mini_batch_size = cfg.get('mini_batch_size', 1024)
self.save_model_interval = cfg.get('save_model_interval', 10)
def log(self, logger, tb_logger):
"""Log cfg to logger and tensorboard."""
logger.info(f'id: {self.cfg_id}')
logger.info(f'seed: {self.seed}')
logger.info(f'env_specs: {self.env_specs}')
logger.info(f'reward_specs: {self.reward_specs}')
logger.info(f'obs_specs: {self.obs_specs}')
logger.info(f'agent_specs: {self.agent_specs}')
logger.info(f'gamma: {self.gamma}')
logger.info(f'tau: {self.tau}')
logger.info(f'state_encoder_specs: {self.state_encoder_specs}')
logger.info(f'policy_specs: {self.policy_specs}')
logger.info(f'value_specs: {self.value_specs}')
logger.info(f'lr: {self.lr}')
logger.info(f'weightdecay: {self.weightdecay}')
logger.info(f'eps: {self.eps}')
logger.info(f'value_pred_coef: {self.value_pred_coef}')
logger.info(f'entropy_coef: {self.entropy_coef}')
logger.info(f'clip_epsilon: {self.clip_epsilon}')
logger.info(f'max_num_iterations: {self.max_num_iterations}')
logger.info(f'num_episodes_per_iteration: {self.num_episodes_per_iteration}')
logger.info(f'max_sequence_length: {self.max_sequence_length}')
logger.info(f'num_optim_epoch: {self.num_optim_epoch}')
logger.info(f'mini_batch_size: {self.mini_batch_size}')
logger.info(f'save_model_interval: {self.save_model_interval}')
if tb_logger is not None:
tb_logger.add_hparams(
hparam_dict={
'id': self.cfg_id,
'seed': self.seed,
'env_specs': str(self.env_specs),
'reward_specs': str(self.reward_specs),
'obs_specs': str(self.obs_specs),
'agent_specs': str(self.agent_specs),
'gamma': self.gamma,
'tau': self.tau,
'state_encoder_specs': str(self.state_encoder_specs),
'policy_specs': str(self.policy_specs),
'value_specs': str(self.value_specs),
'lr': self.lr,
'weightdecay': self.weightdecay,
'eps': self.eps,
'value_pred_coef': self.value_pred_coef,
'entropy_coef': self.entropy_coef,
'clip_epsilon': self.clip_epsilon,
'max_num_iterations': self.max_num_iterations,
'num_episodes_per_iteration': self.num_episodes_per_iteration,
'max_sequence_length': self.max_sequence_length,
'num_optim_epoch': self.num_optim_epoch,
'mini_batch_size': self.mini_batch_size,
'save_model_interval': self.save_model_interval},
metric_dict={'hparam/placeholder': 0.0})
|