# | |
# -*- coding: utf-8 -*- | |
import os | |
import gc | |
import sys | |
import json | |
import random | |
BASE_DIR = os.path.dirname(os.path.abspath('__file__')) | |
AGENTS_DIR = os.path.join(BASE_DIR,'agents') | |
sys.path.append(AGENTS_DIR) | |
from agents.PDPPOAgent_one_critic import PDPPOAgent_one_critic | |
from agents.PPOAgent_two_critics import PPOAgent_two_critics | |
from agents.PDPPOAgent import PDPPOAgent | |
from agents.PPOAgent import PPOAgent | |
from agents.stableBaselineAgents import StableBaselineAgent | |
from test_functions import test_agents | |
import numpy as np | |
import torch | |
from envs import SimplePlant | |
from scenarioManager.stochasticDemandModel import StochasticDemandModel | |
#'15items_5machines_i100','25items_10machines' | |
if __name__ == '__main__': | |
# experiments = ['15items_5machines_t100_i100'] | |
# experiments = ['20items_10machines_t100_i100'] | |
# experiments = ['25items_10machines_t100_i100'] | |
experiments = ['25items_15machines_t100_i100'] | |
#experiments = ['15items_5machines_t100_i100', '20items_10machines_t100_i100'] | |
for experiment_name in experiments: | |
for i in range(20,21): | |
# Setting the seeds | |
np.random.seed(i) | |
random.seed(i) | |
torch.manual_seed(i) | |
# If you are using CUDA (PyTorch with GPU support) | |
if torch.cuda.is_available(): | |
torch.cuda.manual_seed(i) | |
torch.cuda.manual_seed_all(i) # if you are using multi-GPU. | |
# Environment setup load: | |
file_path = os.path.abspath(f"./cfg_env/setting_{experiment_name}.json") | |
fp = open(file_path, 'r') | |
settings = json.load(fp) | |
fp.close() | |
# Models setups: | |
stoch_model = StochasticDemandModel(settings) | |
settings['time_horizon'] = 100 | |
env = SimplePlant(settings, stoch_model) | |
settings['dict_obs'] = False | |
setting_sol_method = { | |
'discount_rate': 0.99, | |
'experiment_name': experiment_name, | |
'parallelization': False, | |
'model_name': 'PPO', | |
'branching_factors': [4, 2, 2], | |
'dict_obs': False # To be employed if dictionary observations are necessary | |
} | |
# Parameters for the ADPHS: | |
setting_sol_method['regressor_name'] = 'plain_matrix_I2xM1' | |
setting_sol_method['discount_rate'] = 0.99 | |
setting_sol_method['run'] = i | |
agents = [] | |
# Parameters for the RL: | |
training_epochs_RL = 10000 # 30000 | |
env = SimplePlant(settings, stoch_model) | |
# Number of test execution (number of complet environment iterations) | |
nreps = 100 | |
########################################################################### | |
# Post-decision PPO - Dual critic | |
########################################################################### | |
base_model_name = 'PDPPO' | |
pdppo_agent = PDPPOAgent( | |
env, | |
setting_sol_method | |
) | |
pdppo_agent.learn(n_episodes=training_epochs_RL*settings['time_horizon'] ) # Each ep with 200 steps | |
#load best agent before appending in the test list | |
BEST_MODEL_DIR = os.path.join(os.path.dirname(os.path.abspath('__file__')),'logs',f'best_{base_model_name}_{experiment_name}','best_model') | |
pdppo_agent.load_agent(BEST_MODEL_DIR) # For training purposes | |
agents.append(("PDPPO", pdppo_agent)) | |
########################################################################### | |
# PPO | |
########################################################################### | |
base_model_name = 'PPO' | |
ppo_agent = PPOAgent( | |
env, | |
setting_sol_method | |
) | |
ppo_agent.learn(n_episodes=training_epochs_RL*settings['time_horizon'] ) # Each ep with 200 steps | |
#load best agent before appending in the test list | |
BEST_MODEL_DIR = os.path.join(os.path.dirname(os.path.abspath('__file__')),'logs',f'best_{base_model_name}_{experiment_name}','best_model') | |
ppo_agent.load_agent(BEST_MODEL_DIR) # For training purposes | |
agents.append(("PPO", ppo_agent)) | |
########################################################################### | |
# Post-decision PPO - Single Critic | |
########################################################################### | |
base_model_name = 'PDPPO_one_critic' | |
pdppo_agent_one_critic = PDPPOAgent_one_critic( | |
env, | |
setting_sol_method | |
) | |
pdppo_agent_one_critic.learn(n_episodes=training_epochs_RL*settings['time_horizon'] ) # Each ep with 200 steps | |
#load best agent before appending in the test list | |
BEST_MODEL_DIR = os.path.join(os.path.dirname(os.path.abspath('__file__')),'logs',f'best_{base_model_name}_{experiment_name}','best_model') | |
pdppo_agent_one_critic.load_agent(BEST_MODEL_DIR) # For training purposes | |
agents.append(("PDPPO_one_critic", pdppo_agent_one_critic)) | |
########################################################################### | |
# PPO - two critic | |
########################################################################### | |
# base_model_name = 'PPO_two_critics' | |
# ppo_agent_two_critics = PPOAgent_two_critics( | |
# env, | |
# setting_sol_method | |
# ) | |
# ppo_agent_two_critics.learn(n_episodes=training_epochs_RL*settings['time_horizon'] ) # Each ep with 200 steps | |
# #load best agent before appending in the test list | |
# BEST_MODEL_DIR = os.path.join(os.path.dirname(os.path.abspath('__file__')),'logs',f'best_{base_model_name}_{experiment_name}','best_model') | |
# ppo_agent_two_critics.load_agent(BEST_MODEL_DIR) # For training purposes | |
# agents.append(("PPO_two_critics", ppo_agent_two_critics)) | |
########################################################################### | |
# RL A2C | |
########################################################################### | |
# setting_sol_method['multiagent'] = False | |
# setting_sol_method['parallelization'] = False | |
# base_model_name = 'A2C' | |
# setting_sol_method['parallelization'] = False | |
# env = SimplePlant(settings, stoch_model) | |
# setting_sol_method['model_name'] = base_model_name | |
# rl_agent = StableBaselineAgent( | |
# env, | |
# setting_sol_method | |
# ) | |
# rl_agent.learn(epochs=training_epochs_RL) # Each ep with 200 steps | |
# #load best agent before appending in the test list | |
# BEST_MODEL_DIR = os.path.join(os.path.dirname(os.path.abspath('__file__')),'logs',f'best_{base_model_name}_{experiment_name}','best_model') | |
# rl_agent.load_agent(BEST_MODEL_DIR) | |
# agents.append(("A2C", rl_agent)) | |
########################################################################### | |
#TESTING | |
# settings['dict_obs'] = False | |
# setting_sol_method['multiagent'] = False | |
# setting_sol_method['dict_obs'] = False | |
# env = SimplePlant(settings, stoch_model) | |
# setting_sol_method['experiment_name'] = experiment_name | |
# dict_res = test_agents( | |
# env, | |
# agents=agents, | |
# n_reps=nreps, | |
# setting_sol_method = setting_sol_method, | |
# use_benchmark_PI=False | |
# ) | |
# for key,_ in agents: | |
# cost = dict_res[key,'costs'] | |
# print(f'\n Cost in {nreps} iterations for the model {key}: {cost}') | |
# try: | |
# cost = dict_res['PI','costs'] | |
# print(f'\n Cost in {nreps} repetitions for the model PI: {cost}') | |
# except: | |
# pass | |
#del multiagent | |
del env | |
gc.collect() | |