algo: ddpg: params: target_update_tau: 0.01 policy: exploration: sigma: 0.3 theta: 0.15 deterministic_params: buffer_batch_size: 32 min_buffer_size: 10000 n_train_steps: 500 qf_lr: 0.0001 steps_per_epoch: 1 dqn: params: clip_gradient: 10 deterministic_eval: true double_q: false target_update_freq: 2 policy: exploration: decay_ratio: 0.5 max_epsilon: 1.0 min_epsilon: 0.05 general_params: discount: 0.99 package: garage policy: hidden_sizes: - 128 - 128 pretrained_policy: null ppo: params: center_adv: false tanhnormal: false pretrain: additional_config: null algo_to_pretrain: null params: episodes_per_batch: 10 loss: log_prob policy_lr: 0.01 pretrain_algo: rbc replay_buffer: buffer_size: 200000 rnd: batch_size: 64 bound_reward_weight: cosine bound_reward_weight_initial_ratio: 0.999999 bound_reward_weight_transient_epochs: 10 hidden_sizes: - 64 - 64 intrinsic_reward_weight: 0.0001 n_train_steps: 32 output_dim: 128 predictor_lr: 0.001 standardize_extrinsic_reward: true standardize_intrinsic_reward: true sampler: n_workers: 16 type: ray train: batch_size: 50000 n_epochs: 100 steps_per_epoch: 32 type: ppo context: disable_logging: false experiment_name: null log_dir: from_keys: - microgrid.config.scenario - microgrid.methods.set_forecaster.forecaster - microgrid.methods.set_module_attrs.battery_transition_model - context.seed - env.domain_randomization.noise_std - algo.ppo.tanhnormal - algo.rnd.intrinsic_reward_weight parent: /home/ahalev/data/GridRL/paper_experiments use_existing_dir: false seed: 42 snapshot_gap: 10 verbose: 0 wandb: api_key_file: ../../local/wandb_api_key.txt group: null log_density: 1 plot_baseline: - mpc - rbc username: ahalev env: cls: DiscreteMicrogridEnv domain_randomization: noise_std: 0.01 relative_noise: true forced_genset: null net_load: slack_module: genset use: true observation_keys: - soc - net_load - import_price_current - import_price_forecast_0 - import_price_forecast_1 - import_price_forecast_2 - import_price_forecast_3 - import_price_forecast_4 microgrid: attributes: reward_shaping_func: !BaselineShaper baseline_module: false module: - genset - 0 config: scenario: 1 methods: set_forecaster: forecast_horizon: 23 forecaster: 0.0 forecaster_increase_uncertainty: true forecaster_relative_noise: true set_module_attrs: battery_transition_model: null normalized_action_bounds: - 0.0 - 1.0 trajectory: evaluate: final_step: -1 initial_step: 5840 trajectory_func: null train: final_step: 5840 initial_step: 0 trajectory_func: !FixedLengthStochasticTrajectory trajectory_length: 720 verbose: 1