LunarLanderContinuous-v2-QGPO / policy_config.json
zjowowen's picture
Upload policy_config.json with huggingface_hub
ad95118 verified
raw
history blame
5.22 kB
{
"train": {
"project": "LunarLanderContinuous-v2-QGPO-VPSDE",
"device": "cuda",
"wandb": {
"project": "IQL-LunarLanderContinuous-v2-QGPO-VPSDE"
},
"simulator": {
"type": "GymEnvSimulator",
"args": {
"env_id": "LunarLanderContinuous-v2"
}
},
"model": {
"QGPOPolicy": {
"device": "cuda",
"critic": {
"device": "cuda",
"q_alpha": 1.0,
"DoubleQNetwork": {
"backbone": {
"type": "ConcatenateMLP",
"args": {
"hidden_sizes": [
10,
256,
256
],
"output_size": 1,
"activation": "relu"
}
}
}
},
"diffusion_model": {
"device": "cuda",
"x_size": 2,
"alpha": 1.0,
"solver": {
"type": "DPMSolver",
"args": {
"order": 2,
"device": "cuda",
"steps": 17
}
},
"path": {
"type": "linear_vp_sde",
"beta_0": 0.1,
"beta_1": 20.0
},
"reverse_path": {
"type": "linear_vp_sde",
"beta_0": 0.1,
"beta_1": 20.0
},
"model": {
"type": "noise_function",
"args": {
"t_encoder": {
"type": "GaussianFourierProjectionTimeEncoder",
"args": {
"embed_dim": 32,
"scale": 30.0
}
},
"backbone": {
"type": "TemporalSpatialResidualNet",
"args": {
"hidden_sizes": [
512,
256,
128
],
"output_dim": 2,
"t_dim": 32,
"condition_dim": 8,
"condition_hidden_dim": 32,
"t_condition_hidden_dim": 128
}
}
}
},
"energy_guidance": {
"t_encoder": {
"type": "GaussianFourierProjectionTimeEncoder",
"args": {
"embed_dim": 32,
"scale": 30.0
}
},
"backbone": {
"type": "ConcatenateMLP",
"args": {
"hidden_sizes": [
42,
256,
256
],
"output_size": 1,
"activation": "silu"
}
}
}
}
}
},
"parameter": {
"behaviour_policy": {
"batch_size": 1024,
"learning_rate": 0.0001,
"epochs": 500
},
"action_augment_num": 16,
"fake_data_t_span": null,
"energy_guided_policy": {
"batch_size": 256
},
"critic": {
"stop_training_epochs": 500,
"learning_rate": 0.0001,
"discount_factor": 0.99,
"update_momentum": 0.005
},
"energy_guidance": {
"epochs": 1000,
"learning_rate": 0.0001
},
"evaluation": {
"evaluation_interval": 50,
"guidance_scale": [
0.0,
1.0,
2.0
]
},
"checkpoint_path": "./LunarLanderContinuous-v2-QGPO"
}
},
"deploy": {
"device": "cuda",
"env": {
"env_id": "LunarLanderContinuous-v2",
"seed": 0
},
"num_deploy_steps": 1000,
"t_span": null
}
}