patricktiu1205
commited on
Commit
•
62b4582
1
Parent(s):
20ca8d8
Upload PPO LunarLander-v2 trained agent
Browse files- README.md +1 -1
- config.json +1 -1
- ppo-LunarLander-v2.zip +2 -2
- ppo-LunarLander-v2/data +16 -16
- ppo-LunarLander-v2/policy.optimizer.pth +1 -1
- ppo-LunarLander-v2/policy.pth +1 -1
- replay.mp4 +0 -0
- results.json +1 -1
README.md
CHANGED
@@ -16,7 +16,7 @@ model-index:
|
|
16 |
type: LunarLander-v2
|
17 |
metrics:
|
18 |
- type: mean_reward
|
19 |
-
value:
|
20 |
name: mean_reward
|
21 |
verified: false
|
22 |
---
|
|
|
16 |
type: LunarLander-v2
|
17 |
metrics:
|
18 |
- type: mean_reward
|
19 |
+
value: 269.08 +/- 14.57
|
20 |
name: mean_reward
|
21 |
verified: false
|
22 |
---
|
config.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"policy_class": {":type:": "<class 'abc.ABCMeta'>", ":serialized:": "gAWVOwAAAAAAAACMIXN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi5wb2xpY2llc5SMEUFjdG9yQ3JpdGljUG9saWN5lJOULg==", "__module__": "stable_baselines3.common.policies", "__doc__": "\n Policy class for actor-critic algorithms (has both policy and value prediction).\n Used by A2C, PPO and the likes.\n\n :param observation_space: Observation space\n :param action_space: Action space\n :param lr_schedule: Learning rate schedule (could be constant)\n :param net_arch: The specification of the policy and value networks.\n :param activation_fn: Activation function\n :param ortho_init: Whether to use or not orthogonal initialization\n :param use_sde: Whether to use State Dependent Exploration or not\n :param log_std_init: Initial value for the log standard deviation\n :param full_std: Whether to use (n_features x n_actions) parameters\n for the std instead of only (n_features,) when using gSDE\n :param use_expln: Use ``expln()`` function instead of ``exp()`` to ensure\n a positive standard deviation (cf paper). It allows to keep variance\n above zero and prevent it from growing too fast. In practice, ``exp()`` is usually enough.\n :param squash_output: Whether to squash the output using a tanh function,\n this allows to ensure boundaries when using gSDE.\n :param features_extractor_class: Features extractor to use.\n :param features_extractor_kwargs: Keyword arguments\n to pass to the features extractor.\n :param share_features_extractor: If True, the features extractor is shared between the policy and value networks.\n :param normalize_images: Whether to normalize images or not,\n dividing by 255.0 (True by default)\n :param optimizer_class: The optimizer to use,\n ``th.optim.Adam`` by default\n :param optimizer_kwargs: Additional keyword arguments,\n excluding the learning rate, to pass to the optimizer\n ", "__init__": "<function ActorCriticPolicy.__init__ at 0x00000270885F6320>", "_get_constructor_parameters": "<function ActorCriticPolicy._get_constructor_parameters at 0x00000270885F63B0>", "reset_noise": "<function ActorCriticPolicy.reset_noise at 0x00000270885F6440>", "_build_mlp_extractor": "<function ActorCriticPolicy._build_mlp_extractor at 0x00000270885F64D0>", "_build": "<function ActorCriticPolicy._build at 0x00000270885F6560>", "forward": "<function ActorCriticPolicy.forward at 0x00000270885F65F0>", "extract_features": "<function ActorCriticPolicy.extract_features at 0x00000270885F6680>", "_get_action_dist_from_latent": "<function ActorCriticPolicy._get_action_dist_from_latent at 0x00000270885F6710>", "_predict": "<function ActorCriticPolicy._predict at 0x00000270885F67A0>", "evaluate_actions": "<function ActorCriticPolicy.evaluate_actions at 0x00000270885F6830>", "get_distribution": "<function ActorCriticPolicy.get_distribution at 0x00000270885F68C0>", "predict_values": "<function ActorCriticPolicy.predict_values at 0x00000270885F6950>", "__abstractmethods__": "frozenset()", "_abc_impl": "<_abc._abc_data object at 0x00000270885E71C0>"}, "verbose": 1, "policy_kwargs": {}, "num_timesteps": 1000448, "_total_timesteps": 1000000, "_num_timesteps_at_start": 0, "seed": null, "action_noise": null, "start_time": 1713183574174005400, "learning_rate": 0.0003, "tensorboard_log": null, "_last_obs": {":type:": "<class 'numpy.ndarray'>", ":serialized:": "gAWVlQAAAAAAAACMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYgAAAAAAAAAJqPbjyVKbc/z+Y6PwF83T66K3C8gDH9vQAAAAAAAAAAlIwFbnVtcHmUjAVkdHlwZZSTlIwCZjSUiYiHlFKUKEsDjAE8lE5OTkr/////Sv////9LAHSUYksBSwiGlIwBQ5R0lFKULg=="}, "_last_episode_starts": {":type:": "<class 'numpy.ndarray'>", ":serialized:": "gAWVdAAAAAAAAACMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYBAAAAAAAAAACUjAVudW1weZSMBWR0eXBllJOUjAJiMZSJiIeUUpQoSwOMAXyUTk5OSv////9K/////0sAdJRiSwGFlIwBQ5R0lFKULg=="}, "_last_original_obs": null, "_episode_num": 0, "use_sde": false, "sde_sample_freq": -1, "_current_progress_remaining": -0.00044800000000000395, "_stats_window_size": 100, "ep_info_buffer": {":type:": "<class 'collections.deque'>", ":serialized:": "gAWVIwwAAAAAAACMC2NvbGxlY3Rpb25zlIwFZGVxdWWUk5QpS2SGlFKUKH2UKIwBcpRHQHFKlwkxASqMAWyUTRMBjAF0lEdAoQXUJv5xi3V9lChoBkdAcBRTfBN21WgHTTkBaAhHQKEG6NVBD5V1fZQoaAZHQHAC7EHdGiJoB00oAWgIR0ChB+mQbMoudX2UKGgGR0BCg/29L6DXaAdLy2gIR0ChCZ5DJEH/dX2UKGgGR0BuYoqgAZKnaAdNCQFoCEdAoQqGN96Tn3V9lChoBkdARqqJl8PWhGgHS+VoCEdAoQtMLncL0HV9lChoBkdAblCH0K7ZnWgHTSIBaAhHQKEMSw8nuzB1fZQoaAZHQHEmAx33YcxoB01GAWgIR0ChDnDQ7cO9dX2UKGgGR0Bs1XZh8YygaAdNHwFoCEdAoQ92TC+De3V9lChoBkdAcrfrGR3eN2gHTQgBaAhHQKEQWkX1rZd1fZQoaAZHQEbj99+gDihoB0u5aAhHQKEQ+vlEJBx1fZQoaAZHQHIE2l67dzpoB0vQaAhHQKESu7/4qPR1fZQoaAZHQHD1MBU70WdoB0v1aAhHQKETmD6nBLx1fZQoaAZHQG7hafzz3AVoB00IAWgIR0ChFHzI3irDdX2UKGgGR0Bwqan4wh4daAdNNgFoCEdAoRWLfP5YYHV9lChoBkdAcDnP4VRDTmgHTTkBaAhHQKEXq1b7j1h1fZQoaAZHQDvqu0TlDF9oB0vNaAhHQKEYdFVktmN1fZQoaAZHQHGti6xxDLNoB00HAWgIR0ChGVgHu7YkdX2UKGgGR0BvINGkN4JNaAdNGQFoCEdAoRtUWGh24nV9lChoBkdAcI2y3kPtlmgHTQIBaAhHQKEcOVoHs1N1fZQoaAZHQHKU6s2eg+RoB02yAmgIR0ChHs7G3nZCdX2UKGgGR0BxD1WBBiTdaAdNEwFoCEdAoSDEEA5q/XV9lChoBkdAbwrVAiV0LmgHTVcBaAhHQKEh+PTXrdF1fZQoaAZHQHConB+F10VoB00AAWgIR0ChIt7SApazdX2UKGgGR0ByYjVbzK9xaAdNVAFoCEdAoSUUeEIw/XV9lChoBkdAcPri5NGmUGgHTToBaAhHQKEmNEZzgdh1fZQoaAZHQHCCutwJgLJoB008AWgIR0ChJ0lwDNhWdX2UKGgGR0Bxf1J7LMcIaAdL/2gIR0ChKS4h+vyLdX2UKGgGR0BwiyTC+De1aAdNbAFoCEdAoSpysr/bTXV9lChoBkdAcejNVinYQWgHTTgBaAhHQKEri49X9zh1fZQoaAZHQG+zQX668QJoB00iAWgIR0ChLKOMl1KXdX2UKGgGR0ByEoXzlLezaAdNjQFoCEdAoS8RmGucMHV9lChoBkdAcTFbYK6WgWgHTQcBaAhHQKEv+GJN0vJ1fZQoaAZHwCMRBAv+OwRoB0tNaAhHQKEwPPDYRNB1fZQoaAZHQHC6GcvugHxoB00DAWgIR0ChMRoLG7z1dX2UKGgGR0BxD+qo60Y1aAdNZgFoCEdAoTNZO8Cgb3V9lChoBkdAbvmPMB6rvWgHTTIBaAhHQKE0ar0aqCJ1fZQoaAZHQGykrSVnmJZoB00LAWgIR0ChNU86V+qjdX2UKGgGR0Bxb0C1Z1V6aAdNGQJoCEdAoThECkoF3nV9lChoBkdAcXdTRYzSC2gHTVUBaAhHQKE5boA4n4R1fZQoaAZHwEBXGDtgKF9oB0tvaAhHQKE50AuqWC51fZQoaAZHQHJtPWcz68BoB0vWaAhHQKE7m9nK4hF1fZQoaAZHQHC/yAc1fmdoB01EAWgIR0ChPL6OPvKEdX2UKGgGR0BvZ2ReTmnwaAdNEAFoCEdAoT23nuAqeHV9lChoBkdAY0r9Wp6yB2gHTegDaAhHQKFCND50r9V1fZQoaAZHQG6S1NxlxwRoB00YAWgIR0ChQzo99tuUdX2UKGgGR0BlmwnYxtYTaAdN6ANoCEdAoUfDkZJkG3V9lChoBkdAcRIn3cpLEmgHS/FoCEdAoUmeXHBDX3V9lChoBkdAZMf5nlGPP2gHTegDaAhHQKFOJaouPFN1fZQoaAZHQEhY0iyIHkdoB0uzaAhHQKFOxxwyZa51fZQoaAZHQHDteXu3MINoB00WAWgIR0ChT8UaqCHzdX2UKGgGR0BuWe5SWJJoaAdNYQFoCEdAoVD4mkWRBHV9lChoBkfALeo9TxXnyWgHS2BoCEdAoVFPa+N96XV9lChoBkdAcI3dK/VRUGgHTYUDaAhHQKFVhQgs9Sx1fZQoaAZHQG7Js5n13+xoB01rAWgIR0ChV8jvd/KAdX2UKGgGR0BwM8QPI4lyaAdNdgJoCEdAoVoKkCV8kXV9lChoBkdAcbSzPa+N+GgHTWACaAhHQKFdJzGxUvR1fZQoaAZHQEpNBC2MKkVoB0upaAhHQKFduf4AS391fZQoaAZHQEqzIOH31z1oB0ufaAhHQKFeQTK1XvJ1fZQoaAZHQGmm384xUNtoB03aAWgIR0ChYP56D5CXdX2UKGgGR0BwEQbBGhEjaAdNAAFoCEdAoWH0My8BdXV9lChoBkdAcCbnNgSey2gHS/xoCEdAoWLTSofjj3V9lChoBkdAO7cYIjW07mgHS1VoCEdAoWMciD/VAnV9lChoBkdASuIyj59E1GgHS4ZoCEdAoWOTuBtk4HV9lChoBkdASAHJaJQ+EGgHS8VoCEdAoWVKRuCPIXV9lChoBkdAckY18b70nWgHTSMBaAhHQKFmWR2bG3p1fZQoaAZHQHF3eCK77KtoB00aAWgIR0ChZ1r6+FlDdX2UKGgGR0BCuuieumrKaAdLrmgIR0ChZ/JI1+AmdX2UKGgGR8BEluqm0mdBaAdLVWgIR0ChaD8qnWJ8dX2UKGgGR0BxV9donKGMaAdN8gFoCEdAoWsO8VYZEXV9lChoBkdAcGGQgs9SuWgHTYcBaAhHQKFsY0VrRBx1fZQoaAZHQCNoMQVbiZRoB0u6aAhHQKFtBOKwY+B1fZQoaAZHQHDAsN+b3GpoB0vlaAhHQKFu0An2Iwd1fZQoaAZHQHIKjcRDkU9oB03AAWgIR0ChcGl2vB8AdX2UKGgGR0BwUEHqu8sdaAdNGgFoCEdAoXFzRKHwgHV9lChoBkdAcqCGy5Zr6GgHTR8BaAhHQKFzbz4DcM51fZQoaAZHwBrWOAAhje9oB0tIaAhHQKFzrfUF0Pp1fZQoaAZHQHJbbyH2ys1oB0vkaAhHQKF0gCVbA1x1fZQoaAZHwE0gg9vCMxZoB0taaAhHQKF0zBVMmF91fZQoaAZHQHDknAAQxvhoB016AmgIR0CheAV7IDHPdX2UKGgGR0BujQLb5/LDaAdNKQFoCEdAoXkJ5/smfHV9lChoBkdAbY1FuNxVAGgHS/5oCEdAoXnpMzuWr3V9lChoBkdATJCAhB7eEmgHS4doCEdAoXpdu5z5oHV9lChoBkdAcTrzImw7kmgHS+5oCEdAoXwtNFjNIXV9lChoBkfAMCj8pCrtFGgHS1doCEdAoXx34j8k2XV9lChoBkdAceSO6/ZdwGgHS+VoCEdAoX1IUzsQd3V9lChoBkdAcl8+UyHmBGgHTQcBaAhHQKF+OERJ2+x1fZQoaAZHQG8jUaqCHypoB02MAWgIR0Chf5M189fUdX2UKGgGR0BxNytZFG5MaAdNWgFoCEdAoYHOAf+0gXV9lChoBkdAcq+1eBxxUGgHS9toCEdAoYKQVEd/8XV9lChoBkdAcQnZRKpT/GgHTQsBaAhHQKGDdYao/A11fZQoaAZHQG9j9a+vhZRoB00WAWgIR0ChhXGWUr08dX2UKGgGR0BwHLf8/D+BaAdNPAFoCEdAoYaMKZ2IPHV9lChoBkdAQ9TG5tm+TWgHS6toCEdAoYcgWYWtVHV9lChoBkdAcBCNorWiDmgHTT0BaAhHQKGINTfBN211fZQoaAZHQHAocL0Bfa9oB00ZAWgIR0Chii8DB/I9dX2UKGgGR0BRvCR0U47zaAdN6ANoCEdAoY7FQ0oBrHV9lChoBkdAchYK/mDDj2gHTTkBaAhHQKGP9jZL7Gh1fZQoaAZHQHDCPATIvJ1oB01eAWgIR0ChkWLaM72ddX2UKGgGR0BuKQTIvJzUaAdNDQFoCEdAoZJkS00FbHVlLg=="}, "ep_success_buffer": {":type:": "<class 'collections.deque'>", ":serialized:": "gAWVIAAAAAAAAACMC2NvbGxlY3Rpb25zlIwFZGVxdWWUk5QpS2SGlFKULg=="}, "_n_updates": 3908, "observation_space": {":type:": "<class 'gymnasium.spaces.box.Box'>", ":serialized:": "gAWVdgIAAAAAAACMFGd5bW5hc2l1bS5zcGFjZXMuYm94lIwDQm94lJOUKYGUfZQojAVkdHlwZZSMBW51bXB5lIwFZHR5cGWUk5SMAmY0lImIh5RSlChLA4wBPJROTk5K/////0r/////SwB0lGKMDWJvdW5kZWRfYmVsb3eUjBJudW1weS5jb3JlLm51bWVyaWOUjAtfZnJvbWJ1ZmZlcpSTlCiWCAAAAAAAAAABAQEBAQEBAZRoCIwCYjGUiYiHlFKUKEsDjAF8lE5OTkr/////Sv////9LAHSUYksIhZSMAUOUdJRSlIwNYm91bmRlZF9hYm92ZZRoESiWCAAAAAAAAAABAQEBAQEBAZRoFUsIhZRoGXSUUpSMBl9zaGFwZZRLCIWUjANsb3eUaBEoliAAAAAAAAAAAAC0wgAAtMIAAKDAAACgwNsPScAAAKDAAAAAgAAAAICUaAtLCIWUaBl0lFKUjARoaWdolGgRKJYgAAAAAAAAAAAAtEIAALRCAACgQAAAoEDbD0lAAACgQAAAgD8AAIA/lGgLSwiFlGgZdJRSlIwIbG93X3JlcHKUjFtbLTkwLiAgICAgICAgLTkwLiAgICAgICAgIC01LiAgICAgICAgIC01LiAgICAgICAgIC0zLjE0MTU5MjcgIC01LgogIC0wLiAgICAgICAgIC0wLiAgICAgICBdlIwJaGlnaF9yZXBylIxTWzkwLiAgICAgICAgOTAuICAgICAgICAgNS4gICAgICAgICA1LiAgICAgICAgIDMuMTQxNTkyNyAgNS4KICAxLiAgICAgICAgIDEuICAgICAgIF2UjApfbnBfcmFuZG9tlE51Yi4=", "dtype": "float32", "bounded_below": "[ True True True True True True True True]", "bounded_above": "[ True True True True True True True True]", "_shape": [8], "low": "[-90. -90. -5. -5. -3.1415927 -5.\n -0. -0. ]", "high": "[90. 90. 5. 5. 3.1415927 5.\n 1. 1. ]", "low_repr": "[-90. -90. -5. -5. -3.1415927 -5.\n -0. -0. ]", "high_repr": "[90. 90. 5. 5. 3.1415927 5.\n 1. 1. ]", "_np_random": null}, "action_space": {":type:": "<class 'gymnasium.spaces.discrete.Discrete'>", ":serialized:": "gAWV2wAAAAAAAACMGWd5bW5hc2l1bS5zcGFjZXMuZGlzY3JldGWUjAhEaXNjcmV0ZZSTlCmBlH2UKIwBbpSMFW51bXB5LmNvcmUubXVsdGlhcnJheZSMBnNjYWxhcpSTlIwFbnVtcHmUjAVkdHlwZZSTlIwCaTiUiYiHlFKUKEsDjAE8lE5OTkr/////Sv////9LAHSUYkMIBAAAAAAAAACUhpRSlIwFc3RhcnSUaAhoDkMIAAAAAAAAAACUhpRSlIwGX3NoYXBllCmMBWR0eXBllGgOjApfbnBfcmFuZG9tlE51Yi4=", "n": "4", "start": "0", "_shape": [], "dtype": "int64", "_np_random": null}, "n_envs": 1, "n_steps": 1024, "gamma": 0.999, "gae_lambda": 0.98, "ent_coef": 0.01, "vf_coef": 0.5, "max_grad_norm": 0.5, "batch_size": 64, "n_epochs": 4, "clip_range": {":type:": "<class 'function'>", ":serialized:": "gAWVcAIAAAAAAACMF2Nsb3VkcGlja2xlLmNsb3VkcGlja2xllIwOX21ha2VfZnVuY3Rpb26Uk5QoaACMDV9idWlsdGluX3R5cGWUk5SMCENvZGVUeXBllIWUUpQoSwFLAEsASwFLAUsTQwSIAFMAlE6FlCmMAV+UhZSMW2M6XFVzZXJzXHB0aXUxXC5jb25kYVxlbnZzXERlZXBSTENvdXJzZVxsaWJcc2l0ZS1wYWNrYWdlc1xzdGFibGVfYmFzZWxpbmVzM1xjb21tb25cdXRpbHMucHmUjARmdW5jlEuEQwIEAZSMA3ZhbJSFlCl0lFKUfZQojAtfX3BhY2thZ2VfX5SMGHN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbpSMCF9fbmFtZV9flIwec3RhYmxlX2Jhc2VsaW5lczMuY29tbW9uLnV0aWxzlIwIX19maWxlX1+UaAx1Tk5oAIwQX21ha2VfZW1wdHlfY2VsbJSTlClSlIWUdJRSlGgAjBJfZnVuY3Rpb25fc2V0c3RhdGWUk5RoHn2UfZQoaBZoDYwMX19xdWFsbmFtZV9flIwZY29uc3RhbnRfZm4uPGxvY2Fscz4uZnVuY5SMD19fYW5ub3RhdGlvbnNfX5R9lIwOX19rd2RlZmF1bHRzX1+UTowMX19kZWZhdWx0c19flE6MCl9fbW9kdWxlX1+UaBeMB19fZG9jX1+UTowLX19jbG9zdXJlX1+UaACMCl9tYWtlX2NlbGyUk5RHP8mZmZmZmZqFlFKUhZSMF19jbG91ZHBpY2tsZV9zdWJtb2R1bGVzlF2UjAtfX2dsb2JhbHNfX5R9lHWGlIZSMC4="}, "clip_range_vf": null, "normalize_advantage": true, "target_kl": null, "lr_schedule": {":type:": "<class 'function'>", ":serialized:": "gAWVcAIAAAAAAACMF2Nsb3VkcGlja2xlLmNsb3VkcGlja2xllIwOX21ha2VfZnVuY3Rpb26Uk5QoaACMDV9idWlsdGluX3R5cGWUk5SMCENvZGVUeXBllIWUUpQoSwFLAEsASwFLAUsTQwSIAFMAlE6FlCmMAV+UhZSMW2M6XFVzZXJzXHB0aXUxXC5jb25kYVxlbnZzXERlZXBSTENvdXJzZVxsaWJcc2l0ZS1wYWNrYWdlc1xzdGFibGVfYmFzZWxpbmVzM1xjb21tb25cdXRpbHMucHmUjARmdW5jlEuEQwIEAZSMA3ZhbJSFlCl0lFKUfZQojAtfX3BhY2thZ2VfX5SMGHN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbpSMCF9fbmFtZV9flIwec3RhYmxlX2Jhc2VsaW5lczMuY29tbW9uLnV0aWxzlIwIX19maWxlX1+UaAx1Tk5oAIwQX21ha2VfZW1wdHlfY2VsbJSTlClSlIWUdJRSlGgAjBJfZnVuY3Rpb25fc2V0c3RhdGWUk5RoHn2UfZQoaBZoDYwMX19xdWFsbmFtZV9flIwZY29uc3RhbnRfZm4uPGxvY2Fscz4uZnVuY5SMD19fYW5ub3RhdGlvbnNfX5R9lIwOX19rd2RlZmF1bHRzX1+UTowMX19kZWZhdWx0c19flE6MCl9fbW9kdWxlX1+UaBeMB19fZG9jX1+UTowLX19jbG9zdXJlX1+UaACMCl9tYWtlX2NlbGyUk5RHPzOpKjBVMmGFlFKUhZSMF19jbG91ZHBpY2tsZV9zdWJtb2R1bGVzlF2UjAtfX2dsb2JhbHNfX5R9lHWGlIZSMC4="}, "system_info": {"OS": "Windows-10-10.0.22621-SP0 10.0.22621", "Python": "3.10.12", "Stable-Baselines3": "2.0.0a5", "PyTorch": "1.13.1+cu116", "GPU Enabled": "True", "Numpy": "1.26.4", "Cloudpickle": "3.0.0", "Gymnasium": "0.28.1", "OpenAI Gym": "0.26.2"}}
|
|
|
1 |
+
{"policy_class": {":type:": "<class 'abc.ABCMeta'>", ":serialized:": "gAWVOwAAAAAAAACMIXN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi5wb2xpY2llc5SMEUFjdG9yQ3JpdGljUG9saWN5lJOULg==", "__module__": "stable_baselines3.common.policies", "__doc__": "\n Policy class for actor-critic algorithms (has both policy and value prediction).\n Used by A2C, PPO and the likes.\n\n :param observation_space: Observation space\n :param action_space: Action space\n :param lr_schedule: Learning rate schedule (could be constant)\n :param net_arch: The specification of the policy and value networks.\n :param activation_fn: Activation function\n :param ortho_init: Whether to use or not orthogonal initialization\n :param use_sde: Whether to use State Dependent Exploration or not\n :param log_std_init: Initial value for the log standard deviation\n :param full_std: Whether to use (n_features x n_actions) parameters\n for the std instead of only (n_features,) when using gSDE\n :param use_expln: Use ``expln()`` function instead of ``exp()`` to ensure\n a positive standard deviation (cf paper). It allows to keep variance\n above zero and prevent it from growing too fast. In practice, ``exp()`` is usually enough.\n :param squash_output: Whether to squash the output using a tanh function,\n this allows to ensure boundaries when using gSDE.\n :param features_extractor_class: Features extractor to use.\n :param features_extractor_kwargs: Keyword arguments\n to pass to the features extractor.\n :param share_features_extractor: If True, the features extractor is shared between the policy and value networks.\n :param normalize_images: Whether to normalize images or not,\n dividing by 255.0 (True by default)\n :param optimizer_class: The optimizer to use,\n ``th.optim.Adam`` by default\n :param optimizer_kwargs: Additional keyword arguments,\n excluding the learning rate, to pass to the optimizer\n ", "__init__": "<function ActorCriticPolicy.__init__ at 0x0000024920EBE4D0>", "_get_constructor_parameters": "<function ActorCriticPolicy._get_constructor_parameters at 0x0000024920EBE560>", "reset_noise": "<function ActorCriticPolicy.reset_noise at 0x0000024920EBE5F0>", "_build_mlp_extractor": "<function ActorCriticPolicy._build_mlp_extractor at 0x0000024920EBE680>", "_build": "<function ActorCriticPolicy._build at 0x0000024920EBE710>", "forward": "<function ActorCriticPolicy.forward at 0x0000024920EBE7A0>", "extract_features": "<function ActorCriticPolicy.extract_features at 0x0000024920EBE830>", "_get_action_dist_from_latent": "<function ActorCriticPolicy._get_action_dist_from_latent at 0x0000024920EBE8C0>", "_predict": "<function ActorCriticPolicy._predict at 0x0000024920EBE950>", "evaluate_actions": "<function ActorCriticPolicy.evaluate_actions at 0x0000024920EBE9E0>", "get_distribution": "<function ActorCriticPolicy.get_distribution at 0x0000024920EBEA70>", "predict_values": "<function ActorCriticPolicy.predict_values at 0x0000024920EBEB00>", "__abstractmethods__": "frozenset()", "_abc_impl": "<_abc._abc_data object at 0x0000024920EB7680>"}, "verbose": 1, "policy_kwargs": {}, "num_timesteps": 1000448, "_total_timesteps": 1000000, "_num_timesteps_at_start": 0, "seed": null, "action_noise": null, "start_time": 1713188788623747800, "learning_rate": 0.0003, "tensorboard_log": null, "_last_obs": {":type:": "<class 'numpy.ndarray'>", ":serialized:": "gAWVlQAAAAAAAACMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYgAAAAAAAAAOZ2HD0paDC6WvtIM76NM7D/Zh853arJswAAgD8AAIA/lIwFbnVtcHmUjAVkdHlwZZSTlIwCZjSUiYiHlFKUKEsDjAE8lE5OTkr/////Sv////9LAHSUYksBSwiGlIwBQ5R0lFKULg=="}, "_last_episode_starts": {":type:": "<class 'numpy.ndarray'>", ":serialized:": "gAWVdAAAAAAAAACMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYBAAAAAAAAAACUjAVudW1weZSMBWR0eXBllJOUjAJiMZSJiIeUUpQoSwOMAXyUTk5OSv////9K/////0sAdJRiSwGFlIwBQ5R0lFKULg=="}, "_last_original_obs": null, "_episode_num": 0, "use_sde": false, "sde_sample_freq": -1, "_current_progress_remaining": -0.00044800000000000395, "_stats_window_size": 100, "ep_info_buffer": {":type:": "<class 'collections.deque'>", ":serialized:": "gAWVKQwAAAAAAACMC2NvbGxlY3Rpb25zlIwFZGVxdWWUk5QpS2SGlFKUKH2UKIwBcpRHQElMK9f1HvuMAWyUS86MAXSUR0ClO63BxgiNdX2UKGgGR0BxDoCjk+5faAdL9GgIR0ClPlqneiztdX2UKGgGR0BwXOVSn+AFaAdNKgFoCEdApT/Jvze41HV9lChoBkdAcnzJNCZ4OmgHTT4BaAhHQKVBUM3IdU91fZQoaAZHQEuo+evpyIZoB0v0aAhHQKVD0UiY9gZ1fZQoaAZHQHC8V7Y02tNoB00lAWgIR0ClRS1LBbfQdX2UKGgGR0Bu1PReC04SaAdNLQFoCEdApUaK+8Gs3nV9lChoBkdAcLcD6WPcSGgHTQUBaAhHQKVHtwnYxtZ1fZQoaAZHQG4w0gr6LwZoB00GAWgIR0ClSmBCUorndX2UKGgGR0BxY1gVoHs1aAdNKQFoCEdApUvIs5GSZHV9lChoBkdAci6jzI3irGgHTQcBaAhHQKVM+TviLl51fZQoaAZHQHCDtZJTVDtoB00IAWgIR0ClTjc5CF9KdX2UKGgGR0Bxeh+d9UjtaAdNDwFoCEdApVDVGmUGFHV9lChoBkdAcGMZB9kSVWgHTTQBaAhHQKVSRjGT9sJ1fZQoaAZHQG9ebUPQOWloB0v1aAhHQKVTcvzvqkd1fZQoaAZHQG5tBNucc2loB00CAWgIR0ClVJwwsXizdX2UKGgGR0BxNEaXKKYRaAdNGAFoCEdApVcv6Q/5cnV9lChoBkdATgP3N9ph4WgHS9VoCEdApVgolD4QBnV9lChoBkdASde+GoJiRWgHS8hoCEdApVkYVTJhfHV9lChoBkdAQywQDmr8zmgHS8BoCEdApVnygkC3gHV9lChoBkdARmCP4mCyyGgHS9ZoCEdApVxXaYeDF3V9lChoBkdANCwggX/HYGgHS7ZoCEdApV0mcjJMg3V9lChoBkdAcSvXD3ueBmgHTWsBaAhHQKVeziYLLIR1fZQoaAZHQHE9Z7sv7FdoB00NAWgIR0ClYACGvfTDdX2UKGgGR0Bw98QZn+Q2aAdNIwFoCEdApWK7Hp8neHV9lChoBkdAbdgLofSx7mgHTR4BaAhHQKVkDA6+36R1fZQoaAZHQHGCm1D0DlpoB0v2aAhHQKVlKNdZ7ol1fZQoaAZHQG6htoSL61toB00hAWgIR0ClZm4yGi5/dX2UKGgGR0Bwi+VUuL75aAdNDQFoCEdApWkBGvwEyXV9lChoBkdAcSrETg2qDWgHTT0BaAhHQKVqdXtBv751fZQoaAZHQG8zKYJE6T5oB00aAWgIR0Cla7miQDFIdX2UKGgGR0BvWaeGwiaBaAdNMwFoCEdApW55hDw6Q3V9lChoBkdAcJv+w1R+B2gHTRoBaAhHQKVvzpgTh5x1fZQoaAZHQHJKGxY7q6hoB02tAWgIR0Clcbosqaw2dX2UKGgGR0Bw0e8K5TZQaAdNBAFoCEdApXP3+fh/AnV9lChoBkdAb/mqSX+l02gHTRMBaAhHQKV1IjQAuI11fZQoaAZHQHBF52U0Nz9oB00zAWgIR0Cldl7iQ1aXdX2UKGgGR0Bvv/SncclxaAdNDQFoCEdApXd2uRs/IXV9lChoBkdAOslLrX18LWgHS+ZoCEdApXnA9kjHGXV9lChoBkdAbp8BOHnEEWgHTSYBaAhHQKV6++ueSSx1fZQoaAZHQHE076DXe3xoB01qAWgIR0ClfHCBGx2TdX2UKGgGR0BvtfHeaa1DaAdNAQFoCEdApX1opazNU3V9lChoBkdARBUl3Qla82gHS9toCEdApX+O0zCUHXV9lChoBkdAS146+36RAGgHS61oCEdApYBUY8+zMXV9lChoBkdAcglun/DLsGgHTSgBaAhHQKWBh9tuUEB1fZQoaAZHQHMHzJyQxN9oB00BAWgIR0ClgpjW9US7dX2UKGgGR0BUhbU1AJLNaAdL2GgIR0ClhL22G7BgdX2UKGgGR0Bx6+i48U22aAdNOgFoCEdApYYfpwCKaXV9lChoBkdAber+4smOVGgHTR8BaAhHQKWHdBPbfxd1fZQoaAZHQD/cmBvrGBFoB0vAaAhHQKWIUBd2Pkt1fZQoaAZHQHEEsD4gzP9oB00WAWgIR0CliuCyyD7JdX2UKGgGR0BxXO7SRbKSaAdNDgFoCEdApYwYsZpBX3V9lChoBkdAbIPaFmFrVWgHTRoBaAhHQKWNXx4ptrN1fZQoaAZHQG/k0DdP+GZoB00ZAWgIR0Cljp6S9ugpdX2UKGgGR0BS29HhCMP0aAdLsWgIR0ClkNAcDKYBdX2UKGgGR0BHZjdP+GXYaAdLzWgIR0ClkcOaOPvKdX2UKGgGR0BwqF+1Bt1qaAdNXAFoCEdApZNXO8kD6nV9lChoBkdAYq19v0h/zGgHTegDaAhHQKWZMDGLk0d1fZQoaAZHQHE6DXrdFfBoB00uAWgIR0Clmo2MS9M9dX2UKGgGR0BvLu2G7BfsaAdL+2gIR0ClnQL5IpYtdX2UKGgGR0BxAESZjQRgaAdNkAFoCEdApZ7YRujynXV9lChoBkdARp/1BdD6WWgHS7hoCEdApZ+qRSxZ+3V9lChoBkdAcaUdP+GXX2gHTQEBaAhHQKWiF4FA3UB1fZQoaAZHQG++2Pkq+aloB00RAWgIR0Clo2BRZU1idX2UKGgGR0Bt2cUbkwN9aAdL72gIR0ClpHYQJ5VwdX2UKGgGR0BwO7Mqz7djaAdNBwFoCEdApaWn/cWTHXV9lChoBkdAbr3ps41gpmgHTRUBaAhHQKWoUTMaCMB1fZQoaAZHQHEvGvfTCtRoB00UAWgIR0ClqZUBGQS0dX2UKGgGR0BwT9gy/KyOaAdL+mgIR0ClqrKZc9nsdX2UKGgGR0Bx4gSUTtb+aAdNOQFoCEdApaw5r1uivnV9lChoBkdAcB/DbJwKjWgHTSEBaAhHQKWu6oP07Kd1fZQoaAZHQG0QYSYgJTloB00YAWgIR0ClsDpCSidrdX2UKGgGR0BxpPWuoxYaaAdNIAFoCEdApbGFOwgTy3V9lChoBkdAbkXO2RaHK2gHS/doCEdApbKbfcer/HV9lChoBkdAcA8nYQJ5V2gHTSMBaAhHQKW1S2qkuYh1fZQoaAZHQG/lk690zTFoB01cAWgIR0ClttjgAIY4dX2UKGgGR0Bw8M54nndPaAdNFQFoCEdApbgxjriVB3V9lChoBkdAcDYQtz0Yj2gHTRsBaAhHQKW670J4SpR1fZQoaAZHQHA1Oq3mV7hoB01DAWgIR0ClvGE9+w1SdX2UKGgGR0BNvsWGh24eaAdLyGgIR0ClvVJudf9hdX2UKGgGR0BxPD7tRekYaAdNHwFoCEdApb6Yis4kvHV9lChoBkdAbTlfCyhSL2gHTSIBaAhHQKXBRh3JPqN1fZQoaAZHQHBbIod+5OJoB01TAWgIR0ClwvWMsH0LdX2UKGgGR0Byt3ra/RE4aAdL/WgIR0ClxBvvKEFodX2UKGgGR0BxzKWQfZElaAdNUQFoCEdApccWXRgJC3V9lChoBkdAcLrG8Empl2gHTQgBaAhHQKXISE9Mbm51fZQoaAZHQHFdtPpIMBpoB00zAWgIR0Clyan1vl2edX2UKGgGR0Bx7sTmGM4taAdNCgFoCEdApcriDZlFt3V9lChoBkdAYx7yDqW1MWgHTegDaAhHQKXQxEQXhwV1fZQoaAZHQHA9viPyTZBoB00TAWgIR0Cl01WZ7XxwdX2UKGgGR0BxBcVk+X7caAdNEgFoCEdApdSaxTsIFHV9lChoBkdAcLTXtjTa02gHTQcBaAhHQKXVx/nW8RN1fZQoaAZHQEIi3lS0jTtoB0vEaAhHQKXWqA7Pppx1fZQoaAZHQEUx/WDpTuRoB0vSaAhHQKXY9hfjS5R1fZQoaAZHQHGQqqGUOd5oB00RAWgIR0Cl2jM5GSZCdX2UKGgGR0A4EbjcVQANaAdLwmgIR0Cl2x3vhIe6dX2UKGgGR0BxwE9r433paAdNLgFoCEdApdytfReC1HV9lChoBkdAcAaO3DvVmWgHTR8BaAhHQKXfUPBBRht1fZQoaAZHQHLLv0/W1+loB001AWgIR0Cl4L+KTB69dX2UKGgGR0BxqMfMfRu1aAdNXgFoCEdApeJS4c3l0nVlLg=="}, "ep_success_buffer": {":type:": "<class 'collections.deque'>", ":serialized:": "gAWVIAAAAAAAAACMC2NvbGxlY3Rpb25zlIwFZGVxdWWUk5QpS2SGlFKULg=="}, "_n_updates": 3908, "observation_space": {":type:": "<class 'gymnasium.spaces.box.Box'>", ":serialized:": "gAWVdgIAAAAAAACMFGd5bW5hc2l1bS5zcGFjZXMuYm94lIwDQm94lJOUKYGUfZQojAVkdHlwZZSMBW51bXB5lIwFZHR5cGWUk5SMAmY0lImIh5RSlChLA4wBPJROTk5K/////0r/////SwB0lGKMDWJvdW5kZWRfYmVsb3eUjBJudW1weS5jb3JlLm51bWVyaWOUjAtfZnJvbWJ1ZmZlcpSTlCiWCAAAAAAAAAABAQEBAQEBAZRoCIwCYjGUiYiHlFKUKEsDjAF8lE5OTkr/////Sv////9LAHSUYksIhZSMAUOUdJRSlIwNYm91bmRlZF9hYm92ZZRoESiWCAAAAAAAAAABAQEBAQEBAZRoFUsIhZRoGXSUUpSMBl9zaGFwZZRLCIWUjANsb3eUaBEoliAAAAAAAAAAAAC0wgAAtMIAAKDAAACgwNsPScAAAKDAAAAAgAAAAICUaAtLCIWUaBl0lFKUjARoaWdolGgRKJYgAAAAAAAAAAAAtEIAALRCAACgQAAAoEDbD0lAAACgQAAAgD8AAIA/lGgLSwiFlGgZdJRSlIwIbG93X3JlcHKUjFtbLTkwLiAgICAgICAgLTkwLiAgICAgICAgIC01LiAgICAgICAgIC01LiAgICAgICAgIC0zLjE0MTU5MjcgIC01LgogIC0wLiAgICAgICAgIC0wLiAgICAgICBdlIwJaGlnaF9yZXBylIxTWzkwLiAgICAgICAgOTAuICAgICAgICAgNS4gICAgICAgICA1LiAgICAgICAgIDMuMTQxNTkyNyAgNS4KICAxLiAgICAgICAgIDEuICAgICAgIF2UjApfbnBfcmFuZG9tlE51Yi4=", "dtype": "float32", "bounded_below": "[ True True True True True True True True]", "bounded_above": "[ True True True True True True True True]", "_shape": [8], "low": "[-90. -90. -5. -5. -3.1415927 -5.\n -0. -0. ]", "high": "[90. 90. 5. 5. 3.1415927 5.\n 1. 1. ]", "low_repr": "[-90. -90. -5. -5. -3.1415927 -5.\n -0. -0. ]", "high_repr": "[90. 90. 5. 5. 3.1415927 5.\n 1. 1. ]", "_np_random": null}, "action_space": {":type:": "<class 'gymnasium.spaces.discrete.Discrete'>", ":serialized:": "gAWV2wAAAAAAAACMGWd5bW5hc2l1bS5zcGFjZXMuZGlzY3JldGWUjAhEaXNjcmV0ZZSTlCmBlH2UKIwBbpSMFW51bXB5LmNvcmUubXVsdGlhcnJheZSMBnNjYWxhcpSTlIwFbnVtcHmUjAVkdHlwZZSTlIwCaTiUiYiHlFKUKEsDjAE8lE5OTkr/////Sv////9LAHSUYkMIBAAAAAAAAACUhpRSlIwFc3RhcnSUaAhoDkMIAAAAAAAAAACUhpRSlIwGX3NoYXBllCmMBWR0eXBllGgOjApfbnBfcmFuZG9tlE51Yi4=", "n": "4", "start": "0", "_shape": [], "dtype": "int64", "_np_random": null}, "n_envs": 1, "n_steps": 1024, "gamma": 0.999, "gae_lambda": 0.98, "ent_coef": 0.01, "vf_coef": 0.5, "max_grad_norm": 0.5, "batch_size": 64, "n_epochs": 4, "clip_range": {":type:": "<class 'function'>", ":serialized:": "gAWVcAIAAAAAAACMF2Nsb3VkcGlja2xlLmNsb3VkcGlja2xllIwOX21ha2VfZnVuY3Rpb26Uk5QoaACMDV9idWlsdGluX3R5cGWUk5SMCENvZGVUeXBllIWUUpQoSwFLAEsASwFLAUsTQwSIAFMAlE6FlCmMAV+UhZSMW2M6XFVzZXJzXHB0aXUxXC5jb25kYVxlbnZzXERlZXBSTENvdXJzZVxsaWJcc2l0ZS1wYWNrYWdlc1xzdGFibGVfYmFzZWxpbmVzM1xjb21tb25cdXRpbHMucHmUjARmdW5jlEuEQwIEAZSMA3ZhbJSFlCl0lFKUfZQojAtfX3BhY2thZ2VfX5SMGHN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbpSMCF9fbmFtZV9flIwec3RhYmxlX2Jhc2VsaW5lczMuY29tbW9uLnV0aWxzlIwIX19maWxlX1+UaAx1Tk5oAIwQX21ha2VfZW1wdHlfY2VsbJSTlClSlIWUdJRSlGgAjBJfZnVuY3Rpb25fc2V0c3RhdGWUk5RoHn2UfZQoaBZoDYwMX19xdWFsbmFtZV9flIwZY29uc3RhbnRfZm4uPGxvY2Fscz4uZnVuY5SMD19fYW5ub3RhdGlvbnNfX5R9lIwOX19rd2RlZmF1bHRzX1+UTowMX19kZWZhdWx0c19flE6MCl9fbW9kdWxlX1+UaBeMB19fZG9jX1+UTowLX19jbG9zdXJlX1+UaACMCl9tYWtlX2NlbGyUk5RHP8mZmZmZmZqFlFKUhZSMF19jbG91ZHBpY2tsZV9zdWJtb2R1bGVzlF2UjAtfX2dsb2JhbHNfX5R9lHWGlIZSMC4="}, "clip_range_vf": null, "normalize_advantage": true, "target_kl": null, "lr_schedule": {":type:": "<class 'function'>", ":serialized:": "gAWVcAIAAAAAAACMF2Nsb3VkcGlja2xlLmNsb3VkcGlja2xllIwOX21ha2VfZnVuY3Rpb26Uk5QoaACMDV9idWlsdGluX3R5cGWUk5SMCENvZGVUeXBllIWUUpQoSwFLAEsASwFLAUsTQwSIAFMAlE6FlCmMAV+UhZSMW2M6XFVzZXJzXHB0aXUxXC5jb25kYVxlbnZzXERlZXBSTENvdXJzZVxsaWJcc2l0ZS1wYWNrYWdlc1xzdGFibGVfYmFzZWxpbmVzM1xjb21tb25cdXRpbHMucHmUjARmdW5jlEuEQwIEAZSMA3ZhbJSFlCl0lFKUfZQojAtfX3BhY2thZ2VfX5SMGHN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbpSMCF9fbmFtZV9flIwec3RhYmxlX2Jhc2VsaW5lczMuY29tbW9uLnV0aWxzlIwIX19maWxlX1+UaAx1Tk5oAIwQX21ha2VfZW1wdHlfY2VsbJSTlClSlIWUdJRSlGgAjBJfZnVuY3Rpb25fc2V0c3RhdGWUk5RoHn2UfZQoaBZoDYwMX19xdWFsbmFtZV9flIwZY29uc3RhbnRfZm4uPGxvY2Fscz4uZnVuY5SMD19fYW5ub3RhdGlvbnNfX5R9lIwOX19rd2RlZmF1bHRzX1+UTowMX19kZWZhdWx0c19flE6MCl9fbW9kdWxlX1+UaBeMB19fZG9jX1+UTowLX19jbG9zdXJlX1+UaACMCl9tYWtlX2NlbGyUk5RHPzOpKjBVMmGFlFKUhZSMF19jbG91ZHBpY2tsZV9zdWJtb2R1bGVzlF2UjAtfX2dsb2JhbHNfX5R9lHWGlIZSMC4="}, "system_info": {"OS": "Windows-10-10.0.22621-SP0 10.0.22621", "Python": "3.10.12", "Stable-Baselines3": "2.0.0a5", "PyTorch": "1.13.1+cu116", "GPU Enabled": "True", "Numpy": "1.26.4", "Cloudpickle": "3.0.0", "Gymnasium": "0.28.1", "OpenAI Gym": "0.26.2"}}
|
ppo-LunarLander-v2.zip
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bc5c101536d82d501294f4f0da9fb5cb0cbffaf12be5d2cb751114a0f00c8f29
|
3 |
+
size 145872
|
ppo-LunarLander-v2/data
CHANGED
@@ -4,20 +4,20 @@
|
|
4 |
":serialized:": "gAWVOwAAAAAAAACMIXN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi5wb2xpY2llc5SMEUFjdG9yQ3JpdGljUG9saWN5lJOULg==",
|
5 |
"__module__": "stable_baselines3.common.policies",
|
6 |
"__doc__": "\n Policy class for actor-critic algorithms (has both policy and value prediction).\n Used by A2C, PPO and the likes.\n\n :param observation_space: Observation space\n :param action_space: Action space\n :param lr_schedule: Learning rate schedule (could be constant)\n :param net_arch: The specification of the policy and value networks.\n :param activation_fn: Activation function\n :param ortho_init: Whether to use or not orthogonal initialization\n :param use_sde: Whether to use State Dependent Exploration or not\n :param log_std_init: Initial value for the log standard deviation\n :param full_std: Whether to use (n_features x n_actions) parameters\n for the std instead of only (n_features,) when using gSDE\n :param use_expln: Use ``expln()`` function instead of ``exp()`` to ensure\n a positive standard deviation (cf paper). It allows to keep variance\n above zero and prevent it from growing too fast. In practice, ``exp()`` is usually enough.\n :param squash_output: Whether to squash the output using a tanh function,\n this allows to ensure boundaries when using gSDE.\n :param features_extractor_class: Features extractor to use.\n :param features_extractor_kwargs: Keyword arguments\n to pass to the features extractor.\n :param share_features_extractor: If True, the features extractor is shared between the policy and value networks.\n :param normalize_images: Whether to normalize images or not,\n dividing by 255.0 (True by default)\n :param optimizer_class: The optimizer to use,\n ``th.optim.Adam`` by default\n :param optimizer_kwargs: Additional keyword arguments,\n excluding the learning rate, to pass to the optimizer\n ",
|
7 |
-
"__init__": "<function ActorCriticPolicy.__init__ at
|
8 |
-
"_get_constructor_parameters": "<function ActorCriticPolicy._get_constructor_parameters at
|
9 |
-
"reset_noise": "<function ActorCriticPolicy.reset_noise at
|
10 |
-
"_build_mlp_extractor": "<function ActorCriticPolicy._build_mlp_extractor at
|
11 |
-
"_build": "<function ActorCriticPolicy._build at
|
12 |
-
"forward": "<function ActorCriticPolicy.forward at
|
13 |
-
"extract_features": "<function ActorCriticPolicy.extract_features at
|
14 |
-
"_get_action_dist_from_latent": "<function ActorCriticPolicy._get_action_dist_from_latent at
|
15 |
-
"_predict": "<function ActorCriticPolicy._predict at
|
16 |
-
"evaluate_actions": "<function ActorCriticPolicy.evaluate_actions at
|
17 |
-
"get_distribution": "<function ActorCriticPolicy.get_distribution at
|
18 |
-
"predict_values": "<function ActorCriticPolicy.predict_values at
|
19 |
"__abstractmethods__": "frozenset()",
|
20 |
-
"_abc_impl": "<_abc._abc_data object at
|
21 |
},
|
22 |
"verbose": 1,
|
23 |
"policy_kwargs": {},
|
@@ -26,12 +26,12 @@
|
|
26 |
"_num_timesteps_at_start": 0,
|
27 |
"seed": null,
|
28 |
"action_noise": null,
|
29 |
-
"start_time":
|
30 |
"learning_rate": 0.0003,
|
31 |
"tensorboard_log": null,
|
32 |
"_last_obs": {
|
33 |
":type:": "<class 'numpy.ndarray'>",
|
34 |
-
":serialized:": "
|
35 |
},
|
36 |
"_last_episode_starts": {
|
37 |
":type:": "<class 'numpy.ndarray'>",
|
@@ -45,7 +45,7 @@
|
|
45 |
"_stats_window_size": 100,
|
46 |
"ep_info_buffer": {
|
47 |
":type:": "<class 'collections.deque'>",
|
48 |
-
":serialized:": "
|
49 |
},
|
50 |
"ep_success_buffer": {
|
51 |
":type:": "<class 'collections.deque'>",
|
|
|
4 |
":serialized:": "gAWVOwAAAAAAAACMIXN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi5wb2xpY2llc5SMEUFjdG9yQ3JpdGljUG9saWN5lJOULg==",
|
5 |
"__module__": "stable_baselines3.common.policies",
|
6 |
"__doc__": "\n Policy class for actor-critic algorithms (has both policy and value prediction).\n Used by A2C, PPO and the likes.\n\n :param observation_space: Observation space\n :param action_space: Action space\n :param lr_schedule: Learning rate schedule (could be constant)\n :param net_arch: The specification of the policy and value networks.\n :param activation_fn: Activation function\n :param ortho_init: Whether to use or not orthogonal initialization\n :param use_sde: Whether to use State Dependent Exploration or not\n :param log_std_init: Initial value for the log standard deviation\n :param full_std: Whether to use (n_features x n_actions) parameters\n for the std instead of only (n_features,) when using gSDE\n :param use_expln: Use ``expln()`` function instead of ``exp()`` to ensure\n a positive standard deviation (cf paper). It allows to keep variance\n above zero and prevent it from growing too fast. In practice, ``exp()`` is usually enough.\n :param squash_output: Whether to squash the output using a tanh function,\n this allows to ensure boundaries when using gSDE.\n :param features_extractor_class: Features extractor to use.\n :param features_extractor_kwargs: Keyword arguments\n to pass to the features extractor.\n :param share_features_extractor: If True, the features extractor is shared between the policy and value networks.\n :param normalize_images: Whether to normalize images or not,\n dividing by 255.0 (True by default)\n :param optimizer_class: The optimizer to use,\n ``th.optim.Adam`` by default\n :param optimizer_kwargs: Additional keyword arguments,\n excluding the learning rate, to pass to the optimizer\n ",
|
7 |
+
"__init__": "<function ActorCriticPolicy.__init__ at 0x0000024920EBE4D0>",
|
8 |
+
"_get_constructor_parameters": "<function ActorCriticPolicy._get_constructor_parameters at 0x0000024920EBE560>",
|
9 |
+
"reset_noise": "<function ActorCriticPolicy.reset_noise at 0x0000024920EBE5F0>",
|
10 |
+
"_build_mlp_extractor": "<function ActorCriticPolicy._build_mlp_extractor at 0x0000024920EBE680>",
|
11 |
+
"_build": "<function ActorCriticPolicy._build at 0x0000024920EBE710>",
|
12 |
+
"forward": "<function ActorCriticPolicy.forward at 0x0000024920EBE7A0>",
|
13 |
+
"extract_features": "<function ActorCriticPolicy.extract_features at 0x0000024920EBE830>",
|
14 |
+
"_get_action_dist_from_latent": "<function ActorCriticPolicy._get_action_dist_from_latent at 0x0000024920EBE8C0>",
|
15 |
+
"_predict": "<function ActorCriticPolicy._predict at 0x0000024920EBE950>",
|
16 |
+
"evaluate_actions": "<function ActorCriticPolicy.evaluate_actions at 0x0000024920EBE9E0>",
|
17 |
+
"get_distribution": "<function ActorCriticPolicy.get_distribution at 0x0000024920EBEA70>",
|
18 |
+
"predict_values": "<function ActorCriticPolicy.predict_values at 0x0000024920EBEB00>",
|
19 |
"__abstractmethods__": "frozenset()",
|
20 |
+
"_abc_impl": "<_abc._abc_data object at 0x0000024920EB7680>"
|
21 |
},
|
22 |
"verbose": 1,
|
23 |
"policy_kwargs": {},
|
|
|
26 |
"_num_timesteps_at_start": 0,
|
27 |
"seed": null,
|
28 |
"action_noise": null,
|
29 |
+
"start_time": 1713188788623747800,
|
30 |
"learning_rate": 0.0003,
|
31 |
"tensorboard_log": null,
|
32 |
"_last_obs": {
|
33 |
":type:": "<class 'numpy.ndarray'>",
|
34 |
+
":serialized:": "gAWVlQAAAAAAAACMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYgAAAAAAAAAOZ2HD0paDC6WvtIM76NM7D/Zh853arJswAAgD8AAIA/lIwFbnVtcHmUjAVkdHlwZZSTlIwCZjSUiYiHlFKUKEsDjAE8lE5OTkr/////Sv////9LAHSUYksBSwiGlIwBQ5R0lFKULg=="
|
35 |
},
|
36 |
"_last_episode_starts": {
|
37 |
":type:": "<class 'numpy.ndarray'>",
|
|
|
45 |
"_stats_window_size": 100,
|
46 |
"ep_info_buffer": {
|
47 |
":type:": "<class 'collections.deque'>",
|
48 |
+
":serialized:": "gAWVKQwAAAAAAACMC2NvbGxlY3Rpb25zlIwFZGVxdWWUk5QpS2SGlFKUKH2UKIwBcpRHQElMK9f1HvuMAWyUS86MAXSUR0ClO63BxgiNdX2UKGgGR0BxDoCjk+5faAdL9GgIR0ClPlqneiztdX2UKGgGR0BwXOVSn+AFaAdNKgFoCEdApT/Jvze41HV9lChoBkdAcnzJNCZ4OmgHTT4BaAhHQKVBUM3IdU91fZQoaAZHQEuo+evpyIZoB0v0aAhHQKVD0UiY9gZ1fZQoaAZHQHC8V7Y02tNoB00lAWgIR0ClRS1LBbfQdX2UKGgGR0Bu1PReC04SaAdNLQFoCEdApUaK+8Gs3nV9lChoBkdAcLcD6WPcSGgHTQUBaAhHQKVHtwnYxtZ1fZQoaAZHQG4w0gr6LwZoB00GAWgIR0ClSmBCUorndX2UKGgGR0BxY1gVoHs1aAdNKQFoCEdApUvIs5GSZHV9lChoBkdAci6jzI3irGgHTQcBaAhHQKVM+TviLl51fZQoaAZHQHCDtZJTVDtoB00IAWgIR0ClTjc5CF9KdX2UKGgGR0Bxeh+d9UjtaAdNDwFoCEdApVDVGmUGFHV9lChoBkdAcGMZB9kSVWgHTTQBaAhHQKVSRjGT9sJ1fZQoaAZHQG9ebUPQOWloB0v1aAhHQKVTcvzvqkd1fZQoaAZHQG5tBNucc2loB00CAWgIR0ClVJwwsXizdX2UKGgGR0BxNEaXKKYRaAdNGAFoCEdApVcv6Q/5cnV9lChoBkdATgP3N9ph4WgHS9VoCEdApVgolD4QBnV9lChoBkdASde+GoJiRWgHS8hoCEdApVkYVTJhfHV9lChoBkdAQywQDmr8zmgHS8BoCEdApVnygkC3gHV9lChoBkdARmCP4mCyyGgHS9ZoCEdApVxXaYeDF3V9lChoBkdANCwggX/HYGgHS7ZoCEdApV0mcjJMg3V9lChoBkdAcSvXD3ueBmgHTWsBaAhHQKVeziYLLIR1fZQoaAZHQHE9Z7sv7FdoB00NAWgIR0ClYACGvfTDdX2UKGgGR0Bw98QZn+Q2aAdNIwFoCEdApWK7Hp8neHV9lChoBkdAbdgLofSx7mgHTR4BaAhHQKVkDA6+36R1fZQoaAZHQHGCm1D0DlpoB0v2aAhHQKVlKNdZ7ol1fZQoaAZHQG6htoSL61toB00hAWgIR0ClZm4yGi5/dX2UKGgGR0Bwi+VUuL75aAdNDQFoCEdApWkBGvwEyXV9lChoBkdAcSrETg2qDWgHTT0BaAhHQKVqdXtBv751fZQoaAZHQG8zKYJE6T5oB00aAWgIR0Cla7miQDFIdX2UKGgGR0BvWaeGwiaBaAdNMwFoCEdApW55hDw6Q3V9lChoBkdAcJv+w1R+B2gHTRoBaAhHQKVvzpgTh5x1fZQoaAZHQHJKGxY7q6hoB02tAWgIR0Clcbosqaw2dX2UKGgGR0Bw0e8K5TZQaAdNBAFoCEdApXP3+fh/AnV9lChoBkdAb/mqSX+l02gHTRMBaAhHQKV1IjQAuI11fZQoaAZHQHBF52U0Nz9oB00zAWgIR0Cldl7iQ1aXdX2UKGgGR0Bvv/SncclxaAdNDQFoCEdApXd2uRs/IXV9lChoBkdAOslLrX18LWgHS+ZoCEdApXnA9kjHGXV9lChoBkdAbp8BOHnEEWgHTSYBaAhHQKV6++ueSSx1fZQoaAZHQHE076DXe3xoB01qAWgIR0ClfHCBGx2TdX2UKGgGR0BvtfHeaa1DaAdNAQFoCEdApX1opazNU3V9lChoBkdARBUl3Qla82gHS9toCEdApX+O0zCUHXV9lChoBkdAS146+36RAGgHS61oCEdApYBUY8+zMXV9lChoBkdAcglun/DLsGgHTSgBaAhHQKWBh9tuUEB1fZQoaAZHQHMHzJyQxN9oB00BAWgIR0ClgpjW9US7dX2UKGgGR0BUhbU1AJLNaAdL2GgIR0ClhL22G7BgdX2UKGgGR0Bx6+i48U22aAdNOgFoCEdApYYfpwCKaXV9lChoBkdAber+4smOVGgHTR8BaAhHQKWHdBPbfxd1fZQoaAZHQD/cmBvrGBFoB0vAaAhHQKWIUBd2Pkt1fZQoaAZHQHEEsD4gzP9oB00WAWgIR0CliuCyyD7JdX2UKGgGR0BxXO7SRbKSaAdNDgFoCEdApYwYsZpBX3V9lChoBkdAbIPaFmFrVWgHTRoBaAhHQKWNXx4ptrN1fZQoaAZHQG/k0DdP+GZoB00ZAWgIR0Cljp6S9ugpdX2UKGgGR0BS29HhCMP0aAdLsWgIR0ClkNAcDKYBdX2UKGgGR0BHZjdP+GXYaAdLzWgIR0ClkcOaOPvKdX2UKGgGR0BwqF+1Bt1qaAdNXAFoCEdApZNXO8kD6nV9lChoBkdAYq19v0h/zGgHTegDaAhHQKWZMDGLk0d1fZQoaAZHQHE6DXrdFfBoB00uAWgIR0Clmo2MS9M9dX2UKGgGR0BvLu2G7BfsaAdL+2gIR0ClnQL5IpYtdX2UKGgGR0BxAESZjQRgaAdNkAFoCEdApZ7YRujynXV9lChoBkdARp/1BdD6WWgHS7hoCEdApZ+qRSxZ+3V9lChoBkdAcaUdP+GXX2gHTQEBaAhHQKWiF4FA3UB1fZQoaAZHQG++2Pkq+aloB00RAWgIR0Clo2BRZU1idX2UKGgGR0Bt2cUbkwN9aAdL72gIR0ClpHYQJ5VwdX2UKGgGR0BwO7Mqz7djaAdNBwFoCEdApaWn/cWTHXV9lChoBkdAbr3ps41gpmgHTRUBaAhHQKWoUTMaCMB1fZQoaAZHQHEvGvfTCtRoB00UAWgIR0ClqZUBGQS0dX2UKGgGR0BwT9gy/KyOaAdL+mgIR0ClqrKZc9nsdX2UKGgGR0Bx4gSUTtb+aAdNOQFoCEdApaw5r1uivnV9lChoBkdAcB/DbJwKjWgHTSEBaAhHQKWu6oP07Kd1fZQoaAZHQG0QYSYgJTloB00YAWgIR0ClsDpCSidrdX2UKGgGR0BxpPWuoxYaaAdNIAFoCEdApbGFOwgTy3V9lChoBkdAbkXO2RaHK2gHS/doCEdApbKbfcer/HV9lChoBkdAcA8nYQJ5V2gHTSMBaAhHQKW1S2qkuYh1fZQoaAZHQG/lk690zTFoB01cAWgIR0ClttjgAIY4dX2UKGgGR0Bw8M54nndPaAdNFQFoCEdApbgxjriVB3V9lChoBkdAcDYQtz0Yj2gHTRsBaAhHQKW670J4SpR1fZQoaAZHQHA1Oq3mV7hoB01DAWgIR0ClvGE9+w1SdX2UKGgGR0BNvsWGh24eaAdLyGgIR0ClvVJudf9hdX2UKGgGR0BxPD7tRekYaAdNHwFoCEdApb6Yis4kvHV9lChoBkdAbTlfCyhSL2gHTSIBaAhHQKXBRh3JPqN1fZQoaAZHQHBbIod+5OJoB01TAWgIR0ClwvWMsH0LdX2UKGgGR0Byt3ra/RE4aAdL/WgIR0ClxBvvKEFodX2UKGgGR0BxzKWQfZElaAdNUQFoCEdApccWXRgJC3V9lChoBkdAcLrG8Empl2gHTQgBaAhHQKXISE9Mbm51fZQoaAZHQHFdtPpIMBpoB00zAWgIR0Clyan1vl2edX2UKGgGR0Bx7sTmGM4taAdNCgFoCEdApcriDZlFt3V9lChoBkdAYx7yDqW1MWgHTegDaAhHQKXQxEQXhwV1fZQoaAZHQHA9viPyTZBoB00TAWgIR0Cl01WZ7XxwdX2UKGgGR0BxBcVk+X7caAdNEgFoCEdApdSaxTsIFHV9lChoBkdAcLTXtjTa02gHTQcBaAhHQKXVx/nW8RN1fZQoaAZHQEIi3lS0jTtoB0vEaAhHQKXWqA7Pppx1fZQoaAZHQEUx/WDpTuRoB0vSaAhHQKXY9hfjS5R1fZQoaAZHQHGQqqGUOd5oB00RAWgIR0Cl2jM5GSZCdX2UKGgGR0A4EbjcVQANaAdLwmgIR0Cl2x3vhIe6dX2UKGgGR0BxwE9r433paAdNLgFoCEdApdytfReC1HV9lChoBkdAcAaO3DvVmWgHTR8BaAhHQKXfUPBBRht1fZQoaAZHQHLLv0/W1+loB001AWgIR0Cl4L+KTB69dX2UKGgGR0BxqMfMfRu1aAdNXgFoCEdApeJS4c3l0nVlLg=="
|
49 |
},
|
50 |
"ep_success_buffer": {
|
51 |
":type:": "<class 'collections.deque'>",
|
ppo-LunarLander-v2/policy.optimizer.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 87929
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b2e752c536cbfe2189f8953c96989b06aee83e587fcb5f0721d37c01f99b1310
|
3 |
size 87929
|
ppo-LunarLander-v2/policy.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 43329
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:53cdc5eb3300f7be79925171c5df440e1a0d0335939cf41dd12838eb15b4fb11
|
3 |
size 43329
|
replay.mp4
CHANGED
Binary files a/replay.mp4 and b/replay.mp4 differ
|
|
results.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"mean_reward":
|
|
|
1 |
+
{"mean_reward": 269.0811557, "std_reward": 14.566306130591665, "is_deterministic": true, "n_eval_episodes": 10, "eval_datetime": "2024-04-15T22:33:22.251487"}
|