nsanghi commited on
Commit
3ef4bcf
1 Parent(s): a97c258

Push to Hub

Browse files
README.md CHANGED
@@ -16,7 +16,7 @@ model-index:
16
  type: CartPole-v1
17
  metrics:
18
  - type: mean_reward
19
- value: 152.20 +/- 2.86
20
  name: mean_reward
21
  verified: false
22
  ---
 
16
  type: CartPole-v1
17
  metrics:
18
  - type: mean_reward
19
+ value: 500.00 +/- 0.00
20
  name: mean_reward
21
  verified: false
22
  ---
args.yml CHANGED
@@ -54,13 +54,13 @@
54
  - - save_replay_buffer
55
  - false
56
  - - seed
57
- - 4205676148
58
  - - storage
59
  - null
60
  - - study_name
61
  - null
62
  - - tensorboard_log
63
- - runs/CartPole-v1__dqn__4205676148__1698256789
64
  - - track
65
  - true
66
  - - trained_agent
 
54
  - - save_replay_buffer
55
  - false
56
  - - seed
57
+ - 2904896230
58
  - - storage
59
  - null
60
  - - study_name
61
  - null
62
  - - tensorboard_log
63
+ - runs/CartPole-v1__dqn__2904896230__1698259642
64
  - - track
65
  - true
66
  - - trained_agent
dqn-CartPole-v1.zip CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:de6a6e62ebce57ab258e1e17e873086d91349745f1365c364002c193661356cf
3
- size 1107479
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c52e0bab37152188fea3b271cee0fd2faf7752eb8521a5ed578f989737a9bf4
3
+ size 1107467
dqn-CartPole-v1/data CHANGED
@@ -5,15 +5,15 @@
5
  "__module__": "stable_baselines3.dqn.policies",
6
  "__annotations__": "{'q_net': <class 'stable_baselines3.dqn.policies.QNetwork'>, 'q_net_target': <class 'stable_baselines3.dqn.policies.QNetwork'>}",
7
  "__doc__": "\n Policy class with Q-Value Net and target net for DQN\n\n :param observation_space: Observation space\n :param action_space: Action space\n :param lr_schedule: Learning rate schedule (could be constant)\n :param net_arch: The specification of the policy and value networks.\n :param activation_fn: Activation function\n :param features_extractor_class: Features extractor to use.\n :param features_extractor_kwargs: Keyword arguments\n to pass to the features extractor.\n :param normalize_images: Whether to normalize images or not,\n dividing by 255.0 (True by default)\n :param optimizer_class: The optimizer to use,\n ``th.optim.Adam`` by default\n :param optimizer_kwargs: Additional keyword arguments,\n excluding the learning rate, to pass to the optimizer\n ",
8
- "__init__": "<function DQNPolicy.__init__ at 0x7f7e7d8d8310>",
9
- "_build": "<function DQNPolicy._build at 0x7f7e7d8d83a0>",
10
- "make_q_net": "<function DQNPolicy.make_q_net at 0x7f7e7d8d8430>",
11
- "forward": "<function DQNPolicy.forward at 0x7f7e7d8d84c0>",
12
- "_predict": "<function DQNPolicy._predict at 0x7f7e7d8d8550>",
13
- "_get_constructor_parameters": "<function DQNPolicy._get_constructor_parameters at 0x7f7e7d8d85e0>",
14
- "set_training_mode": "<function DQNPolicy.set_training_mode at 0x7f7e7d8d8670>",
15
  "__abstractmethods__": "frozenset()",
16
- "_abc_impl": "<_abc._abc_data object at 0x7f7e7d8cadc0>"
17
  },
18
  "verbose": 1,
19
  "policy_kwargs": {
@@ -27,12 +27,12 @@
27
  "_num_timesteps_at_start": 0,
28
  "seed": 0,
29
  "action_noise": null,
30
- "start_time": 1698256792389686494,
31
  "learning_rate": {
32
  ":type:": "<class 'function'>",
33
  ":serialized:": "gAWVAwMAAAAAAACMF2Nsb3VkcGlja2xlLmNsb3VkcGlja2xllIwOX21ha2VfZnVuY3Rpb26Uk5QoaACMDV9idWlsdGluX3R5cGWUk5SMCENvZGVUeXBllIWUUpQoSwFLAEsASwFLAUsTQwSIAFMAlE6FlCmMAV+UhZSMaC9ob21lL25zYW5naGkvc2FuZGJveC9hcHJlc3MvZHJsLTJlZC92ZW52L2xpYi9weXRob24zLjEwL3NpdGUtcGFja2FnZXMvc3RhYmxlX2Jhc2VsaW5lczMvY29tbW9uL3V0aWxzLnB5lIwEZnVuY5RLg0MCBAGUjAN2YWyUhZQpdJRSlH2UKIwLX19wYWNrYWdlX1+UjBhzdGFibGVfYmFzZWxpbmVzMy5jb21tb26UjAhfX25hbWVfX5SMHnN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi51dGlsc5SMCF9fZmlsZV9flIxoL2hvbWUvbnNhbmdoaS9zYW5kYm94L2FwcmVzcy9kcmwtMmVkL3ZlbnYvbGliL3B5dGhvbjMuMTAvc2l0ZS1wYWNrYWdlcy9zdGFibGVfYmFzZWxpbmVzMy9jb21tb24vdXRpbHMucHmUdU5OaACMEF9tYWtlX2VtcHR5X2NlbGyUk5QpUpSFlHSUUpSMHGNsb3VkcGlja2xlLmNsb3VkcGlja2xlX2Zhc3SUjBJfZnVuY3Rpb25fc2V0c3RhdGWUk5RoH32UfZQoaBZoDYwMX19xdWFsbmFtZV9flIwZY29uc3RhbnRfZm4uPGxvY2Fscz4uZnVuY5SMD19fYW5ub3RhdGlvbnNfX5R9lIwOX19rd2RlZmF1bHRzX1+UTowMX19kZWZhdWx0c19flE6MCl9fbW9kdWxlX1+UaBeMB19fZG9jX1+UTowLX19jbG9zdXJlX1+UaACMCl9tYWtlX2NlbGyUk5RHP2LXcxj8UEiFlFKUhZSMF19jbG91ZHBpY2tsZV9zdWJtb2R1bGVzlF2UjAtfX2dsb2JhbHNfX5R9lHWGlIZSMC4="
34
  },
35
- "tensorboard_log": "runs/CartPole-v1__dqn__4205676148__1698256789/CartPole-v1",
36
  "_last_obs": null,
37
  "_last_episode_starts": {
38
  ":type:": "<class 'numpy.ndarray'>",
@@ -40,16 +40,16 @@
40
  },
41
  "_last_original_obs": {
42
  ":type:": "<class 'numpy.ndarray'>",
43
- ":serialized:": "gAWVhQAAAAAAAACMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYQAAAAAAAAAMTQED0Ugzw/oNLJPdyrmL6UjAVudW1weZSMBWR0eXBllJOUjAJmNJSJiIeUUpQoSwOMATyUTk5OSv////9K/////0sAdJRiSwFLBIaUjAFDlHSUUpQu"
44
  },
45
- "_episode_num": 379,
46
  "use_sde": false,
47
  "sde_sample_freq": -1,
48
  "_current_progress_remaining": 0.40002000000000004,
49
  "_stats_window_size": 100,
50
  "ep_info_buffer": {
51
  ":type:": "<class 'collections.deque'>",
52
- ":serialized:": "gAWV7QsAAAAAAACMC2NvbGxlY3Rpb25zlIwFZGVxdWWUk5QpS2SGlFKUKH2UKIwBcpRHQF1AAAAAAACMAWyUS3WMAXSUR0BFtmVJL/S6dX2UKGgGR0BloAAAAAAAaAdLrWgIR0BFvYKIBRyfdX2UKGgGR0BiYAAAAAAAaAdLk2gIR0BGBshouf29dX2UKGgGR0A4AAAAAAAAaAdLGGgIR0BGCCb+cYqHdX2UKGgGR0A4AAAAAAAAaAdLGGgIR0BGCVTaTOgQdX2UKGgGR0BCgAAAAAAAaAdLJWgIR0BGCwMx46fbdX2UKGgGR0A5AAAAAAAAaAdLGWgIR0BGDDo6jnFHdX2UKGgGR0A2AAAAAAAAaAdLFmgIR0BGDnnU2DQJdX2UKGgGR0AuAAAAAAAAaAdLD2gIR0BGD0RWcSXddX2UKGgGR0A8AAAAAAAAaAdLHGgIR0BGEMWGh24edX2UKGgGR0BgIAAAAAAAaAdLgWgIR0BGjioKlYU4dX2UKGgGR0Bg4AAAAAAAaAdLh2gIR0BHBpHy3CsPdX2UKGgGR0BgwAAAAAAAaAdLhmgIR0BHDlfJFLFodX2UKGgGR0BhgAAAAAAAaAdLjGgIR0BHk/LcKw6idX2UKGgGR0BcAAAAAAAAaAdLcGgIR0BHnBDw6QvIdX2UKGgGR0BDgAAAAAAAaAdLJ2gIR0BHn9oN/e+FdX2UKGgGR0BcgAAAAAAAaAdLcmgIR0BH0sz/IbOvdX2UKGgGR0BfgAAAAAAAaAdLfmgIR0BH3DS5RTCMdX2UKGgGR0BewAAAAAAAaAdLe2gIR0BIIAdwNsnBdX2UKGgGR0BXgAAAAAAAaAdLXmgIR0BIJNYr8R+SdX2UKGgGR0BXgAAAAAAAaAdLXmgIR0BIKnZCfHxSdX2UKGgGR0BYQAAAAAAAaAdLYWgIR0BIq2EK3NLUdX2UKGgGR0BhYAAAAAAAaAdLi2gIR0BIruIhyKekdX2UKGgGR0BgAAAAAAAAaAdLgGgIR0BI52cz67/XdX2UKGgGR0AqAAAAAAAAaAdLDWgIR0BI6OG9HtngdX2UKGgGR0AsAAAAAAAAaAdLDmgIR0BI6mplz2eydX2UKGgGR0BXwAAAAAAAaAdLX2gIR0BI7yMUAT7EdX2UKGgGR0BFgAAAAAAAaAdLK2gIR0BI8WgFotcwdX2UKGgGR0Bi4AAAAAAAaAdLl2gIR0BJTI1DSgGsdX2UKGgGR0BwoAAAAAAAaAdNCgFoCEdASdVBt1p0wXV9lChoBkdAZKAAAAAAAGgHS6VoCEdASjevB7/n4nV9lChoBkdAccAAAAAAAGgHTRwBaAhHQEqBWvr4WUN1fZQoaAZHQHMgAAAAAABoB00yAWgIR0BK2hzeXRgJdX2UKGgGR0BfAAAAAAAAaAdLfGgIR0BK4GxD9fkWdX2UKGgGR0BzMAAAAAAAaAdNMwFoCEdAS39vZRKpUHV9lChoBkdAXQAAAAAAAGgHS3RoCEdAS4ZX2dupCXV9lChoBkdAXkAAAAAAAGgHS3loCEdAS8bEaVD8cnV9lChoBkdAXgAAAAAAAGgHS3hoCEdAS8wh6jWTYHV9lChoBkdAXwAAAAAAAGgHS3xoCEdATAhMewLVnXV9lChoBkdAaAAAAAAAAGgHS8BoCEdATBDneSB9TnV9lChoBkdAZYAAAAAAAGgHS6xoCEdATFMU47zTW3V9lChoBkdAYOAAAAAAAGgHS4doCEdATFrMNc4YJnV9lChoBkdAWUAAAAAAAGgHS2VoCEdATSewTufEoHV9lChoBkdAWQAAAAAAAGgHS2RoCEdATS9H+ZPVNHV9lChoBkdAWsAAAAAAAGgHS2toCEdATVs9Oh0yQHV9lChoBkdAZeAAAAAAAGgHS69oCEdATWNpXZGrj3V9lChoBkdAYeAAAAAAAGgHS49oCEdATZuF36hxpHV9lChoBkdAX0AAAAAAAGgHS31oCEdATaIgaFVT73V9lChoBkdAXUAAAAAAAGgHS3VoCEdATejDVH4GlnV9lChoBkdAYOAAAAAAAGgHS4doCEdATe+4XoC+13V9lChoBkdAToAAAAAAAGgHSz1oCEdATipbOeJ53XV9lChoBkdAYOAAAAAAAGgHS4doCEdATjGzyBkI5nV9lChoBkdAYQAAAAAAAGgHS4hoCEdATrSPn0TURXV9lChoBkdAZ8AAAAAAAGgHS75oCEdATrmETQE6k3V9lChoBkdAcHAAAAAAAGgHTQcBaAhHQE8CqSX+l0p1fZQoaAZHQHcAAAAAAABoB01wAWgIR0BP99PLxI8RdX2UKGgGR0BXQAAAAAAAaAdLXWgIR0BP/WSlnAZbdX2UKGgGR0AwAAAAAAAAaAdLEGgIR0BP/m5UcXFcdX2UKGgGR0BZQAAAAAAAaAdLZWgIR0BQN6/qPfbcdX2UKGgGR0BeAAAAAAAAaAdLeGgIR0BQOfLX+VC5dX2UKGgGR0BfgAAAAAAAaAdLfmgIR0BQWOQEIPbxdX2UKGgGR0BpwAAAAAAAaAdLzmgIR0BQXmdNFjNIdX2UKGgGR0BiYAAAAAAAaAdLk2gIR0BQgkL2HtWudX2UKGgGR0Bh4AAAAAAAaAdLj2gIR0BQo9Ujs2NvdX2UKGgGR0B0EAAAAAAAaAdNQQFoCEdAUOMpXp4bCXV9lChoBkdAf0AAAAAAAGgHTfQBaAhHQFFMSElE7XB1fZQoaAZHQF6AAAAAAABoB0t6aAhHQFFPQswtapx1fZQoaAZHQF+AAAAAAABoB0t+aAhHQFGbJ0W/JvJ1fZQoaAZHQGBgAAAAAABoB0uDaAhHQFGfQAuIyj51fZQoaAZHQGHAAAAAAABoB0uOaAhHQFG7KYzBRAN1fZQoaAZHQH9AAAAAAABoB030AWgIR0BSB0S/TLGJdX2UKGgGR0BqgAAAAAAAaAdL1GgIR0BSPQiA2AG0dX2UKGgGR0BYQAAAAAAAaAdLYWgIR0BSP5v1lGwzdX2UKGgGR0BXQAAAAAAAaAdLXWgIR0BSQxBRhttRdX2UKGgGR0AqAAAAAAAAaAdLDWgIR0BSQ1wYLsrvdX2UKGgGR0BcQAAAAAAAaAdLcWgIR0BSaezdDYywdX2UKGgGR0BdQAAAAAAAaAdLdWgIR0BSbAqd6LOzdX2UKGgGR0BlgAAAAAAAaAdLrGgIR0BSi0mplz2fdX2UKGgGR0B/QAAAAAAAaAdN9AFoCEdAUwjhcZ9/jXV9lChoBkdAYWAAAAAAAGgHS4toCEdAUw41cdHUdHV9lChoBkdAYqAAAAAAAGgHS5VoCEdAU1/JOnEVFnV9lChoBkdAYCAAAAAAAGgHS4FoCEdAU6bNQj2SMnV9lChoBkdAc7AAAAAAAGgHTTsBaAhHQFPwGlQ/HHZ1fZQoaAZHQGHgAAAAAABoB0uPaAhHQFP2g/Tspod1fZQoaAZHQGFAAAAAAABoB0uKaAhHQFQvGSIP9UF1fZQoaAZHQH9AAAAAAABoB030AWgIR0BUpxgy/KyOdX2UKGgGR0BmIAAAAAAAaAdLsWgIR0BUw8oDxLCfdX2UKGgGR0BvoAAAAAAAaAdL/WgIR0BU5B6OYIBzdX2UKGgGR0BnwAAAAAAAaAdLvmgIR0BU6bj5sTFmdX2UKGgGR0Bz8AAAAAAAaAdNPwFoCEdAVVGOR1X/53V9lChoBkdAVUAAAAAAAGgHS1VoCEdAVVQoKD0163V9lChoBkdAZ4AAAAAAAGgHS7xoCEdAVZfXoTwlSnV9lChoBkdAZuAAAAAAAGgHS7doCEdAVZ3iZOSGJ3V9lChoBkdAYsAAAAAAAGgHS5ZoCEdAVe6HoHLRr3V9lChoBkdAYIAAAAAAAGgHS4RoCEdAVfHxaxHG0nV9lChoBkdASYAAAAAAAGgHSzNoCEdAVi2O7xusLnV9lChoBkdAbiAAAAAAAGgHS/FoCEdAVlAn+hoM8nV9lChoBkdAYEAAAAAAAGgHS4JoCEdAVlTCbc45tHV9lChoBkdAJgAAAAAAAGgHSwtoCEdAVlUOCoS+QHV9lChoBkdAXgAAAAAAAGgHS3hoCEdAVnUeQuEmIHVlLg=="
53
  },
54
  "ep_success_buffer": {
55
  ":type:": "<class 'collections.deque'>",
@@ -93,13 +93,13 @@
93
  ":serialized:": "gAWVNQAAAAAAAACMIHN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi5idWZmZXJzlIwMUmVwbGF5QnVmZmVylJOULg==",
94
  "__module__": "stable_baselines3.common.buffers",
95
  "__doc__": "\n Replay buffer used in off-policy algorithms like SAC/TD3.\n\n :param buffer_size: Max number of element in the buffer\n :param observation_space: Observation space\n :param action_space: Action space\n :param device: PyTorch device\n :param n_envs: Number of parallel environments\n :param optimize_memory_usage: Enable a memory efficient variant\n of the replay buffer which reduces by almost a factor two the memory used,\n at a cost of more complexity.\n See https://github.com/DLR-RM/stable-baselines3/issues/37#issuecomment-637501195\n and https://github.com/DLR-RM/stable-baselines3/pull/28#issuecomment-637559274\n Cannot be used in combination with handle_timeout_termination.\n :param handle_timeout_termination: Handle timeout termination (due to timelimit)\n separately and treat the task as infinite horizon task.\n https://github.com/DLR-RM/stable-baselines3/issues/284\n ",
96
- "__init__": "<function ReplayBuffer.__init__ at 0x7f7e7d8bcaf0>",
97
- "add": "<function ReplayBuffer.add at 0x7f7e7d8bcb80>",
98
- "sample": "<function ReplayBuffer.sample at 0x7f7e7d8bcc10>",
99
- "_get_samples": "<function ReplayBuffer._get_samples at 0x7f7e7d8bcca0>",
100
- "_maybe_cast_dtype": "<staticmethod(<function ReplayBuffer._maybe_cast_dtype at 0x7f7e7d8bcd30>)>",
101
  "__abstractmethods__": "frozenset()",
102
- "_abc_impl": "<_abc._abc_data object at 0x7f7e7d838440>"
103
  },
104
  "replay_buffer_kwargs": {},
105
  "train_freq": {
 
5
  "__module__": "stable_baselines3.dqn.policies",
6
  "__annotations__": "{'q_net': <class 'stable_baselines3.dqn.policies.QNetwork'>, 'q_net_target': <class 'stable_baselines3.dqn.policies.QNetwork'>}",
7
  "__doc__": "\n Policy class with Q-Value Net and target net for DQN\n\n :param observation_space: Observation space\n :param action_space: Action space\n :param lr_schedule: Learning rate schedule (could be constant)\n :param net_arch: The specification of the policy and value networks.\n :param activation_fn: Activation function\n :param features_extractor_class: Features extractor to use.\n :param features_extractor_kwargs: Keyword arguments\n to pass to the features extractor.\n :param normalize_images: Whether to normalize images or not,\n dividing by 255.0 (True by default)\n :param optimizer_class: The optimizer to use,\n ``th.optim.Adam`` by default\n :param optimizer_kwargs: Additional keyword arguments,\n excluding the learning rate, to pass to the optimizer\n ",
8
+ "__init__": "<function DQNPolicy.__init__ at 0x7f2fa26a0310>",
9
+ "_build": "<function DQNPolicy._build at 0x7f2fa26a03a0>",
10
+ "make_q_net": "<function DQNPolicy.make_q_net at 0x7f2fa26a0430>",
11
+ "forward": "<function DQNPolicy.forward at 0x7f2fa26a04c0>",
12
+ "_predict": "<function DQNPolicy._predict at 0x7f2fa26a0550>",
13
+ "_get_constructor_parameters": "<function DQNPolicy._get_constructor_parameters at 0x7f2fa26a05e0>",
14
+ "set_training_mode": "<function DQNPolicy.set_training_mode at 0x7f2fa26a0670>",
15
  "__abstractmethods__": "frozenset()",
16
+ "_abc_impl": "<_abc._abc_data object at 0x7f2fa268f500>"
17
  },
18
  "verbose": 1,
19
  "policy_kwargs": {
 
27
  "_num_timesteps_at_start": 0,
28
  "seed": 0,
29
  "action_noise": null,
30
+ "start_time": 1698259645745364335,
31
  "learning_rate": {
32
  ":type:": "<class 'function'>",
33
  ":serialized:": "gAWVAwMAAAAAAACMF2Nsb3VkcGlja2xlLmNsb3VkcGlja2xllIwOX21ha2VfZnVuY3Rpb26Uk5QoaACMDV9idWlsdGluX3R5cGWUk5SMCENvZGVUeXBllIWUUpQoSwFLAEsASwFLAUsTQwSIAFMAlE6FlCmMAV+UhZSMaC9ob21lL25zYW5naGkvc2FuZGJveC9hcHJlc3MvZHJsLTJlZC92ZW52L2xpYi9weXRob24zLjEwL3NpdGUtcGFja2FnZXMvc3RhYmxlX2Jhc2VsaW5lczMvY29tbW9uL3V0aWxzLnB5lIwEZnVuY5RLg0MCBAGUjAN2YWyUhZQpdJRSlH2UKIwLX19wYWNrYWdlX1+UjBhzdGFibGVfYmFzZWxpbmVzMy5jb21tb26UjAhfX25hbWVfX5SMHnN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi51dGlsc5SMCF9fZmlsZV9flIxoL2hvbWUvbnNhbmdoaS9zYW5kYm94L2FwcmVzcy9kcmwtMmVkL3ZlbnYvbGliL3B5dGhvbjMuMTAvc2l0ZS1wYWNrYWdlcy9zdGFibGVfYmFzZWxpbmVzMy9jb21tb24vdXRpbHMucHmUdU5OaACMEF9tYWtlX2VtcHR5X2NlbGyUk5QpUpSFlHSUUpSMHGNsb3VkcGlja2xlLmNsb3VkcGlja2xlX2Zhc3SUjBJfZnVuY3Rpb25fc2V0c3RhdGWUk5RoH32UfZQoaBZoDYwMX19xdWFsbmFtZV9flIwZY29uc3RhbnRfZm4uPGxvY2Fscz4uZnVuY5SMD19fYW5ub3RhdGlvbnNfX5R9lIwOX19rd2RlZmF1bHRzX1+UTowMX19kZWZhdWx0c19flE6MCl9fbW9kdWxlX1+UaBeMB19fZG9jX1+UTowLX19jbG9zdXJlX1+UaACMCl9tYWtlX2NlbGyUk5RHP2LXcxj8UEiFlFKUhZSMF19jbG91ZHBpY2tsZV9zdWJtb2R1bGVzlF2UjAtfX2dsb2JhbHNfX5R9lHWGlIZSMC4="
34
  },
35
+ "tensorboard_log": "runs/CartPole-v1__dqn__2904896230__1698259642/CartPole-v1",
36
  "_last_obs": null,
37
  "_last_episode_starts": {
38
  ":type:": "<class 'numpy.ndarray'>",
 
40
  },
41
  "_last_original_obs": {
42
  ":type:": "<class 'numpy.ndarray'>",
43
+ ":serialized:": "gAWVhQAAAAAAAACMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYQAAAAAAAAADnT0D/cl12+LmdSOkzGQ76UjAVudW1weZSMBWR0eXBllJOUjAJmNJSJiIeUUpQoSwOMATyUTk5OSv////9K/////0sAdJRiSwFLBIaUjAFDlHSUUpQu"
44
  },
45
+ "_episode_num": 365,
46
  "use_sde": false,
47
  "sde_sample_freq": -1,
48
  "_current_progress_remaining": 0.40002000000000004,
49
  "_stats_window_size": 100,
50
  "ep_info_buffer": {
51
  ":type:": "<class 'collections.deque'>",
52
+ ":serialized:": "gAWV5gsAAAAAAACMC2NvbGxlY3Rpb25zlIwFZGVxdWWUk5QpS2SGlFKUKH2UKIwBcpRHQFTAAAAAAACMAWyUS1OMAXSUR0BJdt2cJ+lTdX2UKGgGR0BVgAAAAAAAaAdLVmgIR0BJe0Re1KGtdX2UKGgGR0AqAAAAAAAAaAdLDWgIR0BJfBAv+OwQdX2UKGgGR0BXQAAAAAAAaAdLXWgIR0BJg4e1a4c4dX2UKGgGR0AsAAAAAAAAaAdLDmgIR0BJhEcKgIyCdX2UKGgGR0AoAAAAAAAAaAdLDGgIR0BJhNRvWH1wdX2UKGgGR0AkAAAAAAAAaAdLCmgIR0BKBGXokiUxdX2UKGgGR0BcQAAAAAAAaAdLcWgIR0BKCr1uivgWdX2UKGgGR0BaAAAAAAAAaAdLaGgIR0BKEqCHymQ9dX2UKGgGR0BawAAAAAAAaAdLa2gIR0BKg2n889wFdX2UKGgGR0BZAAAAAAAAaAdLZGgIR0BKiSD7IkqudX2UKGgGR0BaAAAAAAAAaAdLaGgIR0BLDASnLq2SdX2UKGgGR0BhgAAAAAAAaAdLjGgIR0BLGSDZlFtsdX2UKGgGR0BhgAAAAAAAaAdLjGgIR0BLksCtA9mpdX2UKGgGR0A9AAAAAAAAaAdLHWgIR0BLlMgMc6vJdX2UKGgGR0AwAAAAAAAAaAdLEGgIR0BLlbMPjGT+dX2UKGgGR0A+AAAAAAAAaAdLHmgIR0BLl7ZOBUaRdX2UKGgGR0AoAAAAAAAAaAdLDGgIR0BLmIrFwT/RdX2UKGgGR0A4AAAAAAAAaAdLGGgIR0BLmi++M6zWdX2UKGgGR0AoAAAAAAAAaAdLDGgIR0BLnRn3+MqCdX2UKGgGR0A3AAAAAAAAaAdLF2gIR0BLnluWKMvRdX2UKGgGR0A4AAAAAAAAaAdLGGgIR0BLn9aEBbOedX2UKGgGR0BYgAAAAAAAaAdLYmgIR0BMKhVU+9rXdX2UKGgGR0AyAAAAAAAAaAdLEmgIR0BMKvNNahYedX2UKGgGR0AyAAAAAAAAaAdLEmgIR0BMK9KVY6n0dX2UKGgGR0AkAAAAAAAAaAdLCmgIR0BMLE2Hck+pdX2UKGgGR0A1AAAAAAAAaAdLFWgIR0BMLU/W1+iKdX2UKGgGR0BYwAAAAAAAaAdLY2gIR0BMNrL6k691dX2UKGgGR0AsAAAAAAAAaAdLDmgIR0BMN3TEzfrKdX2UKGgGR0AyAAAAAAAAaAdLEmgIR0BMxPl2eQMhdX2UKGgGR0Bw4AAAAAAAaAdNDgFoCEdATTu1fE4vOHV9lChoBkdAYMAAAAAAAGgHS4ZoCEdATUjGm1pj+nV9lChoBkdAYqAAAAAAAGgHS5VoCEdATftEb5uZTnV9lChoBkdAWIAAAAAAAGgHS2JoCEdATgQy0rsjV3V9lChoBkdAWAAAAAAAAGgHS2BoCEdATgy9CeEqUnV9lChoBkdAWIAAAAAAAGgHS2JoCEdATpWTaCcwxnV9lChoBkdAKgAAAAAAAGgHSw1oCEdATpXqLS/j83V9lChoBkdAWIAAAAAAAGgHS2JoCEdATpkbzbvgFXV9lChoBkdAWQAAAAAAAGgHS2RoCEdATsvMY/FBIHV9lChoBkdAYaAAAAAAAGgHS41oCEdATtNghKUVz3V9lChoBkdAYIAAAAAAAGgHS4RoCEdATw4xN7BwdnV9lChoBkdAWsAAAAAAAGgHS2toCEdATxMExIre7HV9lChoBkdAXEAAAAAAAGgHS3FoCEdAT06OLiuMdnV9lChoBkdAaWAAAAAAAGgHS8toCEdAT1gqgAZKnXV9lChoBkdAZOAAAAAAAGgHS6doCEdAT5SwdKdxyXV9lChoBkdAYSAAAAAAAGgHS4loCEdAT9W5OJtSAHV9lChoBkdAXwAAAAAAAGgHS3xoCEdAT9wnQY1pCnV9lChoBkdAXsAAAAAAAGgHS3toCEdAT+Gl41P3z3V9lChoBkdAWQAAAAAAAGgHS2RoCEdAUBEzJp35e3V9lChoBkdAWYAAAAAAAGgHS2ZoCEdAUBQX531SO3V9lChoBkdAWMAAAAAAAGgHS2NoCEdAUC5zQu27WnV9lChoBkdAX8AAAAAAAGgHS39oCEdAUDHnHNorWnV9lChoBkdAX0AAAAAAAGgHS31oCEdAUGUEzO5avHV9lChoBkdAKgAAAAAAAGgHSw1oCEdAUGVOsT37DXV9lChoBkdAMwAAAAAAAGgHSxNoCEdAUGXDLr5ZbXV9lChoBkdAWEAAAAAAAGgHS2FoCEdAUGh1V5rxiHV9lChoBkdAWAAAAAAAAGgHS2BoCEdAUJvGuLaVU3V9lChoBkdALgAAAAAAAGgHSw9oCEdAUJycZtNzsHV9lChoBkdAW0AAAAAAAGgHS21oCEdAUJ/127nPmnV9lChoBkdAXEAAAAAAAGgHS3FoCEdAUKRkAggX/HV9lChoBkdAXgAAAAAAAGgHS3hoCEdAUO3cbiqABnV9lChoBkdAXcAAAAAAAGgHS3doCEdAUPHiHZbpvHV9lChoBkdAKAAAAAAAAGgHSwxoCEdAUPIwj+rEL3V9lChoBkdAZsAAAAAAAGgHS7ZoCEdAUSv4nF5v+HV9lChoBkdAY0AAAAAAAGgHS5poCEdAUXETnJT2nXV9lChoBkdAakAAAAAAAGgHS9JoCEdAUZvo/zJ6p3V9lChoBkdAYWAAAAAAAGgHS4toCEdAUZ+OlwcYInV9lChoBkdAYWAAAAAAAGgHS4toCEdAUb6pWFN+LHV9lChoBkdAXIAAAAAAAGgHS3JoCEdAUcIGpuMuOHV9lChoBkdAWwAAAAAAAGgHS2xoCEdAUfl81Gb1AnV9lChoBkdAMAAAAAAAAGgHSxBoCEdAUfn0lJHy3HV9lChoBkdAVsAAAAAAAGgHS1toCEdAUf2MS9M9KXV9lChoBkdAV0AAAAAAAGgHS11oCEdAUgYlfJFLFnV9lChoBkdAV4AAAAAAAGgHS15oCEdAUjjES/TLGXV9lChoBkdAa8AAAAAAAGgHS95oCEdAUnocNpdrwnV9lChoBkdAY4AAAAAAAGgHS5xoCEdAUoEXwb2lEnV9lChoBkdAYcAAAAAAAGgHS45oCEdAUr9kEs8PnXV9lChoBkdAWoAAAAAAAGgHS2poCEdAUsNaFEiMYXV9lChoBkdAW8AAAAAAAGgHS29oCEdAUxtaJQ+EAnV9lChoBkdANAAAAAAAAGgHSxRoCEdAUxvaJyhi9nV9lChoBkdAf0AAAAAAAGgHTfQBaAhHQFOvrbxmTTx1fZQoaAZHQGXgAAAAAABoB0uvaAhHQFO0ahHskY51fZQoaAZHQGAAAAAAAABoB0uAaAhHQFPzGgBcRlJ1fZQoaAZHQFuAAAAAAABoB0tuaAhHQFP4bblA/s51fZQoaAZHQFwAAAAAAABoB0twaAhHQFQkMMqjJuF1fZQoaAZHQGyAAAAAAABoB0vkaAhHQFRDYFJQLux1fZQoaAZHQGEgAAAAAABoB0uJaAhHQFRGoJRfnfV1fZQoaAZHQF7AAAAAAABoB0t7aAhHQFRi6oESuhd1fZQoaAZHQHlwAAAAAABoB02XAWgIR0BUjeKbayrxdX2UKGgGR0B8wAAAAAAAaAdNzAFoCEdAVRWPOpsGgXV9lChoBkdAYGAAAAAAAGgHS4NoCEdAVRdfOUt7KXV9lChoBkdAZqAAAAAAAGgHS7VoCEdAVTJfhMrVfHV9lChoBkdAb0AAAAAAAGgHS/poCEdAVVCR1X/5tXV9lChoBkdAXwAAAAAAAGgHS3xoCEdAVZf2PDHfdnV9lChoBkdAXcAAAAAAAGgHS3doCEdAVZ3Z6D5CW3V9lChoBkdAXkAAAAAAAGgHS3loCEdAVdq/rSmZVnV9lChoBkdAdWAAAAAAAGgHTVYBaAhHQFX2EHMUypJ1fZQoaAZHQGBgAAAAAABoB0uDaAhHQFX5DG96C191fZQoaAZHQGKgAAAAAABoB0uVaAhHQFYaWWyC4Bp1fZQoaAZHQH9AAAAAAABoB030AWgIR0BWaQrhBJI2dWUu"
53
  },
54
  "ep_success_buffer": {
55
  ":type:": "<class 'collections.deque'>",
 
93
  ":serialized:": "gAWVNQAAAAAAAACMIHN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi5idWZmZXJzlIwMUmVwbGF5QnVmZmVylJOULg==",
94
  "__module__": "stable_baselines3.common.buffers",
95
  "__doc__": "\n Replay buffer used in off-policy algorithms like SAC/TD3.\n\n :param buffer_size: Max number of element in the buffer\n :param observation_space: Observation space\n :param action_space: Action space\n :param device: PyTorch device\n :param n_envs: Number of parallel environments\n :param optimize_memory_usage: Enable a memory efficient variant\n of the replay buffer which reduces by almost a factor two the memory used,\n at a cost of more complexity.\n See https://github.com/DLR-RM/stable-baselines3/issues/37#issuecomment-637501195\n and https://github.com/DLR-RM/stable-baselines3/pull/28#issuecomment-637559274\n Cannot be used in combination with handle_timeout_termination.\n :param handle_timeout_termination: Handle timeout termination (due to timelimit)\n separately and treat the task as infinite horizon task.\n https://github.com/DLR-RM/stable-baselines3/issues/284\n ",
96
+ "__init__": "<function ReplayBuffer.__init__ at 0x7f2fa2684af0>",
97
+ "add": "<function ReplayBuffer.add at 0x7f2fa2684b80>",
98
+ "sample": "<function ReplayBuffer.sample at 0x7f2fa2684c10>",
99
+ "_get_samples": "<function ReplayBuffer._get_samples at 0x7f2fa2684ca0>",
100
+ "_maybe_cast_dtype": "<staticmethod(<function ReplayBuffer._maybe_cast_dtype at 0x7f2fa2684d30>)>",
101
  "__abstractmethods__": "frozenset()",
102
+ "_abc_impl": "<_abc._abc_data object at 0x7f2fa27fcfc0>"
103
  },
104
  "replay_buffer_kwargs": {},
105
  "train_freq": {
dqn-CartPole-v1/policy.optimizer.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c9d4396f0ff5bb988d5441e07812ed5e22748221d77e0758e3dcf4cf9ea38276
3
  size 545519
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:317080825fbbcf0d2841c4f30f7f6c897f29f7235b9e374fd49a91fd3f0cf96c
3
  size 545519
dqn-CartPole-v1/policy.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc0592d099dd214d518d11bf823825f887a715cdcb2ac4e2d2320749a0660961
3
  size 544641
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cd85b9ea4cebd7d1da691b03ee66e89192f7f8144c9fb760e295c59834c4ca6
3
  size 544641
replay.mp4 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:321923938fd02e6ac892e5b5764290a7da3aadb46767d53c937ec60f513a3559
3
- size 90679
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a363a978e83fae68c287aff916ddd13bcdbc5a70e4d67b571fdce60a07c4ac23
3
+ size 74263
results.json CHANGED
@@ -1 +1 @@
1
- {"mean_reward": 152.2, "std_reward": 2.85657137141714, "is_deterministic": true, "n_eval_episodes": 10, "eval_datetime": "2023-10-25T23:47:14.927431"}
 
1
+ {"mean_reward": 500.0, "std_reward": 0.0, "is_deterministic": true, "n_eval_episodes": 10, "eval_datetime": "2023-10-26T00:21:43.677391"}
train_eval_metrics.zip CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:95f5619a5177ffcc0f9b123aeced9c27be9e0719a9f3de4d163424bb8780cb6a
3
- size 11172
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c99107bc8583368e2cd3d0b161cac331f31c89d995db8401823a1dd7eb8ad199
3
+ size 10938