Push to Hub
Browse files- README.md +1 -1
- args.yml +2 -2
- dqn-CartPole-v1.zip +2 -2
- dqn-CartPole-v1/data +19 -19
- dqn-CartPole-v1/policy.optimizer.pth +1 -1
- dqn-CartPole-v1/policy.pth +1 -1
- replay.mp4 +2 -2
- results.json +1 -1
- train_eval_metrics.zip +2 -2
README.md
CHANGED
@@ -16,7 +16,7 @@ model-index:
|
|
16 |
type: CartPole-v1
|
17 |
metrics:
|
18 |
- type: mean_reward
|
19 |
-
value:
|
20 |
name: mean_reward
|
21 |
verified: false
|
22 |
---
|
|
|
16 |
type: CartPole-v1
|
17 |
metrics:
|
18 |
- type: mean_reward
|
19 |
+
value: 500.00 +/- 0.00
|
20 |
name: mean_reward
|
21 |
verified: false
|
22 |
---
|
args.yml
CHANGED
@@ -54,13 +54,13 @@
|
|
54 |
- - save_replay_buffer
|
55 |
- false
|
56 |
- - seed
|
57 |
-
-
|
58 |
- - storage
|
59 |
- null
|
60 |
- - study_name
|
61 |
- null
|
62 |
- - tensorboard_log
|
63 |
-
- runs/CartPole-
|
64 |
- - track
|
65 |
- true
|
66 |
- - trained_agent
|
|
|
54 |
- - save_replay_buffer
|
55 |
- false
|
56 |
- - seed
|
57 |
+
- 2904896230
|
58 |
- - storage
|
59 |
- null
|
60 |
- - study_name
|
61 |
- null
|
62 |
- - tensorboard_log
|
63 |
+
- runs/CartPole-v1__dqn__2904896230__1698259642
|
64 |
- - track
|
65 |
- true
|
66 |
- - trained_agent
|
dqn-CartPole-v1.zip
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6c52e0bab37152188fea3b271cee0fd2faf7752eb8521a5ed578f989737a9bf4
|
3 |
+
size 1107467
|
dqn-CartPole-v1/data
CHANGED
@@ -5,15 +5,15 @@
|
|
5 |
"__module__": "stable_baselines3.dqn.policies",
|
6 |
"__annotations__": "{'q_net': <class 'stable_baselines3.dqn.policies.QNetwork'>, 'q_net_target': <class 'stable_baselines3.dqn.policies.QNetwork'>}",
|
7 |
"__doc__": "\n Policy class with Q-Value Net and target net for DQN\n\n :param observation_space: Observation space\n :param action_space: Action space\n :param lr_schedule: Learning rate schedule (could be constant)\n :param net_arch: The specification of the policy and value networks.\n :param activation_fn: Activation function\n :param features_extractor_class: Features extractor to use.\n :param features_extractor_kwargs: Keyword arguments\n to pass to the features extractor.\n :param normalize_images: Whether to normalize images or not,\n dividing by 255.0 (True by default)\n :param optimizer_class: The optimizer to use,\n ``th.optim.Adam`` by default\n :param optimizer_kwargs: Additional keyword arguments,\n excluding the learning rate, to pass to the optimizer\n ",
|
8 |
-
"__init__": "<function DQNPolicy.__init__ at
|
9 |
-
"_build": "<function DQNPolicy._build at
|
10 |
-
"make_q_net": "<function DQNPolicy.make_q_net at
|
11 |
-
"forward": "<function DQNPolicy.forward at
|
12 |
-
"_predict": "<function DQNPolicy._predict at
|
13 |
-
"_get_constructor_parameters": "<function DQNPolicy._get_constructor_parameters at
|
14 |
-
"set_training_mode": "<function DQNPolicy.set_training_mode at
|
15 |
"__abstractmethods__": "frozenset()",
|
16 |
-
"_abc_impl": "<_abc._abc_data object at
|
17 |
},
|
18 |
"verbose": 1,
|
19 |
"policy_kwargs": {
|
@@ -27,12 +27,12 @@
|
|
27 |
"_num_timesteps_at_start": 0,
|
28 |
"seed": 0,
|
29 |
"action_noise": null,
|
30 |
-
"start_time":
|
31 |
"learning_rate": {
|
32 |
":type:": "<class 'function'>",
|
33 |
":serialized:": "gAWVAwMAAAAAAACMF2Nsb3VkcGlja2xlLmNsb3VkcGlja2xllIwOX21ha2VfZnVuY3Rpb26Uk5QoaACMDV9idWlsdGluX3R5cGWUk5SMCENvZGVUeXBllIWUUpQoSwFLAEsASwFLAUsTQwSIAFMAlE6FlCmMAV+UhZSMaC9ob21lL25zYW5naGkvc2FuZGJveC9hcHJlc3MvZHJsLTJlZC92ZW52L2xpYi9weXRob24zLjEwL3NpdGUtcGFja2FnZXMvc3RhYmxlX2Jhc2VsaW5lczMvY29tbW9uL3V0aWxzLnB5lIwEZnVuY5RLg0MCBAGUjAN2YWyUhZQpdJRSlH2UKIwLX19wYWNrYWdlX1+UjBhzdGFibGVfYmFzZWxpbmVzMy5jb21tb26UjAhfX25hbWVfX5SMHnN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi51dGlsc5SMCF9fZmlsZV9flIxoL2hvbWUvbnNhbmdoaS9zYW5kYm94L2FwcmVzcy9kcmwtMmVkL3ZlbnYvbGliL3B5dGhvbjMuMTAvc2l0ZS1wYWNrYWdlcy9zdGFibGVfYmFzZWxpbmVzMy9jb21tb24vdXRpbHMucHmUdU5OaACMEF9tYWtlX2VtcHR5X2NlbGyUk5QpUpSFlHSUUpSMHGNsb3VkcGlja2xlLmNsb3VkcGlja2xlX2Zhc3SUjBJfZnVuY3Rpb25fc2V0c3RhdGWUk5RoH32UfZQoaBZoDYwMX19xdWFsbmFtZV9flIwZY29uc3RhbnRfZm4uPGxvY2Fscz4uZnVuY5SMD19fYW5ub3RhdGlvbnNfX5R9lIwOX19rd2RlZmF1bHRzX1+UTowMX19kZWZhdWx0c19flE6MCl9fbW9kdWxlX1+UaBeMB19fZG9jX1+UTowLX19jbG9zdXJlX1+UaACMCl9tYWtlX2NlbGyUk5RHP2LXcxj8UEiFlFKUhZSMF19jbG91ZHBpY2tsZV9zdWJtb2R1bGVzlF2UjAtfX2dsb2JhbHNfX5R9lHWGlIZSMC4="
|
34 |
},
|
35 |
-
"tensorboard_log": "runs/CartPole-
|
36 |
"_last_obs": null,
|
37 |
"_last_episode_starts": {
|
38 |
":type:": "<class 'numpy.ndarray'>",
|
@@ -40,16 +40,16 @@
|
|
40 |
},
|
41 |
"_last_original_obs": {
|
42 |
":type:": "<class 'numpy.ndarray'>",
|
43 |
-
":serialized:": "
|
44 |
},
|
45 |
-
"_episode_num":
|
46 |
"use_sde": false,
|
47 |
"sde_sample_freq": -1,
|
48 |
"_current_progress_remaining": 0.40002000000000004,
|
49 |
"_stats_window_size": 100,
|
50 |
"ep_info_buffer": {
|
51 |
":type:": "<class 'collections.deque'>",
|
52 |
-
":serialized:": "
|
53 |
},
|
54 |
"ep_success_buffer": {
|
55 |
":type:": "<class 'collections.deque'>",
|
@@ -93,13 +93,13 @@
|
|
93 |
":serialized:": "gAWVNQAAAAAAAACMIHN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi5idWZmZXJzlIwMUmVwbGF5QnVmZmVylJOULg==",
|
94 |
"__module__": "stable_baselines3.common.buffers",
|
95 |
"__doc__": "\n Replay buffer used in off-policy algorithms like SAC/TD3.\n\n :param buffer_size: Max number of element in the buffer\n :param observation_space: Observation space\n :param action_space: Action space\n :param device: PyTorch device\n :param n_envs: Number of parallel environments\n :param optimize_memory_usage: Enable a memory efficient variant\n of the replay buffer which reduces by almost a factor two the memory used,\n at a cost of more complexity.\n See https://github.com/DLR-RM/stable-baselines3/issues/37#issuecomment-637501195\n and https://github.com/DLR-RM/stable-baselines3/pull/28#issuecomment-637559274\n Cannot be used in combination with handle_timeout_termination.\n :param handle_timeout_termination: Handle timeout termination (due to timelimit)\n separately and treat the task as infinite horizon task.\n https://github.com/DLR-RM/stable-baselines3/issues/284\n ",
|
96 |
-
"__init__": "<function ReplayBuffer.__init__ at
|
97 |
-
"add": "<function ReplayBuffer.add at
|
98 |
-
"sample": "<function ReplayBuffer.sample at
|
99 |
-
"_get_samples": "<function ReplayBuffer._get_samples at
|
100 |
-
"_maybe_cast_dtype": "<staticmethod(<function ReplayBuffer._maybe_cast_dtype at
|
101 |
"__abstractmethods__": "frozenset()",
|
102 |
-
"_abc_impl": "<_abc._abc_data object at
|
103 |
},
|
104 |
"replay_buffer_kwargs": {},
|
105 |
"train_freq": {
|
|
|
5 |
"__module__": "stable_baselines3.dqn.policies",
|
6 |
"__annotations__": "{'q_net': <class 'stable_baselines3.dqn.policies.QNetwork'>, 'q_net_target': <class 'stable_baselines3.dqn.policies.QNetwork'>}",
|
7 |
"__doc__": "\n Policy class with Q-Value Net and target net for DQN\n\n :param observation_space: Observation space\n :param action_space: Action space\n :param lr_schedule: Learning rate schedule (could be constant)\n :param net_arch: The specification of the policy and value networks.\n :param activation_fn: Activation function\n :param features_extractor_class: Features extractor to use.\n :param features_extractor_kwargs: Keyword arguments\n to pass to the features extractor.\n :param normalize_images: Whether to normalize images or not,\n dividing by 255.0 (True by default)\n :param optimizer_class: The optimizer to use,\n ``th.optim.Adam`` by default\n :param optimizer_kwargs: Additional keyword arguments,\n excluding the learning rate, to pass to the optimizer\n ",
|
8 |
+
"__init__": "<function DQNPolicy.__init__ at 0x7f2fa26a0310>",
|
9 |
+
"_build": "<function DQNPolicy._build at 0x7f2fa26a03a0>",
|
10 |
+
"make_q_net": "<function DQNPolicy.make_q_net at 0x7f2fa26a0430>",
|
11 |
+
"forward": "<function DQNPolicy.forward at 0x7f2fa26a04c0>",
|
12 |
+
"_predict": "<function DQNPolicy._predict at 0x7f2fa26a0550>",
|
13 |
+
"_get_constructor_parameters": "<function DQNPolicy._get_constructor_parameters at 0x7f2fa26a05e0>",
|
14 |
+
"set_training_mode": "<function DQNPolicy.set_training_mode at 0x7f2fa26a0670>",
|
15 |
"__abstractmethods__": "frozenset()",
|
16 |
+
"_abc_impl": "<_abc._abc_data object at 0x7f2fa268f500>"
|
17 |
},
|
18 |
"verbose": 1,
|
19 |
"policy_kwargs": {
|
|
|
27 |
"_num_timesteps_at_start": 0,
|
28 |
"seed": 0,
|
29 |
"action_noise": null,
|
30 |
+
"start_time": 1698259645745364335,
|
31 |
"learning_rate": {
|
32 |
":type:": "<class 'function'>",
|
33 |
":serialized:": "gAWVAwMAAAAAAACMF2Nsb3VkcGlja2xlLmNsb3VkcGlja2xllIwOX21ha2VfZnVuY3Rpb26Uk5QoaACMDV9idWlsdGluX3R5cGWUk5SMCENvZGVUeXBllIWUUpQoSwFLAEsASwFLAUsTQwSIAFMAlE6FlCmMAV+UhZSMaC9ob21lL25zYW5naGkvc2FuZGJveC9hcHJlc3MvZHJsLTJlZC92ZW52L2xpYi9weXRob24zLjEwL3NpdGUtcGFja2FnZXMvc3RhYmxlX2Jhc2VsaW5lczMvY29tbW9uL3V0aWxzLnB5lIwEZnVuY5RLg0MCBAGUjAN2YWyUhZQpdJRSlH2UKIwLX19wYWNrYWdlX1+UjBhzdGFibGVfYmFzZWxpbmVzMy5jb21tb26UjAhfX25hbWVfX5SMHnN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi51dGlsc5SMCF9fZmlsZV9flIxoL2hvbWUvbnNhbmdoaS9zYW5kYm94L2FwcmVzcy9kcmwtMmVkL3ZlbnYvbGliL3B5dGhvbjMuMTAvc2l0ZS1wYWNrYWdlcy9zdGFibGVfYmFzZWxpbmVzMy9jb21tb24vdXRpbHMucHmUdU5OaACMEF9tYWtlX2VtcHR5X2NlbGyUk5QpUpSFlHSUUpSMHGNsb3VkcGlja2xlLmNsb3VkcGlja2xlX2Zhc3SUjBJfZnVuY3Rpb25fc2V0c3RhdGWUk5RoH32UfZQoaBZoDYwMX19xdWFsbmFtZV9flIwZY29uc3RhbnRfZm4uPGxvY2Fscz4uZnVuY5SMD19fYW5ub3RhdGlvbnNfX5R9lIwOX19rd2RlZmF1bHRzX1+UTowMX19kZWZhdWx0c19flE6MCl9fbW9kdWxlX1+UaBeMB19fZG9jX1+UTowLX19jbG9zdXJlX1+UaACMCl9tYWtlX2NlbGyUk5RHP2LXcxj8UEiFlFKUhZSMF19jbG91ZHBpY2tsZV9zdWJtb2R1bGVzlF2UjAtfX2dsb2JhbHNfX5R9lHWGlIZSMC4="
|
34 |
},
|
35 |
+
"tensorboard_log": "runs/CartPole-v1__dqn__2904896230__1698259642/CartPole-v1",
|
36 |
"_last_obs": null,
|
37 |
"_last_episode_starts": {
|
38 |
":type:": "<class 'numpy.ndarray'>",
|
|
|
40 |
},
|
41 |
"_last_original_obs": {
|
42 |
":type:": "<class 'numpy.ndarray'>",
|
43 |
+
":serialized:": "gAWVhQAAAAAAAACMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYQAAAAAAAAADnT0D/cl12+LmdSOkzGQ76UjAVudW1weZSMBWR0eXBllJOUjAJmNJSJiIeUUpQoSwOMATyUTk5OSv////9K/////0sAdJRiSwFLBIaUjAFDlHSUUpQu"
|
44 |
},
|
45 |
+
"_episode_num": 365,
|
46 |
"use_sde": false,
|
47 |
"sde_sample_freq": -1,
|
48 |
"_current_progress_remaining": 0.40002000000000004,
|
49 |
"_stats_window_size": 100,
|
50 |
"ep_info_buffer": {
|
51 |
":type:": "<class 'collections.deque'>",
|
52 |
+
":serialized:": "gAWV5gsAAAAAAACMC2NvbGxlY3Rpb25zlIwFZGVxdWWUk5QpS2SGlFKUKH2UKIwBcpRHQFTAAAAAAACMAWyUS1OMAXSUR0BJdt2cJ+lTdX2UKGgGR0BVgAAAAAAAaAdLVmgIR0BJe0Re1KGtdX2UKGgGR0AqAAAAAAAAaAdLDWgIR0BJfBAv+OwQdX2UKGgGR0BXQAAAAAAAaAdLXWgIR0BJg4e1a4c4dX2UKGgGR0AsAAAAAAAAaAdLDmgIR0BJhEcKgIyCdX2UKGgGR0AoAAAAAAAAaAdLDGgIR0BJhNRvWH1wdX2UKGgGR0AkAAAAAAAAaAdLCmgIR0BKBGXokiUxdX2UKGgGR0BcQAAAAAAAaAdLcWgIR0BKCr1uivgWdX2UKGgGR0BaAAAAAAAAaAdLaGgIR0BKEqCHymQ9dX2UKGgGR0BawAAAAAAAaAdLa2gIR0BKg2n889wFdX2UKGgGR0BZAAAAAAAAaAdLZGgIR0BKiSD7IkqudX2UKGgGR0BaAAAAAAAAaAdLaGgIR0BLDASnLq2SdX2UKGgGR0BhgAAAAAAAaAdLjGgIR0BLGSDZlFtsdX2UKGgGR0BhgAAAAAAAaAdLjGgIR0BLksCtA9mpdX2UKGgGR0A9AAAAAAAAaAdLHWgIR0BLlMgMc6vJdX2UKGgGR0AwAAAAAAAAaAdLEGgIR0BLlbMPjGT+dX2UKGgGR0A+AAAAAAAAaAdLHmgIR0BLl7ZOBUaRdX2UKGgGR0AoAAAAAAAAaAdLDGgIR0BLmIrFwT/RdX2UKGgGR0A4AAAAAAAAaAdLGGgIR0BLmi++M6zWdX2UKGgGR0AoAAAAAAAAaAdLDGgIR0BLnRn3+MqCdX2UKGgGR0A3AAAAAAAAaAdLF2gIR0BLnluWKMvRdX2UKGgGR0A4AAAAAAAAaAdLGGgIR0BLn9aEBbOedX2UKGgGR0BYgAAAAAAAaAdLYmgIR0BMKhVU+9rXdX2UKGgGR0AyAAAAAAAAaAdLEmgIR0BMKvNNahYedX2UKGgGR0AyAAAAAAAAaAdLEmgIR0BMK9KVY6n0dX2UKGgGR0AkAAAAAAAAaAdLCmgIR0BMLE2Hck+pdX2UKGgGR0A1AAAAAAAAaAdLFWgIR0BMLU/W1+iKdX2UKGgGR0BYwAAAAAAAaAdLY2gIR0BMNrL6k691dX2UKGgGR0AsAAAAAAAAaAdLDmgIR0BMN3TEzfrKdX2UKGgGR0AyAAAAAAAAaAdLEmgIR0BMxPl2eQMhdX2UKGgGR0Bw4AAAAAAAaAdNDgFoCEdATTu1fE4vOHV9lChoBkdAYMAAAAAAAGgHS4ZoCEdATUjGm1pj+nV9lChoBkdAYqAAAAAAAGgHS5VoCEdATftEb5uZTnV9lChoBkdAWIAAAAAAAGgHS2JoCEdATgQy0rsjV3V9lChoBkdAWAAAAAAAAGgHS2BoCEdATgy9CeEqUnV9lChoBkdAWIAAAAAAAGgHS2JoCEdATpWTaCcwxnV9lChoBkdAKgAAAAAAAGgHSw1oCEdATpXqLS/j83V9lChoBkdAWIAAAAAAAGgHS2JoCEdATpkbzbvgFXV9lChoBkdAWQAAAAAAAGgHS2RoCEdATsvMY/FBIHV9lChoBkdAYaAAAAAAAGgHS41oCEdATtNghKUVz3V9lChoBkdAYIAAAAAAAGgHS4RoCEdATw4xN7BwdnV9lChoBkdAWsAAAAAAAGgHS2toCEdATxMExIre7HV9lChoBkdAXEAAAAAAAGgHS3FoCEdAT06OLiuMdnV9lChoBkdAaWAAAAAAAGgHS8toCEdAT1gqgAZKnXV9lChoBkdAZOAAAAAAAGgHS6doCEdAT5SwdKdxyXV9lChoBkdAYSAAAAAAAGgHS4loCEdAT9W5OJtSAHV9lChoBkdAXwAAAAAAAGgHS3xoCEdAT9wnQY1pCnV9lChoBkdAXsAAAAAAAGgHS3toCEdAT+Gl41P3z3V9lChoBkdAWQAAAAAAAGgHS2RoCEdAUBEzJp35e3V9lChoBkdAWYAAAAAAAGgHS2ZoCEdAUBQX531SO3V9lChoBkdAWMAAAAAAAGgHS2NoCEdAUC5zQu27WnV9lChoBkdAX8AAAAAAAGgHS39oCEdAUDHnHNorWnV9lChoBkdAX0AAAAAAAGgHS31oCEdAUGUEzO5avHV9lChoBkdAKgAAAAAAAGgHSw1oCEdAUGVOsT37DXV9lChoBkdAMwAAAAAAAGgHSxNoCEdAUGXDLr5ZbXV9lChoBkdAWEAAAAAAAGgHS2FoCEdAUGh1V5rxiHV9lChoBkdAWAAAAAAAAGgHS2BoCEdAUJvGuLaVU3V9lChoBkdALgAAAAAAAGgHSw9oCEdAUJycZtNzsHV9lChoBkdAW0AAAAAAAGgHS21oCEdAUJ/127nPmnV9lChoBkdAXEAAAAAAAGgHS3FoCEdAUKRkAggX/HV9lChoBkdAXgAAAAAAAGgHS3hoCEdAUO3cbiqABnV9lChoBkdAXcAAAAAAAGgHS3doCEdAUPHiHZbpvHV9lChoBkdAKAAAAAAAAGgHSwxoCEdAUPIwj+rEL3V9lChoBkdAZsAAAAAAAGgHS7ZoCEdAUSv4nF5v+HV9lChoBkdAY0AAAAAAAGgHS5poCEdAUXETnJT2nXV9lChoBkdAakAAAAAAAGgHS9JoCEdAUZvo/zJ6p3V9lChoBkdAYWAAAAAAAGgHS4toCEdAUZ+OlwcYInV9lChoBkdAYWAAAAAAAGgHS4toCEdAUb6pWFN+LHV9lChoBkdAXIAAAAAAAGgHS3JoCEdAUcIGpuMuOHV9lChoBkdAWwAAAAAAAGgHS2xoCEdAUfl81Gb1AnV9lChoBkdAMAAAAAAAAGgHSxBoCEdAUfn0lJHy3HV9lChoBkdAVsAAAAAAAGgHS1toCEdAUf2MS9M9KXV9lChoBkdAV0AAAAAAAGgHS11oCEdAUgYlfJFLFnV9lChoBkdAV4AAAAAAAGgHS15oCEdAUjjES/TLGXV9lChoBkdAa8AAAAAAAGgHS95oCEdAUnocNpdrwnV9lChoBkdAY4AAAAAAAGgHS5xoCEdAUoEXwb2lEnV9lChoBkdAYcAAAAAAAGgHS45oCEdAUr9kEs8PnXV9lChoBkdAWoAAAAAAAGgHS2poCEdAUsNaFEiMYXV9lChoBkdAW8AAAAAAAGgHS29oCEdAUxtaJQ+EAnV9lChoBkdANAAAAAAAAGgHSxRoCEdAUxvaJyhi9nV9lChoBkdAf0AAAAAAAGgHTfQBaAhHQFOvrbxmTTx1fZQoaAZHQGXgAAAAAABoB0uvaAhHQFO0ahHskY51fZQoaAZHQGAAAAAAAABoB0uAaAhHQFPzGgBcRlJ1fZQoaAZHQFuAAAAAAABoB0tuaAhHQFP4bblA/s51fZQoaAZHQFwAAAAAAABoB0twaAhHQFQkMMqjJuF1fZQoaAZHQGyAAAAAAABoB0vkaAhHQFRDYFJQLux1fZQoaAZHQGEgAAAAAABoB0uJaAhHQFRGoJRfnfV1fZQoaAZHQF7AAAAAAABoB0t7aAhHQFRi6oESuhd1fZQoaAZHQHlwAAAAAABoB02XAWgIR0BUjeKbayrxdX2UKGgGR0B8wAAAAAAAaAdNzAFoCEdAVRWPOpsGgXV9lChoBkdAYGAAAAAAAGgHS4NoCEdAVRdfOUt7KXV9lChoBkdAZqAAAAAAAGgHS7VoCEdAVTJfhMrVfHV9lChoBkdAb0AAAAAAAGgHS/poCEdAVVCR1X/5tXV9lChoBkdAXwAAAAAAAGgHS3xoCEdAVZf2PDHfdnV9lChoBkdAXcAAAAAAAGgHS3doCEdAVZ3Z6D5CW3V9lChoBkdAXkAAAAAAAGgHS3loCEdAVdq/rSmZVnV9lChoBkdAdWAAAAAAAGgHTVYBaAhHQFX2EHMUypJ1fZQoaAZHQGBgAAAAAABoB0uDaAhHQFX5DG96C191fZQoaAZHQGKgAAAAAABoB0uVaAhHQFYaWWyC4Bp1fZQoaAZHQH9AAAAAAABoB030AWgIR0BWaQrhBJI2dWUu"
|
53 |
},
|
54 |
"ep_success_buffer": {
|
55 |
":type:": "<class 'collections.deque'>",
|
|
|
93 |
":serialized:": "gAWVNQAAAAAAAACMIHN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi5idWZmZXJzlIwMUmVwbGF5QnVmZmVylJOULg==",
|
94 |
"__module__": "stable_baselines3.common.buffers",
|
95 |
"__doc__": "\n Replay buffer used in off-policy algorithms like SAC/TD3.\n\n :param buffer_size: Max number of element in the buffer\n :param observation_space: Observation space\n :param action_space: Action space\n :param device: PyTorch device\n :param n_envs: Number of parallel environments\n :param optimize_memory_usage: Enable a memory efficient variant\n of the replay buffer which reduces by almost a factor two the memory used,\n at a cost of more complexity.\n See https://github.com/DLR-RM/stable-baselines3/issues/37#issuecomment-637501195\n and https://github.com/DLR-RM/stable-baselines3/pull/28#issuecomment-637559274\n Cannot be used in combination with handle_timeout_termination.\n :param handle_timeout_termination: Handle timeout termination (due to timelimit)\n separately and treat the task as infinite horizon task.\n https://github.com/DLR-RM/stable-baselines3/issues/284\n ",
|
96 |
+
"__init__": "<function ReplayBuffer.__init__ at 0x7f2fa2684af0>",
|
97 |
+
"add": "<function ReplayBuffer.add at 0x7f2fa2684b80>",
|
98 |
+
"sample": "<function ReplayBuffer.sample at 0x7f2fa2684c10>",
|
99 |
+
"_get_samples": "<function ReplayBuffer._get_samples at 0x7f2fa2684ca0>",
|
100 |
+
"_maybe_cast_dtype": "<staticmethod(<function ReplayBuffer._maybe_cast_dtype at 0x7f2fa2684d30>)>",
|
101 |
"__abstractmethods__": "frozenset()",
|
102 |
+
"_abc_impl": "<_abc._abc_data object at 0x7f2fa27fcfc0>"
|
103 |
},
|
104 |
"replay_buffer_kwargs": {},
|
105 |
"train_freq": {
|
dqn-CartPole-v1/policy.optimizer.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 545519
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:317080825fbbcf0d2841c4f30f7f6c897f29f7235b9e374fd49a91fd3f0cf96c
|
3 |
size 545519
|
dqn-CartPole-v1/policy.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 544641
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8cd85b9ea4cebd7d1da691b03ee66e89192f7f8144c9fb760e295c59834c4ca6
|
3 |
size 544641
|
replay.mp4
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a363a978e83fae68c287aff916ddd13bcdbc5a70e4d67b571fdce60a07c4ac23
|
3 |
+
size 74263
|
results.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"mean_reward":
|
|
|
1 |
+
{"mean_reward": 500.0, "std_reward": 0.0, "is_deterministic": true, "n_eval_episodes": 10, "eval_datetime": "2023-10-26T00:21:43.677391"}
|
train_eval_metrics.zip
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c99107bc8583368e2cd3d0b161cac331f31c89d995db8401823a1dd7eb8ad199
|
3 |
+
size 10938
|