Initial commit
Browse files- README.md +12 -4
- args.yml +11 -5
- config.yml +5 -1
- replay.mp4 +2 -2
- results.json +1 -1
- sac-seals-Swimmer-v0.zip +2 -2
- sac-seals-Swimmer-v0/_stable_baselines3_version +1 -1
- sac-seals-Swimmer-v0/actor.optimizer.pth +1 -1
- sac-seals-Swimmer-v0/critic.optimizer.pth +1 -1
- sac-seals-Swimmer-v0/data +27 -25
- sac-seals-Swimmer-v0/ent_coef_optimizer.pth +2 -2
- sac-seals-Swimmer-v0/policy.pth +1 -1
- sac-seals-Swimmer-v0/pytorch_variables.pth +1 -1
- sac-seals-Swimmer-v0/system_info.txt +2 -2
- train_eval_metrics.zip +2 -2
README.md
CHANGED
@@ -10,7 +10,7 @@ model-index:
|
|
10 |
results:
|
11 |
- metrics:
|
12 |
- type: mean_reward
|
13 |
-
value:
|
14 |
name: mean_reward
|
15 |
task:
|
16 |
type: reinforcement-learning
|
@@ -37,15 +37,21 @@ SB3 Contrib: https://github.com/Stable-Baselines-Team/stable-baselines3-contrib
|
|
37 |
|
38 |
```
|
39 |
# Download model and save it into the logs/ folder
|
40 |
-
python -m
|
41 |
python enjoy.py --algo sac --env seals/Swimmer-v0 -f logs/
|
42 |
```
|
43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
## Training (with the RL Zoo)
|
45 |
```
|
46 |
python train.py --algo sac --env seals/Swimmer-v0 -f logs/
|
47 |
# Upload the model and generate video (when possible)
|
48 |
-
python -m
|
49 |
```
|
50 |
|
51 |
## Hyperparameters
|
@@ -58,7 +64,9 @@ OrderedDict([('batch_size', 128),
|
|
58 |
('n_timesteps', 1000000.0),
|
59 |
('policy', 'MlpPolicy'),
|
60 |
('policy_kwargs',
|
61 |
-
'
|
|
|
|
|
62 |
('tau', 0.01),
|
63 |
('train_freq', 256),
|
64 |
('normalize', False)])
|
|
|
10 |
results:
|
11 |
- metrics:
|
12 |
- type: mean_reward
|
13 |
+
value: 28.16 +/- 0.72
|
14 |
name: mean_reward
|
15 |
task:
|
16 |
type: reinforcement-learning
|
|
|
37 |
|
38 |
```
|
39 |
# Download model and save it into the logs/ folder
|
40 |
+
python -m rl_zoo3.load_from_hub --algo sac --env seals/Swimmer-v0 -orga HumanCompatibleAI -f logs/
|
41 |
python enjoy.py --algo sac --env seals/Swimmer-v0 -f logs/
|
42 |
```
|
43 |
|
44 |
+
If you installed the RL Zoo3 via pip (`pip install rl_zoo3`), from anywhere you can do:
|
45 |
+
```
|
46 |
+
python -m rl_zoo3.load_from_hub --algo sac --env seals/Swimmer-v0 -orga HumanCompatibleAI -f logs/
|
47 |
+
rl_zoo3 enjoy --algo sac --env seals/Swimmer-v0 -f logs/
|
48 |
+
```
|
49 |
+
|
50 |
## Training (with the RL Zoo)
|
51 |
```
|
52 |
python train.py --algo sac --env seals/Swimmer-v0 -f logs/
|
53 |
# Upload the model and generate video (when possible)
|
54 |
+
python -m rl_zoo3.push_to_hub --algo sac --env seals/Swimmer-v0 -f logs/ -orga HumanCompatibleAI
|
55 |
```
|
56 |
|
57 |
## Hyperparameters
|
|
|
64 |
('n_timesteps', 1000000.0),
|
65 |
('policy', 'MlpPolicy'),
|
66 |
('policy_kwargs',
|
67 |
+
{'log_std_init': -2.689958330139309,
|
68 |
+
'net_arch': [400, 300],
|
69 |
+
'use_sde': False}),
|
70 |
('tau', 0.01),
|
71 |
('train_freq', 256),
|
72 |
('normalize', False)])
|
args.yml
CHANGED
@@ -1,6 +1,8 @@
|
|
1 |
!!python/object/apply:collections.OrderedDict
|
2 |
- - - algo
|
3 |
- sac
|
|
|
|
|
4 |
- - device
|
5 |
- cpu
|
6 |
- - env
|
@@ -16,7 +18,7 @@
|
|
16 |
- - hyperparams
|
17 |
- null
|
18 |
- - log_folder
|
19 |
-
-
|
20 |
- - log_interval
|
21 |
- -1
|
22 |
- - max_total_trials
|
@@ -41,6 +43,8 @@
|
|
41 |
- null
|
42 |
- - optimize_hyperparameters
|
43 |
- false
|
|
|
|
|
44 |
- - pruner
|
45 |
- median
|
46 |
- - sampler
|
@@ -50,13 +54,13 @@
|
|
50 |
- - save_replay_buffer
|
51 |
- false
|
52 |
- - seed
|
53 |
-
-
|
54 |
- - storage
|
55 |
- null
|
56 |
- - study_name
|
57 |
- null
|
58 |
- - tensorboard_log
|
59 |
-
- runs/seals/Swimmer-
|
60 |
- - track
|
61 |
- true
|
62 |
- - trained_agent
|
@@ -70,6 +74,8 @@
|
|
70 |
- - verbose
|
71 |
- 1
|
72 |
- - wandb_entity
|
73 |
-
-
|
74 |
- - wandb_project_name
|
75 |
-
- seals-experts-
|
|
|
|
|
|
1 |
!!python/object/apply:collections.OrderedDict
|
2 |
- - - algo
|
3 |
- sac
|
4 |
+
- - conf_file
|
5 |
+
- hyperparams/python/sac.py
|
6 |
- - device
|
7 |
- cpu
|
8 |
- - env
|
|
|
18 |
- - hyperparams
|
19 |
- null
|
20 |
- - log_folder
|
21 |
+
- logs
|
22 |
- - log_interval
|
23 |
- -1
|
24 |
- - max_total_trials
|
|
|
43 |
- null
|
44 |
- - optimize_hyperparameters
|
45 |
- false
|
46 |
+
- - progress
|
47 |
+
- false
|
48 |
- - pruner
|
49 |
- median
|
50 |
- - sampler
|
|
|
54 |
- - save_replay_buffer
|
55 |
- false
|
56 |
- - seed
|
57 |
+
- 7
|
58 |
- - storage
|
59 |
- null
|
60 |
- - study_name
|
61 |
- null
|
62 |
- - tensorboard_log
|
63 |
+
- runs/seals/Swimmer-v0__sac__7__1670518603
|
64 |
- - track
|
65 |
- true
|
66 |
- - trained_agent
|
|
|
74 |
- - verbose
|
75 |
- 1
|
76 |
- - wandb_entity
|
77 |
+
- ernestum
|
78 |
- - wandb_project_name
|
79 |
+
- seals-experts-normalized
|
80 |
+
- - yaml_file
|
81 |
+
- null
|
config.yml
CHANGED
@@ -14,7 +14,11 @@
|
|
14 |
- - policy
|
15 |
- MlpPolicy
|
16 |
- - policy_kwargs
|
17 |
-
-
|
|
|
|
|
|
|
|
|
18 |
- - tau
|
19 |
- 0.01
|
20 |
- - train_freq
|
|
|
14 |
- - policy
|
15 |
- MlpPolicy
|
16 |
- - policy_kwargs
|
17 |
+
- log_std_init: -2.689958330139309
|
18 |
+
net_arch:
|
19 |
+
- 400
|
20 |
+
- 300
|
21 |
+
use_sde: false
|
22 |
- - tau
|
23 |
- 0.01
|
24 |
- - train_freq
|
replay.mp4
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:897c78428cf51df7347c098979b8a47dc654d602e3e4993e06354579c9fc4c30
|
3 |
+
size 448267
|
results.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"mean_reward":
|
|
|
1 |
+
{"mean_reward": 28.160976400000003, "std_reward": 0.7219696332704303, "is_deterministic": true, "n_eval_episodes": 10, "eval_datetime": "2022-12-31T18:33:54.752461"}
|
sac-seals-Swimmer-v0.zip
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f62ba56e2bc5ed9a84b4b0726ae3b762dd0f073421dae062d62b623030db0670
|
3 |
+
size 5585156
|
sac-seals-Swimmer-v0/_stable_baselines3_version
CHANGED
@@ -1 +1 @@
|
|
1 |
-
1.6.
|
|
|
1 |
+
1.6.2
|
sac-seals-Swimmer-v0/actor.optimizer.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1013341
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9ef5b663072e4b3b873de8900b81b1cc4e2e2cfe09c8e37ff711cd38f4033fcc
|
3 |
size 1013341
|
sac-seals-Swimmer-v0/critic.optimizer.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2021689
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2cb119f0daf4f5bdd01b2a85bb7823e1703e1385b9045001dcf47e0df0cd2baf
|
3 |
size 2021689
|
sac-seals-Swimmer-v0/data
CHANGED
@@ -4,17 +4,17 @@
|
|
4 |
":serialized:": "gAWVMAAAAAAAAACMHnN0YWJsZV9iYXNlbGluZXMzLnNhYy5wb2xpY2llc5SMCVNBQ1BvbGljeZSTlC4=",
|
5 |
"__module__": "stable_baselines3.sac.policies",
|
6 |
"__doc__": "\n Policy class (with both actor and critic) for SAC.\n\n :param observation_space: Observation space\n :param action_space: Action space\n :param lr_schedule: Learning rate schedule (could be constant)\n :param net_arch: The specification of the policy and value networks.\n :param activation_fn: Activation function\n :param use_sde: Whether to use State Dependent Exploration or not\n :param log_std_init: Initial value for the log standard deviation\n :param sde_net_arch: Network architecture for extracting features\n when using gSDE. If None, the latent features from the policy will be used.\n Pass an empty list to use the states as features.\n :param use_expln: Use ``expln()`` function instead of ``exp()`` when using gSDE to ensure\n a positive standard deviation (cf paper). It allows to keep variance\n above zero and prevent it from growing too fast. In practice, ``exp()`` is usually enough.\n :param clip_mean: Clip the mean output when using gSDE to avoid numerical instability.\n :param features_extractor_class: Features extractor to use.\n :param features_extractor_kwargs: Keyword arguments\n to pass to the features extractor.\n :param normalize_images: Whether to normalize images or not,\n dividing by 255.0 (True by default)\n :param optimizer_class: The optimizer to use,\n ``th.optim.Adam`` by default\n :param optimizer_kwargs: Additional keyword arguments,\n excluding the learning rate, to pass to the optimizer\n :param n_critics: Number of critic networks to create.\n :param share_features_extractor: Whether to share or not the features extractor\n between the actor and the critic (this saves computation time)\n ",
|
7 |
-
"__init__": "<function SACPolicy.__init__ at
|
8 |
-
"_build": "<function SACPolicy._build at
|
9 |
-
"_get_constructor_parameters": "<function SACPolicy._get_constructor_parameters at
|
10 |
-
"reset_noise": "<function SACPolicy.reset_noise at
|
11 |
-
"make_actor": "<function SACPolicy.make_actor at
|
12 |
-
"make_critic": "<function SACPolicy.make_critic at
|
13 |
-
"forward": "<function SACPolicy.forward at
|
14 |
-
"_predict": "<function SACPolicy._predict at
|
15 |
-
"set_training_mode": "<function SACPolicy.set_training_mode at
|
16 |
"__abstractmethods__": "frozenset()",
|
17 |
-
"_abc_impl": "<_abc_data object at
|
18 |
},
|
19 |
"verbose": 1,
|
20 |
"policy_kwargs": {
|
@@ -40,7 +40,7 @@
|
|
40 |
},
|
41 |
"action_space": {
|
42 |
":type:": "<class 'gym.spaces.box.Box'>",
|
43 |
-
":serialized:": "gAWV6wsAAAAAAACMDmd5bS5zcGFjZXMuYm94lIwDQm94lJOUKYGUfZQojAVkdHlwZZSMBW51bXB5lGgFk5SMAmY0lImIh5RSlChLA4wBPJROTk5K/////0r/////SwB0lGKMBl9zaGFwZZRLAoWUjANsb3eUjBJudW1weS5jb3JlLm51bWVyaWOUjAtfZnJvbWJ1ZmZlcpSTlCiWCAAAAAAAAAAAAIC/AACAv5RoCksChZSMAUOUdJRSlIwEaGlnaJRoEiiWCAAAAAAAAAAAAIA/AACAP5RoCksChZRoFXSUUpSMDWJvdW5kZWRfYmVsb3eUaBIolgIAAAAAAAAAAQGUaAeMAmIxlImIh5RSlChLA4wBfJROTk5K/////0r/////
|
44 |
"dtype": "float32",
|
45 |
"_shape": [
|
46 |
2
|
@@ -55,17 +55,17 @@
|
|
55 |
"num_timesteps": 1000192,
|
56 |
"_total_timesteps": 1000000,
|
57 |
"_num_timesteps_at_start": 0,
|
58 |
-
"seed":
|
59 |
"action_noise": null,
|
60 |
-
"start_time":
|
61 |
"learning_rate": {
|
62 |
":type:": "<class 'function'>",
|
63 |
-
":serialized:": "
|
64 |
},
|
65 |
-
"tensorboard_log": "runs/seals/Swimmer-
|
66 |
"lr_schedule": {
|
67 |
":type:": "<class 'function'>",
|
68 |
-
":serialized:": "
|
69 |
},
|
70 |
"_last_obs": null,
|
71 |
"_last_episode_starts": {
|
@@ -74,7 +74,7 @@
|
|
74 |
},
|
75 |
"_last_original_obs": {
|
76 |
":type:": "<class 'numpy.ndarray'>",
|
77 |
-
":serialized:": "
|
78 |
},
|
79 |
"_episode_num": 1000,
|
80 |
"use_sde": false,
|
@@ -82,7 +82,7 @@
|
|
82 |
"_current_progress_remaining": -0.00019199999999996997,
|
83 |
"ep_info_buffer": {
|
84 |
":type:": "<class 'collections.deque'>",
|
85 |
-
":serialized:": "gAWVgRAAAAAAAACMC2NvbGxlY3Rpb25zlIwFZGVxdWWUk5QpS2SGlFKUKH2UKIwBcpSMFW51bXB5LmNvcmUubXVsdGlhcnJheZSMBnNjYWxhcpSTlIwFbnVtcHmUjAVkdHlwZZSTlIwCZjiUiYiHlFKUKEsDjAE8lE5OTkr/////Sv////
|
86 |
},
|
87 |
"ep_success_buffer": {
|
88 |
":type:": "<class 'collections.deque'>",
|
@@ -100,13 +100,13 @@
|
|
100 |
":type:": "<class 'abc.ABCMeta'>",
|
101 |
":serialized:": "gAWVNQAAAAAAAACMIHN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi5idWZmZXJzlIwMUmVwbGF5QnVmZmVylJOULg==",
|
102 |
"__module__": "stable_baselines3.common.buffers",
|
103 |
-
"__doc__": "\n Replay buffer used in off-policy algorithms like SAC/TD3.\n\n :param buffer_size: Max number of element in the buffer\n :param observation_space: Observation space\n :param action_space: Action space\n :param device
|
104 |
-
"__init__": "<function ReplayBuffer.__init__ at
|
105 |
-
"add": "<function ReplayBuffer.add at
|
106 |
-
"sample": "<function ReplayBuffer.sample at
|
107 |
-
"_get_samples": "<function ReplayBuffer._get_samples at
|
108 |
"__abstractmethods__": "frozenset()",
|
109 |
-
"_abc_impl": "<_abc_data object at
|
110 |
},
|
111 |
"replay_buffer_kwargs": {},
|
112 |
"train_freq": {
|
@@ -116,5 +116,7 @@
|
|
116 |
"use_sde_at_warmup": false,
|
117 |
"target_entropy": -2.0,
|
118 |
"ent_coef": "auto",
|
119 |
-
"target_update_interval": 1
|
|
|
|
|
120 |
}
|
|
|
4 |
":serialized:": "gAWVMAAAAAAAAACMHnN0YWJsZV9iYXNlbGluZXMzLnNhYy5wb2xpY2llc5SMCVNBQ1BvbGljeZSTlC4=",
|
5 |
"__module__": "stable_baselines3.sac.policies",
|
6 |
"__doc__": "\n Policy class (with both actor and critic) for SAC.\n\n :param observation_space: Observation space\n :param action_space: Action space\n :param lr_schedule: Learning rate schedule (could be constant)\n :param net_arch: The specification of the policy and value networks.\n :param activation_fn: Activation function\n :param use_sde: Whether to use State Dependent Exploration or not\n :param log_std_init: Initial value for the log standard deviation\n :param sde_net_arch: Network architecture for extracting features\n when using gSDE. If None, the latent features from the policy will be used.\n Pass an empty list to use the states as features.\n :param use_expln: Use ``expln()`` function instead of ``exp()`` when using gSDE to ensure\n a positive standard deviation (cf paper). It allows to keep variance\n above zero and prevent it from growing too fast. In practice, ``exp()`` is usually enough.\n :param clip_mean: Clip the mean output when using gSDE to avoid numerical instability.\n :param features_extractor_class: Features extractor to use.\n :param features_extractor_kwargs: Keyword arguments\n to pass to the features extractor.\n :param normalize_images: Whether to normalize images or not,\n dividing by 255.0 (True by default)\n :param optimizer_class: The optimizer to use,\n ``th.optim.Adam`` by default\n :param optimizer_kwargs: Additional keyword arguments,\n excluding the learning rate, to pass to the optimizer\n :param n_critics: Number of critic networks to create.\n :param share_features_extractor: Whether to share or not the features extractor\n between the actor and the critic (this saves computation time)\n ",
|
7 |
+
"__init__": "<function SACPolicy.__init__ at 0x7f73fdab1ee0>",
|
8 |
+
"_build": "<function SACPolicy._build at 0x7f73fdab1f70>",
|
9 |
+
"_get_constructor_parameters": "<function SACPolicy._get_constructor_parameters at 0x7f73fda3a040>",
|
10 |
+
"reset_noise": "<function SACPolicy.reset_noise at 0x7f73fda3a0d0>",
|
11 |
+
"make_actor": "<function SACPolicy.make_actor at 0x7f73fda3a160>",
|
12 |
+
"make_critic": "<function SACPolicy.make_critic at 0x7f73fda3a1f0>",
|
13 |
+
"forward": "<function SACPolicy.forward at 0x7f73fda3a280>",
|
14 |
+
"_predict": "<function SACPolicy._predict at 0x7f73fda3a310>",
|
15 |
+
"set_training_mode": "<function SACPolicy.set_training_mode at 0x7f73fda3a3a0>",
|
16 |
"__abstractmethods__": "frozenset()",
|
17 |
+
"_abc_impl": "<_abc_data object at 0x7f73fdab0cc0>"
|
18 |
},
|
19 |
"verbose": 1,
|
20 |
"policy_kwargs": {
|
|
|
40 |
},
|
41 |
"action_space": {
|
42 |
":type:": "<class 'gym.spaces.box.Box'>",
|
43 |
+
":serialized:": "gAWV6wsAAAAAAACMDmd5bS5zcGFjZXMuYm94lIwDQm94lJOUKYGUfZQojAVkdHlwZZSMBW51bXB5lGgFk5SMAmY0lImIh5RSlChLA4wBPJROTk5K/////0r/////SwB0lGKMBl9zaGFwZZRLAoWUjANsb3eUjBJudW1weS5jb3JlLm51bWVyaWOUjAtfZnJvbWJ1ZmZlcpSTlCiWCAAAAAAAAAAAAIC/AACAv5RoCksChZSMAUOUdJRSlIwEaGlnaJRoEiiWCAAAAAAAAAAAAIA/AACAP5RoCksChZRoFXSUUpSMDWJvdW5kZWRfYmVsb3eUaBIolgIAAAAAAAAAAQGUaAeMAmIxlImIh5RSlChLA4wBfJROTk5K/////0r/////SwB0lGJLAoWUaBV0lFKUjA1ib3VuZGVkX2Fib3ZllGgSKJYCAAAAAAAAAAEBlGghSwKFlGgVdJRSlIwKX25wX3JhbmRvbZSMFG51bXB5LnJhbmRvbS5fcGlja2xllIwSX19yYW5kb21zdGF0ZV9jdG9ylJOUjAdNVDE5OTM3lIWUUpR9lCiMDWJpdF9nZW5lcmF0b3KUaDCMBXN0YXRllH2UKIwDa2V5lGgSKJbACQAAAAAAAAAAAIDUt/+HPZICYJHdFVVVQW03xB9i6zaKeU/qR8oEBxO6m+fZGoL5zaD582xaBJqIOV0R0dRE+UYr4oAox36BZtgEvMuABVfIDSxkU/zAm/URIuDsNT1R1pmQ0mbTBLTnkSU/dwnOKav8c1yezE645DG4Zi6OKu7AIC2gqaFt2x6857Q95ooaFRyrs9exUzSRY/jTchJ6WBM6RUiPEkpXVIeUNmuF0rgNgfWhfnDfwTXMQbO2e7mM/4kbhi5VyByWo6ZxytcXAh3vCoxz2mO6CbiHUEFkLdwU3mC+E9zguBInDgxgxSasvL8rheOQhDiUSd+hvjkh/rtvbBkdjWmkjm9YZMeRcRzPWpUhhyVo1ppdpVzhGT+NSA72tpfW7Ywr4rc+5q9CcOcjm2AyIxUROxgq0QtMI9kF5mZjrlQf8PgyNQzVRviV7F+GZkwUNseBoy+mkcXkHFUSBf+YnFukIJn3ObFNhWto2cHnqFABSlz5+733nvZh0cI2cjVs+xK8QikBOsrX9qHhepejxghbKXU3TujHbDlWBNetNO7YAKLKnh4rIOn1EgoapdwP0InHjPDCK1kvHvKE2FDnqALD1F3Muo41fzu+vF4bvJbpsnTI3lZd4FmRbXZzwNy8X2/4Xwzg4dLLqylwwn+VRKRQWXIhD7wL6hsWyoj8dJX+zt74B5h+l7azfR8pDLdkR0xaiBSUenBHma83aaFS3zwPnytmlMj46gy0XLt4nrwWDPHHXKOVKsVXcm7021O490u/G98DRvXhwi37QRQM4ZmFVALWnSZPKcE7oPuWxye0h7P0xBu4E3miSz7sSn8M+KqFjU1ZJNTfHkmtwKbuLkaKsyGmBY0D2OheEQCPpp87bZansU2kEThxzl8JTnzkjNGI5UvIlOczX/AfKhVZjmIF2DqRJyUxvF9prZblZ48UN4u7DCizMGqW6vGjIjnE8B4HP/7D58N5cr5o6+W4FoVT3Cl8r+OsJT5cWuBbAp89ShQcj2PtevZdUqgdNiA4KSx0QH0b8P2mmuPWMrUy3ePWdBhLrPgMMdDD8a922KR+gYuu85enMo64O9/QDithPVepRflPglNfkXnlp0XS+0ILpvIWt2IXW6xQtVS7Cg9v7u4FKYNETesC3rM+1s5WkLjmBSvWcn2QiFBfk4aw1CPpjYuAs5PRXz6T61ccciZVlrB9Cs3WCKL+tevryRi4ueGHoJBsmxhcMTCvo1ueigyCnGCP0txIe3zNUiu3qW/StHRRk01d8iYvnJWypMb+crb2/Q/UF02QxiLTCNcUVKfh7DGCzVUazlPLgZQg5CkSZNmpUk8NniSKRKu3rQrkCo0pSZHBRTghdyAnskQ6gfSSV1InxaTJWyPVgNxdLcGvfG/Pt83I05T6D3dvpPfnHfe8ihOiPqPiRxlvpWboqOyhltg3pcTXH26hDVRtWRgen4yl8VbzXt52d9Pm42exrgxVKrfpysm8n/k+jk/g0xYAUR6bqgUkFalx8AVTHS3dFyJGgHtGeGmHO4+7HSXqPYF7Sz+dFzGj08AZEaAfe9bLe2LgVnWbrQ5WcyLVMv9fibIyheyGznG9sX0Mj3WHis13WZU81EFhzSpTe6EVrRobSXshPWwuV1CSWqaivMuvAhqdFh5S2BZRYR1mGXGVDFpsSfyPs0ZnMHJQbXdKWmGHPmf+khClRouBjQZgEYSaf4sEhXMu7EOSu3ZJb1Lg++nzfwCthHsJtxmDDUB7nDmMZ0u1VdYK7aju6xVZD3pMBDmW5hZdXAezT/NmWYmqsG3BZZ8UFVFucvCuG9C3U/TXwiPPHbo/aNNkDOwf+mF20yWfLumr5wETZmCYJbBN7PPLGVQf+UOxxZuLqjP3vV+TQz/gUI1cPOK+EGs4ljVipyUGUc6gd5w9egePo2MB1cIrN7kySFPSYgk++lOgtCz7uz1ariIR7xZvIyTwHtdVfonPkRsMlFAtHWeO1cdC6kIeBbwXbEuQ/rRNx/4Z6rlpFKkJTVuxZYBu06lXlXvbzVlUEaURiCkMi8A4rs6Dge1Z2hfxxYaLxhT24aNpWEpL4hMy5IejgCk6Z5SkF3ptQ7kZXxjra6WS34n4TBMfDbIgWXRjGgr1Ch+ujyMazr4zDZu9/KXwXMFYSyOL+aFlYH1+GbX2A6XwrQarnbNsyZh0rsmtuxz4eH7jmGXzDYxng57HjRVfg2TB9g5j7BxqAgW9q4en+2G5fKGLWT6eVa37VBqNSUECY1gh5dQpyFxX9yJc33ZazmDFlm8aWTb0K13Fn1mRvCIXZ+db3cexASnBct/9YarQ7vT6zhTWAjUrlXUCGiQdlPrTYLp8ijs+frEfryi1twq3wwZQSRX760uEOlhs/1GoKZArGDcrdJgzFfo49bsGELqGxfKqgXnKexxKyjIW9MejZQL1dIcMUz1YpnBRQFoMJq6ZZiaHd/o/98fWD9emudvV1181o78Deq4Hw8XXNf34Tuke16YOH8GtKHDq1AKAlKMiJ3DYOuQ/R+4mKvPqmn2b5Nq7t1ESQ1T8QOjzmrxrnMdvJj0RmLtO8wTY6Z39qsexViykKa76Q/XfDIyjnjqbhho48peQVM3noHfJXmtm6DsOOC8PflcygfAuw2wbK7TEOQPkUzEdbtljFV8OiaRyku766aesG6ItPvZ87Wco68Mgeg0xlSfDBSLa53ctCDy1nJH229EwvSZyX1TqLocVMwxzak5ZM8TaiGHhzB03QXHEqtxS9w/xZit1zBNe2+n/FDnsx8MjbLe516qyWfwgBVjrj2aBRUtPV6L4P1ittDB/UsVpSq2IuJqSzUymnWcmaSdh1ImJM9KsVRGvIE6ud80943p5Q/YXou8P6F+7wUwEcreuHlJN8KSuw8eLbAbyykTBYyy/y4Tj4PWj8dGGS1G+a1xwzjKfZAA3YiRnP36J/h+CrV5SJIkSX4HNgYrsEYmSQHMmosL9AMmAdJVwEkhklJJgg/dI2Cgv4mIWweIzMvHDP+BmPKliAdtfhYOyYV6Z7WU5ltxIp2Kw6toj3jLBi9zlU30zY+lk4n39JGwSvQ9oGw9pn7fBF2gd+CiIZCLrpk3biEVBkZnoY5iBlJz8/K5ly5jqGZrrlAEIH0f9aQbim1qr8ZAGxZ50ZfldgoAXkcXfBGG7HxON92KWLDiUdjBBJWAS7pY3YMUognHB4Q+MpzoYUQmn5Yt2UKKfogFzcgtjOokJhQos1azacEKqsgc1x0UeOF8SqSXt/6u2OnyhhSWqLEsZnDD/8o1YQYwOPR8ZW0Ran30UJA/OjRS83UBnpIvQ6iG2b/lgXSTzIXf8WpRoB4wCdTSUiYiHlFKUKEsDaAtOTk5K/////0r/////SwB0lGJNcAKFlGgVdJRSlIwDcG9zlE1wAnWMCWhhc19nYXVzc5RLAIwFZ2F1c3OURwAAAAAAAAAAdWJ1Yi4=",
|
44 |
"dtype": "float32",
|
45 |
"_shape": [
|
46 |
2
|
|
|
55 |
"num_timesteps": 1000192,
|
56 |
"_total_timesteps": 1000000,
|
57 |
"_num_timesteps_at_start": 0,
|
58 |
+
"seed": 5,
|
59 |
"action_noise": null,
|
60 |
+
"start_time": 1670518607720798620,
|
61 |
"learning_rate": {
|
62 |
":type:": "<class 'function'>",
|
63 |
+
":serialized:": "gAWVhQIAAAAAAACMF2Nsb3VkcGlja2xlLmNsb3VkcGlja2xllIwOX21ha2VfZnVuY3Rpb26Uk5QoaACMDV9idWlsdGluX3R5cGWUk5SMCENvZGVUeXBllIWUUpQoSwFLAEsASwFLAUsTQwSIAFMAlE6FlCmMAV+UhZSMUy9ob21lL21heGltaWxpYW4vdmVudi9saWIvcHl0aG9uMy44L3NpdGUtcGFja2FnZXMvc3RhYmxlX2Jhc2VsaW5lczMvY29tbW9uL3V0aWxzLnB5lIwEZnVuY5RLgEMCAAGUjAN2YWyUhZQpdJRSlH2UKIwLX19wYWNrYWdlX1+UjBhzdGFibGVfYmFzZWxpbmVzMy5jb21tb26UjAhfX25hbWVfX5SMHnN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi51dGlsc5SMCF9fZmlsZV9flGgMdU5OaACMEF9tYWtlX2VtcHR5X2NlbGyUk5QpUpSFlHSUUpSMHGNsb3VkcGlja2xlLmNsb3VkcGlja2xlX2Zhc3SUjBJfZnVuY3Rpb25fc2V0c3RhdGWUk5RoHn2UfZQoaBZoDYwMX19xdWFsbmFtZV9flIwZY29uc3RhbnRfZm4uPGxvY2Fscz4uZnVuY5SMD19fYW5ub3RhdGlvbnNfX5R9lIwOX19rd2RlZmF1bHRzX1+UTowMX19kZWZhdWx0c19flE6MCl9fbW9kdWxlX1+UaBeMB19fZG9jX1+UTowLX19jbG9zdXJlX1+UaACMCl9tYWtlX2NlbGyUk5RHPzoz1Xjblu+FlFKUhZSMF19jbG91ZHBpY2tsZV9zdWJtb2R1bGVzlF2UjAtfX2dsb2JhbHNfX5R9lHWGlIZSMC4="
|
64 |
},
|
65 |
+
"tensorboard_log": "runs/seals/Swimmer-v0__sac__7__1670518603/seals-Swimmer-v0",
|
66 |
"lr_schedule": {
|
67 |
":type:": "<class 'function'>",
|
68 |
+
":serialized:": "gAWVhQIAAAAAAACMF2Nsb3VkcGlja2xlLmNsb3VkcGlja2xllIwOX21ha2VfZnVuY3Rpb26Uk5QoaACMDV9idWlsdGluX3R5cGWUk5SMCENvZGVUeXBllIWUUpQoSwFLAEsASwFLAUsTQwSIAFMAlE6FlCmMAV+UhZSMUy9ob21lL21heGltaWxpYW4vdmVudi9saWIvcHl0aG9uMy44L3NpdGUtcGFja2FnZXMvc3RhYmxlX2Jhc2VsaW5lczMvY29tbW9uL3V0aWxzLnB5lIwEZnVuY5RLgEMCAAGUjAN2YWyUhZQpdJRSlH2UKIwLX19wYWNrYWdlX1+UjBhzdGFibGVfYmFzZWxpbmVzMy5jb21tb26UjAhfX25hbWVfX5SMHnN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi51dGlsc5SMCF9fZmlsZV9flGgMdU5OaACMEF9tYWtlX2VtcHR5X2NlbGyUk5QpUpSFlHSUUpSMHGNsb3VkcGlja2xlLmNsb3VkcGlja2xlX2Zhc3SUjBJfZnVuY3Rpb25fc2V0c3RhdGWUk5RoHn2UfZQoaBZoDYwMX19xdWFsbmFtZV9flIwZY29uc3RhbnRfZm4uPGxvY2Fscz4uZnVuY5SMD19fYW5ub3RhdGlvbnNfX5R9lIwOX19rd2RlZmF1bHRzX1+UTowMX19kZWZhdWx0c19flE6MCl9fbW9kdWxlX1+UaBeMB19fZG9jX1+UTowLX19jbG9zdXJlX1+UaACMCl9tYWtlX2NlbGyUk5RHPzoz1Xjblu+FlFKUhZSMF19jbG91ZHBpY2tsZV9zdWJtb2R1bGVzlF2UjAtfX2dsb2JhbHNfX5R9lHWGlIZSMC4="
|
69 |
},
|
70 |
"_last_obs": null,
|
71 |
"_last_episode_starts": {
|
|
|
74 |
},
|
75 |
"_last_original_obs": {
|
76 |
":type:": "<class 'numpy.ndarray'>",
|
77 |
+
":serialized:": "gAWVxQAAAAAAAACMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJZQAAAAAAAAAJBc42JH7PI/FaPJIyh55L8l4H/8thwDQHGC60/f8fu/mSQHGOXy+L8WZoRFxp+Wv9YI44rs05q/AFUugCZtmL8+Dm4JgleeP0wJaRRRQMG/lIwFbnVtcHmUjAVkdHlwZZSTlIwCZjiUiYiHlFKUKEsDjAE8lE5OTkr/////Sv////9LAHSUYksBSwqGlIwBQ5R0lFKULg=="
|
78 |
},
|
79 |
"_episode_num": 1000,
|
80 |
"use_sde": false,
|
|
|
82 |
"_current_progress_remaining": -0.00019199999999996997,
|
83 |
"ep_info_buffer": {
|
84 |
":type:": "<class 'collections.deque'>",
|
85 |
+
":serialized:": "gAWVgRAAAAAAAACMC2NvbGxlY3Rpb25zlIwFZGVxdWWUk5QpS2SGlFKUKH2UKIwBcpSMFW51bXB5LmNvcmUubXVsdGlhcnJheZSMBnNjYWxhcpSTlIwFbnVtcHmUjAVkdHlwZZSTlIwCZjiUiYiHlFKUKEsDjAE8lE5OTkr/////Sv////9LAHSUYkMIvayJBb42N0CUhpRSlIwBbJRN6AOMAXSUR0CXjk+dsi0OdX2UKGgGaAloD0MIRxyygXQpPECUhpRSlGgVTegDaBZHQJeUOOKfnOl1fZQoaAZoCWgPQwgoYDsYsYs5QJSGlFKUaBVN6ANoFkdAl5qkxmCiAXV9lChoBmgJaA9DCEW94NOc5DhAlIaUUpRoFU3oA2gWR0CXoOV8Ti84dX2UKGgGaAloD0MIlwLS/geAO0CUhpRSlGgVTegDaBZHQJenMkC3gDR1fZQoaAZoCWgPQwizQLtDiuk4QJSGlFKUaBVN6ANoFkdAl619Fvybx3V9lChoBmgJaA9DCPRNmgZFdzZAlIaUUpRoFU3oA2gWR0CXs0UiILw4dX2UKGgGaAloD0MIxRwEHa3yOUCUhpRSlGgVTegDaBZHQJe4Ef4h2W91fZQoaAZoCWgPQwi8V61M+L05QJSGlFKUaBVN6ANoFkdAl76Hh4t6HHV9lChoBmgJaA9DCMLCSZo/NjlAlIaUUpRoFU3oA2gWR0CXxD48lolEdX2UKGgGaAloD0MIUn+9woI3NkCUhpRSlGgVTegDaBZHQJfJMvkBCD51fZQoaAZoCWgPQwgof/eOGp83QJSGlFKUaBVN6ANoFkdAl84RUFSsKnV9lChoBmgJaA9DCJOpglFJiThAlIaUUpRoFU3oA2gWR0CX04DCxeLOdX2UKGgGaAloD0MIvR3htOBVOUCUhpRSlGgVTegDaBZHQJfZ9svZh8Z1fZQoaAZoCWgPQwjC+6pcqLQ6QJSGlFKUaBVN6ANoFkdAl+BtnPE873V9lChoBmgJaA9DCJ33/3HClDlAlIaUUpRoFU3oA2gWR0CX5u8uSOindX2UKGgGaAloD0MII0kQroDmOUCUhpRSlGgVTegDaBZHQJftbechC+l1fZQoaAZoCWgPQwgnpDUGnYA+QJSGlFKUaBVN6ANoFkdAl/Os580DU3V9lChoBmgJaA9DCNtugm+a2jdAlIaUUpRoFU3oA2gWR0CX+g1CgK4QdX2UKGgGaAloD0MIml33ViR6PECUhpRSlGgVTegDaBZHQJf/+gsbvPV1fZQoaAZoCWgPQwiWBKipZdc7QJSGlFKUaBVN6ANoFkdAmAVncpLEk3V9lChoBmgJaA9DCLK8qx4wxztAlIaUUpRoFU3oA2gWR0CYCjpt78ekdX2UKGgGaAloD0MIdXedDfm/OkCUhpRSlGgVTegDaBZHQJgP2TlkpZx1fZQoaAZoCWgPQwh2qKYk61A6QJSGlFKUaBVN6ANoFkdAmBUwS8J2MnV9lChoBmgJaA9DCB4V/3dEKTpAlIaUUpRoFU3oA2gWR0CYG4Kkl/pddX2UKGgGaAloD0MIXoQpyqV9OUCUhpRSlGgVTegDaBZHQJg8CAvtdAx1fZQoaAZoCWgPQwgo8E4+PQ45QJSGlFKUaBVN6ANoFkdAmEIEwN9YwXV9lChoBmgJaA9DCJ60cFmF4TlAlIaUUpRoFU3oA2gWR0CYR+UhV2iddX2UKGgGaAloD0MIBADHnj2bOkCUhpRSlGgVTegDaBZHQJhN2w0O3Dx1fZQoaAZoCWgPQwhv2SH+YW83QJSGlFKUaBVN6ANoFkdAmFPUFW4mTnV9lChoBmgJaA9DCIi85erHHjlAlIaUUpRoFU3oA2gWR0CYWdNO/L1VdX2UKGgGaAloD0MIK1CLwcOYPkCUhpRSlGgVTegDaBZHQJhfHPWxyGV1fZQoaAZoCWgPQwibHD7pRNI5QJSGlFKUaBVN6ANoFkdAmGTr4i5d4XV9lChoBmgJaA9DCJZBtcGJoDhAlIaUUpRoFU3oA2gWR0CYauxUvPC3dX2UKGgGaAloD0MITKYKRiWZOkCUhpRSlGgVTegDaBZHQJhxGPLgXM11fZQoaAZoCWgPQwghHR7C+Fk5QJSGlFKUaBVN6ANoFkdAmHdYAwPAf3V9lChoBmgJaA9DCFpJK76hODlAlIaUUpRoFU3oA2gWR0CYfO4eLehxdX2UKGgGaAloD0MISPyKNVzAOUCUhpRSlGgVTegDaBZHQJiB2p97Wup1fZQoaAZoCWgPQwhV3o5wWsg6QJSGlFKUaBVN6ANoFkdAmIbtfw7T2HV9lChoBmgJaA9DCDEHQUerTj1AlIaUUpRoFU3oA2gWR0CYjQ9H+ZPVdX2UKGgGaAloD0MIRS3NrRCiOkCUhpRSlGgVTegDaBZHQJiTkUlAu7J1fZQoaAZoCWgPQwh39pUH6c05QJSGlFKUaBVN6ANoFkdAmJoO+mFajnV9lChoBmgJaA9DCMOedvhr8jxAlIaUUpRoFU3oA2gWR0CYoHAbADaHdX2UKGgGaAloD0MIavtXVppwPUCUhpRSlGgVTegDaBZHQJil36BRQ791fZQoaAZoCWgPQwgqHaz/c5Q7QJSGlFKUaBVN6ANoFkdAmKthLsa86HV9lChoBmgJaA9DCPwXCAJkhD1AlIaUUpRoFU3oA2gWR0CYsAs4T9KmdX2UKGgGaAloD0MIsVOsGoSpPECUhpRSlGgVTegDaBZHQJi1F66asp51fZQoaAZoCWgPQwgMzXUaaf01QJSGlFKUaBVN6ANoFkdAmLrymIj4YnV9lChoBmgJaA9DCFIP0egO1jhAlIaUUpRoFU3oA2gWR0CYv3gB91EFdX2UKGgGaAloD0MIpbxWQnfJOUCUhpRSlGgVTegDaBZHQJjEvbN8ma91fZQoaAZoCWgPQwj0NGCQ9ME6QJSGlFKUaBVN6ANoFkdAmOORKpT/AHV9lChoBmgJaA9DCO+MtiqJdDZAlIaUUpRoFU3oA2gWR0CY6YVaOgg6dX2UKGgGaAloD0MIMQdBR6suOECUhpRSlGgVTegDaBZHQJjv8W0qpcZ1fZQoaAZoCWgPQwiRgTy7fJs2QJSGlFKUaBVN6ANoFkdAmPZbEDQqqnV9lChoBmgJaA9DCMZOeAlOfThAlIaUUpRoFU3oA2gWR0CY/MrOZ9eAdX2UKGgGaAloD0MIyeNp+YGvO0CUhpRSlGgVTegDaBZHQJkCV81Gb1B1fZQoaAZoCWgPQwhRpWYPtP47QJSGlFKUaBVN6ANoFkdAmQhZyZKFqXV9lChoBmgJaA9DCMkeoWZInTZAlIaUUpRoFU3oA2gWR0CZDpw1zhgmdX2UKGgGaAloD0MIY7Mj1XcuOUCUhpRSlGgVTegDaBZHQJkU3QfIS151fZQoaAZoCWgPQwiVfsLZreU5QJSGlFKUaBVN6ANoFkdAmRr7Z8KG+XV9lChoBmgJaA9DCIAtr1xvjztAlIaUUpRoFU3oA2gWR0CZIRKNhmXgdX2UKGgGaAloD0MIgXfy6bEZP0CUhpRSlGgVTegDaBZHQJknfzlLeyl1fZQoaAZoCWgPQwgfSUkPQzM5QJSGlFKUaBVN6ANoFkdAmS18/6frbHV9lChoBmgJaA9DCB09fm/TDzxAlIaUUpRoFU3oA2gWR0CZM9lmOEM9dX2UKGgGaAloD0MIwJSBA1qGOkCUhpRSlGgVTegDaBZHQJk43n4fwJB1fZQoaAZoCWgPQwijW6/pQUU5QJSGlFKUaBVN6ANoFkdAmT76aLGaQXV9lChoBmgJaA9DCPqXpDLFuDtAlIaUUpRoFU3oA2gWR0CZRPtT1kDqdX2UKGgGaAloD0MIPzp15bOwOkCUhpRSlGgVTegDaBZHQJlK/gR9PUN1fZQoaAZoCWgPQwgx0/avrHg1QJSGlFKUaBVN6ANoFkdAmU+dKh+OO3V9lChoBmgJaA9DCJRsdTkl1DlAlIaUUpRoFU3oA2gWR0CZU/IBRyfddX2UKGgGaAloD0MINV8lH7tfOECUhpRSlGgVTegDaBZHQJlZzOu7pV11fZQoaAZoCWgPQwhPIy2Vt/s4QJSGlFKUaBVN6ANoFkdAmV91KkEcKnV9lChoBmgJaA9DCLzOhvwzWzlAlIaUUpRoFU3oA2gWR0CZZAKpDNQkdX2UKGgGaAloD0MIG0mCcAXUOECUhpRSlGgVTegDaBZHQJlpOQRwqAl1fZQoaAZoCWgPQwj4F0FjJh04QJSGlFKUaBVN6ANoFkdAmW/2KZUkwHV9lChoBmgJaA9DCB3oobYNvzRAlIaUUpRoFU3oA2gWR0CZiUeI2wV1dX2UKGgGaAloD0MIObh0zHl+OkCUhpRSlGgVTegDaBZHQJmN+d3B55Z1fZQoaAZoCWgPQwifzarP1YY4QJSGlFKUaBVN6ANoFkdAmZKxR64Ue3V9lChoBmgJaA9DCKclVkYjpzdAlIaUUpRoFU3oA2gWR0CZlxImgJ1JdX2UKGgGaAloD0MIS1tc4zOVN0CUhpRSlGgVTegDaBZHQJmbnxz7uUl1fZQoaAZoCWgPQwh3TUhrDJY6QJSGlFKUaBVN6ANoFkdAmaCTcZccEXV9lChoBmgJaA9DCCS2uwfoJjlAlIaUUpRoFU3oA2gWR0CZpgi8WbgCdX2UKGgGaAloD0MIIUCGjh2IN0CUhpRSlGgVTegDaBZHQJmrKnfl6qt1fZQoaAZoCWgPQwj5ugz/6RIzQJSGlFKUaBVN6ANoFkdAmbGfLDAJs3V9lChoBmgJaA9DCJshVRSv9jpAlIaUUpRoFU3oA2gWR0CZuA6VMVUNdX2UKGgGaAloD0MID+1jBb+5OECUhpRSlGgVTegDaBZHQJm+hbRnezl1fZQoaAZoCWgPQwgzMV2I1Q85QJSGlFKUaBVN6ANoFkdAmcThoysS03V9lChoBmgJaA9DCBL4w89/Nz1AlIaUUpRoFU3oA2gWR0CZyzSnLq2SdX2UKGgGaAloD0MIhLndy336O0CUhpRSlGgVTegDaBZHQJnRhM6BAfN1fZQoaAZoCWgPQwhDPBIvT4c6QJSGlFKUaBVN6ANoFkdAmdfPqC6H03V9lChoBmgJaA9DCPKWqx+beDpAlIaUUpRoFU3oA2gWR0CZ3h3Zwn6VdX2UKGgGaAloD0MIDYtR19pLOECUhpRSlGgVTegDaBZHQJnkSe4Cp3p1fZQoaAZoCWgPQwjK3Hwjunc8QJSGlFKUaBVN6ANoFkdAmep/2f02+HV9lChoBmgJaA9DCJSl1vuNdjVAlIaUUpRoFU3oA2gWR0CZ8L3cpLEldX2UKGgGaAloD0MI2LeTiPA7O0CUhpRSlGgVTegDaBZHQJn2+/k/8l51fZQoaAZoCWgPQwg7yOvBpLA5QJSGlFKUaBVN6ANoFkdAmf0yamXPaHV9lChoBmgJaA9DCNxkVBnGRThAlIaUUpRoFU3oA2gWR0CaAmaP0Zm7dX2UKGgGaAloD0MIlxsMdVi9NkCUhpRSlGgVTegDaBZHQJoHT13+uNh1fZQoaAZoCWgPQwj27SQi/Ks5QJSGlFKUaBVN6ANoFkdAmgvebI91U3V9lChoBmgJaA9DCB8PfXcrvzpAlIaUUpRoFU3oA2gWR0CaEGQ1rIo3dWUu"
|
86 |
},
|
87 |
"ep_success_buffer": {
|
88 |
":type:": "<class 'collections.deque'>",
|
|
|
100 |
":type:": "<class 'abc.ABCMeta'>",
|
101 |
":serialized:": "gAWVNQAAAAAAAACMIHN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi5idWZmZXJzlIwMUmVwbGF5QnVmZmVylJOULg==",
|
102 |
"__module__": "stable_baselines3.common.buffers",
|
103 |
+
"__doc__": "\n Replay buffer used in off-policy algorithms like SAC/TD3.\n\n :param buffer_size: Max number of element in the buffer\n :param observation_space: Observation space\n :param action_space: Action space\n :param device: PyTorch device\n :param n_envs: Number of parallel environments\n :param optimize_memory_usage: Enable a memory efficient variant\n of the replay buffer which reduces by almost a factor two the memory used,\n at a cost of more complexity.\n See https://github.com/DLR-RM/stable-baselines3/issues/37#issuecomment-637501195\n and https://github.com/DLR-RM/stable-baselines3/pull/28#issuecomment-637559274\n Cannot be used in combination with handle_timeout_termination.\n :param handle_timeout_termination: Handle timeout termination (due to timelimit)\n separately and treat the task as infinite horizon task.\n https://github.com/DLR-RM/stable-baselines3/issues/284\n ",
|
104 |
+
"__init__": "<function ReplayBuffer.__init__ at 0x7f73fda860d0>",
|
105 |
+
"add": "<function ReplayBuffer.add at 0x7f73fda86160>",
|
106 |
+
"sample": "<function ReplayBuffer.sample at 0x7f73fda861f0>",
|
107 |
+
"_get_samples": "<function ReplayBuffer._get_samples at 0x7f73fda86280>",
|
108 |
"__abstractmethods__": "frozenset()",
|
109 |
+
"_abc_impl": "<_abc_data object at 0x7f73fdb08300>"
|
110 |
},
|
111 |
"replay_buffer_kwargs": {},
|
112 |
"train_freq": {
|
|
|
116 |
"use_sde_at_warmup": false,
|
117 |
"target_entropy": -2.0,
|
118 |
"ent_coef": "auto",
|
119 |
+
"target_update_interval": 1,
|
120 |
+
"batch_norm_stats": [],
|
121 |
+
"batch_norm_stats_target": []
|
122 |
}
|
sac-seals-Swimmer-v0/ent_coef_optimizer.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:92ee1fa53c32c2297cc9a5240938fecdf6b4871995f8db2e954d6027bbce4aef
|
3 |
+
size 1507
|
sac-seals-Swimmer-v0/policy.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2526661
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dbd4df5a2a75abdb4613e38ece864bbe13c55836825d21fcf50b10a43ffdf929
|
3 |
size 2526661
|
sac-seals-Swimmer-v0/pytorch_variables.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 747
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2d04d0d4e231386fa5da26d87cd17322def1683a43031d869c4ca4441710934c
|
3 |
size 747
|
sac-seals-Swimmer-v0/system_info.txt
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
-
OS: Linux-5.4.0-
|
2 |
Python: 3.8.10
|
3 |
-
Stable-Baselines3: 1.6.
|
4 |
PyTorch: 1.11.0+cu102
|
5 |
GPU Enabled: False
|
6 |
Numpy: 1.22.3
|
|
|
1 |
+
OS: Linux-5.4.0-125-generic-x86_64-with-glibc2.29 #141-Ubuntu SMP Wed Aug 10 13:42:03 UTC 2022
|
2 |
Python: 3.8.10
|
3 |
+
Stable-Baselines3: 1.6.2
|
4 |
PyTorch: 1.11.0+cu102
|
5 |
GPU Enabled: False
|
6 |
Numpy: 1.22.3
|
train_eval_metrics.zip
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:37c5ceb0198fba0964a2f8cf2184ccaf70195eab06d42bb8f20325d8e17c9230
|
3 |
+
size 31667
|