raghavauppuluri commited on
Commit
f4b8042
1 Parent(s): e39b528

init commit

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. mbpo_half_cheetah_v4/diff/.hydra/config.yaml +93 -0
  2. mbpo_half_cheetah_v4/diff/.hydra/hydra.yaml +127 -0
  3. mbpo_half_cheetah_v4/diff/.hydra/overrides.yaml +2 -0
  4. mbpo_half_cheetah_v4/diff/env_stats.pickle +3 -0
  5. mbpo_half_cheetah_v4/diff/eval.csv +0 -0
  6. mbpo_half_cheetah_v4/diff/main.log +0 -0
  7. mbpo_half_cheetah_v4/diff/model.pth +3 -0
  8. mbpo_half_cheetah_v4/diff/model_train.csv +0 -0
  9. mbpo_half_cheetah_v4/diff/replay_buffer.npz +3 -0
  10. mbpo_half_cheetah_v4/diff/results.csv +401 -0
  11. mbpo_half_cheetah_v4/diff/sac.pth +3 -0
  12. mbpo_half_cheetah_v4/diff/train.csv +0 -0
  13. mbpo_half_cheetah_v4/main/.hydra/config.yaml +93 -0
  14. mbpo_half_cheetah_v4/main/.hydra/hydra.yaml +128 -0
  15. mbpo_half_cheetah_v4/main/.hydra/overrides.yaml +3 -0
  16. mbpo_half_cheetah_v4/main/env_stats.pickle +3 -0
  17. mbpo_half_cheetah_v4/main/eval.csv +0 -0
  18. mbpo_half_cheetah_v4/main/main.log +0 -0
  19. mbpo_half_cheetah_v4/main/model.pth +3 -0
  20. mbpo_half_cheetah_v4/main/model_train.csv +0 -0
  21. mbpo_half_cheetah_v4/main/replay_buffer.npz +3 -0
  22. mbpo_half_cheetah_v4/main/results.csv +401 -0
  23. mbpo_half_cheetah_v4/main/sac.pth +3 -0
  24. mbpo_half_cheetah_v4/main/train.csv +0 -0
  25. mbpo_inv_pendulum_v4/diff/.hydra/config.yaml +93 -0
  26. mbpo_inv_pendulum_v4/diff/.hydra/hydra.yaml +127 -0
  27. mbpo_inv_pendulum_v4/diff/.hydra/overrides.yaml +2 -0
  28. mbpo_inv_pendulum_v4/diff/env_stats.pickle +3 -0
  29. mbpo_inv_pendulum_v4/diff/eval.csv +0 -0
  30. mbpo_inv_pendulum_v4/diff/main.log +0 -0
  31. mbpo_inv_pendulum_v4/diff/model.pth +3 -0
  32. mbpo_inv_pendulum_v4/diff/model_train.csv +0 -0
  33. mbpo_inv_pendulum_v4/diff/replay_buffer.npz +3 -0
  34. mbpo_inv_pendulum_v4/diff/results.csv +81 -0
  35. mbpo_inv_pendulum_v4/diff/sac.pth +3 -0
  36. mbpo_inv_pendulum_v4/diff/train.csv +198 -0
  37. mbpo_inv_pendulum_v4/main/.hydra/config.yaml +93 -0
  38. mbpo_inv_pendulum_v4/main/.hydra/hydra.yaml +128 -0
  39. mbpo_inv_pendulum_v4/main/.hydra/overrides.yaml +3 -0
  40. mbpo_inv_pendulum_v4/main/env_stats.pickle +3 -0
  41. mbpo_inv_pendulum_v4/main/eval.csv +0 -0
  42. mbpo_inv_pendulum_v4/main/main.log +0 -0
  43. mbpo_inv_pendulum_v4/main/model.pth +3 -0
  44. mbpo_inv_pendulum_v4/main/model_train.csv +0 -0
  45. mbpo_inv_pendulum_v4/main/replay_buffer.npz +3 -0
  46. mbpo_inv_pendulum_v4/main/results.csv +81 -0
  47. mbpo_inv_pendulum_v4/main/sac.pth +3 -0
  48. mbpo_inv_pendulum_v4/main/train.csv +198 -0
  49. pets_pusher/diff/.hydra/config.yaml +68 -0
  50. pets_pusher/diff/.hydra/hydra.yaml +128 -0
mbpo_half_cheetah_v4/diff/.hydra/config.yaml ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ seed: 0
2
+ device: cuda:0
3
+ log_frequency_agent: 1000
4
+ save_video: false
5
+ debug_mode: false
6
+ experiment: default
7
+ root_dir: ./exp
8
+ algorithm:
9
+ name: mbpo
10
+ normalize: true
11
+ normalize_double_precision: true
12
+ target_is_delta: true
13
+ learned_rewards: true
14
+ freq_train_model: ${overrides.freq_train_model}
15
+ real_data_ratio: 0.0
16
+ sac_samples_action: true
17
+ initial_exploration_steps: 5000
18
+ random_initial_explore: false
19
+ num_eval_episodes: 1
20
+ agent:
21
+ _target_: mbrl.third_party.pytorch_sac_pranz24.sac.SAC
22
+ num_inputs: ???
23
+ action_space:
24
+ _target_: gym.env.Box
25
+ low: ???
26
+ high: ???
27
+ shape: ???
28
+ args:
29
+ gamma: ${overrides.sac_gamma}
30
+ tau: ${overrides.sac_tau}
31
+ alpha: ${overrides.sac_alpha}
32
+ policy: ${overrides.sac_policy}
33
+ target_update_interval: ${overrides.sac_target_update_interval}
34
+ automatic_entropy_tuning: ${overrides.sac_automatic_entropy_tuning}
35
+ target_entropy: ${overrides.sac_target_entropy}
36
+ hidden_size: ${overrides.sac_hidden_size}
37
+ device: ${device}
38
+ lr: ${overrides.sac_lr}
39
+ dynamics_model:
40
+ _target_: mbrl.models.GaussianMLP
41
+ device: ${device}
42
+ num_layers: 4
43
+ in_size: ???
44
+ out_size: ???
45
+ ensemble_size: 7
46
+ hid_size: 200
47
+ deterministic: false
48
+ propagation_method: random_model
49
+ learn_logvar_bounds: false
50
+ activation_fn_cfg:
51
+ _target_: torch.nn.SiLU
52
+ overrides:
53
+ env: gym___HalfCheetah-v4
54
+ term_fn: no_termination
55
+ num_steps: 400000
56
+ epoch_length: 1000
57
+ num_elites: 5
58
+ patience: 5
59
+ model_lr: 0.001
60
+ model_wd: 1.0e-05
61
+ model_batch_size: 256
62
+ validation_ratio: 0.2
63
+ freq_train_model: 250
64
+ effective_model_rollouts_per_step: 400
65
+ rollout_schedule:
66
+ - 20
67
+ - 150
68
+ - 1
69
+ - 1
70
+ num_sac_updates_per_step: 10
71
+ sac_updates_every_steps: 1
72
+ num_epochs_to_retain_sac_buffer: 1
73
+ sac_gamma: 0.99
74
+ sac_tau: 0.005
75
+ sac_alpha: 0.2
76
+ sac_policy: Gaussian
77
+ sac_target_update_interval: 1
78
+ sac_automatic_entropy_tuning: true
79
+ sac_target_entropy: -1
80
+ sac_hidden_size: 512
81
+ sac_lr: 0.0003
82
+ sac_batch_size: 256
83
+ action_optimizer:
84
+ _target_: mbrl.planning.CEMOptimizer
85
+ num_iterations: ${overrides.cem_num_iters}
86
+ elite_ratio: ${overrides.cem_elite_ratio}
87
+ population_size: ${overrides.cem_population_size}
88
+ alpha: ${overrides.cem_alpha}
89
+ lower_bound: ???
90
+ upper_bound: ???
91
+ return_mean_elites: true
92
+ device: ${device}
93
+ clipped_normal: ${overrides.cem_clipped_normal}
mbpo_half_cheetah_v4/diff/.hydra/hydra.yaml ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ hydra:
2
+ run:
3
+ dir: ${root_dir}/${algorithm.name}/${experiment}/${overrides.env}/${now:%Y.%m.%d}/${now:%H%M%S}
4
+ sweep:
5
+ dir: ${root_dir}/${algorithm.name}/${experiment}/${overrides.env}/${now:%Y.%m.%d}/${now:%H%M%S}
6
+ subdir: ${hydra.job.num}
7
+ hydra_logging:
8
+ version: 1
9
+ formatters:
10
+ simple:
11
+ format: '[%(asctime)s][HYDRA] %(message)s'
12
+ handlers:
13
+ console:
14
+ class: logging.StreamHandler
15
+ formatter: simple
16
+ stream: ext://sys.stdout
17
+ root:
18
+ level: INFO
19
+ handlers:
20
+ - console
21
+ loggers:
22
+ logging_example:
23
+ level: DEBUG
24
+ disable_existing_loggers: false
25
+ job_logging:
26
+ version: 1
27
+ formatters:
28
+ simple:
29
+ format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
30
+ handlers:
31
+ console:
32
+ class: logging.StreamHandler
33
+ formatter: simple
34
+ stream: ext://sys.stdout
35
+ file:
36
+ class: logging.FileHandler
37
+ formatter: simple
38
+ filename: ${hydra.job.name}.log
39
+ root:
40
+ level: INFO
41
+ handlers:
42
+ - console
43
+ - file
44
+ disable_existing_loggers: false
45
+ sweeper:
46
+ _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
47
+ max_batch_size: null
48
+ launcher:
49
+ _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
50
+ help:
51
+ app_name: ${hydra.job.name}
52
+ header: '${hydra.help.app_name} is powered by Hydra.
53
+
54
+ '
55
+ footer: 'Powered by Hydra (https://hydra.cc)
56
+
57
+ Use --hydra-help to view Hydra specific help
58
+
59
+ '
60
+ template: '${hydra.help.header}
61
+
62
+ == Configuration groups ==
63
+
64
+ Compose your configuration from those groups (group=option)
65
+
66
+
67
+ $APP_CONFIG_GROUPS
68
+
69
+
70
+ == Config ==
71
+
72
+ Override anything in the config (foo.bar=value)
73
+
74
+
75
+ $CONFIG
76
+
77
+
78
+ ${hydra.help.footer}
79
+
80
+ '
81
+ hydra_help:
82
+ hydra_help: ???
83
+ template: 'Hydra (${hydra.runtime.version})
84
+
85
+ See https://hydra.cc for more info.
86
+
87
+
88
+ == Flags ==
89
+
90
+ $FLAGS_HELP
91
+
92
+
93
+ == Configuration groups ==
94
+
95
+ Compose your configuration from those groups (For example, append hydra/job_logging=disabled
96
+ to command line)
97
+
98
+
99
+ $HYDRA_CONFIG_GROUPS
100
+
101
+
102
+ Use ''--cfg hydra'' to Show the Hydra config.
103
+
104
+ '
105
+ output_subdir: .hydra
106
+ overrides:
107
+ hydra: []
108
+ task:
109
+ - algorithm=mbpo
110
+ - overrides=mbpo_halfcheetah
111
+ job:
112
+ name: main
113
+ override_dirname: algorithm=mbpo,overrides=mbpo_halfcheetah
114
+ id: ???
115
+ num: ???
116
+ config_name: main
117
+ env_set: {}
118
+ env_copy: []
119
+ config:
120
+ override_dirname:
121
+ kv_sep: '='
122
+ item_sep: ','
123
+ exclude_keys: []
124
+ runtime:
125
+ version: 1.0.3
126
+ cwd: /home/raghava/projects/mbrl-lib
127
+ verbose: false
mbpo_half_cheetah_v4/diff/.hydra/overrides.yaml ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ - algorithm=mbpo
2
+ - overrides=mbpo_halfcheetah
mbpo_half_cheetah_v4/diff/env_stats.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:393096d51d9d81c76f75f7ecaa8d8c980cec2ad587386fb9a9f6fb6444cb5d7a
3
+ size 566
mbpo_half_cheetah_v4/diff/eval.csv ADDED
File without changes
mbpo_half_cheetah_v4/diff/main.log ADDED
File without changes
mbpo_half_cheetah_v4/diff/model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52caaf4f3d050bab035f7be401b379f0dbd4251788656d2aab98c8431954b93d
3
+ size 3718181
mbpo_half_cheetah_v4/diff/model_train.csv ADDED
The diff for this file is too large to render. See raw diff
 
mbpo_half_cheetah_v4/diff/replay_buffer.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7b8663edd7190d5eab01bf4600ed904c2ce8084a10f50ff5670c5d0d4a8eff6
3
+ size 132001766
mbpo_half_cheetah_v4/diff/results.csv ADDED
@@ -0,0 +1,401 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ env_step,episode_reward,epoch,rollout_length,step
2
+ 999.0,-294.7836078708305,0.0,1.0,1
3
+ 1999.0,196.1418426044376,1.0,1.0,2
4
+ 2999.0,-278.4890888897964,2.0,1.0,3
5
+ 3999.0,-158.75335195255366,3.0,1.0,4
6
+ 4999.0,258.6594261853722,4.0,1.0,5
7
+ 5999.0,720.156389611084,5.0,1.0,6
8
+ 6999.0,1076.3906862608796,6.0,1.0,7
9
+ 7999.0,1678.9706425481238,7.0,1.0,8
10
+ 8999.0,1974.7106551555864,8.0,1.0,9
11
+ 9999.0,2414.6940830428266,9.0,1.0,10
12
+ 10999.0,2604.865262198175,10.0,1.0,11
13
+ 11999.0,3347.303775391816,11.0,1.0,12
14
+ 12999.0,3558.54738703696,12.0,1.0,13
15
+ 13999.0,3941.6085198282,13.0,1.0,14
16
+ 14999.0,3959.275788949466,14.0,1.0,15
17
+ 15999.0,4215.243116391198,15.0,1.0,16
18
+ 16999.0,4776.6297450474995,16.0,1.0,17
19
+ 17999.0,4609.127704789694,17.0,1.0,18
20
+ 18999.0,4900.340315353441,18.0,1.0,19
21
+ 19999.0,4684.670743447068,19.0,1.0,20
22
+ 20999.0,4940.0799419830955,20.0,1.0,21
23
+ 21999.0,5420.052143621577,21.0,1.0,22
24
+ 22999.0,5409.976577664324,22.0,1.0,23
25
+ 23999.0,6062.370504672796,23.0,1.0,24
26
+ 24999.0,5919.90541108176,24.0,1.0,25
27
+ 25999.0,6702.695296422866,25.0,1.0,26
28
+ 26999.0,6301.405183484551,26.0,1.0,27
29
+ 27999.0,3197.5470276634874,27.0,1.0,28
30
+ 28999.0,5297.705512083361,28.0,1.0,29
31
+ 29999.0,5741.028345497354,29.0,1.0,30
32
+ 30999.0,7178.758132762722,30.0,1.0,31
33
+ 31999.0,7425.934815518168,31.0,1.0,32
34
+ 32999.0,7814.862875775929,32.0,1.0,33
35
+ 33999.0,5773.013605914988,33.0,1.0,34
36
+ 34999.0,7835.987632739687,34.0,1.0,35
37
+ 35999.0,7903.326710913368,35.0,1.0,36
38
+ 36999.0,8216.634341186278,36.0,1.0,37
39
+ 37999.0,8360.624661170059,37.0,1.0,38
40
+ 38999.0,8153.819149073541,38.0,1.0,39
41
+ 39999.0,8378.745693520874,39.0,1.0,40
42
+ 40999.0,8172.071859753907,40.0,1.0,41
43
+ 41999.0,8239.808645257288,41.0,1.0,42
44
+ 42999.0,7849.832761161068,42.0,1.0,43
45
+ 43999.0,7857.36684734548,43.0,1.0,44
46
+ 44999.0,8414.914870693285,44.0,1.0,45
47
+ 45999.0,8673.083278917358,45.0,1.0,46
48
+ 46999.0,7699.6903569153255,46.0,1.0,47
49
+ 47999.0,8639.667000314594,47.0,1.0,48
50
+ 48999.0,8547.74258002908,48.0,1.0,49
51
+ 49999.0,8555.200902940964,49.0,1.0,50
52
+ 50999.0,8913.20674866399,50.0,1.0,51
53
+ 51999.0,8379.796693087892,51.0,1.0,52
54
+ 52999.0,3387.7085029653554,52.0,1.0,53
55
+ 53999.0,8826.299694947718,53.0,1.0,54
56
+ 54999.0,8488.148012027918,54.0,1.0,55
57
+ 55999.0,8332.591687490607,55.0,1.0,56
58
+ 56999.0,1587.1204568104324,56.0,1.0,57
59
+ 57999.0,7055.592882903911,57.0,1.0,58
60
+ 58999.0,9107.432776260928,58.0,1.0,59
61
+ 59999.0,9047.379595779152,59.0,1.0,60
62
+ 60999.0,8504.254453486781,60.0,1.0,61
63
+ 61999.0,6772.222202752463,61.0,1.0,62
64
+ 62999.0,8289.95397239336,62.0,1.0,63
65
+ 63999.0,9099.957140674012,63.0,1.0,64
66
+ 64999.0,9281.260198706272,64.0,1.0,65
67
+ 65999.0,9203.450048956473,65.0,1.0,66
68
+ 66999.0,9224.526395319512,66.0,1.0,67
69
+ 67999.0,4308.232026190792,67.0,1.0,68
70
+ 68999.0,8970.106572616245,68.0,1.0,69
71
+ 69999.0,9089.342452749703,69.0,1.0,70
72
+ 70999.0,8987.759937574807,70.0,1.0,71
73
+ 71999.0,9115.061706920516,71.0,1.0,72
74
+ 72999.0,8854.255485642969,72.0,1.0,73
75
+ 73999.0,8139.480062454781,73.0,1.0,74
76
+ 74999.0,9099.196749279035,74.0,1.0,75
77
+ 75999.0,9236.47499476103,75.0,1.0,76
78
+ 76999.0,9013.81899479127,76.0,1.0,77
79
+ 77999.0,9494.551153273946,77.0,1.0,78
80
+ 78999.0,8822.661170585126,78.0,1.0,79
81
+ 79999.0,8102.313916882804,79.0,1.0,80
82
+ 80999.0,1256.8712115325827,80.0,1.0,81
83
+ 81999.0,8583.38996365152,81.0,1.0,82
84
+ 82999.0,9121.465003603857,82.0,1.0,83
85
+ 83999.0,9336.756917253779,83.0,1.0,84
86
+ 84999.0,9224.149587640184,84.0,1.0,85
87
+ 85999.0,9072.30961752765,85.0,1.0,86
88
+ 86999.0,8287.410291452588,86.0,1.0,87
89
+ 87999.0,9096.627955151995,87.0,1.0,88
90
+ 88999.0,2128.519209737562,88.0,1.0,89
91
+ 89999.0,9236.534968899734,89.0,1.0,90
92
+ 90999.0,9638.129012116648,90.0,1.0,91
93
+ 91999.0,9624.47011207292,91.0,1.0,92
94
+ 92999.0,9440.442954954917,92.0,1.0,93
95
+ 93999.0,9574.868118874016,93.0,1.0,94
96
+ 94999.0,9856.114810240975,94.0,1.0,95
97
+ 95999.0,9985.031510291155,95.0,1.0,96
98
+ 96999.0,9706.164496723464,96.0,1.0,97
99
+ 97999.0,9335.069238459102,97.0,1.0,98
100
+ 98999.0,9253.985373597654,98.0,1.0,99
101
+ 99999.0,8701.530476781452,99.0,1.0,100
102
+ 100999.0,10042.915743880547,100.0,1.0,101
103
+ 101999.0,10185.069926510745,101.0,1.0,102
104
+ 102999.0,9531.961976386074,102.0,1.0,103
105
+ 103999.0,9754.57320036387,103.0,1.0,104
106
+ 104999.0,9644.157237393198,104.0,1.0,105
107
+ 105999.0,10038.381409892,105.0,1.0,106
108
+ 106999.0,9679.874013530687,106.0,1.0,107
109
+ 107999.0,9953.389277945293,107.0,1.0,108
110
+ 108999.0,8703.214781685001,108.0,1.0,109
111
+ 109999.0,9294.682870016886,109.0,1.0,110
112
+ 110999.0,9482.299524126956,110.0,1.0,111
113
+ 111999.0,9849.066135354211,111.0,1.0,112
114
+ 112999.0,9319.709145278244,112.0,1.0,113
115
+ 113999.0,9363.530601883416,113.0,1.0,114
116
+ 114999.0,8122.473777446586,114.0,1.0,115
117
+ 115999.0,10350.571676134094,115.0,1.0,116
118
+ 116999.0,9940.735724257744,116.0,1.0,117
119
+ 117999.0,9150.884694739907,117.0,1.0,118
120
+ 118999.0,9196.892923994592,118.0,1.0,119
121
+ 119999.0,9124.969826653602,119.0,1.0,120
122
+ 120999.0,8820.013447096959,120.0,1.0,121
123
+ 121999.0,8913.329250430945,121.0,1.0,122
124
+ 122999.0,9416.847916135033,122.0,1.0,123
125
+ 123999.0,10104.21754274828,123.0,1.0,124
126
+ 124999.0,9994.135045446883,124.0,1.0,125
127
+ 125999.0,10221.31660187982,125.0,1.0,126
128
+ 126999.0,10504.80727339807,126.0,1.0,127
129
+ 127999.0,10192.590788684174,127.0,1.0,128
130
+ 128999.0,8693.504803877764,128.0,1.0,129
131
+ 129999.0,9793.683572508562,129.0,1.0,130
132
+ 130999.0,10389.445221433189,130.0,1.0,131
133
+ 131999.0,10472.924831975124,131.0,1.0,132
134
+ 132999.0,10092.121489514768,132.0,1.0,133
135
+ 133999.0,10401.191662253834,133.0,1.0,134
136
+ 134999.0,8939.728265721407,134.0,1.0,135
137
+ 135999.0,9350.171762110345,135.0,1.0,136
138
+ 136999.0,9411.424620433527,136.0,1.0,137
139
+ 137999.0,10029.675937906644,137.0,1.0,138
140
+ 138999.0,9992.554138274802,138.0,1.0,139
141
+ 139999.0,10343.762664373156,139.0,1.0,140
142
+ 140999.0,9722.122682521733,140.0,1.0,141
143
+ 141999.0,9330.025648145984,141.0,1.0,142
144
+ 142999.0,10106.804353318286,142.0,1.0,143
145
+ 143999.0,9850.851012327541,143.0,1.0,144
146
+ 144999.0,9883.532003682962,144.0,1.0,145
147
+ 145999.0,9204.10194357558,145.0,1.0,146
148
+ 146999.0,10668.211195381087,146.0,1.0,147
149
+ 147999.0,10723.23239431441,147.0,1.0,148
150
+ 148999.0,10172.957344566386,148.0,1.0,149
151
+ 149999.0,10760.052805989957,149.0,1.0,150
152
+ 150999.0,9445.280204649993,150.0,1.0,151
153
+ 151999.0,2599.530681876538,151.0,1.0,152
154
+ 152999.0,10420.222602344218,152.0,1.0,153
155
+ 153999.0,9945.58262579822,153.0,1.0,154
156
+ 154999.0,9722.759424952892,154.0,1.0,155
157
+ 155999.0,9635.495883598736,155.0,1.0,156
158
+ 156999.0,9883.012533153958,156.0,1.0,157
159
+ 157999.0,8974.469662973865,157.0,1.0,158
160
+ 158999.0,9944.028371865432,158.0,1.0,159
161
+ 159999.0,10058.456577723073,159.0,1.0,160
162
+ 160999.0,9136.084817143756,160.0,1.0,161
163
+ 161999.0,5511.148140548139,161.0,1.0,162
164
+ 162999.0,9317.26438893607,162.0,1.0,163
165
+ 163999.0,9227.346664424846,163.0,1.0,164
166
+ 164999.0,9883.906299015594,164.0,1.0,165
167
+ 165999.0,8563.304770355506,165.0,1.0,166
168
+ 166999.0,10524.570994111598,166.0,1.0,167
169
+ 167999.0,10279.900453196968,167.0,1.0,168
170
+ 168999.0,9572.33969918891,168.0,1.0,169
171
+ 169999.0,459.4115657738802,169.0,1.0,170
172
+ 170999.0,10370.908056408089,170.0,1.0,171
173
+ 171999.0,10108.69195123791,171.0,1.0,172
174
+ 172999.0,9700.60778501836,172.0,1.0,173
175
+ 173999.0,10049.826021828469,173.0,1.0,174
176
+ 174999.0,9973.229946762802,174.0,1.0,175
177
+ 175999.0,9927.628289856191,175.0,1.0,176
178
+ 176999.0,10951.456701067278,176.0,1.0,177
179
+ 177999.0,10683.198306012837,177.0,1.0,178
180
+ 178999.0,10056.52728346155,178.0,1.0,179
181
+ 179999.0,10956.19238412689,179.0,1.0,180
182
+ 180999.0,10753.4588488623,180.0,1.0,181
183
+ 181999.0,10135.69824734681,181.0,1.0,182
184
+ 182999.0,11035.23633395284,182.0,1.0,183
185
+ 183999.0,9724.465486939987,183.0,1.0,184
186
+ 184999.0,10600.714800528838,184.0,1.0,185
187
+ 185999.0,10497.512333427781,185.0,1.0,186
188
+ 186999.0,10072.93756631213,186.0,1.0,187
189
+ 187999.0,10826.202622805544,187.0,1.0,188
190
+ 188999.0,9449.110098412495,188.0,1.0,189
191
+ 189999.0,10190.089325632674,189.0,1.0,190
192
+ 190999.0,10927.480902046444,190.0,1.0,191
193
+ 191999.0,9118.85921607503,191.0,1.0,192
194
+ 192999.0,10464.76083732186,192.0,1.0,193
195
+ 193999.0,10836.288402985549,193.0,1.0,194
196
+ 194999.0,11427.696208089892,194.0,1.0,195
197
+ 195999.0,9986.067037817547,195.0,1.0,196
198
+ 196999.0,10572.428528243849,196.0,1.0,197
199
+ 197999.0,11083.93560179224,197.0,1.0,198
200
+ 198999.0,10660.56055833877,198.0,1.0,199
201
+ 199999.0,10292.624763630529,199.0,1.0,200
202
+ 200999.0,11611.820130340391,200.0,1.0,201
203
+ 201999.0,9737.310176126619,201.0,1.0,202
204
+ 202999.0,11202.55003629337,202.0,1.0,203
205
+ 203999.0,11506.25498560904,203.0,1.0,204
206
+ 204999.0,11025.715945433363,204.0,1.0,205
207
+ 205999.0,11404.55085986778,205.0,1.0,206
208
+ 206999.0,10753.346482449862,206.0,1.0,207
209
+ 207999.0,9555.793214849753,207.0,1.0,208
210
+ 208999.0,11192.256012556445,208.0,1.0,209
211
+ 209999.0,11336.022957654866,209.0,1.0,210
212
+ 210999.0,11328.72170085688,210.0,1.0,211
213
+ 211999.0,10681.134889693185,211.0,1.0,212
214
+ 212999.0,11007.769300592925,212.0,1.0,213
215
+ 213999.0,11178.157581211646,213.0,1.0,214
216
+ 214999.0,11083.913210765086,214.0,1.0,215
217
+ 215999.0,11395.155550538651,215.0,1.0,216
218
+ 216999.0,11639.67595321487,216.0,1.0,217
219
+ 217999.0,11705.08851523343,217.0,1.0,218
220
+ 218999.0,10668.342271099253,218.0,1.0,219
221
+ 219999.0,10805.61925898763,219.0,1.0,220
222
+ 220999.0,11592.366612497766,220.0,1.0,221
223
+ 221999.0,11223.5294829176,221.0,1.0,222
224
+ 222999.0,11138.024211462522,222.0,1.0,223
225
+ 223999.0,11601.35988403525,223.0,1.0,224
226
+ 224999.0,11310.779012879391,224.0,1.0,225
227
+ 225999.0,11802.66065614503,225.0,1.0,226
228
+ 226999.0,11209.806475788553,226.0,1.0,227
229
+ 227999.0,11654.839004839429,227.0,1.0,228
230
+ 228999.0,10855.618642422498,228.0,1.0,229
231
+ 229999.0,9812.630364716157,229.0,1.0,230
232
+ 230999.0,11724.005399907652,230.0,1.0,231
233
+ 231999.0,11465.352774973042,231.0,1.0,232
234
+ 232999.0,11920.273782298198,232.0,1.0,233
235
+ 233999.0,11430.42879966512,233.0,1.0,234
236
+ 234999.0,11552.025001424965,234.0,1.0,235
237
+ 235999.0,11403.072859394202,235.0,1.0,236
238
+ 236999.0,11057.082860436925,236.0,1.0,237
239
+ 237999.0,11582.033307065773,237.0,1.0,238
240
+ 238999.0,10348.801547796596,238.0,1.0,239
241
+ 239999.0,10741.964451536884,239.0,1.0,240
242
+ 240999.0,11618.196693735217,240.0,1.0,241
243
+ 241999.0,10002.383524572851,241.0,1.0,242
244
+ 242999.0,11671.505684007396,242.0,1.0,243
245
+ 243999.0,10533.37173300435,243.0,1.0,244
246
+ 244999.0,11495.491286299972,244.0,1.0,245
247
+ 245999.0,10704.936048284479,245.0,1.0,246
248
+ 246999.0,11059.853144223625,246.0,1.0,247
249
+ 247999.0,11348.785803639697,247.0,1.0,248
250
+ 248999.0,10420.040232432144,248.0,1.0,249
251
+ 249999.0,11765.821990391167,249.0,1.0,250
252
+ 250999.0,11824.230405206414,250.0,1.0,251
253
+ 251999.0,11469.446747771486,251.0,1.0,252
254
+ 252999.0,11595.828035361086,252.0,1.0,253
255
+ 253999.0,10359.15029178175,253.0,1.0,254
256
+ 254999.0,9917.691284426726,254.0,1.0,255
257
+ 255999.0,6102.609024160513,255.0,1.0,256
258
+ 256999.0,10927.013048022645,256.0,1.0,257
259
+ 257999.0,11300.657766723705,257.0,1.0,258
260
+ 258999.0,10151.599494745633,258.0,1.0,259
261
+ 259999.0,11529.406322172845,259.0,1.0,260
262
+ 260999.0,11531.339103217848,260.0,1.0,261
263
+ 261999.0,11651.531214686984,261.0,1.0,262
264
+ 262999.0,10040.325692500259,262.0,1.0,263
265
+ 263999.0,11546.858520227863,263.0,1.0,264
266
+ 264999.0,11799.049568617369,264.0,1.0,265
267
+ 265999.0,11484.005210292597,265.0,1.0,266
268
+ 266999.0,11282.057907999704,266.0,1.0,267
269
+ 267999.0,11568.720958887252,267.0,1.0,268
270
+ 268999.0,11639.945396769197,268.0,1.0,269
271
+ 269999.0,11720.551555788543,269.0,1.0,270
272
+ 270999.0,10379.107392251875,270.0,1.0,271
273
+ 271999.0,11216.436913037982,271.0,1.0,272
274
+ 272999.0,11396.103637628003,272.0,1.0,273
275
+ 273999.0,11680.062090577818,273.0,1.0,274
276
+ 274999.0,10215.109158371728,274.0,1.0,275
277
+ 275999.0,11398.785608347678,275.0,1.0,276
278
+ 276999.0,10764.059050668971,276.0,1.0,277
279
+ 277999.0,11348.653029476352,277.0,1.0,278
280
+ 278999.0,11662.697639542466,278.0,1.0,279
281
+ 279999.0,11765.363705664562,279.0,1.0,280
282
+ 280999.0,11657.6353552141,280.0,1.0,281
283
+ 281999.0,11684.999610622286,281.0,1.0,282
284
+ 282999.0,10255.648173107225,282.0,1.0,283
285
+ 283999.0,11465.240904220014,283.0,1.0,284
286
+ 284999.0,10475.481276566978,284.0,1.0,285
287
+ 285999.0,11549.675393902611,285.0,1.0,286
288
+ 286999.0,11242.430013140807,286.0,1.0,287
289
+ 287999.0,11536.323434760423,287.0,1.0,288
290
+ 288999.0,11317.117428195046,288.0,1.0,289
291
+ 289999.0,10582.213144270892,289.0,1.0,290
292
+ 290999.0,9793.15922987001,290.0,1.0,291
293
+ 291999.0,11472.518604319215,291.0,1.0,292
294
+ 292999.0,11833.892198669642,292.0,1.0,293
295
+ 293999.0,11402.400341744446,293.0,1.0,294
296
+ 294999.0,10265.423980682765,294.0,1.0,295
297
+ 295999.0,11553.598432918068,295.0,1.0,296
298
+ 296999.0,11649.103180816406,296.0,1.0,297
299
+ 297999.0,11704.295685135929,297.0,1.0,298
300
+ 298999.0,11710.643213841471,298.0,1.0,299
301
+ 299999.0,10811.282881245972,299.0,1.0,300
302
+ 300999.0,11790.477101605919,300.0,1.0,301
303
+ 301999.0,11463.917177513425,301.0,1.0,302
304
+ 302999.0,10550.227460936256,302.0,1.0,303
305
+ 303999.0,11502.956823971092,303.0,1.0,304
306
+ 304999.0,11525.664368442765,304.0,1.0,305
307
+ 305999.0,9953.49985543528,305.0,1.0,306
308
+ 306999.0,10860.576970068785,306.0,1.0,307
309
+ 307999.0,11487.493941227742,307.0,1.0,308
310
+ 308999.0,11494.436632675712,308.0,1.0,309
311
+ 309999.0,10927.645117397507,309.0,1.0,310
312
+ 310999.0,10911.542192928973,310.0,1.0,311
313
+ 311999.0,11204.923054237095,311.0,1.0,312
314
+ 312999.0,11658.928212273355,312.0,1.0,313
315
+ 313999.0,10956.728869721908,313.0,1.0,314
316
+ 314999.0,11870.042372250722,314.0,1.0,315
317
+ 315999.0,11336.991736286973,315.0,1.0,316
318
+ 316999.0,11290.45207898969,316.0,1.0,317
319
+ 317999.0,11318.244776732387,317.0,1.0,318
320
+ 318999.0,11791.585046453325,318.0,1.0,319
321
+ 319999.0,11734.674824404196,319.0,1.0,320
322
+ 320999.0,10701.80781992137,320.0,1.0,321
323
+ 321999.0,10800.248844245685,321.0,1.0,322
324
+ 322999.0,10627.2921436344,322.0,1.0,323
325
+ 323999.0,11531.378951903072,323.0,1.0,324
326
+ 324999.0,11608.96231993335,324.0,1.0,325
327
+ 325999.0,10857.554335952875,325.0,1.0,326
328
+ 326999.0,11565.460188866216,326.0,1.0,327
329
+ 327999.0,11829.766356462838,327.0,1.0,328
330
+ 328999.0,10909.096601768862,328.0,1.0,329
331
+ 329999.0,11153.248737018626,329.0,1.0,330
332
+ 330999.0,10892.668020480964,330.0,1.0,331
333
+ 331999.0,11577.835698048304,331.0,1.0,332
334
+ 332999.0,10453.38812840578,332.0,1.0,333
335
+ 333999.0,11270.415564340012,333.0,1.0,334
336
+ 334999.0,10531.741409624186,334.0,1.0,335
337
+ 335999.0,11140.527387574772,335.0,1.0,336
338
+ 336999.0,10891.5504131639,336.0,1.0,337
339
+ 337999.0,11233.056701604546,337.0,1.0,338
340
+ 338999.0,11481.459254934854,338.0,1.0,339
341
+ 339999.0,11378.32052353805,339.0,1.0,340
342
+ 340999.0,10394.023606479881,340.0,1.0,341
343
+ 341999.0,11573.917683099462,341.0,1.0,342
344
+ 342999.0,11488.659280743272,342.0,1.0,343
345
+ 343999.0,10904.42070431335,343.0,1.0,344
346
+ 344999.0,11642.66216110625,344.0,1.0,345
347
+ 345999.0,11362.100618845143,345.0,1.0,346
348
+ 346999.0,11740.276267002291,346.0,1.0,347
349
+ 347999.0,10914.3370857476,347.0,1.0,348
350
+ 348999.0,10824.83888350996,348.0,1.0,349
351
+ 349999.0,10657.98411697037,349.0,1.0,350
352
+ 350999.0,11738.166032299676,350.0,1.0,351
353
+ 351999.0,11210.281026024877,351.0,1.0,352
354
+ 352999.0,10837.792504976918,352.0,1.0,353
355
+ 353999.0,10444.90714571259,353.0,1.0,354
356
+ 354999.0,10936.01831311329,354.0,1.0,355
357
+ 355999.0,10844.386795465618,355.0,1.0,356
358
+ 356999.0,9754.415662493262,356.0,1.0,357
359
+ 357999.0,11650.587169334869,357.0,1.0,358
360
+ 358999.0,10357.592297727244,358.0,1.0,359
361
+ 359999.0,11537.725681387774,359.0,1.0,360
362
+ 360999.0,11289.146965757438,360.0,1.0,361
363
+ 361999.0,11150.838012406875,361.0,1.0,362
364
+ 362999.0,11471.199113177685,362.0,1.0,363
365
+ 363999.0,11646.833006585788,363.0,1.0,364
366
+ 364999.0,10503.227567542197,364.0,1.0,365
367
+ 365999.0,11276.450668628904,365.0,1.0,366
368
+ 366999.0,11940.695821080499,366.0,1.0,367
369
+ 367999.0,11604.386497975769,367.0,1.0,368
370
+ 368999.0,11653.682400035035,368.0,1.0,369
371
+ 369999.0,10571.039193929144,369.0,1.0,370
372
+ 370999.0,11568.144529238545,370.0,1.0,371
373
+ 371999.0,11612.962706391143,371.0,1.0,372
374
+ 372999.0,11514.825298835605,372.0,1.0,373
375
+ 373999.0,11430.463677979154,373.0,1.0,374
376
+ 374999.0,11401.6582482154,374.0,1.0,375
377
+ 375999.0,10978.886940097198,375.0,1.0,376
378
+ 376999.0,11490.77850211517,376.0,1.0,377
379
+ 377999.0,11601.61677407009,377.0,1.0,378
380
+ 378999.0,11296.2031309121,378.0,1.0,379
381
+ 379999.0,11442.871154911058,379.0,1.0,380
382
+ 380999.0,11433.402744525016,380.0,1.0,381
383
+ 381999.0,10865.328439839192,381.0,1.0,382
384
+ 382999.0,11451.844032661144,382.0,1.0,383
385
+ 383999.0,11425.572922795247,383.0,1.0,384
386
+ 384999.0,11727.389646807698,384.0,1.0,385
387
+ 385999.0,10703.502019860602,385.0,1.0,386
388
+ 386999.0,11293.135891588056,386.0,1.0,387
389
+ 387999.0,11316.482527959852,387.0,1.0,388
390
+ 388999.0,11554.276355157273,388.0,1.0,389
391
+ 389999.0,10535.023297408634,389.0,1.0,390
392
+ 390999.0,10990.757378831033,390.0,1.0,391
393
+ 391999.0,11451.658759693475,391.0,1.0,392
394
+ 392999.0,9895.0233731017,392.0,1.0,393
395
+ 393999.0,10724.596514099725,393.0,1.0,394
396
+ 394999.0,11519.724406604217,394.0,1.0,395
397
+ 395999.0,11102.809263879044,395.0,1.0,396
398
+ 396999.0,10541.369100808077,396.0,1.0,397
399
+ 397999.0,10849.298837270066,397.0,1.0,398
400
+ 398999.0,11369.46537138363,398.0,1.0,399
401
+ 399999.0,11602.159903388363,399.0,1.0,400
mbpo_half_cheetah_v4/diff/sac.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f84d520b7c8661ca5d20f9e2bc7b9c16a2b14293bbfdddbdcaf496b67ec37ab
3
+ size 12173785
mbpo_half_cheetah_v4/diff/train.csv ADDED
The diff for this file is too large to render. See raw diff
 
mbpo_half_cheetah_v4/main/.hydra/config.yaml ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ seed: 0
2
+ device: cuda:0
3
+ log_frequency_agent: 1000
4
+ save_video: false
5
+ debug_mode: false
6
+ experiment: default
7
+ root_dir: ./exp
8
+ algorithm:
9
+ name: mbpo
10
+ normalize: true
11
+ normalize_double_precision: true
12
+ target_is_delta: true
13
+ learned_rewards: true
14
+ freq_train_model: ${overrides.freq_train_model}
15
+ real_data_ratio: 0.0
16
+ sac_samples_action: true
17
+ initial_exploration_steps: 5000
18
+ random_initial_explore: false
19
+ num_eval_episodes: 1
20
+ agent:
21
+ _target_: mbrl.third_party.pytorch_sac_pranz24.sac.SAC
22
+ num_inputs: ???
23
+ action_space:
24
+ _target_: gym.env.Box
25
+ low: ???
26
+ high: ???
27
+ shape: ???
28
+ args:
29
+ gamma: ${overrides.sac_gamma}
30
+ tau: ${overrides.sac_tau}
31
+ alpha: ${overrides.sac_alpha}
32
+ policy: ${overrides.sac_policy}
33
+ target_update_interval: ${overrides.sac_target_update_interval}
34
+ automatic_entropy_tuning: ${overrides.sac_automatic_entropy_tuning}
35
+ target_entropy: ${overrides.sac_target_entropy}
36
+ hidden_size: ${overrides.sac_hidden_size}
37
+ device: ${device}
38
+ lr: ${overrides.sac_lr}
39
+ dynamics_model:
40
+ _target_: mbrl.models.GaussianMLP
41
+ device: ${device}
42
+ num_layers: 4
43
+ in_size: ???
44
+ out_size: ???
45
+ ensemble_size: 7
46
+ hid_size: 200
47
+ deterministic: false
48
+ propagation_method: random_model
49
+ learn_logvar_bounds: false
50
+ activation_fn_cfg:
51
+ _target_: torch.nn.SiLU
52
+ overrides:
53
+ env: gym___HalfCheetah-v2
54
+ term_fn: no_termination
55
+ num_steps: 400000
56
+ epoch_length: 1000
57
+ num_elites: 5
58
+ patience: 5
59
+ model_lr: 0.001
60
+ model_wd: 1.0e-05
61
+ model_batch_size: 256
62
+ validation_ratio: 0.2
63
+ freq_train_model: 250
64
+ effective_model_rollouts_per_step: 400
65
+ rollout_schedule:
66
+ - 20
67
+ - 150
68
+ - 1
69
+ - 1
70
+ num_sac_updates_per_step: 10
71
+ sac_updates_every_steps: 1
72
+ num_epochs_to_retain_sac_buffer: 1
73
+ sac_gamma: 0.99
74
+ sac_tau: 0.005
75
+ sac_alpha: 0.2
76
+ sac_policy: Gaussian
77
+ sac_target_update_interval: 1
78
+ sac_automatic_entropy_tuning: true
79
+ sac_target_entropy: -1
80
+ sac_hidden_size: 512
81
+ sac_lr: 0.0003
82
+ sac_batch_size: 256
83
+ action_optimizer:
84
+ _target_: mbrl.planning.CEMOptimizer
85
+ num_iterations: ${overrides.cem_num_iters}
86
+ elite_ratio: ${overrides.cem_elite_ratio}
87
+ population_size: ${overrides.cem_population_size}
88
+ alpha: ${overrides.cem_alpha}
89
+ lower_bound: ???
90
+ upper_bound: ???
91
+ return_mean_elites: true
92
+ device: ${device}
93
+ clipped_normal: ${overrides.cem_clipped_normal}
mbpo_half_cheetah_v4/main/.hydra/hydra.yaml ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ hydra:
2
+ run:
3
+ dir: ${root_dir}/${algorithm.name}/${experiment}/${overrides.env}/${now:%Y.%m.%d}/${now:%H%M%S}
4
+ sweep:
5
+ dir: ${root_dir}/${algorithm.name}/${experiment}/${overrides.env}/${now:%Y.%m.%d}/${now:%H%M%S}
6
+ subdir: ${hydra.job.num}
7
+ hydra_logging:
8
+ version: 1
9
+ formatters:
10
+ simple:
11
+ format: '[%(asctime)s][HYDRA] %(message)s'
12
+ handlers:
13
+ console:
14
+ class: logging.StreamHandler
15
+ formatter: simple
16
+ stream: ext://sys.stdout
17
+ root:
18
+ level: INFO
19
+ handlers:
20
+ - console
21
+ loggers:
22
+ logging_example:
23
+ level: DEBUG
24
+ disable_existing_loggers: false
25
+ job_logging:
26
+ version: 1
27
+ formatters:
28
+ simple:
29
+ format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
30
+ handlers:
31
+ console:
32
+ class: logging.StreamHandler
33
+ formatter: simple
34
+ stream: ext://sys.stdout
35
+ file:
36
+ class: logging.FileHandler
37
+ formatter: simple
38
+ filename: ${hydra.job.name}.log
39
+ root:
40
+ level: INFO
41
+ handlers:
42
+ - console
43
+ - file
44
+ disable_existing_loggers: false
45
+ sweeper:
46
+ _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
47
+ max_batch_size: null
48
+ launcher:
49
+ _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
50
+ help:
51
+ app_name: ${hydra.job.name}
52
+ header: '${hydra.help.app_name} is powered by Hydra.
53
+
54
+ '
55
+ footer: 'Powered by Hydra (https://hydra.cc)
56
+
57
+ Use --hydra-help to view Hydra specific help
58
+
59
+ '
60
+ template: '${hydra.help.header}
61
+
62
+ == Configuration groups ==
63
+
64
+ Compose your configuration from those groups (group=option)
65
+
66
+
67
+ $APP_CONFIG_GROUPS
68
+
69
+
70
+ == Config ==
71
+
72
+ Override anything in the config (foo.bar=value)
73
+
74
+
75
+ $CONFIG
76
+
77
+
78
+ ${hydra.help.footer}
79
+
80
+ '
81
+ hydra_help:
82
+ hydra_help: ???
83
+ template: 'Hydra (${hydra.runtime.version})
84
+
85
+ See https://hydra.cc for more info.
86
+
87
+
88
+ == Flags ==
89
+
90
+ $FLAGS_HELP
91
+
92
+
93
+ == Configuration groups ==
94
+
95
+ Compose your configuration from those groups (For example, append hydra/job_logging=disabled
96
+ to command line)
97
+
98
+
99
+ $HYDRA_CONFIG_GROUPS
100
+
101
+
102
+ Use ''--cfg hydra'' to Show the Hydra config.
103
+
104
+ '
105
+ output_subdir: .hydra
106
+ overrides:
107
+ hydra: []
108
+ task:
109
+ - algorithm=mbpo
110
+ - overrides=mbpo_halfcheetah
111
+ - device=cuda:0
112
+ job:
113
+ name: main
114
+ override_dirname: algorithm=mbpo,device=cuda:0,overrides=mbpo_halfcheetah
115
+ id: ???
116
+ num: ???
117
+ config_name: main
118
+ env_set: {}
119
+ env_copy: []
120
+ config:
121
+ override_dirname:
122
+ kv_sep: '='
123
+ item_sep: ','
124
+ exclude_keys: []
125
+ runtime:
126
+ version: 1.0.3
127
+ cwd: /home/raghava/projects/mbrl-lib/exp
128
+ verbose: false
mbpo_half_cheetah_v4/main/.hydra/overrides.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ - algorithm=mbpo
2
+ - overrides=mbpo_halfcheetah
3
+ - device=cuda:0
mbpo_half_cheetah_v4/main/env_stats.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5139c3fc0135ce131d07dbb9797e1fe455da616e2a1db860260660bfa10372a5
3
+ size 566
mbpo_half_cheetah_v4/main/eval.csv ADDED
File without changes
mbpo_half_cheetah_v4/main/main.log ADDED
File without changes
mbpo_half_cheetah_v4/main/model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de90df3c9aecb27e8048a228e7f0f46e225d6c81ce5f8cc5e426e083ef2d03c7
3
+ size 3718181
mbpo_half_cheetah_v4/main/model_train.csv ADDED
The diff for this file is too large to render. See raw diff
 
mbpo_half_cheetah_v4/main/replay_buffer.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9612327c095d9a94e7f49f0b684147a01a3cff909a93e868b34378d7de909ad
3
+ size 131601504
mbpo_half_cheetah_v4/main/results.csv ADDED
@@ -0,0 +1,401 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ env_step,episode_reward,epoch,rollout_length,step
2
+ 999.0,-369.0130980815457,0.0,1.0,1
3
+ 1999.0,-235.36340547914483,1.0,1.0,2
4
+ 2999.0,-160.61847904837379,2.0,1.0,3
5
+ 3999.0,-251.37242028717705,3.0,1.0,4
6
+ 4999.0,287.63697974456574,4.0,1.0,5
7
+ 5999.0,465.28802906698195,5.0,1.0,6
8
+ 6999.0,256.5621996956014,6.0,1.0,7
9
+ 7999.0,987.482534733894,7.0,1.0,8
10
+ 8999.0,1346.8132962964285,8.0,1.0,9
11
+ 9999.0,1516.7007738734344,9.0,1.0,10
12
+ 10999.0,2077.0220162425917,10.0,1.0,11
13
+ 11999.0,2756.123492980726,11.0,1.0,12
14
+ 12999.0,2818.79517793501,12.0,1.0,13
15
+ 13999.0,2951.649373621113,13.0,1.0,14
16
+ 14999.0,3615.8984761142788,14.0,1.0,15
17
+ 15999.0,3882.154072028614,15.0,1.0,16
18
+ 16999.0,4143.7003767242995,16.0,1.0,17
19
+ 17999.0,4109.368695464052,17.0,1.0,18
20
+ 18999.0,4530.626838981523,18.0,1.0,19
21
+ 19999.0,3679.4346902894276,19.0,1.0,20
22
+ 20999.0,4812.1468407628345,20.0,1.0,21
23
+ 21999.0,5226.5529479399665,21.0,1.0,22
24
+ 22999.0,1289.5633254892462,22.0,1.0,23
25
+ 23999.0,1639.5806666230224,23.0,1.0,24
26
+ 24999.0,5075.560795092392,24.0,1.0,25
27
+ 25999.0,5574.9206199170585,25.0,1.0,26
28
+ 26999.0,6150.071187831111,26.0,1.0,27
29
+ 27999.0,6017.034367254374,27.0,1.0,28
30
+ 28999.0,6220.441993965224,28.0,1.0,29
31
+ 29999.0,6001.274678092837,29.0,1.0,30
32
+ 30999.0,6276.78281636152,30.0,1.0,31
33
+ 31999.0,6023.983039447032,31.0,1.0,32
34
+ 32999.0,6815.475006707122,32.0,1.0,33
35
+ 33999.0,5814.2060335059905,33.0,1.0,34
36
+ 34999.0,6945.53296866619,34.0,1.0,35
37
+ 35999.0,7075.37897349407,35.0,1.0,36
38
+ 36999.0,6689.62232098397,36.0,1.0,37
39
+ 37999.0,7094.026935332524,37.0,1.0,38
40
+ 38999.0,7242.396276420288,38.0,1.0,39
41
+ 39999.0,7184.08575394812,39.0,1.0,40
42
+ 40999.0,6976.332326344404,40.0,1.0,41
43
+ 41999.0,7288.639038985377,41.0,1.0,42
44
+ 42999.0,7579.331068470798,42.0,1.0,43
45
+ 43999.0,7624.351252931682,43.0,1.0,44
46
+ 44999.0,7607.61866080825,44.0,1.0,45
47
+ 45999.0,7762.832287888078,45.0,1.0,46
48
+ 46999.0,7584.514063942946,46.0,1.0,47
49
+ 47999.0,7558.714275533149,47.0,1.0,48
50
+ 48999.0,7809.3267236152715,48.0,1.0,49
51
+ 49999.0,7622.424267933799,49.0,1.0,50
52
+ 50999.0,7402.999245162974,50.0,1.0,51
53
+ 51999.0,7532.140716234178,51.0,1.0,52
54
+ 52999.0,7700.468377812024,52.0,1.0,53
55
+ 53999.0,7745.981361518955,53.0,1.0,54
56
+ 54999.0,7778.012429519654,54.0,1.0,55
57
+ 55999.0,8000.055477501836,55.0,1.0,56
58
+ 56999.0,8133.759000174345,56.0,1.0,57
59
+ 57999.0,7907.740975282002,57.0,1.0,58
60
+ 58999.0,7661.227031154118,58.0,1.0,59
61
+ 59999.0,7616.222236496069,59.0,1.0,60
62
+ 60999.0,7921.515480433634,60.0,1.0,61
63
+ 61999.0,8067.472767443434,61.0,1.0,62
64
+ 62999.0,8065.628691719224,62.0,1.0,63
65
+ 63999.0,7927.771507168636,63.0,1.0,64
66
+ 64999.0,8177.62758646112,64.0,1.0,65
67
+ 65999.0,8110.685614122283,65.0,1.0,66
68
+ 66999.0,8251.030013036172,66.0,1.0,67
69
+ 67999.0,7986.542899270262,67.0,1.0,68
70
+ 68999.0,8115.230292462184,68.0,1.0,69
71
+ 69999.0,8185.523333247866,69.0,1.0,70
72
+ 70999.0,8462.240757849328,70.0,1.0,71
73
+ 71999.0,8260.070672056961,71.0,1.0,72
74
+ 72999.0,8593.078792360848,72.0,1.0,73
75
+ 73999.0,8099.660550837046,73.0,1.0,74
76
+ 74999.0,8553.790607196444,74.0,1.0,75
77
+ 75999.0,8134.570609046353,75.0,1.0,76
78
+ 76999.0,8558.135128449898,76.0,1.0,77
79
+ 77999.0,8378.682659957069,77.0,1.0,78
80
+ 78999.0,8436.293777340921,78.0,1.0,79
81
+ 79999.0,8298.182012627483,79.0,1.0,80
82
+ 80999.0,8313.538284361797,80.0,1.0,81
83
+ 81999.0,8670.455493612977,81.0,1.0,82
84
+ 82999.0,8729.28276386704,82.0,1.0,83
85
+ 83999.0,8625.818636910657,83.0,1.0,84
86
+ 84999.0,8545.670153126317,84.0,1.0,85
87
+ 85999.0,8419.280748498199,85.0,1.0,86
88
+ 86999.0,8291.708477937227,86.0,1.0,87
89
+ 87999.0,8707.386203578095,87.0,1.0,88
90
+ 88999.0,8507.979202492046,88.0,1.0,89
91
+ 89999.0,8912.670602556847,89.0,1.0,90
92
+ 90999.0,9099.363292253307,90.0,1.0,91
93
+ 91999.0,8799.83223988593,91.0,1.0,92
94
+ 92999.0,8493.400885485386,92.0,1.0,93
95
+ 93999.0,8503.504417333934,93.0,1.0,94
96
+ 94999.0,8839.363730337864,94.0,1.0,95
97
+ 95999.0,8292.634198634803,95.0,1.0,96
98
+ 96999.0,8462.347679223689,96.0,1.0,97
99
+ 97999.0,8509.095043728508,97.0,1.0,98
100
+ 98999.0,8791.148483266306,98.0,1.0,99
101
+ 99999.0,8796.509268113581,99.0,1.0,100
102
+ 100999.0,8835.069177414674,100.0,1.0,101
103
+ 101999.0,9054.74716104422,101.0,1.0,102
104
+ 102999.0,8960.968819564618,102.0,1.0,103
105
+ 103999.0,8816.824076834902,103.0,1.0,104
106
+ 104999.0,9039.556562783406,104.0,1.0,105
107
+ 105999.0,8894.603252014946,105.0,1.0,106
108
+ 106999.0,9143.584969179807,106.0,1.0,107
109
+ 107999.0,9344.0926735272,107.0,1.0,108
110
+ 108999.0,9060.783105405088,108.0,1.0,109
111
+ 109999.0,9382.012670053917,109.0,1.0,110
112
+ 110999.0,9187.09952767959,110.0,1.0,111
113
+ 111999.0,9319.585898852723,111.0,1.0,112
114
+ 112999.0,8996.140932654755,112.0,1.0,113
115
+ 113999.0,9103.037894549827,113.0,1.0,114
116
+ 114999.0,9461.77777315585,114.0,1.0,115
117
+ 115999.0,9483.64324212161,115.0,1.0,116
118
+ 116999.0,9311.055511360762,116.0,1.0,117
119
+ 117999.0,9122.211950967205,117.0,1.0,118
120
+ 118999.0,8532.411393569635,118.0,1.0,119
121
+ 119999.0,8962.52659927028,119.0,1.0,120
122
+ 120999.0,9101.620113844787,120.0,1.0,121
123
+ 121999.0,8955.686958948094,121.0,1.0,122
124
+ 122999.0,9824.427401272214,122.0,1.0,123
125
+ 123999.0,9726.37010879676,123.0,1.0,124
126
+ 124999.0,9621.885644343254,124.0,1.0,125
127
+ 125999.0,10190.25782416718,125.0,1.0,126
128
+ 126999.0,9153.771548390503,126.0,1.0,127
129
+ 127999.0,9610.835985513231,127.0,1.0,128
130
+ 128999.0,8723.528420896504,128.0,1.0,129
131
+ 129999.0,9524.650331441011,129.0,1.0,130
132
+ 130999.0,9732.672166734767,130.0,1.0,131
133
+ 131999.0,9763.710201269325,131.0,1.0,132
134
+ 132999.0,10296.83508391069,132.0,1.0,133
135
+ 133999.0,9724.38399921497,133.0,1.0,134
136
+ 134999.0,9118.238193482885,134.0,1.0,135
137
+ 135999.0,10018.042217354912,135.0,1.0,136
138
+ 136999.0,10229.065088655801,136.0,1.0,137
139
+ 137999.0,10003.961732040856,137.0,1.0,138
140
+ 138999.0,9993.914554911275,138.0,1.0,139
141
+ 139999.0,9034.02357606101,139.0,1.0,140
142
+ 140999.0,10234.263119355604,140.0,1.0,141
143
+ 141999.0,10427.554011973149,141.0,1.0,142
144
+ 142999.0,9606.83187563424,142.0,1.0,143
145
+ 143999.0,10276.170849590213,143.0,1.0,144
146
+ 144999.0,9500.928679588373,144.0,1.0,145
147
+ 145999.0,10310.599223899986,145.0,1.0,146
148
+ 146999.0,10753.922891924101,146.0,1.0,147
149
+ 147999.0,10326.222947687818,147.0,1.0,148
150
+ 148999.0,10764.178142766756,148.0,1.0,149
151
+ 149999.0,10271.316565777075,149.0,1.0,150
152
+ 150999.0,10062.741777715837,150.0,1.0,151
153
+ 151999.0,10735.787767190728,151.0,1.0,152
154
+ 152999.0,10431.543770023649,152.0,1.0,153
155
+ 153999.0,10009.429956800152,153.0,1.0,154
156
+ 154999.0,9366.795939268999,154.0,1.0,155
157
+ 155999.0,9350.747948269807,155.0,1.0,156
158
+ 156999.0,9975.911836434918,156.0,1.0,157
159
+ 157999.0,10131.757872160042,157.0,1.0,158
160
+ 158999.0,9929.036259037328,158.0,1.0,159
161
+ 159999.0,10017.781789844043,159.0,1.0,160
162
+ 160999.0,10069.939268309321,160.0,1.0,161
163
+ 161999.0,10022.83074909982,161.0,1.0,162
164
+ 162999.0,10279.84727560324,162.0,1.0,163
165
+ 163999.0,10272.875797692795,163.0,1.0,164
166
+ 164999.0,9708.375876183061,164.0,1.0,165
167
+ 165999.0,10636.85141299707,165.0,1.0,166
168
+ 166999.0,10444.746415484664,166.0,1.0,167
169
+ 167999.0,9578.368696859288,167.0,1.0,168
170
+ 168999.0,10828.49496204565,168.0,1.0,169
171
+ 169999.0,9733.430236129821,169.0,1.0,170
172
+ 170999.0,10007.98613789981,170.0,1.0,171
173
+ 171999.0,9569.915445329425,171.0,1.0,172
174
+ 172999.0,10801.238238071226,172.0,1.0,173
175
+ 173999.0,10252.660579902704,173.0,1.0,174
176
+ 174999.0,10846.800024978778,174.0,1.0,175
177
+ 175999.0,10872.179223001885,175.0,1.0,176
178
+ 176999.0,10741.979758846242,176.0,1.0,177
179
+ 177999.0,10713.17763525313,177.0,1.0,178
180
+ 178999.0,10348.405889312931,178.0,1.0,179
181
+ 179999.0,9593.244471757998,179.0,1.0,180
182
+ 180999.0,9781.034301005322,180.0,1.0,181
183
+ 181999.0,9313.628199551471,181.0,1.0,182
184
+ 182999.0,10798.412348436595,182.0,1.0,183
185
+ 183999.0,9864.745749476704,183.0,1.0,184
186
+ 184999.0,9254.062532136639,184.0,1.0,185
187
+ 185999.0,10772.370872120915,185.0,1.0,186
188
+ 186999.0,10147.026474422122,186.0,1.0,187
189
+ 187999.0,10434.020461015656,187.0,1.0,188
190
+ 188999.0,10288.182741605317,188.0,1.0,189
191
+ 189999.0,10538.031738489222,189.0,1.0,190
192
+ 190999.0,10238.1702030159,190.0,1.0,191
193
+ 191999.0,10693.423346480242,191.0,1.0,192
194
+ 192999.0,10213.031572771486,192.0,1.0,193
195
+ 193999.0,10414.205010810525,193.0,1.0,194
196
+ 194999.0,10539.408830633187,194.0,1.0,195
197
+ 195999.0,10841.175928918723,195.0,1.0,196
198
+ 196999.0,10504.649511995141,196.0,1.0,197
199
+ 197999.0,9915.490024212884,197.0,1.0,198
200
+ 198999.0,10885.863398881751,198.0,1.0,199
201
+ 199999.0,10715.89981798312,199.0,1.0,200
202
+ 200999.0,10655.26499256146,200.0,1.0,201
203
+ 201999.0,9795.10086039266,201.0,1.0,202
204
+ 202999.0,10761.119744038515,202.0,1.0,203
205
+ 203999.0,11003.021297650032,203.0,1.0,204
206
+ 204999.0,11024.250124541313,204.0,1.0,205
207
+ 205999.0,9780.716416707417,205.0,1.0,206
208
+ 206999.0,10903.806570977013,206.0,1.0,207
209
+ 207999.0,10888.790471457558,207.0,1.0,208
210
+ 208999.0,10407.650943735562,208.0,1.0,209
211
+ 209999.0,10042.21847968282,209.0,1.0,210
212
+ 210999.0,10203.33393135972,210.0,1.0,211
213
+ 211999.0,10399.650618369827,211.0,1.0,212
214
+ 212999.0,10727.786063343965,212.0,1.0,213
215
+ 213999.0,10771.639026020372,213.0,1.0,214
216
+ 214999.0,10622.20170064502,214.0,1.0,215
217
+ 215999.0,9724.16967428323,215.0,1.0,216
218
+ 216999.0,10974.050123689447,216.0,1.0,217
219
+ 217999.0,10615.911433832272,217.0,1.0,218
220
+ 218999.0,10793.60795656919,218.0,1.0,219
221
+ 219999.0,10853.708637297295,219.0,1.0,220
222
+ 220999.0,9738.562028356819,220.0,1.0,221
223
+ 221999.0,10108.423891758688,221.0,1.0,222
224
+ 222999.0,10209.784558904888,222.0,1.0,223
225
+ 223999.0,10799.734859072858,223.0,1.0,224
226
+ 224999.0,10938.401505392014,224.0,1.0,225
227
+ 225999.0,10717.11088706287,225.0,1.0,226
228
+ 226999.0,10284.214395269752,226.0,1.0,227
229
+ 227999.0,10743.237966349207,227.0,1.0,228
230
+ 228999.0,10141.090359379039,228.0,1.0,229
231
+ 229999.0,10612.984569945347,229.0,1.0,230
232
+ 230999.0,10747.569466317871,230.0,1.0,231
233
+ 231999.0,10492.953469361335,231.0,1.0,232
234
+ 232999.0,10298.883156308866,232.0,1.0,233
235
+ 233999.0,10300.387737370102,233.0,1.0,234
236
+ 234999.0,10679.356489773716,234.0,1.0,235
237
+ 235999.0,9849.05429033343,235.0,1.0,236
238
+ 236999.0,10773.081269678083,236.0,1.0,237
239
+ 237999.0,10260.492327029848,237.0,1.0,238
240
+ 238999.0,10523.792895701807,238.0,1.0,239
241
+ 239999.0,10921.168786630255,239.0,1.0,240
242
+ 240999.0,10029.797066882842,240.0,1.0,241
243
+ 241999.0,10604.446852052013,241.0,1.0,242
244
+ 242999.0,9634.881021868927,242.0,1.0,243
245
+ 243999.0,10203.983631046956,243.0,1.0,244
246
+ 244999.0,10246.012897708786,244.0,1.0,245
247
+ 245999.0,10606.513272520846,245.0,1.0,246
248
+ 246999.0,10613.025368354067,246.0,1.0,247
249
+ 247999.0,10334.592521369425,247.0,1.0,248
250
+ 248999.0,10529.724739345627,248.0,1.0,249
251
+ 249999.0,10532.613304322564,249.0,1.0,250
252
+ 250999.0,10062.246160686384,250.0,1.0,251
253
+ 251999.0,10597.117334841008,251.0,1.0,252
254
+ 252999.0,8471.198079511852,252.0,1.0,253
255
+ 253999.0,-312.51625406083065,253.0,1.0,254
256
+ 254999.0,-376.11402742869177,254.0,1.0,255
257
+ 255999.0,-95.51750228536943,255.0,1.0,256
258
+ 256999.0,9722.580734318644,256.0,1.0,257
259
+ 257999.0,10159.276616640918,257.0,1.0,258
260
+ 258999.0,10833.890403410827,258.0,1.0,259
261
+ 259999.0,10454.374847467805,259.0,1.0,260
262
+ 260999.0,10741.360325785907,260.0,1.0,261
263
+ 261999.0,10909.634080048627,261.0,1.0,262
264
+ 262999.0,10773.29829582095,262.0,1.0,263
265
+ 263999.0,10882.626251289954,263.0,1.0,264
266
+ 264999.0,10385.90692289155,264.0,1.0,265
267
+ 265999.0,10426.6070389699,265.0,1.0,266
268
+ 266999.0,10675.41964130892,266.0,1.0,267
269
+ 267999.0,10181.839602121348,267.0,1.0,268
270
+ 268999.0,10741.536519790952,268.0,1.0,269
271
+ 269999.0,10956.317081082558,269.0,1.0,270
272
+ 270999.0,10444.615952682532,270.0,1.0,271
273
+ 271999.0,10539.904557835602,271.0,1.0,272
274
+ 272999.0,10490.616740356701,272.0,1.0,273
275
+ 273999.0,10795.496279307132,273.0,1.0,274
276
+ 274999.0,10377.907421383054,274.0,1.0,275
277
+ 275999.0,10313.830155854523,275.0,1.0,276
278
+ 276999.0,10807.935214501398,276.0,1.0,277
279
+ 277999.0,11020.30837268351,277.0,1.0,278
280
+ 278999.0,10463.45102158009,278.0,1.0,279
281
+ 279999.0,9803.611625728125,279.0,1.0,280
282
+ 280999.0,10677.350327155074,280.0,1.0,281
283
+ 281999.0,10659.144608171926,281.0,1.0,282
284
+ 282999.0,10522.452171801262,282.0,1.0,283
285
+ 283999.0,10329.237752589697,283.0,1.0,284
286
+ 284999.0,10668.380171747363,284.0,1.0,285
287
+ 285999.0,10299.537024856107,285.0,1.0,286
288
+ 286999.0,10774.154863892461,286.0,1.0,287
289
+ 287999.0,10482.969298842547,287.0,1.0,288
290
+ 288999.0,10436.79857438493,288.0,1.0,289
291
+ 289999.0,10514.977131522855,289.0,1.0,290
292
+ 290999.0,9800.464366667711,290.0,1.0,291
293
+ 291999.0,9716.933095811448,291.0,1.0,292
294
+ 292999.0,10311.470742045705,292.0,1.0,293
295
+ 293999.0,9974.333151661915,293.0,1.0,294
296
+ 294999.0,10688.307676247105,294.0,1.0,295
297
+ 295999.0,10541.841708788743,295.0,1.0,296
298
+ 296999.0,10233.63947488195,296.0,1.0,297
299
+ 297999.0,10700.537961614162,297.0,1.0,298
300
+ 298999.0,10676.128854749648,298.0,1.0,299
301
+ 299999.0,10586.60344067787,299.0,1.0,300
302
+ 300999.0,9978.917996022394,300.0,1.0,301
303
+ 301999.0,10294.26418960165,301.0,1.0,302
304
+ 302999.0,10039.538693772327,302.0,1.0,303
305
+ 303999.0,10821.145896566148,303.0,1.0,304
306
+ 304999.0,10723.180469989655,304.0,1.0,305
307
+ 305999.0,10444.343609100011,305.0,1.0,306
308
+ 306999.0,10043.749680883042,306.0,1.0,307
309
+ 307999.0,9580.431139731192,307.0,1.0,308
310
+ 308999.0,10370.952916522792,308.0,1.0,309
311
+ 309999.0,10546.92130336286,309.0,1.0,310
312
+ 310999.0,10593.06293586483,310.0,1.0,311
313
+ 311999.0,10137.346695024855,311.0,1.0,312
314
+ 312999.0,10232.065552537286,312.0,1.0,313
315
+ 313999.0,10599.943216542424,313.0,1.0,314
316
+ 314999.0,10614.763611072653,314.0,1.0,315
317
+ 315999.0,10496.706974649445,315.0,1.0,316
318
+ 316999.0,10564.311174166134,316.0,1.0,317
319
+ 317999.0,10192.10185841713,317.0,1.0,318
320
+ 318999.0,10697.051251622051,318.0,1.0,319
321
+ 319999.0,10627.00531722882,319.0,1.0,320
322
+ 320999.0,9716.71529963849,320.0,1.0,321
323
+ 321999.0,10469.493604501613,321.0,1.0,322
324
+ 322999.0,10513.41456257654,322.0,1.0,323
325
+ 323999.0,10496.839526756916,323.0,1.0,324
326
+ 324999.0,10483.637501383695,324.0,1.0,325
327
+ 325999.0,10397.427435462547,325.0,1.0,326
328
+ 326999.0,10543.65881008755,326.0,1.0,327
329
+ 327999.0,10533.929605121997,327.0,1.0,328
330
+ 328999.0,10172.6104777934,328.0,1.0,329
331
+ 329999.0,10559.167241234536,329.0,1.0,330
332
+ 330999.0,9683.674582115551,330.0,1.0,331
333
+ 331999.0,10551.58551515645,331.0,1.0,332
334
+ 332999.0,10606.391325084602,332.0,1.0,333
335
+ 333999.0,10783.068974694535,333.0,1.0,334
336
+ 334999.0,10124.02726997322,334.0,1.0,335
337
+ 335999.0,9687.092476373595,335.0,1.0,336
338
+ 336999.0,10562.985555143656,336.0,1.0,337
339
+ 337999.0,9786.362081434578,337.0,1.0,338
340
+ 338999.0,10384.177766724722,338.0,1.0,339
341
+ 339999.0,10378.777121092467,339.0,1.0,340
342
+ 340999.0,9771.209952410552,340.0,1.0,341
343
+ 341999.0,9787.5398515433,341.0,1.0,342
344
+ 342999.0,10101.34014387477,342.0,1.0,343
345
+ 343999.0,10262.975289160537,343.0,1.0,344
346
+ 344999.0,10274.16110320731,344.0,1.0,345
347
+ 345999.0,10085.353840146956,345.0,1.0,346
348
+ 346999.0,9388.621262403349,346.0,1.0,347
349
+ 347999.0,10239.33194377368,347.0,1.0,348
350
+ 348999.0,9941.833470073865,348.0,1.0,349
351
+ 349999.0,10098.968805685094,349.0,1.0,350
352
+ 350999.0,10628.673060844114,350.0,1.0,351
353
+ 351999.0,10229.026984218763,351.0,1.0,352
354
+ 352999.0,10364.742283419999,352.0,1.0,353
355
+ 353999.0,9560.484149441705,353.0,1.0,354
356
+ 354999.0,10428.354707131242,354.0,1.0,355
357
+ 355999.0,10322.211073955707,355.0,1.0,356
358
+ 356999.0,10361.854122413322,356.0,1.0,357
359
+ 357999.0,9962.833265821988,357.0,1.0,358
360
+ 358999.0,10564.644029610097,358.0,1.0,359
361
+ 359999.0,10201.953560196856,359.0,1.0,360
362
+ 360999.0,10147.230591917361,360.0,1.0,361
363
+ 361999.0,10034.812170426963,361.0,1.0,362
364
+ 362999.0,10672.750900366125,362.0,1.0,363
365
+ 363999.0,10064.644617111078,363.0,1.0,364
366
+ 364999.0,9719.097538114482,364.0,1.0,365
367
+ 365999.0,9767.707985565454,365.0,1.0,366
368
+ 366999.0,10212.727301403724,366.0,1.0,367
369
+ 367999.0,10474.783248751484,367.0,1.0,368
370
+ 368999.0,10297.15508735371,368.0,1.0,369
371
+ 369999.0,10217.210684254456,369.0,1.0,370
372
+ 370999.0,9974.086508279841,370.0,1.0,371
373
+ 371999.0,10331.2970628132,371.0,1.0,372
374
+ 372999.0,10343.002899206702,372.0,1.0,373
375
+ 373999.0,10186.052668051922,373.0,1.0,374
376
+ 374999.0,8887.601326359203,374.0,1.0,375
377
+ 375999.0,9938.712302120937,375.0,1.0,376
378
+ 376999.0,10099.970032410727,376.0,1.0,377
379
+ 377999.0,9568.512786859152,377.0,1.0,378
380
+ 378999.0,9514.351454618063,378.0,1.0,379
381
+ 379999.0,10244.24712687496,379.0,1.0,380
382
+ 380999.0,9622.658106996148,380.0,1.0,381
383
+ 381999.0,9586.994311832854,381.0,1.0,382
384
+ 382999.0,10324.10130489058,382.0,1.0,383
385
+ 383999.0,10387.379094908414,383.0,1.0,384
386
+ 384999.0,10383.431009855067,384.0,1.0,385
387
+ 385999.0,10228.424326632074,385.0,1.0,386
388
+ 386999.0,10215.448297216946,386.0,1.0,387
389
+ 387999.0,9565.987011368781,387.0,1.0,388
390
+ 388999.0,10353.80096116201,388.0,1.0,389
391
+ 389999.0,10269.816234974323,389.0,1.0,390
392
+ 390999.0,10083.89844025969,390.0,1.0,391
393
+ 391999.0,10173.611893050529,391.0,1.0,392
394
+ 392999.0,9777.795894070696,392.0,1.0,393
395
+ 393999.0,9212.813359994741,393.0,1.0,394
396
+ 394999.0,10080.51811211683,394.0,1.0,395
397
+ 395999.0,10330.495089295642,395.0,1.0,396
398
+ 396999.0,10140.153068597005,396.0,1.0,397
399
+ 397999.0,10406.77160592857,397.0,1.0,398
400
+ 398999.0,9433.818120980079,398.0,1.0,399
401
+ 399999.0,10006.908566840892,399.0,1.0,400
mbpo_half_cheetah_v4/main/sac.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4070974ce42232f2dded449856e217640c55f8d74265ace78510c4a43813950f
3
+ size 12173785
mbpo_half_cheetah_v4/main/train.csv ADDED
The diff for this file is too large to render. See raw diff
 
mbpo_inv_pendulum_v4/diff/.hydra/config.yaml ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ seed: 0
2
+ device: cuda:0
3
+ log_frequency_agent: 1000
4
+ save_video: false
5
+ debug_mode: false
6
+ experiment: default
7
+ root_dir: ./exp
8
+ algorithm:
9
+ name: mbpo
10
+ normalize: true
11
+ normalize_double_precision: true
12
+ target_is_delta: true
13
+ learned_rewards: true
14
+ freq_train_model: ${overrides.freq_train_model}
15
+ real_data_ratio: 0.0
16
+ sac_samples_action: true
17
+ initial_exploration_steps: 5000
18
+ random_initial_explore: false
19
+ num_eval_episodes: 1
20
+ agent:
21
+ _target_: mbrl.third_party.pytorch_sac_pranz24.sac.SAC
22
+ num_inputs: ???
23
+ action_space:
24
+ _target_: gym.env.Box
25
+ low: ???
26
+ high: ???
27
+ shape: ???
28
+ args:
29
+ gamma: ${overrides.sac_gamma}
30
+ tau: ${overrides.sac_tau}
31
+ alpha: ${overrides.sac_alpha}
32
+ policy: ${overrides.sac_policy}
33
+ target_update_interval: ${overrides.sac_target_update_interval}
34
+ automatic_entropy_tuning: ${overrides.sac_automatic_entropy_tuning}
35
+ target_entropy: ${overrides.sac_target_entropy}
36
+ hidden_size: ${overrides.sac_hidden_size}
37
+ device: ${device}
38
+ lr: ${overrides.sac_lr}
39
+ dynamics_model:
40
+ _target_: mbrl.models.GaussianMLP
41
+ device: ${device}
42
+ num_layers: 4
43
+ in_size: ???
44
+ out_size: ???
45
+ ensemble_size: 7
46
+ hid_size: 200
47
+ deterministic: false
48
+ propagation_method: random_model
49
+ learn_logvar_bounds: false
50
+ activation_fn_cfg:
51
+ _target_: torch.nn.SiLU
52
+ overrides:
53
+ env: gym___InvertedPendulum-v4
54
+ term_fn: inverted_pendulum
55
+ num_steps: 20000
56
+ epoch_length: 250
57
+ num_elites: 5
58
+ patience: 5
59
+ model_lr: 0.001
60
+ model_wd: 1.0e-05
61
+ model_batch_size: 256
62
+ validation_ratio: 0.2
63
+ freq_train_model: 250
64
+ effective_model_rollouts_per_step: 400
65
+ rollout_schedule:
66
+ - 1
67
+ - 15
68
+ - 1
69
+ - 1
70
+ num_sac_updates_per_step: 10
71
+ sac_updates_every_steps: 1
72
+ num_epochs_to_retain_sac_buffer: 1
73
+ sac_gamma: 0.99
74
+ sac_tau: 0.005
75
+ sac_alpha: 0.2
76
+ sac_policy: Gaussian
77
+ sac_target_update_interval: 1
78
+ sac_automatic_entropy_tuning: true
79
+ sac_hidden_size: 256
80
+ sac_lr: 0.0003
81
+ sac_batch_size: 256
82
+ sac_target_entropy: -1
83
+ action_optimizer:
84
+ _target_: mbrl.planning.CEMOptimizer
85
+ num_iterations: ${overrides.cem_num_iters}
86
+ elite_ratio: ${overrides.cem_elite_ratio}
87
+ population_size: ${overrides.cem_population_size}
88
+ alpha: ${overrides.cem_alpha}
89
+ lower_bound: ???
90
+ upper_bound: ???
91
+ return_mean_elites: true
92
+ device: ${device}
93
+ clipped_normal: ${overrides.cem_clipped_normal}
mbpo_inv_pendulum_v4/diff/.hydra/hydra.yaml ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ hydra:
2
+ run:
3
+ dir: ${root_dir}/${algorithm.name}/${experiment}/${overrides.env}/${now:%Y.%m.%d}/${now:%H%M%S}
4
+ sweep:
5
+ dir: ${root_dir}/${algorithm.name}/${experiment}/${overrides.env}/${now:%Y.%m.%d}/${now:%H%M%S}
6
+ subdir: ${hydra.job.num}
7
+ hydra_logging:
8
+ version: 1
9
+ formatters:
10
+ simple:
11
+ format: '[%(asctime)s][HYDRA] %(message)s'
12
+ handlers:
13
+ console:
14
+ class: logging.StreamHandler
15
+ formatter: simple
16
+ stream: ext://sys.stdout
17
+ root:
18
+ level: INFO
19
+ handlers:
20
+ - console
21
+ loggers:
22
+ logging_example:
23
+ level: DEBUG
24
+ disable_existing_loggers: false
25
+ job_logging:
26
+ version: 1
27
+ formatters:
28
+ simple:
29
+ format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
30
+ handlers:
31
+ console:
32
+ class: logging.StreamHandler
33
+ formatter: simple
34
+ stream: ext://sys.stdout
35
+ file:
36
+ class: logging.FileHandler
37
+ formatter: simple
38
+ filename: ${hydra.job.name}.log
39
+ root:
40
+ level: INFO
41
+ handlers:
42
+ - console
43
+ - file
44
+ disable_existing_loggers: false
45
+ sweeper:
46
+ _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
47
+ max_batch_size: null
48
+ launcher:
49
+ _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
50
+ help:
51
+ app_name: ${hydra.job.name}
52
+ header: '${hydra.help.app_name} is powered by Hydra.
53
+
54
+ '
55
+ footer: 'Powered by Hydra (https://hydra.cc)
56
+
57
+ Use --hydra-help to view Hydra specific help
58
+
59
+ '
60
+ template: '${hydra.help.header}
61
+
62
+ == Configuration groups ==
63
+
64
+ Compose your configuration from those groups (group=option)
65
+
66
+
67
+ $APP_CONFIG_GROUPS
68
+
69
+
70
+ == Config ==
71
+
72
+ Override anything in the config (foo.bar=value)
73
+
74
+
75
+ $CONFIG
76
+
77
+
78
+ ${hydra.help.footer}
79
+
80
+ '
81
+ hydra_help:
82
+ hydra_help: ???
83
+ template: 'Hydra (${hydra.runtime.version})
84
+
85
+ See https://hydra.cc for more info.
86
+
87
+
88
+ == Flags ==
89
+
90
+ $FLAGS_HELP
91
+
92
+
93
+ == Configuration groups ==
94
+
95
+ Compose your configuration from those groups (For example, append hydra/job_logging=disabled
96
+ to command line)
97
+
98
+
99
+ $HYDRA_CONFIG_GROUPS
100
+
101
+
102
+ Use ''--cfg hydra'' to Show the Hydra config.
103
+
104
+ '
105
+ output_subdir: .hydra
106
+ overrides:
107
+ hydra: []
108
+ task:
109
+ - algorithm=mbpo
110
+ - overrides=mbpo_inv_pendulum
111
+ job:
112
+ name: main
113
+ override_dirname: algorithm=mbpo,overrides=mbpo_inv_pendulum
114
+ id: ???
115
+ num: ???
116
+ config_name: main
117
+ env_set: {}
118
+ env_copy: []
119
+ config:
120
+ override_dirname:
121
+ kv_sep: '='
122
+ item_sep: ','
123
+ exclude_keys: []
124
+ runtime:
125
+ version: 1.0.3
126
+ cwd: /home/raghava/projects/mbrl-lib
127
+ verbose: false
mbpo_inv_pendulum_v4/diff/.hydra/overrides.yaml ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ - algorithm=mbpo
2
+ - overrides=mbpo_inv_pendulum
mbpo_inv_pendulum_v4/diff/env_stats.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5e7846a1da9cbaff6eac7cec7587a3ca3ab97080e986dbe2e8c4c1bc1f789a3
3
+ size 278
mbpo_inv_pendulum_v4/diff/eval.csv ADDED
File without changes
mbpo_inv_pendulum_v4/diff/main.log ADDED
File without changes
mbpo_inv_pendulum_v4/diff/model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e2eb49d13624a0e6301a99e4846fe84f7375c8dfe61c90dce02a3a5a3aa0360
3
+ size 3470949
mbpo_inv_pendulum_v4/diff/model_train.csv ADDED
The diff for this file is too large to render. See raw diff
 
mbpo_inv_pendulum_v4/diff/replay_buffer.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3de70022cd88583cd06d8c3a94d8d5510ff324b4eb25bd23ebe75f9befc1ea47
3
+ size 1641766
mbpo_inv_pendulum_v4/diff/results.csv ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ env_step,episode_reward,epoch,rollout_length,step
2
+ 249.0,15.0,0.0,1.0,1
3
+ 499.0,59.0,1.0,1.0,2
4
+ 749.0,49.0,2.0,1.0,3
5
+ 999.0,138.0,3.0,1.0,4
6
+ 1249.0,70.0,4.0,1.0,5
7
+ 1499.0,142.0,5.0,1.0,6
8
+ 1749.0,135.0,6.0,1.0,7
9
+ 1999.0,113.0,7.0,1.0,8
10
+ 2249.0,76.0,8.0,1.0,9
11
+ 2499.0,79.0,9.0,1.0,10
12
+ 2749.0,99.0,10.0,1.0,11
13
+ 2999.0,102.0,11.0,1.0,12
14
+ 3249.0,111.0,12.0,1.0,13
15
+ 3499.0,151.0,13.0,1.0,14
16
+ 3749.0,1000.0,14.0,1.0,15
17
+ 3999.0,1000.0,15.0,1.0,16
18
+ 4249.0,1000.0,16.0,1.0,17
19
+ 4499.0,1000.0,17.0,1.0,18
20
+ 4749.0,1000.0,18.0,1.0,19
21
+ 4999.0,1000.0,19.0,1.0,20
22
+ 5249.0,539.0,20.0,1.0,21
23
+ 5499.0,1000.0,21.0,1.0,22
24
+ 5749.0,1000.0,22.0,1.0,23
25
+ 5999.0,1000.0,23.0,1.0,24
26
+ 6249.0,1000.0,24.0,1.0,25
27
+ 6499.0,1000.0,25.0,1.0,26
28
+ 6749.0,1000.0,26.0,1.0,27
29
+ 6999.0,1000.0,27.0,1.0,28
30
+ 7249.0,1000.0,28.0,1.0,29
31
+ 7499.0,1000.0,29.0,1.0,30
32
+ 7749.0,1000.0,30.0,1.0,31
33
+ 7999.0,1000.0,31.0,1.0,32
34
+ 8249.0,1000.0,32.0,1.0,33
35
+ 8499.0,1000.0,33.0,1.0,34
36
+ 8749.0,1000.0,34.0,1.0,35
37
+ 8999.0,1000.0,35.0,1.0,36
38
+ 9249.0,1000.0,36.0,1.0,37
39
+ 9499.0,1000.0,37.0,1.0,38
40
+ 9749.0,1000.0,38.0,1.0,39
41
+ 9999.0,1000.0,39.0,1.0,40
42
+ 10249.0,1000.0,40.0,1.0,41
43
+ 10499.0,1000.0,41.0,1.0,42
44
+ 10749.0,1000.0,42.0,1.0,43
45
+ 10999.0,1000.0,43.0,1.0,44
46
+ 11249.0,1000.0,44.0,1.0,45
47
+ 11499.0,1000.0,45.0,1.0,46
48
+ 11749.0,1000.0,46.0,1.0,47
49
+ 11999.0,1000.0,47.0,1.0,48
50
+ 12249.0,1000.0,48.0,1.0,49
51
+ 12499.0,1000.0,49.0,1.0,50
52
+ 12749.0,1000.0,50.0,1.0,51
53
+ 12999.0,1000.0,51.0,1.0,52
54
+ 13249.0,1000.0,52.0,1.0,53
55
+ 13499.0,1000.0,53.0,1.0,54
56
+ 13749.0,1000.0,54.0,1.0,55
57
+ 13999.0,1000.0,55.0,1.0,56
58
+ 14249.0,1000.0,56.0,1.0,57
59
+ 14499.0,1000.0,57.0,1.0,58
60
+ 14749.0,1000.0,58.0,1.0,59
61
+ 14999.0,1000.0,59.0,1.0,60
62
+ 15249.0,1000.0,60.0,1.0,61
63
+ 15499.0,1000.0,61.0,1.0,62
64
+ 15749.0,1000.0,62.0,1.0,63
65
+ 15999.0,1000.0,63.0,1.0,64
66
+ 16249.0,1000.0,64.0,1.0,65
67
+ 16499.0,1000.0,65.0,1.0,66
68
+ 16749.0,1000.0,66.0,1.0,67
69
+ 16999.0,1000.0,67.0,1.0,68
70
+ 17249.0,1000.0,68.0,1.0,69
71
+ 17499.0,1000.0,69.0,1.0,70
72
+ 17749.0,1000.0,70.0,1.0,71
73
+ 17999.0,1000.0,71.0,1.0,72
74
+ 18249.0,1000.0,72.0,1.0,73
75
+ 18499.0,1000.0,73.0,1.0,74
76
+ 18749.0,1000.0,74.0,1.0,75
77
+ 18999.0,1000.0,75.0,1.0,76
78
+ 19249.0,1000.0,76.0,1.0,77
79
+ 19499.0,1000.0,77.0,1.0,78
80
+ 19749.0,1000.0,78.0,1.0,79
81
+ 19999.0,1000.0,79.0,1.0,80
mbpo_inv_pendulum_v4/diff/sac.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7502abe7237e51251036bf0e4322d95b416b69c1325868e4d51f03cf6c418217
3
+ size 2996697
mbpo_inv_pendulum_v4/diff/train.csv ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ actor_entropy,actor_loss,actor_target_entropy,alpha_loss,alpha_value,batch_reward,critic_loss,step
2
+ 1.493838207602501,-6.569816477864981,-1.0,-0.35015889247873566,0.8665040380358696,0.9999173983931542,2.2208123644590376,1000
3
+ 0.9439774397611618,-13.745218251228332,-1.0,-0.8007358192205429,0.6619804044365882,0.9999008753299713,4.0111730659008025,2000
4
+ 0.7604190962910652,-20.797024126052857,-1.0,-1.1928863077759744,0.5082762288749219,1.000005373597145,9.348308407306671,3000
5
+ 0.6368114268183708,-26.51144472885132,-1.0,-1.5547268472909928,0.38732609486579894,1.0001056509017945,14.958546762466431,4000
6
+ 0.5381594774127006,-31.5230074672699,-1.0,-1.8867379150390624,0.29372579529881476,1.0001363333463669,19.679044901371004,5000
7
+ 0.43797474505752326,-35.869890718460084,-1.0,-2.1628250383138656,0.22195076505839825,0.9986995432376862,23.389901768684386,6000
8
+ 0.2344058583350852,-39.58433359527588,-1.0,-2.188446480989456,0.16962583105266094,0.9987130118608475,27.624515762805938,7000
9
+ 0.011835498963482677,-42.83946503448486,-1.0,-2.0501222967505455,0.13134975270181895,0.9995527708530426,28.9760269985199,8000
10
+ -0.37779140403680506,-45.39073581314087,-1.0,-1.3950421232432126,0.10541640808433295,1.0004109799265861,29.69263405752182,9000
11
+ -0.6495422125607729,-47.868538143157956,-1.0,-0.8446831302680076,0.08914945264160633,1.0004087806344033,31.66173789024353,10000
12
+ -0.8718362467885017,-50.113747859954834,-1.0,-0.32353918302059176,0.07922550565749407,1.000056108057499,30.508046844244003,11000
13
+ -1.07795413094759,-51.871144283294676,-1.0,0.19740964542888104,0.07956916995346547,1.0000849595069885,32.4674864859581,12000
14
+ -1.0749270496964454,-53.568331142425535,-1.0,0.18325764698907732,0.08651803997904062,1.0011069517731666,34.173708275794986,13000
15
+ -1.1310607643723487,-54.92776387786865,-1.0,0.3009507325552404,0.10061549678444863,1.0021368993520736,36.8220076110363,14000
16
+ -1.0803008878231048,-56.35177045822144,-1.0,0.17132213531434537,0.11828142216801643,1.0021741310358048,37.059287757873534,15000
17
+ -1.0289104614257814,-57.29119060134888,-1.0,0.05902101999334991,0.12951574516296388,1.0011007027029992,42.23691171693802,16000
18
+ -1.0334283287525177,-58.47008536911011,-1.0,0.06622494510374963,0.1406096382588148,1.0011140705943107,42.127763655662534,17000
19
+ -1.0162736368775367,-59.566441989898685,-1.0,0.03143201219290495,0.14436958655714988,1.000819817483425,42.4480926425457,18000
20
+ -1.00455027282238,-60.46463600540161,-1.0,0.008704960724338889,0.1477274897247553,1.0005073457360267,40.8605065331459,19000
21
+ -1.0227747420072555,-61.19214879989624,-1.0,0.042737597707659,0.15415391635894776,1.0005110157132149,41.268779353380204,20000
22
+ -1.0315952678322793,-61.78733708190918,-1.0,0.057287524234503506,0.16244652178883554,0.9999493732452392,40.64452742147446,21000
23
+ -1.0411550005674362,-62.19964386367798,-1.0,0.07124377514049411,0.17624832533299922,0.9999484555721283,41.30928859949112,22000
24
+ -0.9980195701718331,-62.48287965011597,-1.0,-0.0032996708285063505,0.18549373878538608,0.9996576129198075,44.99455017948151,23000
25
+ -1.0225543335080147,-62.5963381652832,-1.0,0.03752785968221724,0.18992553170025347,0.9993076038956642,44.44289537906647,24000
26
+ -1.023633216381073,-62.46517762374878,-1.0,0.03801574104186147,0.20172112296521663,0.9993349796533585,43.49600985431671,25000
27
+ -1.0181822930574418,-62.209617137908936,-1.0,0.02832673791050911,0.20904910536110402,1.0007421296834946,36.40054043006897,26000
28
+ -1.0088150359988213,-62.06679902648926,-1.0,0.013443255878984928,0.21943154594302178,1.0007333585619926,38.41663456940651,27000
29
+ -0.9833292745351792,-61.69345449829102,-1.0,-0.02547134857811034,0.22101346036791802,1.0003481801748275,36.67505409288406,28000
30
+ -0.9607535673379898,-61.22492010116577,-1.0,-0.06283083692938089,0.19951766909658908,0.9999483436346054,36.48649499058723,29000
31
+ -0.9875905993580818,-60.94002877044678,-1.0,-0.02067774767242372,0.1872829377800226,0.9999462094902992,35.76054789876938,30000
32
+ -0.9859303486943245,-60.45467862701416,-1.0,-0.023947426492348312,0.18260775251686573,1.000067180097103,33.46047931289673,31000
33
+ -0.9907931895852089,-60.23810551452637,-1.0,-0.01596553395036608,0.1754228770583868,1.000107972741127,33.02961659371853,32000
34
+ -0.9906358752250671,-60.032018226623535,-1.0,-0.016474583091214298,0.17196464632451533,1.0002173899412155,31.066219021558762,33000
35
+ -0.9921235266923905,-59.57483866882324,-1.0,-0.014051686435937882,0.16686931060254573,1.0003702776432037,29.98489404964447,34000
36
+ -0.9969602919220925,-59.18120288848877,-1.0,-0.00545992835611105,0.16624251732230186,1.000368851184845,30.22038459920883,35000
37
+ -0.9940293877124786,-58.94965456008911,-1.0,-0.010820465029217304,0.16045246842503547,1.000158005297184,28.08173468565941,36000
38
+ -1.0106326112151145,-58.96186043167114,-1.0,0.019267929589375853,0.16325592923164367,1.0001343381404877,26.802384899139405,37000
39
+ -0.9982487582564354,-59.005081771850584,-1.0,-0.0031140896528959274,0.16744341118633746,1.0003103867173195,27.711327709674837,38000
40
+ -0.9882265691757202,-59.42047230529785,-1.0,-0.02136775566264987,0.16206690035760402,1.0005004966259003,28.205893921613693,39000
41
+ -0.9839821374416351,-59.55798131942749,-1.0,-0.029782317876815796,0.15561468127369882,1.0005018964409829,28.01034232866764,40000
42
+ -0.9706209456920624,-59.92940855026245,-1.0,-0.05675998756475747,0.14528997722268105,0.9995018175840378,27.15603841483593,41000
43
+ -0.9938761875033378,-60.09583742904663,-1.0,-0.012159579254686833,0.13703985477983952,0.9994872786402702,27.511166502952577,42000
44
+ -0.9881486862301826,-60.30534705734253,-1.0,-0.023877707468345763,0.13359984268248082,0.999603948533535,26.12005173563957,43000
45
+ -0.9998864040374756,-60.631327610015866,-1.0,-0.0002087612822651863,0.12964784732460977,0.999720250070095,24.764948699116708,44000
46
+ -0.9991461510062217,-60.58202717208862,-1.0,-0.0017105450015515089,0.1300100853294134,0.999754517197609,24.632385041594507,45000
47
+ -1.0048933351039886,-60.91865445709229,-1.0,0.00996079982072115,0.13041590969264508,0.9989029683470726,22.79637634086609,46000
48
+ -1.0017562845349313,-60.80931303024292,-1.0,0.0035734086092561483,0.13406709623336793,0.9988998120427132,22.020199392199515,47000
49
+ -0.9804258936643601,-60.60379527664185,-1.0,-0.040085968650877477,0.13065906274318695,0.9992783052325249,26.249065366864205,48000
50
+ -0.9851111543774604,-60.61770191192627,-1.0,-0.0313395280521363,0.11999744119495154,0.9996418966650963,28.04982629776001,49000
51
+ -0.9920319446921348,-60.60330095672607,-1.0,-0.017063460103236137,0.11673798649013042,0.9996564839482307,27.930925288915635,50000
52
+ -0.9905456731319427,-59.38956230545044,-1.0,-0.02050870030466467,0.11097944168746471,1.0002420818805695,157.5964256362915,51000
53
+ -1.0022744398713113,-59.369951927185056,-1.0,0.005042384816333651,0.11500462274998427,1.0001986079216003,155.1019143371582,52000
54
+ -0.9770433279871941,-60.61716003036499,-1.0,-0.05097921687271446,0.10752894331514835,1.0003848908543587,86.48469992637634,53000
55
+ -1.0091568711400032,-62.13064689636231,-1.0,0.020602049209177496,0.10633968943357468,1.0005400642752647,22.67192457330227,54000
56
+ -1.00282784730196,-62.418231437683104,-1.0,0.006315168831497431,0.1082631842494011,1.000528253018856,23.25265587735176,55000
57
+ -1.0199857507944108,-63.09483688354492,-1.0,0.043709396209567784,0.11126763639599085,0.9998427070379258,20.28450769352913,56000
58
+ -0.9992282408475875,-63.36010657119751,-1.0,-0.0016420072847977282,0.11647002590447664,0.9998114565610886,19.140827070474625,57000
59
+ -0.9981566619277,-63.648306301116946,-1.0,-0.00395291100256145,0.11537689461559057,1.000055011689663,20.375387216806413,58000
60
+ -0.9853873317837715,-63.78722829818726,-1.0,-0.031910179372876886,0.11172135760635138,1.0002526872754096,20.824746731638907,59000
61
+ -0.9943797553777695,-63.90863159942627,-1.0,-0.012473334899172187,0.10835022008419037,1.000258600115776,21.33688474011421,60000
62
+ -1.004048469245434,-64.34789809799194,-1.0,0.0090462313760072,0.10715420034527778,0.9996799311041832,22.648823842287065,61000
63
+ -1.0060918080806731,-64.28810596847534,-1.0,0.013475406096782536,0.10974062752723694,0.9996724778413772,21.823831288337708,62000
64
+ -0.998650194823742,-64.38422663116455,-1.0,-0.00294608358386904,0.11000482465326786,0.9999199795722962,19.558816833496095,63000
65
+ -1.008877337694168,-64.70740211486816,-1.0,0.019425692196004093,0.11221228870749474,1.0001260786652566,17.413058219552042,64000
66
+ -0.9939568256139755,-64.57610186386108,-1.0,-0.013152386968955397,0.11422158103436232,1.000146174788475,16.977198952913284,65000
67
+ -0.996024838745594,-65.15687179946899,-1.0,-0.008716098831966518,0.1094980269446969,0.9996707973480224,19.15190222322941,66000
68
+ -0.9890276168584824,-65.36952756881713,-1.0,-0.024390053307637573,0.10788912042975425,0.999677120923996,18.856537677049637,67000
69
+ -1.0048189045190812,-65.70785009002685,-1.0,0.010808763852342963,0.10640459375083447,0.9997267207503319,19.313512980341912,68000
70
+ -0.9975502488613128,-66.06552368164063,-1.0,-0.005450441155582666,0.10845575701445341,0.9997880893349648,19.819950716137885,69000
71
+ -0.991422104716301,-66.49613233947754,-1.0,-0.019318902691826224,0.1053764559328556,0.9997928919196128,19.94525696337223,70000
72
+ -0.9968246851563454,-67.07324966812133,-1.0,-0.007204956223256886,0.10223569601774216,0.999457478582859,25.121548262357713,71000
73
+ -0.9951747527718544,-67.15217366790772,-1.0,-0.011028420000337064,0.10117325706779957,0.9993425493240357,24.204063668847084,72000
74
+ -0.9973370276689529,-67.52026532363891,-1.0,-0.006105904083698988,0.10007061798870563,0.999764063000679,22.575023555159568,73000
75
+ -0.9879728546738624,-67.64523902511597,-1.0,-0.02801996672898531,0.09714985463023186,1.0000820021629333,21.97725478518009,74000
76
+ -0.9837047052383423,-67.80198410415649,-1.0,-0.03881592246610671,0.09264590780436993,1.000082548737526,21.781193003952502,75000
77
+ -1.011653212249279,-68.07346231842041,-1.0,0.027938664303161203,0.09135581596940756,1.0009172095656396,19.04437172114849,76000
78
+ -0.9992588758468628,-68.15476265716553,-1.0,-0.0017334244921803474,0.09308406421542167,1.0009173532128335,19.636732360720636,77000
79
+ -1.0065687456727028,-68.34769983673095,-1.0,0.015547572972252965,0.0937886850386858,1.0002077078819276,17.504546292185783,78000
80
+ -0.9990048241615296,-68.62648236846924,-1.0,-0.0023199092000722884,0.09486197747290134,0.9995081804990769,17.917668056845663,79000
81
+ -0.9930106283426284,-68.73784998321533,-1.0,-0.01650009070429951,0.0928629122376442,0.9995026607513428,18.759752175033093,80000
82
+ -1.0018898729085923,-68.89342494201661,-1.0,0.004524260058999061,0.0927513626590371,0.9996313926577568,17.99517882978916,81000
83
+ -1.0001003710627556,-69.20227774047852,-1.0,0.00027379719354212286,0.09352016814798117,0.9996479009389877,18.178401344656944,82000
84
+ -0.9815731481909752,-69.64163168334962,-1.0,-0.04430841793585569,0.09039540746808052,0.9999472416639328,19.129633727908136,83000
85
+ -1.003025228381157,-69.73081675720215,-1.0,0.007398352241143585,0.08726292484998703,1.0003049224615097,22.261565452873707,84000
86
+ -0.989307136118412,-70.0006349182129,-1.0,-0.026141878167167305,0.08767862599343061,1.0002748109102249,22.29861309993267,85000
87
+ -0.9655788241624832,-70.62881507110596,-1.0,-0.08692321398388594,0.080210085041821,0.9999520015120507,18.51017069709301,86000
88
+ -0.9885674540400505,-71.06386505126953,-1.0,-0.02976301054097712,0.07434625550359487,0.9999402894973755,21.240147477328776,87000
89
+ -0.9811157184243202,-71.57851461029053,-1.0,-0.05015030830539763,0.07012299706041814,1.0001355493068695,21.760364861607552,88000
90
+ -0.9758122993111611,-72.08760706329346,-1.0,-0.06602096720365808,0.06564538194984197,1.0003441194295883,22.299463767528533,89000
91
+ -0.9968266456127167,-72.59746070861816,-1.0,-0.008750639522448183,0.06236926668509841,1.0003259177803994,22.897069624006747,90000
92
+ -0.986392564356327,-73.09360327911376,-1.0,-0.03810144287813455,0.05920897872745991,0.9999298987984657,23.95387733989954,91000
93
+ -0.977357744038105,-73.46304763793945,-1.0,-0.06470711470022797,0.05780288331583142,0.9999265711307526,24.778531049489974,92000
94
+ -1.0039828538894653,-73.83697990417481,-1.0,0.011551555769518019,0.054543361522257326,1.0002228609323502,22.7983062877059,93000
95
+ -1.049993280351162,-74.28579920196533,-1.0,0.14046295388415456,0.06089876812696457,1.0004683215618133,21.825397450089454,94000
96
+ -1.0085850566029548,-74.56178675079346,-1.0,0.023381707735359667,0.06616719587892293,1.0004742426276207,21.834701390326025,95000
97
+ -1.0122542967796326,-74.85806800079345,-1.0,0.032952326480299235,0.06719360151141882,1.0003446317315101,20.881829951286317,96000
98
+ -1.0210605749487878,-74.8063004989624,-1.0,0.055441863464191554,0.07084829632937908,1.000345691025257,21.586972839832306,97000
99
+ -1.011859646141529,-74.87140302276612,-1.0,0.030562927762046457,0.07616501616686583,0.9998994361758232,21.498714118003846,98000
100
+ -0.9867609457969666,-75.07281974029542,-1.0,-0.03407413821760565,0.07636412268131972,0.9994615656733513,21.85930393356085,99000
101
+ -1.0023563424944877,-75.04354410552979,-1.0,0.006173278223723173,0.0730332586914301,0.9994360575675965,22.131911728084088,100000
102
+ -1.0265334709882736,-75.31031993865967,-1.0,0.06762212561257183,0.0788614655509591,0.9995115852952003,22.47361252140999,101000
103
+ -1.0148809257149696,-75.25846280670166,-1.0,0.036845599841326473,0.08533117131143808,0.999511433005333,21.240438092947006,102000
104
+ -1.0277971612811088,-75.26931474304199,-1.0,0.06670697069168091,0.09047374437004328,0.9996494327783585,24.60781723153591,103000
105
+ -1.0135869224667549,-75.23014540863038,-1.0,0.03162317787483335,0.09787720506638288,0.9998011529445648,27.40267178708315,104000
106
+ -0.989388385117054,-75.19861267089844,-1.0,-0.02457569241616875,0.09793917412310839,0.9998078690171242,27.366890721201898,105000
107
+ -0.9676629127860069,-75.59605960845947,-1.0,-0.0774195080716163,0.09090384919941426,1.000170579612255,20.23391846460104,106000
108
+ -0.9873153592944145,-75.78726528167725,-1.0,-0.0313797371275723,0.08340023210644722,1.0001609872579575,19.498358128964902,107000
109
+ -0.9816827059388161,-76.03137968444824,-1.0,-0.04636062396783382,0.07942245162278413,1.0002473167777062,23.895989376187323,108000
110
+ -0.9843043202161789,-76.40210687255859,-1.0,-0.04069539660774171,0.07449376336485147,1.0003661205172538,26.60186349260807,109000
111
+ -0.9972612901329995,-76.63444550323486,-1.0,-0.007140183327719569,0.07106686289608478,1.0003362352252005,26.200468220055104,110000
112
+ -0.9983292373418808,-76.97551028442383,-1.0,-0.004360750805586577,0.07149379009753466,1.0001977071762085,21.592858878970148,111000
113
+ -0.9876489273905754,-77.12918453216552,-1.0,-0.03284798744134605,0.0694600162729621,1.0002032145261766,21.551772240877153,112000
114
+ -0.991434505045414,-77.15685929107666,-1.0,-0.023076199036091566,0.06737745525687933,0.9998867361545563,23.985899344861508,113000
115
+ -1.0057225298881531,-77.12702884674073,-1.0,0.01553801503404975,0.06685516469925643,0.9995961604714394,26.178320498645306,114000
116
+ -1.0030023970603943,-77.21003496551513,-1.0,0.008130928881466389,0.06868824980407953,0.9995831936001778,25.208522073745726,115000
117
+ -0.996603140592575,-77.58315219879151,-1.0,-0.009109364757314324,0.06742798230051994,1.0001947776675224,17.324451050162317,116000
118
+ -1.0193433285355569,-77.51738840484619,-1.0,0.051538418915122744,0.06915991185605526,1.0001872941851615,18.502246737003325,117000
119
+ -0.9977396162748337,-77.58117472076415,-1.0,-0.005899784555658698,0.07285885934531688,1.0000619815587997,19.857613354623318,118000
120
+ -0.9957109395265579,-77.74931622314453,-1.0,-0.011291122964583337,0.07158141831308604,0.9998870033621788,21.872688065946104,119000
121
+ -0.9758642501831055,-77.75992423248292,-1.0,-0.06489636794105172,0.06680584533512593,0.9999097186923027,21.384719988048076,120000
122
+ -0.9818301703333855,-78.21818016815186,-1.0,-0.050202988208271564,0.06278225111961365,0.9998687520623207,17.392764938354492,121000
123
+ -0.9827328267693519,-78.49793140411377,-1.0,-0.04877833329513669,0.059878904961049556,0.9998608286976814,16.729922588169575,122000
124
+ -0.9965555649995804,-78.63004919433594,-1.0,-0.009818097866140307,0.05608044613525271,1.0000594046711921,17.74705994784832,123000
125
+ -1.0278037972450256,-78.57762065887451,-1.0,0.07851473570428788,0.059272227119654415,1.0002437651157379,19.648591296076773,124000
126
+ -1.007983655333519,-78.73370041656494,-1.0,0.022092003559693693,0.06345146144554019,1.000271218597889,19.5869420440495,125000
127
+ -1.02958509349823,-78.99077085113525,-1.0,0.0798284177929163,0.06780733081698417,0.9998230465054512,21.72086617028713,126000
128
+ -0.9976160601377487,-78.88883309936523,-1.0,-0.006276324898935854,0.071120236068964,0.9998530797362327,20.855426520466803,127000
129
+ -1.0034334641695022,-78.84311935424805,-1.0,0.009135777525603772,0.0720900322496891,0.9999374279379845,21.609691269934178,128000
130
+ -0.9850325761437416,-78.97000855255126,-1.0,-0.03990509575419128,0.06865666772425175,1.000007982969284,20.6258009288311,129000
131
+ -0.9826672503352165,-79.0434119720459,-1.0,-0.04722128384374082,0.0648920365869999,1.0000016584396363,20.230778927206995,130000
132
+ -1.0002324653863908,-79.37714877319335,-1.0,0.0006909850537776947,0.06260854507610202,0.9996440658569336,22.565037951231,131000
133
+ -1.0330364577174187,-79.28788088989258,-1.0,0.08939480585232377,0.06712514241784812,0.9995979015231132,21.232402546167375,132000
134
+ -1.0157093277573586,-79.27483876800537,-1.0,0.04124018005654216,0.07193619012832642,0.9997707313299179,19.692615383028983,133000
135
+ -1.017749091029167,-79.41637870788574,-1.0,0.04554602128174156,0.07725188230723143,0.9999128139019012,17.75625270974636,134000
136
+ -0.9875344927310944,-79.38201184844971,-1.0,-0.03179976768419147,0.07831125605106354,0.9998872186541558,18.422774891138076,135000
137
+ -0.9939663333892822,-79.33133717346192,-1.0,-0.01557260387018323,0.07463804023712874,0.999814551115036,21.90999667006731,136000
138
+ -0.9710173662304878,-79.38993272399902,-1.0,-0.07676357857696713,0.07075537968426943,0.999826491355896,19.51068600165844,137000
139
+ -0.9883043924570084,-79.6456791381836,-1.0,-0.031789003593847155,0.06603261356800795,0.999942181289196,18.830005536735058,138000
140
+ -1.001346819460392,-79.86522940063476,-1.0,0.0037461116081103684,0.06382139748334885,1.000019571661949,17.60494112381339,139000
141
+ -0.9998247120976448,-79.96432592010498,-1.0,-0.00043489366117864845,0.06549482826143503,1.0000226683616638,17.018622804552315,140000
142
+ -0.9907862497568131,-80.2963182220459,-1.0,-0.025344387150369584,0.06361574615538121,1.0001142790317537,15.199616261094809,141000
143
+ -0.987527729690075,-80.3666222000122,-1.0,-0.03480504833720625,0.06035537938773632,1.0000992152690888,17.03840553218126,142000
144
+ -0.9728233120441436,-80.33490142822265,-1.0,-0.07780388488061726,0.05742417515814304,1.0001191559433937,18.845235291093587,143000
145
+ -0.9872996910214424,-80.23211087799072,-1.0,-0.03725586630869657,0.0531988036595285,1.0001454809308052,19.94920403164625,144000
146
+ -1.004514715731144,-80.3371275024414,-1.0,0.013391039645299316,0.051196304645389316,1.0001565765142442,18.852208189308644,145000
147
+ -1.0257146656513214,-80.64270692443847,-1.0,0.07464588841050863,0.05572146571055055,1.000187596142292,14.460804673284292,146000
148
+ -1.0052570534944534,-80.70591646575927,-1.0,0.015007079996168614,0.05824466022104025,1.0001739746332168,15.045653567552566,147000
149
+ -0.9993835052847863,-80.79999544525147,-1.0,-0.0017121764309704303,0.05872526940703392,0.9999828320741654,16.156062802076338,148000
150
+ -1.003722942829132,-80.88539027404785,-1.0,0.01058670607022941,0.05879524979367852,0.9998487173318863,19.03390526640415,149000
151
+ -1.0040151216983795,-80.90654062652588,-1.0,0.011362662557512521,0.0596247271373868,0.9998097960948944,17.528271546304225,150000
152
+ -1.0184957274198532,-81.18318515014649,-1.0,0.051469718523323535,0.06324904305487872,0.9994880017638207,26.671474549531936,151000
153
+ -0.9948843439221382,-81.17492239379882,-1.0,-0.014068985825404525,0.06396577922254801,0.9994792646765709,26.414887577593326,152000
154
+ -0.9710371059179306,-81.37146347045899,-1.0,-0.08159703485388309,0.05986364210397005,0.9999754172563553,20.687223061442374,153000
155
+ -0.9850005224943161,-81.53938858032227,-1.0,-0.04345716318115592,0.05491775343567133,1.000405151307583,18.95950824815035,154000
156
+ -0.9839188978672028,-81.88033156585693,-1.0,-0.04747940175421536,0.051397559851408,1.0004403069019319,17.303893975794317,155000
157
+ -1.0073484655022622,-82.17064730072022,-1.0,0.0218865153696388,0.05208290031552315,1.0000805525779723,17.320823793709277,156000
158
+ -1.0019021522402762,-82.21372888183593,-1.0,0.005655197087675333,0.051897705048322675,1.0000699099898338,16.365241955578327,157000
159
+ -0.9831316752433776,-82.28385097503661,-1.0,-0.050203024961054325,0.05040734025463462,1.0002646628618241,17.988270521491767,158000
160
+ -1.0039880566000938,-82.26859728240967,-1.0,0.01202364307269454,0.04953367314115167,1.0004776933193207,19.497488548576833,159000
161
+ -0.9896269409656525,-82.58067373657227,-1.0,-0.031201540444046258,0.04931324608251452,1.000492603480816,18.183698970258234,160000
162
+ -1.026499224603176,-83.16075233459473,-1.0,0.07926051216386258,0.05006169419363141,0.999981676697731,14.79143437576294,161000
163
+ -0.9990584208965302,-83.34099761199951,-1.0,-0.00272395053319633,0.052807229477912186,0.9999651595950126,15.926989418685435,162000
164
+ -0.9967637494206428,-83.49815326690674,-1.0,-0.00953466885816306,0.05257025824859738,1.0000815382003785,14.20480178925395,163000
165
+ -1.0135623653531074,-83.3752534713745,-1.0,0.03989865591470152,0.05197652318328619,1.0001495413780213,14.399893450081349,164000
166
+ -1.0083206827044486,-83.42160214996338,-1.0,0.024165250253863634,0.055594556406140326,1.0001402760744096,13.72768458968401,165000
167
+ -0.9784447424411774,-83.82295029449463,-1.0,-0.06289933261275292,0.05409995710849762,0.999622277021408,12.383471774071454,166000
168
+ -0.9912153229117393,-83.66328608703613,-1.0,-0.02609281131438911,0.05091436763852835,0.9996066495776177,11.937722791314124,167000
169
+ -0.9675353128910065,-83.61108913421631,-1.0,-0.09878123080544174,0.048147744808346035,0.9993120629191399,16.89767076662183,168000
170
+ -0.9252732286453247,-83.54589627075195,-1.0,-0.23897275562770665,0.04062466590106487,0.9991246962547302,21.02442072534561,169000
171
+ -1.0337345108389855,-83.83648028564453,-1.0,0.11007160902768373,0.03833197908103466,0.9990995711684227,20.15904557096958,170000
172
+ -1.03420645904541,-84.04858778381347,-1.0,0.108637395856902,0.04223010923340917,0.9998237280845642,10.668557365179062,171000
173
+ -0.997223490536213,-84.052030960083,-1.0,-0.008655021131038667,0.04293003601580858,0.9998472774028778,11.30187514001131,172000
174
+ -0.9843209443688392,-83.97513703918457,-1.0,-0.04948945372365415,0.04332855271548033,1.000102687895298,15.966236584991217,173000
175
+ -0.9963931609392166,-83.7561332244873,-1.0,-0.01132044121902436,0.04015885688737035,1.0003005577921866,16.89659118747711,174000
176
+ -0.9775900086164474,-83.79137325286865,-1.0,-0.07241816149931402,0.03835673970729113,1.0002006995081902,17.770900643646716,175000
177
+ -1.0042867133021354,-84.08270108795166,-1.0,0.014039047991856932,0.03848976227641106,1.0000358263254165,9.384288135141134,176000
178
+ -1.022399466753006,-84.00388520050049,-1.0,0.07211130412574858,0.039753952771425245,1.0000150427818297,9.279063169330358,177000
179
+ -0.9961340417861938,-84.10414520263672,-1.0,-0.012236406441777944,0.04206769589707255,1.000317626953125,8.324468117311596,178000
180
+ -0.9815023511648178,-83.94761478424073,-1.0,-0.0595403203740716,0.03946238961815834,1.0006119062304497,11.822921635776758,179000
181
+ -0.9945788309574127,-83.82495505523681,-1.0,-0.017621136920526623,0.03814142559468746,1.0006120149493218,10.284078227072953,180000
182
+ -1.0174808768630028,-83.91962240600586,-1.0,0.057623527953401206,0.038192073501646516,1.0002903699874879,25.27114519435167,181000
183
+ -1.0321397172808646,-83.9185648651123,-1.0,0.1034346729842946,0.04004729238897562,1.0000827281475066,22.894131582677364,182000
184
+ -1.0236703088283539,-84.09037867736816,-1.0,0.07460737392213196,0.043098920729011295,1.0003642064929008,15.016773126393556,183000
185
+ -0.9646011296510696,-84.50953370666504,-1.0,-0.11226235897000879,0.04179131421819329,1.0005287698507308,6.102913897559047,184000
186
+ -1.0014893134832383,-84.60108329772949,-1.0,0.0048609865847975015,0.04005252532288432,1.000533020734787,5.44453550902009,185000
187
+ -1.0300992658138275,-84.53524141693116,-1.0,0.09574909022264183,0.04373792405053973,0.9995259289741516,25.05315464180708,186000
188
+ -0.9975158050656319,-84.85456627655029,-1.0,-0.007727325612679124,0.04356380190327763,0.9994875674843788,26.969009226858617,187000
189
+ -1.0029295891523362,-85.21267974853515,-1.0,0.009257713869214057,0.04314345766603947,0.9996950832605362,17.81913815894723,188000
190
+ -0.9790707188844681,-85.07146759796143,-1.0,-0.06618797897361219,0.042045536406338214,0.9997785101532937,6.414190990597009,189000
191
+ -0.9777221149206161,-84.92304104614257,-1.0,-0.07226751008443534,0.039201794400811195,0.9998020445108413,6.623549911662936,190000
192
+ -0.988294132232666,-85.1188353729248,-1.0,-0.038624415582045914,0.037016180731356145,1.000133187353611,6.4096473942101,191000
193
+ -0.9505778591036796,-85.2945534362793,-1.0,-0.1682928476333618,0.03230870087072253,1.0001307914853097,6.649543940335512,192000
194
+ -1.276725348830223,-85.22326816558838,-1.0,0.9272244605384767,0.03174647399038076,0.9997849019169808,46.60274404168129,193000
195
+ -1.6783772518634796,-86.42634867858887,-1.0,2.071881779462099,0.04851115327700973,0.9995153895616531,90.78533294296264,194000
196
+ -1.161974544286728,-86.76565823364258,-1.0,0.4599228806514293,0.058990542739629746,0.9992697890996933,86.19860540437698,195000
197
+ -1.1238311616182328,-87.45591569519043,-1.0,0.33833809685520827,0.06543059648200869,0.9998308535218239,6.16370524841547,196000
198
+ -1.0021767801642418,-87.39279821014404,-1.0,0.005787738013081253,0.07035529282689094,0.9998381351232528,5.184702994942665,197000
mbpo_inv_pendulum_v4/main/.hydra/config.yaml ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ seed: 0
2
+ device: cuda:0
3
+ log_frequency_agent: 1000
4
+ save_video: false
5
+ debug_mode: false
6
+ experiment: default
7
+ root_dir: ./exp
8
+ algorithm:
9
+ name: mbpo
10
+ normalize: true
11
+ normalize_double_precision: true
12
+ target_is_delta: true
13
+ learned_rewards: true
14
+ freq_train_model: ${overrides.freq_train_model}
15
+ real_data_ratio: 0.0
16
+ sac_samples_action: true
17
+ initial_exploration_steps: 5000
18
+ random_initial_explore: false
19
+ num_eval_episodes: 1
20
+ agent:
21
+ _target_: mbrl.third_party.pytorch_sac_pranz24.sac.SAC
22
+ num_inputs: ???
23
+ action_space:
24
+ _target_: gym.env.Box
25
+ low: ???
26
+ high: ???
27
+ shape: ???
28
+ args:
29
+ gamma: ${overrides.sac_gamma}
30
+ tau: ${overrides.sac_tau}
31
+ alpha: ${overrides.sac_alpha}
32
+ policy: ${overrides.sac_policy}
33
+ target_update_interval: ${overrides.sac_target_update_interval}
34
+ automatic_entropy_tuning: ${overrides.sac_automatic_entropy_tuning}
35
+ target_entropy: ${overrides.sac_target_entropy}
36
+ hidden_size: ${overrides.sac_hidden_size}
37
+ device: ${device}
38
+ lr: ${overrides.sac_lr}
39
+ dynamics_model:
40
+ _target_: mbrl.models.GaussianMLP
41
+ device: ${device}
42
+ num_layers: 4
43
+ in_size: ???
44
+ out_size: ???
45
+ ensemble_size: 7
46
+ hid_size: 200
47
+ deterministic: false
48
+ propagation_method: random_model
49
+ learn_logvar_bounds: false
50
+ activation_fn_cfg:
51
+ _target_: torch.nn.SiLU
52
+ overrides:
53
+ env: gym___InvertedPendulum-v2
54
+ term_fn: inverted_pendulum
55
+ num_steps: 20000
56
+ epoch_length: 250
57
+ num_elites: 5
58
+ patience: 5
59
+ model_lr: 0.001
60
+ model_wd: 1.0e-05
61
+ model_batch_size: 256
62
+ validation_ratio: 0.2
63
+ freq_train_model: 250
64
+ effective_model_rollouts_per_step: 400
65
+ rollout_schedule:
66
+ - 1
67
+ - 15
68
+ - 1
69
+ - 1
70
+ num_sac_updates_per_step: 10
71
+ sac_updates_every_steps: 1
72
+ num_epochs_to_retain_sac_buffer: 1
73
+ sac_gamma: 0.99
74
+ sac_tau: 0.005
75
+ sac_alpha: 0.2
76
+ sac_policy: Gaussian
77
+ sac_target_update_interval: 1
78
+ sac_automatic_entropy_tuning: true
79
+ sac_hidden_size: 256
80
+ sac_lr: 0.0003
81
+ sac_batch_size: 256
82
+ sac_target_entropy: -1
83
+ action_optimizer:
84
+ _target_: mbrl.planning.CEMOptimizer
85
+ num_iterations: ${overrides.cem_num_iters}
86
+ elite_ratio: ${overrides.cem_elite_ratio}
87
+ population_size: ${overrides.cem_population_size}
88
+ alpha: ${overrides.cem_alpha}
89
+ lower_bound: ???
90
+ upper_bound: ???
91
+ return_mean_elites: true
92
+ device: ${device}
93
+ clipped_normal: ${overrides.cem_clipped_normal}
mbpo_inv_pendulum_v4/main/.hydra/hydra.yaml ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ hydra:
2
+ run:
3
+ dir: ${root_dir}/${algorithm.name}/${experiment}/${overrides.env}/${now:%Y.%m.%d}/${now:%H%M%S}
4
+ sweep:
5
+ dir: ${root_dir}/${algorithm.name}/${experiment}/${overrides.env}/${now:%Y.%m.%d}/${now:%H%M%S}
6
+ subdir: ${hydra.job.num}
7
+ hydra_logging:
8
+ version: 1
9
+ formatters:
10
+ simple:
11
+ format: '[%(asctime)s][HYDRA] %(message)s'
12
+ handlers:
13
+ console:
14
+ class: logging.StreamHandler
15
+ formatter: simple
16
+ stream: ext://sys.stdout
17
+ root:
18
+ level: INFO
19
+ handlers:
20
+ - console
21
+ loggers:
22
+ logging_example:
23
+ level: DEBUG
24
+ disable_existing_loggers: false
25
+ job_logging:
26
+ version: 1
27
+ formatters:
28
+ simple:
29
+ format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
30
+ handlers:
31
+ console:
32
+ class: logging.StreamHandler
33
+ formatter: simple
34
+ stream: ext://sys.stdout
35
+ file:
36
+ class: logging.FileHandler
37
+ formatter: simple
38
+ filename: ${hydra.job.name}.log
39
+ root:
40
+ level: INFO
41
+ handlers:
42
+ - console
43
+ - file
44
+ disable_existing_loggers: false
45
+ sweeper:
46
+ _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
47
+ max_batch_size: null
48
+ launcher:
49
+ _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
50
+ help:
51
+ app_name: ${hydra.job.name}
52
+ header: '${hydra.help.app_name} is powered by Hydra.
53
+
54
+ '
55
+ footer: 'Powered by Hydra (https://hydra.cc)
56
+
57
+ Use --hydra-help to view Hydra specific help
58
+
59
+ '
60
+ template: '${hydra.help.header}
61
+
62
+ == Configuration groups ==
63
+
64
+ Compose your configuration from those groups (group=option)
65
+
66
+
67
+ $APP_CONFIG_GROUPS
68
+
69
+
70
+ == Config ==
71
+
72
+ Override anything in the config (foo.bar=value)
73
+
74
+
75
+ $CONFIG
76
+
77
+
78
+ ${hydra.help.footer}
79
+
80
+ '
81
+ hydra_help:
82
+ hydra_help: ???
83
+ template: 'Hydra (${hydra.runtime.version})
84
+
85
+ See https://hydra.cc for more info.
86
+
87
+
88
+ == Flags ==
89
+
90
+ $FLAGS_HELP
91
+
92
+
93
+ == Configuration groups ==
94
+
95
+ Compose your configuration from those groups (For example, append hydra/job_logging=disabled
96
+ to command line)
97
+
98
+
99
+ $HYDRA_CONFIG_GROUPS
100
+
101
+
102
+ Use ''--cfg hydra'' to Show the Hydra config.
103
+
104
+ '
105
+ output_subdir: .hydra
106
+ overrides:
107
+ hydra: []
108
+ task:
109
+ - algorithm=mbpo
110
+ - overrides=mbpo_inv_pendulum
111
+ - device=cuda:0
112
+ job:
113
+ name: main
114
+ override_dirname: algorithm=mbpo,device=cuda:0,overrides=mbpo_inv_pendulum
115
+ id: ???
116
+ num: ???
117
+ config_name: main
118
+ env_set: {}
119
+ env_copy: []
120
+ config:
121
+ override_dirname:
122
+ kv_sep: '='
123
+ item_sep: ','
124
+ exclude_keys: []
125
+ runtime:
126
+ version: 1.0.3
127
+ cwd: /home/raghava/projects/mbrl-lib/exp
128
+ verbose: false
mbpo_inv_pendulum_v4/main/.hydra/overrides.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ - algorithm=mbpo
2
+ - overrides=mbpo_inv_pendulum
3
+ - device=cuda:0
mbpo_inv_pendulum_v4/main/env_stats.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4bf513b97ba1ebdfe092ff0ba789730d47f6fadb4b92711070e658f9fc912cde
3
+ size 278
mbpo_inv_pendulum_v4/main/eval.csv ADDED
File without changes
mbpo_inv_pendulum_v4/main/main.log ADDED
File without changes
mbpo_inv_pendulum_v4/main/model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14274d884ccae1d37d83223c14cc8c9bc7a488093d99791899f26e3ecf9a4d85
3
+ size 3470949
mbpo_inv_pendulum_v4/main/model_train.csv ADDED
The diff for this file is too large to render. See raw diff
 
mbpo_inv_pendulum_v4/main/replay_buffer.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62a1320710e4accc1347acd640a6c7168a8eec86cae63e213ffe4244452ac2fb
3
+ size 1621504
mbpo_inv_pendulum_v4/main/results.csv ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ env_step,episode_reward,epoch,rollout_length,step
2
+ 249.0,16.0,0.0,1.0,1
3
+ 499.0,45.0,1.0,1.0,2
4
+ 749.0,44.0,2.0,1.0,3
5
+ 999.0,83.0,3.0,1.0,4
6
+ 1249.0,60.0,4.0,1.0,5
7
+ 1499.0,65.0,5.0,1.0,6
8
+ 1749.0,74.0,6.0,1.0,7
9
+ 1999.0,53.0,7.0,1.0,8
10
+ 2249.0,104.0,8.0,1.0,9
11
+ 2499.0,144.0,9.0,1.0,10
12
+ 2749.0,125.0,10.0,1.0,11
13
+ 2999.0,73.0,11.0,1.0,12
14
+ 3249.0,72.0,12.0,1.0,13
15
+ 3499.0,76.0,13.0,1.0,14
16
+ 3749.0,63.0,14.0,1.0,15
17
+ 3999.0,74.0,15.0,1.0,16
18
+ 4249.0,100.0,16.0,1.0,17
19
+ 4499.0,109.0,17.0,1.0,18
20
+ 4749.0,1000.0,18.0,1.0,19
21
+ 4999.0,1000.0,19.0,1.0,20
22
+ 5249.0,1000.0,20.0,1.0,21
23
+ 5499.0,1000.0,21.0,1.0,22
24
+ 5749.0,1000.0,22.0,1.0,23
25
+ 5999.0,1000.0,23.0,1.0,24
26
+ 6249.0,1000.0,24.0,1.0,25
27
+ 6499.0,1000.0,25.0,1.0,26
28
+ 6749.0,1000.0,26.0,1.0,27
29
+ 6999.0,1000.0,27.0,1.0,28
30
+ 7249.0,1000.0,28.0,1.0,29
31
+ 7499.0,1000.0,29.0,1.0,30
32
+ 7749.0,1000.0,30.0,1.0,31
33
+ 7999.0,1000.0,31.0,1.0,32
34
+ 8249.0,1000.0,32.0,1.0,33
35
+ 8499.0,1000.0,33.0,1.0,34
36
+ 8749.0,1000.0,34.0,1.0,35
37
+ 8999.0,1000.0,35.0,1.0,36
38
+ 9249.0,1000.0,36.0,1.0,37
39
+ 9499.0,1000.0,37.0,1.0,38
40
+ 9749.0,1000.0,38.0,1.0,39
41
+ 9999.0,1000.0,39.0,1.0,40
42
+ 10249.0,1000.0,40.0,1.0,41
43
+ 10499.0,1000.0,41.0,1.0,42
44
+ 10749.0,1000.0,42.0,1.0,43
45
+ 10999.0,1000.0,43.0,1.0,44
46
+ 11249.0,1000.0,44.0,1.0,45
47
+ 11499.0,1000.0,45.0,1.0,46
48
+ 11749.0,1000.0,46.0,1.0,47
49
+ 11999.0,1000.0,47.0,1.0,48
50
+ 12249.0,1000.0,48.0,1.0,49
51
+ 12499.0,1000.0,49.0,1.0,50
52
+ 12749.0,1000.0,50.0,1.0,51
53
+ 12999.0,1000.0,51.0,1.0,52
54
+ 13249.0,1000.0,52.0,1.0,53
55
+ 13499.0,1000.0,53.0,1.0,54
56
+ 13749.0,1000.0,54.0,1.0,55
57
+ 13999.0,1000.0,55.0,1.0,56
58
+ 14249.0,1000.0,56.0,1.0,57
59
+ 14499.0,1000.0,57.0,1.0,58
60
+ 14749.0,1000.0,58.0,1.0,59
61
+ 14999.0,1000.0,59.0,1.0,60
62
+ 15249.0,1000.0,60.0,1.0,61
63
+ 15499.0,1000.0,61.0,1.0,62
64
+ 15749.0,1000.0,62.0,1.0,63
65
+ 15999.0,1000.0,63.0,1.0,64
66
+ 16249.0,1000.0,64.0,1.0,65
67
+ 16499.0,1000.0,65.0,1.0,66
68
+ 16749.0,1000.0,66.0,1.0,67
69
+ 16999.0,1000.0,67.0,1.0,68
70
+ 17249.0,1000.0,68.0,1.0,69
71
+ 17499.0,1000.0,69.0,1.0,70
72
+ 17749.0,1000.0,70.0,1.0,71
73
+ 17999.0,1000.0,71.0,1.0,72
74
+ 18249.0,1000.0,72.0,1.0,73
75
+ 18499.0,1000.0,73.0,1.0,74
76
+ 18749.0,1000.0,74.0,1.0,75
77
+ 18999.0,1000.0,75.0,1.0,76
78
+ 19249.0,1000.0,76.0,1.0,77
79
+ 19499.0,1000.0,77.0,1.0,78
80
+ 19749.0,1000.0,78.0,1.0,79
81
+ 19999.0,1000.0,79.0,1.0,80
mbpo_inv_pendulum_v4/main/sac.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db4e53c33471c3d6805ea087a42b17be3b3251f15f2f8e992c02f6b1557ff7d1
3
+ size 2996697
mbpo_inv_pendulum_v4/main/train.csv ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ actor_entropy,actor_loss,actor_target_entropy,alpha_loss,alpha_value,batch_reward,critic_loss,step
2
+ 1.4763974827528,-6.478957705557346,-1.0,-0.3456656620029826,0.8668324014544487,0.998785097181797,2.3055325514674188,1000
3
+ 0.9334742274880409,-13.549264032363892,-1.0,-0.7948252328634262,0.6627063400745392,0.9987493185400963,4.908137350201606,2000
4
+ 0.7639499747753143,-20.443750541687013,-1.0,-1.1959195566177367,0.5083418329060078,0.9994187472462654,9.845117563962937,3000
5
+ 0.6597114334106445,-26.3317608089447,-1.0,-1.580805455803871,0.3863526160120964,1.000179212987423,14.545579643726349,4000
6
+ 0.5420159173756838,-31.38573017692566,-1.0,-1.8961482181549072,0.29270267179608345,1.0001851230859757,19.46860353541374,5000
7
+ 0.3861218377072364,-35.36567040252685,-1.0,-2.0821313759088516,0.22218034875392914,1.0009595202803612,26.14231790113449,6000
8
+ 0.18448990973923354,-38.87414058303833,-1.0,-2.0911072857379915,0.17096518313884734,1.0009144856333732,29.732819752693175,7000
9
+ -0.02738651861343533,-42.051555137634274,-1.0,-1.9610867830514909,0.13270662397146224,1.0000671554207803,30.727756870269776,8000
10
+ -0.30352653527259826,-44.69980713272095,-1.0,-1.5632732431292533,0.10550620140880346,0.9992576010227203,31.377577652931212,9000
11
+ -0.5232454610392451,-47.27903657150269,-1.0,-1.1664047592952846,0.08618941026180983,0.9991439099907875,35.88742989778519,10000
12
+ -0.7497658767700195,-49.386965167999264,-1.0,-0.6532136745527387,0.07295095971226692,1.0006782907247542,31.888391916036607,11000
13
+ -0.8994824314117431,-51.24161400604248,-1.0,-0.2733467945735902,0.06515275495499373,1.0007157241106033,32.08064066457749,12000
14
+ -1.0574706745147706,-53.038549835205075,-1.0,0.15674695528671145,0.06411293499916793,1.0001257353425026,33.15245296907425,13000
15
+ -1.1278328355550766,-54.219994827270504,-1.0,0.3328205804508179,0.07298996721208095,0.9995564326047898,33.28109815979004,14000
16
+ -1.1342458639740944,-55.36783479309082,-1.0,0.3250431122016162,0.08960208534449339,0.9995725920200348,33.0742504529953,15000
17
+ -1.110816012918949,-55.848865936279296,-1.0,0.248619750700891,0.10687309091538191,1.000330654978752,35.48366849529743,16000
18
+ -1.0662262784838676,-56.45812505722046,-1.0,0.13938818452879786,0.12245884717255831,1.0003205513358115,36.48077377486229,17000
19
+ -1.042787958085537,-56.855382564544676,-1.0,0.08604093543253839,0.1358872603774071,0.999801897585392,34.83372405910492,18000
20
+ -1.026266006708145,-56.789430904388425,-1.0,0.05115305676870048,0.14414267824590207,0.9992841855883599,31.843272228002547,19000
21
+ -1.0341709280610085,-56.750916934967044,-1.0,0.06443186917714774,0.15097948518395424,0.9992763904333115,32.74763557624817,20000
22
+ -1.0109955106973647,-55.68141269302368,-1.0,0.020218923566862942,0.16104757143557072,1.0004936259984971,33.75554369974137,21000
23
+ -1.0076254295110703,-55.16407460784912,-1.0,0.013882018635049462,0.16243540546298027,1.0004994915723802,32.3070885181427,22000
24
+ -1.0077201149463653,-54.619019035339356,-1.0,0.013897254822775722,0.16521821942925452,1.0001708446145057,33.485269412517546,23000
25
+ -1.0155827897787093,-54.0332432975769,-1.0,0.02754006630834192,0.17051456910371782,0.9998341380953789,33.81496183300018,24000
26
+ -1.0071716911196709,-53.6448695640564,-1.0,0.01245820616837591,0.17605735635757447,0.9998356310725212,32.94323873567581,25000
27
+ -1.0181211317777634,-54.25656684494019,-1.0,0.030791066934354603,0.18514553916454315,0.9992245963811874,30.081492133617402,26000
28
+ -1.0026407722234727,-54.76842158126831,-1.0,0.00442602267768234,0.1897624586522579,0.9992132484316826,31.82845582008362,27000
29
+ -0.981127428472042,-55.71954592132568,-1.0,-0.031911304051987825,0.18560733598470688,0.999436459004879,30.138399094104766,28000
30
+ -0.9874461833238601,-57.066668445587155,-1.0,-0.021865770649164916,0.1761112436503172,0.9996396471858024,29.153592990875243,29000
31
+ -0.9758518093824387,-58.167472229003906,-1.0,-0.043582759907469154,0.1636621859073639,0.999640767633915,29.796037935972215,30000
32
+ -0.9926081134080886,-59.16201022720337,-1.0,-0.013731048082001507,0.15603335838019847,1.0005886608958243,30.26898358631134,31000
33
+ -0.9888003264665604,-59.593364917755125,-1.0,-0.02118095719534904,0.1507633538991213,1.0006292753219606,29.982132452249527,32000
34
+ -1.0015642214417457,-60.0616862411499,-1.0,0.0030278314482420684,0.1476539126187563,1.0000541903376579,28.782667184829712,33000
35
+ -1.0014855726957321,-60.374754447937015,-1.0,0.00286021239310503,0.1483997894525528,0.9995232740044594,29.361223724365235,34000
36
+ -0.9979323974251747,-60.55109497833252,-1.0,-0.0039183969646692275,0.14907038894295693,0.9995119899511338,29.40233796572685,35000
37
+ -0.990416697859764,-60.1503318939209,-1.0,-0.018426711566746235,0.14339881959557532,0.9998402673602104,25.458826015114784,36000
38
+ -1.0026020179986954,-59.62127249526978,-1.0,0.005074418387375772,0.1436169336140156,0.9998138253092765,24.364869206905365,37000
39
+ -1.004600099980831,-58.85995357131958,-1.0,0.008896921917796135,0.14406815087795258,0.9996232470273971,26.081000263571738,38000
40
+ -1.0077694382667541,-58.16466482543945,-1.0,0.014830041491426528,0.14919392232596873,0.9994478154182435,25.08557460951805,39000
41
+ -0.9902586901783943,-57.387617012023924,-1.0,-0.018560130586847662,0.14762759718298912,0.9994597786664963,25.021672772049904,40000
42
+ -0.9823829297423363,-56.53386066055298,-1.0,-0.03443984847702086,0.14210785394906997,0.999759915292263,22.615367067098617,41000
43
+ -0.9882938891649247,-55.747461837768554,-1.0,-0.023470385616645217,0.13391719041764735,0.9997498955726624,21.763695041894913,42000
44
+ -0.992640626013279,-55.11607292938233,-1.0,-0.014986219391226768,0.12998047710955143,0.9997630262374878,21.353686974525452,43000
45
+ -1.0021941338181495,-54.52108274078369,-1.0,0.004528821423649788,0.12854490157961845,0.9997742936015129,20.528487491607667,44000
46
+ -0.992986945271492,-54.32486217498779,-1.0,-0.014399438666179777,0.1273678799122572,0.9998125602006912,20.479064636945726,45000
47
+ -1.0098914121389388,-54.8010499458313,-1.0,0.0203354962002486,0.12899420666694641,1.0003930089473725,18.612088989019394,46000
48
+ -1.007324605345726,-54.53852599716186,-1.0,0.014844927011057734,0.13282603001594542,1.0003904846310616,18.92192676448822,47000
49
+ -0.996329264819622,-54.818495712280274,-1.0,-0.007365137103013695,0.13378550845384599,1.000357665836811,19.447294442892076,48000
50
+ -0.9868814619779587,-55.189026485443115,-1.0,-0.02680392067693174,0.12901602894067765,1.0003222906589508,20.111296785354615,49000
51
+ -0.9966455320715905,-55.25351607513428,-1.0,-0.00693219636939466,0.12489603278040885,1.000335963666439,19.570967321157454,50000
52
+ -1.0185365591049194,-55.857191791534426,-1.0,0.03801956664398313,0.12800770220160484,1.0001067397594452,19.99558943080902,51000
53
+ -1.0084121934175492,-55.68823028182983,-1.0,0.01685784162580967,0.1358429219275713,1.0000793895721436,19.421788303375244,52000
54
+ -0.9977025203108788,-55.67955096817017,-1.0,-0.004541335852816701,0.13719850583374502,0.9998976911306381,18.847534791707993,53000
55
+ -0.9923143435120583,-55.88515181350708,-1.0,-0.015411001484841109,0.13380071939527988,0.9996740792989731,19.56190442943573,54000
56
+ -1.000299010038376,-56.083399005889895,-1.0,0.0006373687349259853,0.13100261519849302,0.9996745353341102,18.72349938297272,55000
57
+ -1.0050171436667443,-56.42775593185425,-1.0,0.01011963664740324,0.13399976445734502,0.9999094310402871,16.562544779658317,56000
58
+ -0.990718131184578,-56.690139152526854,-1.0,-0.018716485319659113,0.13307316191494464,0.9998784223198891,17.074375494122506,57000
59
+ -0.9834671101570129,-57.1244096031189,-1.0,-0.0341147580454126,0.12726081262528896,0.9998120501041412,18.10486300218105,58000
60
+ -0.9911057586669922,-57.70421493148804,-1.0,-0.018744581510312854,0.12049905148148536,0.9997025249600411,18.159959778308867,59000
61
+ -0.9968665035367013,-58.070138122558596,-1.0,-0.006644006033428013,0.11658251328021288,0.9997052510976792,19.504315382957458,60000
62
+ -0.9908172752857208,-58.822973724365234,-1.0,-0.01975833140872419,0.11539044733345509,1.0001899064779283,17.426271301865576,61000
63
+ -1.0022352268099786,-59.005406940460205,-1.0,0.004872841291129589,0.11500932931900025,1.0001603360176086,18.591500796556474,62000
64
+ -1.0040286817550659,-59.3609238319397,-1.0,0.008714991863816976,0.11565299618244171,0.999910607099533,18.301449789524078,63000
65
+ -0.9943597778081894,-59.87129746246338,-1.0,-0.012145042567513883,0.11594359338283539,0.9996355277299881,16.59190679872036,64000
66
+ -0.9878368722200394,-60.16496667480469,-1.0,-0.0266197976404801,0.1116962007060647,0.9996595103144645,16.25766283750534,65000
67
+ -1.0093962472677231,-60.920489204406735,-1.0,0.02067150117037818,0.11231654446572065,1.0003178315758705,17.998802297472952,66000
68
+ -1.0034938445687294,-61.322841342926026,-1.0,0.007615030160173774,0.11419858123362064,1.000322759628296,18.384143598675728,67000
69
+ -0.9900247791409492,-61.64565693664551,-1.0,-0.02175049716141075,0.11315585604310036,1.0004068000912667,17.52445992219448,68000
70
+ -0.9817125222682953,-62.01226956939697,-1.0,-0.04085354046616703,0.1066201238259673,1.0004921290278435,17.64946433699131,69000
71
+ -1.015940748512745,-62.23453958892822,-1.0,0.035763307929039,0.1067629808858037,1.0005025430321692,18.312300976872443,70000
72
+ -0.9918710213303567,-62.64640918731689,-1.0,-0.018080171504057944,0.1075214917063713,0.9999243226647377,17.439783853530884,71000
73
+ -0.9915845676064491,-62.88015721511841,-1.0,-0.018961244069971146,0.10510560478270053,0.9999416568875312,18.607699221730233,72000
74
+ -0.9978037312626838,-63.40633742904663,-1.0,-0.004963525909930468,0.10317199513316154,1.0002951315045356,19.832634543001653,73000
75
+ -0.9886463728547096,-63.45438307571411,-1.0,-0.026091628125868738,0.09944286446273327,1.0006353183984757,21.92453165149689,74000
76
+ -1.0054109276533127,-63.39256450653076,-1.0,0.012540160402655601,0.0997583866417408,1.000638016819954,21.48489053297043,75000
77
+ -1.001624805688858,-63.31376873779297,-1.0,0.00376224123314023,0.1000801005139947,0.9994508177042007,19.251812908411026,76000
78
+ -0.9934332046508789,-63.489520355224606,-1.0,-0.015101213892921805,0.10089764854311943,0.9994514463543892,18.713772445082665,77000
79
+ -0.9803781977891922,-63.71689175415039,-1.0,-0.04611355498433113,0.09356149958074093,0.9997360042333603,18.934187405467032,78000
80
+ -0.9981921567320824,-63.96335473251343,-1.0,-0.004286106995306909,0.09055806790292263,1.000018486380577,17.947209617733954,79000
81
+ -0.9932112913727761,-63.83591778945923,-1.0,-0.016303058272227645,0.09023830170184374,1.0000152925252914,18.065785289525987,80000
82
+ -1.0148001232147217,-64.10034365463257,-1.0,0.035422387033700944,0.09155781245976687,1.0000552546977997,17.41354282736778,81000
83
+ -0.9935139993429184,-64.35829607009887,-1.0,-0.01537885551340878,0.09297246316075325,1.0000672941207887,17.266553600609303,82000
84
+ -0.9926040494441986,-64.72948305130005,-1.0,-0.017725298631936313,0.09055038732290267,0.9997567884922027,19.775992030143737,83000
85
+ -1.0009912559390068,-65.16171224212647,-1.0,0.002430984076112509,0.09023372445255518,0.9995009130239487,19.54423992753029,84000
86
+ -1.0061334508061408,-65.60843515777587,-1.0,0.014747056286782027,0.08988045490533113,0.9994767710566521,18.862002809405325,85000
87
+ -1.003388536632061,-66.12219605255127,-1.0,0.008106661353260278,0.09301029802858829,1.0002276818156242,15.92058328574896,86000
88
+ -0.9948244699239731,-66.344715259552,-1.0,-0.01232436273805797,0.09214161998033524,1.0002165070176126,16.650181475877762,87000
89
+ -1.01198367613554,-66.75642790985107,-1.0,0.028508775627240537,0.09383361798524857,0.9998489872217179,15.916882592320443,88000
90
+ -0.994431672513485,-66.98170137023926,-1.0,-0.013123649187386037,0.09411840833723545,0.99949158847332,16.984962671518325,89000
91
+ -0.9936827052235603,-67.11535403823852,-1.0,-0.015024682595394552,0.09268878719955682,0.9994697330594062,17.53356578373909,90000
92
+ -1.0085768651366234,-67.47149189758301,-1.0,0.02044737664377317,0.09230491660535335,1.0002271916866303,16.982533779501914,91000
93
+ -0.9929719299077988,-67.52575912857056,-1.0,-0.016663433351553975,0.09357742766290902,1.0002271434664727,16.916692378282548,92000
94
+ -0.9967868493199349,-67.59778268051147,-1.0,-0.0076559318006038666,0.09025033078342676,1.0003826623558998,16.516298450231552,93000
95
+ -1.0096310858130455,-67.9500245399475,-1.0,0.023005562459118663,0.09331584140658379,1.0004939422011376,16.30063471966982,94000
96
+ -1.0008865221738816,-68.09687114715577,-1.0,0.0021308941189199684,0.0950366483181715,1.0004946186542512,18.23754500669241,95000
97
+ -0.9975836805701256,-68.48811808776856,-1.0,-0.005674843850545585,0.09242583010345698,0.9997143729329109,16.98644223189354,96000
98
+ -1.0068715377449988,-68.51083322525024,-1.0,0.016274025018326938,0.09610464326292277,0.9997061502933502,17.567107418894768,97000
99
+ -0.998759853899479,-68.6216707611084,-1.0,-0.002870520915836096,0.09400112928450108,0.9997888321280479,17.413335146844386,98000
100
+ -0.997437217950821,-68.85645965576173,-1.0,-0.006007067152764648,0.09461389408260584,0.9998717764616013,17.80547460091114,99000
101
+ -0.98713904774189,-69.0981537399292,-1.0,-0.03062568216305226,0.09289403835684061,0.9998473942875862,17.535892039835453,100000
102
+ -1.0165556582212447,-69.60548803710938,-1.0,0.0394165942389518,0.09316518200188875,0.999963436126709,16.837242566466333,101000
103
+ -0.987264278948307,-69.72081983184815,-1.0,-0.030143538661301136,0.0953957436233759,0.9999581699967385,16.232816412329672,102000
104
+ -0.9789220357537269,-69.87075538635254,-1.0,-0.05120750189665705,0.08859755281358957,0.9995854314565659,17.398883090138437,103000
105
+ -1.0025479341745376,-70.18214385223389,-1.0,0.006315209124237299,0.08380281540006398,0.9992253432273864,17.94904735571146,104000
106
+ -1.0038856930732727,-70.35816407012939,-1.0,0.009583026161417365,0.08740814523398877,0.9992313175797463,18.157692121446132,105000
107
+ -0.9966179539561272,-70.60123742675782,-1.0,-0.008253616485744715,0.08561916831880807,0.9996051346063614,17.816439012765883,106000
108
+ -0.9968062920570373,-70.70014213562011,-1.0,-0.007832949806936084,0.08624395169317722,0.9995976127386094,17.203684946894647,107000
109
+ -0.9925048573613167,-70.99623360443115,-1.0,-0.01856476745009422,0.08372884395718574,0.9996450240015984,17.55250213509798,108000
110
+ -1.009103646159172,-71.13564346313477,-1.0,0.02262940700352192,0.08324057381600142,0.9997219249606133,18.228750668048857,109000
111
+ -1.0013676074147224,-71.38172395324708,-1.0,0.0033991921758279205,0.08563420847803355,0.999674310207367,17.701619140565395,110000
112
+ -1.0028541533350945,-71.833926612854,-1.0,0.007058564387261867,0.08671098373830319,0.9999366970658302,18.628900411486626,111000
113
+ -0.9911108826994895,-72.14574198150635,-1.0,-0.02187084128335118,0.08480388696491718,0.9999389787912368,18.818429768502714,112000
114
+ -0.9916444380879402,-72.56227169036865,-1.0,-0.02082921870984137,0.08187530007213355,1.000075746834278,17.06973088794947,113000
115
+ -1.0045699105858803,-72.72709747314452,-1.0,0.011478737611323595,0.08033119735121727,1.0002418534755706,16.80955806851387,114000
116
+ -0.9969647673368454,-72.89240586090088,-1.0,-0.007563998566940427,0.08255300302803516,1.0002305384874344,16.565836269795895,115000
117
+ -0.98803363353014,-72.93475674438477,-1.0,-0.03020968585740775,0.07923095226287842,1.0006147078871728,17.92954135453701,116000
118
+ -1.0105572581887245,-72.88860368347169,-1.0,0.02676328101940453,0.0792304623350501,1.0006094776391983,18.129633271098136,117000
119
+ -0.9939738088846206,-73.03246242523193,-1.0,-0.01516242234967649,0.0808308222219348,1.0003282096982002,16.482563596844674,118000
120
+ -0.9922592459917069,-73.32136745452881,-1.0,-0.019692457454279066,0.07765411691367627,0.999992131948471,15.236582528591155,119000
121
+ -0.9901708269119263,-73.45218009948731,-1.0,-0.025330654026009144,0.0747445075660944,1.0000213090181351,15.730481168866158,120000
122
+ -0.9816973417401313,-73.53683992767334,-1.0,-0.04813449347298592,0.07180832280963659,0.9991530885100365,22.68273552966118,121000
123
+ -1.0126228908896446,-73.81547941589355,-1.0,0.033412296436727044,0.07094891738891601,0.9991410218477249,21.49774703502655,122000
124
+ -1.0231479509472847,-73.9156694869995,-1.0,0.0598131916038692,0.07487635066360235,0.9995964791178703,19.364834487438202,123000
125
+ -0.9948609911203384,-74.01523791503907,-1.0,-0.013073816211894155,0.07958621053397655,1.0000364733934402,17.083096188485623,124000
126
+ -0.9821588664054871,-73.98033901214599,-1.0,-0.046213735232129696,0.07518805634230376,1.0000572667717933,16.572076936900615,125000
127
+ -0.9805702825188637,-74.29321179962159,-1.0,-0.051655545741319656,0.06982442262768745,1.0002984583973884,17.12731729787588,126000
128
+ -0.9961627615690232,-74.38389482879639,-1.0,-0.01032995866658166,0.06598952382057906,1.0003150888085366,17.08686294555664,127000
129
+ -0.9869118289947509,-74.51627577209473,-1.0,-0.035731220842339095,0.06391940568387508,1.000169724524021,15.771941621303558,128000
130
+ -0.9768465895652771,-74.72749864196777,-1.0,-0.0646674454798922,0.06128474063053727,1.000029991209507,16.268173049092294,129000
131
+ -0.988956431388855,-74.8461897201538,-1.0,-0.0314381602704525,0.056964635986834766,1.0000100461840629,15.631321305632591,130000
132
+ -1.0104575060009957,-75.1898707962036,-1.0,0.029880890790373087,0.05761514198035002,1.0001594460010528,16.929354890763758,131000
133
+ -0.9914515028595925,-75.24356363677978,-1.0,-0.024285164630040525,0.05873783494159579,1.0001154287457465,17.26231514799595,132000
134
+ -0.9862084994912148,-75.33292431640625,-1.0,-0.03972175276372582,0.05581531821936369,1.0001858704686164,17.795951663434504,133000
135
+ -0.9811989842057228,-75.42022638702393,-1.0,-0.05509287717193365,0.053381241161376235,1.0002337135076522,16.93129185497761,134000
136
+ -0.9722182838320732,-75.51022337341308,-1.0,-0.08341761415358633,0.04919845436513424,1.0002063592672348,16.532600370943545,135000
137
+ -0.9864157851338387,-75.74758383178711,-1.0,-0.04159780815243721,0.04666392008587718,0.9998971362113953,16.37889918076992,136000
138
+ -1.0002089310884477,-75.88335538482666,-1.0,0.000697830855846405,0.04597837587073445,0.9999083818197251,16.226929286241532,137000
139
+ -1.0049224603772164,-75.98401473236083,-1.0,0.015234621828421951,0.04494141797348857,1.0000158385038376,18.795446904718876,138000
140
+ -1.0020949218869208,-76.00304367828369,-1.0,0.006481255244463682,0.046606573469936845,1.0000972774624826,17.82430488520861,139000
141
+ -0.9922809925675392,-76.05915750885009,-1.0,-0.023705559162423014,0.04637428366020322,1.0000975608825684,18.035022908091545,140000
142
+ -0.9909175517559051,-76.38887683868408,-1.0,-0.028142681362107395,0.04460193083807826,1.0001125658750534,18.22986603808403,141000
143
+ -1.0346318854689598,-76.20261015319824,-1.0,0.10633192418329418,0.04759643306210637,1.0000919710993768,19.147022684276102,142000
144
+ -1.0079440863728524,-75.95736027526856,-1.0,0.023879892587661742,0.04943982838839293,1.000070084273815,17.794266454994677,143000
145
+ -1.0158559654951096,-76.16038156890869,-1.0,0.047015623413026335,0.05216631001979113,1.0000464915037155,15.190462433099746,144000
146
+ -0.9866527459621429,-76.33673182678223,-1.0,-0.03936403483897448,0.05270787290483713,1.0000553504824639,14.999602069675923,145000
147
+ -0.9774288986325264,-76.60262435913086,-1.0,-0.06791772315651179,0.0486616105735302,1.0002320529222488,14.598117153286934,146000
148
+ -0.9941067886948586,-76.73603015136719,-1.0,-0.017904859343543647,0.04726289954036474,1.0002203060984611,14.42935523825884,147000
149
+ -0.9462012842297554,-76.71712963867188,-1.0,-0.169083744013682,0.04420085182785988,1.0002407499551773,15.162312914788723,148000
150
+ -1.0248723424077033,-76.86457388305664,-1.0,0.07945050740614533,0.040236110124737025,1.0003135110735892,15.019528947234154,149000
151
+ -1.0109064157605172,-76.85408424377441,-1.0,0.03433823741227388,0.04382860206067562,1.0002756406664848,15.52259669342637,150000
152
+ -0.9951482644081115,-77.09557096099853,-1.0,-0.015110970290377735,0.04300338978692889,1.0000269352793694,15.50851853570342,151000
153
+ -0.9835719376802444,-77.1627825164795,-1.0,-0.051988833735696974,0.0426078316681087,1.0000261583328247,15.068188706815242,152000
154
+ -0.9709139489531518,-77.3367892074585,-1.0,-0.09417520035617054,0.03924259215593338,1.0000911182165146,14.186825154960156,153000
155
+ -1.0088640170693397,-77.55197537994385,-1.0,0.029150257494300603,0.03734059687703848,1.0000885239243507,13.558345043540001,154000
156
+ -1.0212022386193276,-77.69694927215576,-1.0,0.06867426004447043,0.039542015191167595,1.0001147250533104,14.296920968949795,155000
157
+ -1.0278117792606354,-77.88566932678222,-1.0,0.08820761155895888,0.04301816117763519,0.9996008985042572,14.137276988685132,156000
158
+ -0.9895427439212799,-77.83372512817382,-1.0,-0.03278996757231653,0.0429468605928123,0.9995930786728859,14.687067904174327,157000
159
+ -0.9797595853209495,-77.87101008605957,-1.0,-0.06447067268751562,0.042437116872519255,0.9997739418148994,16.13315629005432,158000
160
+ -1.013379275202751,-77.84048233032226,-1.0,0.04299740180931985,0.04076220213994384,1.0001069155335427,17.33694684010744,159000
161
+ -0.9615840195417404,-78.01412672424317,-1.0,-0.12410383836179972,0.03878340692073107,1.0000645182132721,16.90905995875597,160000
162
+ -1.0383807904720306,-78.41475357055664,-1.0,0.12417219834588468,0.03914003568142652,0.9999463462233543,12.269535452485085,161000
163
+ -1.0055489974021912,-78.53106629180908,-1.0,0.017649841172620653,0.0431199979968369,0.9999349164962769,11.771708340466022,162000
164
+ -0.9612186776399613,-78.92398818206787,-1.0,-0.12486389862559735,0.04066232944279909,1.0000343598723411,12.362912654370069,163000
165
+ -0.9445008692145348,-79.20295635223388,-1.0,-0.18596524561010302,0.03456020689383149,1.0000746309161186,11.721650278031825,164000
166
+ -1.0013803549408913,-79.51265836334228,-1.0,0.004918525446206331,0.031807119261473414,1.0000640460848809,12.85488219627738,165000
167
+ -1.0447813599705695,-79.73344721221923,-1.0,0.15115574648603797,0.03497543862834573,1.0003409983515739,12.195596550047398,166000
168
+ -0.9722807077765465,-79.97148152923585,-1.0,-0.09293929311074317,0.034385592188686136,1.0003236945271492,12.002830563426018,167000
169
+ -0.9602502499818801,-80.33762901306153,-1.0,-0.13709981412719935,0.03246570440009236,1.0001818029880523,10.892611865848302,168000
170
+ -0.9114510639309883,-80.61345574188232,-1.0,-0.3201595205515623,0.026313471850007774,1.0000675349235535,11.111882902026176,169000
171
+ -0.9425517069101333,-80.99256255340576,-1.0,-0.2165208562016487,0.023223251439630985,1.0000483027100564,10.687078304350376,170000
172
+ -1.0611758540272713,-81.2873125,-1.0,0.2306000980902463,0.023346056735143065,0.999979619204998,10.838928482413293,171000
173
+ -1.0452783763408662,-81.75231202697753,-1.0,0.1655680246260017,0.025135026903823018,1.0000043463706971,10.675374532461166,172000
174
+ -1.0459626215696334,-82.28780084991455,-1.0,0.16332082364708186,0.02887601769901812,0.9999593334197998,9.944168746948241,173000
175
+ -1.0455293064117432,-82.52516458892822,-1.0,0.15667400527745484,0.03190831805765629,0.9999062820672989,9.600904252946377,174000
176
+ -1.0071646354794501,-82.27639138031006,-1.0,0.02417293977551162,0.03471967674419284,0.9998686572909355,9.406234488338232,175000
177
+ -1.0459903602600098,-82.19875326538086,-1.0,0.15094639916345476,0.03767583168298006,0.9999362059235573,11.503028959333896,176000
178
+ -0.9549353388547898,-82.03114837646484,-1.0,-0.14754290030244738,0.03718360911682248,0.9999189318418503,11.644140465438365,177000
179
+ -0.9688282218575478,-82.17402528381348,-1.0,-0.10544923120178282,0.03526916104927659,1.0000488914251326,9.320777967721224,178000
180
+ -0.96454849755764,-82.42874387359619,-1.0,-0.12330934689659626,0.031419472593814134,1.000137368619442,8.73074999782443,179000
181
+ -0.8505163456201553,-82.63743538665771,-1.0,-0.5498501275517046,0.025088370230048895,1.0001556274294854,8.688885678112507,180000
182
+ -0.9576658812165261,-83.05606156158447,-1.0,-0.16188343786634504,0.021708793228492142,1.000329965353012,6.7405557851046325,181000
183
+ -0.9593822256922722,-83.10711833190918,-1.0,-0.15839191107079387,0.02012610281072557,1.00030206990242,6.508285503566265,182000
184
+ -0.9841803368330002,-83.18818240356445,-1.0,-0.06266297751292586,0.018896243158727885,1.0001508246064186,7.046502308696509,183000
185
+ -1.0244473719596863,-83.39971582794189,-1.0,0.09667248136550188,0.019059797348454595,0.9999408781528473,6.1167474163025615,184000
186
+ -1.06392397326231,-83.56818306732178,-1.0,0.2445313390996307,0.02178338426910341,0.9999477730989457,5.4873032447397705,185000
187
+ -1.1017015112638473,-83.77277811431885,-1.0,0.36777011448610575,0.026417087057605385,0.999871075630188,6.725011307999492,186000
188
+ -1.0356311641335487,-84.04559945678712,-1.0,0.12333504794351756,0.031550449941307304,0.999868730545044,6.426630144804716,187000
189
+ -0.9850901752710343,-84.29765727233887,-1.0,-0.05120176237635315,0.03270558039098978,0.999976789176464,6.343316182926297,188000
190
+ -0.9157791820168495,-84.53521042633056,-1.0,-0.30096872403472663,0.028873512187972666,1.0000477527976037,5.444822380304337,189000
191
+ -0.9493217915892601,-85.0954543838501,-1.0,-0.1893192444127053,0.022669385880231856,1.0000496053099632,4.974754636526108,190000
192
+ -1.221846663236618,-85.7686660079956,-1.0,0.8014005500581115,0.027759896967560052,0.9999922454953194,4.645311891838908,191000
193
+ -1.041473182618618,-86.0752448348999,-1.0,0.14157912812568246,0.03365620331466198,0.9999898050427437,4.537857213050127,192000
194
+ -1.1622858802676201,-86.60694064331055,-1.0,0.523856639508158,0.03884925275668502,0.9996525344848632,3.410495422258973,193000
195
+ -0.7552011589407921,-86.44883558654786,-1.0,-0.7974085309449583,0.04052868751809001,0.999313694536686,3.0010018263012173,194000
196
+ -0.8673747957646847,-86.33145885467529,-1.0,-0.46082214194722476,0.0302521002702415,0.9992963833808899,3.1755379558503627,195000
197
+ -0.9691520172953606,-86.52380606842041,-1.0,-0.1100874517634511,0.02729657098092139,0.9998640430569649,1.5732781826257705,196000
198
+ -1.1018922945857048,-86.52318350219727,-1.0,0.3587869614195079,0.03030913616530597,0.9998677024245262,1.8337867197990418,197000
pets_pusher/diff/.hydra/config.yaml ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ seed: 0
2
+ device: cuda:0
3
+ log_frequency_agent: 1000
4
+ save_video: false
5
+ debug_mode: false
6
+ experiment: default
7
+ root_dir: ./exp
8
+ algorithm:
9
+ name: pets
10
+ agent:
11
+ _target_: mbrl.planning.TrajectoryOptimizerAgent
12
+ action_lb: ???
13
+ action_ub: ???
14
+ planning_horizon: ${overrides.planning_horizon}
15
+ optimizer_cfg: ${action_optimizer}
16
+ replan_freq: 1
17
+ verbose: ${debug_mode}
18
+ normalize: true
19
+ normalize_double_precision: true
20
+ target_is_delta: true
21
+ initial_exploration_steps: ${overrides.trial_length}
22
+ freq_train_model: ${overrides.freq_train_model}
23
+ learned_rewards: ${overrides.learned_rewards}
24
+ num_particles: 20
25
+ dynamics_model:
26
+ _target_: mbrl.models.GaussianMLP
27
+ device: ${device}
28
+ num_layers: 4
29
+ in_size: ???
30
+ out_size: ???
31
+ ensemble_size: 7
32
+ hid_size: 200
33
+ deterministic: false
34
+ propagation_method: random_model
35
+ learn_logvar_bounds: false
36
+ activation_fn_cfg:
37
+ _target_: torch.nn.SiLU
38
+ overrides:
39
+ env: pets_pusher
40
+ term_fn: no_termination
41
+ learned_rewards: true
42
+ trial_length: 150
43
+ num_steps: 15000
44
+ num_elites: 5
45
+ model_lr: 1.0e-05
46
+ model_wd: 0.0005
47
+ model_batch_size: 32
48
+ validation_ratio: 0
49
+ freq_train_model: 150
50
+ patience: 25
51
+ num_epochs_train_model: 25
52
+ planning_horizon: 25
53
+ cem_num_iters: 5
54
+ cem_elite_ratio: 0.1
55
+ cem_population_size: 350
56
+ cem_alpha: 0.1
57
+ cem_clipped_normal: false
58
+ action_optimizer:
59
+ _target_: mbrl.planning.CEMOptimizer
60
+ num_iterations: ${overrides.cem_num_iters}
61
+ elite_ratio: ${overrides.cem_elite_ratio}
62
+ population_size: ${overrides.cem_population_size}
63
+ alpha: ${overrides.cem_alpha}
64
+ lower_bound: ???
65
+ upper_bound: ???
66
+ return_mean_elites: true
67
+ device: ${device}
68
+ clipped_normal: ${overrides.cem_clipped_normal}
pets_pusher/diff/.hydra/hydra.yaml ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ hydra:
2
+ run:
3
+ dir: ${root_dir}/${algorithm.name}/${experiment}/${overrides.env}/${now:%Y.%m.%d}/${now:%H%M%S}
4
+ sweep:
5
+ dir: ${root_dir}/${algorithm.name}/${experiment}/${overrides.env}/${now:%Y.%m.%d}/${now:%H%M%S}
6
+ subdir: ${hydra.job.num}
7
+ hydra_logging:
8
+ version: 1
9
+ formatters:
10
+ simple:
11
+ format: '[%(asctime)s][HYDRA] %(message)s'
12
+ handlers:
13
+ console:
14
+ class: logging.StreamHandler
15
+ formatter: simple
16
+ stream: ext://sys.stdout
17
+ root:
18
+ level: INFO
19
+ handlers:
20
+ - console
21
+ loggers:
22
+ logging_example:
23
+ level: DEBUG
24
+ disable_existing_loggers: false
25
+ job_logging:
26
+ version: 1
27
+ formatters:
28
+ simple:
29
+ format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
30
+ handlers:
31
+ console:
32
+ class: logging.StreamHandler
33
+ formatter: simple
34
+ stream: ext://sys.stdout
35
+ file:
36
+ class: logging.FileHandler
37
+ formatter: simple
38
+ filename: ${hydra.job.name}.log
39
+ root:
40
+ level: INFO
41
+ handlers:
42
+ - console
43
+ - file
44
+ disable_existing_loggers: false
45
+ sweeper:
46
+ _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
47
+ max_batch_size: null
48
+ launcher:
49
+ _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
50
+ help:
51
+ app_name: ${hydra.job.name}
52
+ header: '${hydra.help.app_name} is powered by Hydra.
53
+
54
+ '
55
+ footer: 'Powered by Hydra (https://hydra.cc)
56
+
57
+ Use --hydra-help to view Hydra specific help
58
+
59
+ '
60
+ template: '${hydra.help.header}
61
+
62
+ == Configuration groups ==
63
+
64
+ Compose your configuration from those groups (group=option)
65
+
66
+
67
+ $APP_CONFIG_GROUPS
68
+
69
+
70
+ == Config ==
71
+
72
+ Override anything in the config (foo.bar=value)
73
+
74
+
75
+ $CONFIG
76
+
77
+
78
+ ${hydra.help.footer}
79
+
80
+ '
81
+ hydra_help:
82
+ hydra_help: ???
83
+ template: 'Hydra (${hydra.runtime.version})
84
+
85
+ See https://hydra.cc for more info.
86
+
87
+
88
+ == Flags ==
89
+
90
+ $FLAGS_HELP
91
+
92
+
93
+ == Configuration groups ==
94
+
95
+ Compose your configuration from those groups (For example, append hydra/job_logging=disabled
96
+ to command line)
97
+
98
+
99
+ $HYDRA_CONFIG_GROUPS
100
+
101
+
102
+ Use ''--cfg hydra'' to Show the Hydra config.
103
+
104
+ '
105
+ output_subdir: .hydra
106
+ overrides:
107
+ hydra: []
108
+ task:
109
+ - algorithm=pets
110
+ - overrides=pets_pusher
111
+ - device=cuda:0
112
+ job:
113
+ name: main
114
+ override_dirname: algorithm=pets,device=cuda:0,overrides=pets_pusher
115
+ id: ???
116
+ num: ???
117
+ config_name: main
118
+ env_set: {}
119
+ env_copy: []
120
+ config:
121
+ override_dirname:
122
+ kv_sep: '='
123
+ item_sep: ','
124
+ exclude_keys: []
125
+ runtime:
126
+ version: 1.0.3
127
+ cwd: /home/raghava/projects/mbrl-lib/exp
128
+ verbose: false