ledmands commited on
Commit
df1cef8
·
1 Parent(s): 6723373

Modified get_config.py. Updated README. Added config file for most recent agent.

Browse files
Files changed (3) hide show
  1. README.md +4 -0
  2. agents/dqn_v2-8/config.json +105 -0
  3. get_config.py +4 -3
README.md CHANGED
@@ -25,6 +25,10 @@ Does not save any evaluation information.
25
  This will evaluate a specified agent and append the results to a specified log file.
26
  ### get_config.py
27
  This will pull configuration information from the specified agent and save it in JSON format.
 
 
 
 
28
  ### record_video.py
29
  This will record a video of a specified agent being evaluated.
30
  Does not save any evaluation information.
 
25
  This will evaluate a specified agent and append the results to a specified log file.
26
  ### get_config.py
27
  This will pull configuration information from the specified agent and save it in JSON format.
28
+ The data is pulled from the data file in the agent's zip file and strips out the serialized data
29
+ to make the data more human-readable.
30
+ The default save file will save to the directory from which the command is run. Best practice is
31
+ to save the file to the agent's directory.
32
  ### record_video.py
33
  This will record a video of a specified agent being evaluated.
34
  Does not save any evaluation information.
agents/dqn_v2-8/config.json ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "policy_class": {
3
+ ":type:": "<class 'abc.ABCMeta'>",
4
+ "__module__": "stable_baselines3.dqn.policies",
5
+ "__doc__": "\n Policy class for DQN when using images as input.\n\n :param observation_space: Observation space\n :param action_space: Action space\n :param lr_schedule: Learning rate schedule (could be constant)\n :param net_arch: The specification of the policy and value networks.\n :param activation_fn: Activation function\n :param features_extractor_class: Features extractor to use.\n :param normalize_images: Whether to normalize images or not,\n dividing by 255.0 (True by default)\n :param optimizer_class: The optimizer to use,\n ``th.optim.Adam`` by default\n :param optimizer_kwargs: Additional keyword arguments,\n excluding the learning rate, to pass to the optimizer\n ",
6
+ "__init__": "<function CnnPolicy.__init__ at 0x7d6123a05cf0>",
7
+ "__abstractmethods__": "frozenset()",
8
+ "_abc_impl": "<_abc._abc_data object at 0x7d6123a18580>"
9
+ },
10
+ "verbose": 1,
11
+ "policy_kwargs": {},
12
+ "num_timesteps": 10000000,
13
+ "_total_timesteps": 10000000,
14
+ "_num_timesteps_at_start": 9000000,
15
+ "seed": null,
16
+ "action_noise": null,
17
+ "start_time": 1715963247524276127,
18
+ "learning_rate": 5e-05,
19
+ "tensorboard_log": "./",
20
+ "_last_obs": {
21
+ ":type:": "<class 'numpy.ndarray'>"
22
+ },
23
+ "_last_episode_starts": {
24
+ ":type:": "<class 'numpy.ndarray'>"
25
+ },
26
+ "_last_original_obs": {
27
+ ":type:": "<class 'numpy.ndarray'>"
28
+ },
29
+ "_episode_num": 8626,
30
+ "use_sde": false,
31
+ "sde_sample_freq": -1,
32
+ "_current_progress_remaining": 0.0,
33
+ "_stats_window_size": 100,
34
+ "ep_info_buffer": {
35
+ ":type:": "<class 'collections.deque'>"
36
+ },
37
+ "ep_success_buffer": {
38
+ ":type:": "<class 'collections.deque'>"
39
+ },
40
+ "_n_updates": 2487500,
41
+ "observation_space": {
42
+ ":type:": "<class 'gymnasium.spaces.box.Box'>",
43
+ "dtype": "uint8",
44
+ "bounded_below": "[[[ True True True ... True True True]\n [ True True True ... True True True]\n [ True True True ... True True True]\n ...\n [ True True True ... True True True]\n [ True True True ... True True True]\n [ True True True ... True True True]]\n\n [[ True True True ... True True True]\n [ True True True ... True True True]\n [ True True True ... True True True]\n ...\n [ True True True ... True True True]\n [ True True True ... True True True]\n [ True True True ... True True True]]\n\n [[ True True True ... True True True]\n [ True True True ... True True True]\n [ True True True ... True True True]\n ...\n [ True True True ... True True True]\n [ True True True ... True True True]\n [ True True True ... True True True]]]",
45
+ "bounded_above": "[[[ True True True ... True True True]\n [ True True True ... True True True]\n [ True True True ... True True True]\n ...\n [ True True True ... True True True]\n [ True True True ... True True True]\n [ True True True ... True True True]]\n\n [[ True True True ... True True True]\n [ True True True ... True True True]\n [ True True True ... True True True]\n ...\n [ True True True ... True True True]\n [ True True True ... True True True]\n [ True True True ... True True True]]\n\n [[ True True True ... True True True]\n [ True True True ... True True True]\n [ True True True ... True True True]\n ...\n [ True True True ... True True True]\n [ True True True ... True True True]\n [ True True True ... True True True]]]",
46
+ "_shape": [
47
+ 3,
48
+ 250,
49
+ 160
50
+ ],
51
+ "low": "[[[0 0 0 ... 0 0 0]\n [0 0 0 ... 0 0 0]\n [0 0 0 ... 0 0 0]\n ...\n [0 0 0 ... 0 0 0]\n [0 0 0 ... 0 0 0]\n [0 0 0 ... 0 0 0]]\n\n [[0 0 0 ... 0 0 0]\n [0 0 0 ... 0 0 0]\n [0 0 0 ... 0 0 0]\n ...\n [0 0 0 ... 0 0 0]\n [0 0 0 ... 0 0 0]\n [0 0 0 ... 0 0 0]]\n\n [[0 0 0 ... 0 0 0]\n [0 0 0 ... 0 0 0]\n [0 0 0 ... 0 0 0]\n ...\n [0 0 0 ... 0 0 0]\n [0 0 0 ... 0 0 0]\n [0 0 0 ... 0 0 0]]]",
52
+ "high": "[[[255 255 255 ... 255 255 255]\n [255 255 255 ... 255 255 255]\n [255 255 255 ... 255 255 255]\n ...\n [255 255 255 ... 255 255 255]\n [255 255 255 ... 255 255 255]\n [255 255 255 ... 255 255 255]]\n\n [[255 255 255 ... 255 255 255]\n [255 255 255 ... 255 255 255]\n [255 255 255 ... 255 255 255]\n ...\n [255 255 255 ... 255 255 255]\n [255 255 255 ... 255 255 255]\n [255 255 255 ... 255 255 255]]\n\n [[255 255 255 ... 255 255 255]\n [255 255 255 ... 255 255 255]\n [255 255 255 ... 255 255 255]\n ...\n [255 255 255 ... 255 255 255]\n [255 255 255 ... 255 255 255]\n [255 255 255 ... 255 255 255]]]",
53
+ "low_repr": "0",
54
+ "high_repr": "255",
55
+ "_np_random": "Generator(PCG64)"
56
+ },
57
+ "action_space": {
58
+ ":type:": "<class 'gymnasium.spaces.discrete.Discrete'>",
59
+ "n": "5",
60
+ "start": "0",
61
+ "_shape": [],
62
+ "dtype": "int64",
63
+ "_np_random": "Generator(PCG64)"
64
+ },
65
+ "n_envs": 1,
66
+ "buffer_size": 70000,
67
+ "batch_size": 64,
68
+ "learning_starts": 100000,
69
+ "tau": 1.0,
70
+ "gamma": 0.999,
71
+ "gradient_steps": 1,
72
+ "optimize_memory_usage": false,
73
+ "replay_buffer_class": {
74
+ ":type:": "<class 'abc.ABCMeta'>",
75
+ "__module__": "stable_baselines3.common.buffers",
76
+ "__doc__": "\n Replay buffer used in off-policy algorithms like SAC/TD3.\n\n :param buffer_size: Max number of element in the buffer\n :param observation_space: Observation space\n :param action_space: Action space\n :param device: PyTorch device\n :param n_envs: Number of parallel environments\n :param optimize_memory_usage: Enable a memory efficient variant\n of the replay buffer which reduces by almost a factor two the memory used,\n at a cost of more complexity.\n See https://github.com/DLR-RM/stable-baselines3/issues/37#issuecomment-637501195\n and https://github.com/DLR-RM/stable-baselines3/pull/28#issuecomment-637559274\n Cannot be used in combination with handle_timeout_termination.\n :param handle_timeout_termination: Handle timeout termination (due to timelimit)\n separately and treat the task as infinite horizon task.\n https://github.com/DLR-RM/stable-baselines3/issues/284\n ",
77
+ "__init__": "<function ReplayBuffer.__init__ at 0x7d61239e1cf0>",
78
+ "add": "<function ReplayBuffer.add at 0x7d61239e1d80>",
79
+ "sample": "<function ReplayBuffer.sample at 0x7d61239e1e10>",
80
+ "_get_samples": "<function ReplayBuffer._get_samples at 0x7d61239e1ea0>",
81
+ "_maybe_cast_dtype": "<staticmethod(<function ReplayBuffer._maybe_cast_dtype at 0x7d61239e1f30>)>",
82
+ "__abstractmethods__": "frozenset()",
83
+ "_abc_impl": "<_abc._abc_data object at 0x7d61239e61c0>"
84
+ },
85
+ "replay_buffer_kwargs": {},
86
+ "train_freq": {
87
+ ":type:": "<class 'stable_baselines3.common.type_aliases.TrainFreq'>"
88
+ },
89
+ "use_sde_at_warmup": false,
90
+ "exploration_initial_eps": 1.0,
91
+ "exploration_final_eps": 0.005,
92
+ "exploration_fraction": 0.3,
93
+ "target_update_interval": 1000,
94
+ "_n_calls": 10000000,
95
+ "max_grad_norm": 10,
96
+ "exploration_rate": 0.005,
97
+ "lr_schedule": {
98
+ ":type:": "<class 'function'>"
99
+ },
100
+ "batch_norm_stats": [],
101
+ "batch_norm_stats_target": [],
102
+ "exploration_schedule": {
103
+ ":type:": "<class 'function'>"
104
+ }
105
+ }
get_config.py CHANGED
@@ -22,8 +22,9 @@ for key in json_file.keys():
22
  if val_to_remove in json_file[key].keys():
23
  json_file[key].pop(val_to_remove)
24
 
25
- outfile = open(f"{savefile}.json", "w")
26
- outfile.write(json.dumps(json_file, indent=2))
 
27
 
28
  file.close()
29
- outfile.close()
 
22
  if val_to_remove in json_file[key].keys():
23
  json_file[key].pop(val_to_remove)
24
 
25
+ # outfile = open(f"{savefile}.json", "w")
26
+ with open(f"{savefile}.json", "w") as outfile:
27
+ outfile.write(json.dumps(json_file, indent=2))
28
 
29
  file.close()
30
+ # outfile.close()