jacobcd52 commited on
Commit
cd639af
·
verified ·
1 Parent(s): 0f4d7ed

Upload trainer_0/config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. trainer_0/config.json +14 -14
trainer_0/config.json CHANGED
@@ -2,32 +2,32 @@
2
  "trainer": {
3
  "trainer_class": "TopKTrainer",
4
  "dict_class": "AutoEncoderTopK",
5
- "lr": 0.00016329931618554522,
6
- "steps": 6103,
7
  "auxk_alpha": 0.03125,
8
  "warmup_steps": 0,
9
  "decay_start": null,
10
  "threshold_beta": 0.999,
11
  "threshold_start_step": 1000,
12
  "seed": null,
13
- "activation_dim": 6144,
14
- "dict_size": 24576,
15
  "k": 128,
16
- "device": "cuda:0",
17
- "layer": 4,
18
  "lm_name": "blah",
19
  "wandb_name": "AutoEncoderTopK",
20
  "submodule_name": null
21
  },
22
  "buffer": {
23
- "n_models": 12,
24
- "d_submodule": 512,
25
  "io": "out",
26
- "n_ctxs": 1024,
27
- "ctx_len": 128,
28
- "refresh_batch_size": 512,
29
- "out_batch_size": 16384,
30
- "device": "cuda:0",
31
- "rescale_acts": true
32
  }
33
  }
 
2
  "trainer": {
3
  "trainer_class": "TopKTrainer",
4
  "dict_class": "AutoEncoderTopK",
5
+ "lr": 8.944271909999159e-05,
6
+ "steps": 12207,
7
  "auxk_alpha": 0.03125,
8
  "warmup_steps": 0,
9
  "decay_start": null,
10
  "threshold_beta": 0.999,
11
  "threshold_start_step": 1000,
12
  "seed": null,
13
+ "activation_dim": 10240,
14
+ "dict_size": 81920,
15
  "k": 128,
16
+ "device": "cuda:2",
17
+ "layer": 40,
18
  "lm_name": "blah",
19
  "wandb_name": "AutoEncoderTopK",
20
  "submodule_name": null
21
  },
22
  "buffer": {
23
+ "n_models": 2,
24
+ "d_submodule": 5120,
25
  "io": "out",
26
+ "n_ctxs": 256,
27
+ "ctx_len": 512,
28
+ "refresh_batch_size": 64,
29
+ "out_batch_size": 4096,
30
+ "device": "cuda:2",
31
+ "rescale_acts": false
32
  }
33
  }