jacobcd52 commited on
Commit
9fffbc0
·
verified ·
1 Parent(s): fbc92fc

Upload trainer_0/config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. trainer_0/config.json +14 -16
trainer_0/config.json CHANGED
@@ -1,31 +1,29 @@
1
  {
2
  "trainer": {
3
- "trainer_class": "TopKTrainer",
4
- "dict_class": "AutoEncoderTopK",
5
- "lr": 0.00010690449676496975,
6
- "steps": 12,
7
- "auxk_alpha": 0.03125,
 
8
  "warmup_steps": 0,
 
 
9
  "decay_start": null,
10
- "threshold_beta": 0.999,
11
- "threshold_start_step": 1000,
12
  "seed": null,
13
- "activation_dim": 896,
14
- "dict_size": 57344,
15
- "k": 128,
16
  "device": "cuda:2",
17
- "layer": 4,
18
  "lm_name": "blah",
19
- "wandb_name": "AutoEncoderTopK",
20
  "submodule_name": null
21
  },
22
  "buffer": {
23
- "n_models": 1,
24
  "d_submodule": 896,
25
  "io": "out",
26
- "n_ctxs": 1024,
27
- "ctx_len": 128,
28
- "refresh_batch_size": 128,
29
  "out_batch_size": 8192,
30
  "device": "cuda:2",
31
  "rescale_acts": false
 
1
  {
2
  "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainerAprilUpdate",
5
+ "activation_dim": 1792,
6
+ "dict_size": 57344,
7
+ "lr": 1e-06,
8
+ "l1_penalty": 0.01,
9
  "warmup_steps": 0,
10
+ "sparsity_warmup_steps": 2000,
11
+ "steps": 61035,
12
  "decay_start": null,
 
 
13
  "seed": null,
 
 
 
14
  "device": "cuda:2",
15
+ "layer": 7,
16
  "lm_name": "blah",
17
+ "wandb_name": "StandardTrainerAprilUpdate",
18
  "submodule_name": null
19
  },
20
  "buffer": {
21
+ "n_models": 2,
22
  "d_submodule": 896,
23
  "io": "out",
24
+ "n_ctxs": 512,
25
+ "ctx_len": 256,
26
+ "refresh_batch_size": 512,
27
  "out_batch_size": 8192,
28
  "device": "cuda:2",
29
  "rescale_acts": false