jbrinkma commited on
Commit
aa7d8d1
1 Parent(s): 734976e

Upload sparsity_coefficient_0.000131_20240314001404151327_config.json with huggingface_hub

Browse files
sparsity_coefficient_0.000131_20240314001404151327_config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "seed": 42,
3
+ "model_name_or_path": "EleutherAI/pythia-70m-deduped",
4
+ "revision": "",
5
+ "hook_point": "gpt_neox.layers.3",
6
+ "dataset_name_or_path": "jbrinkma/pile-300k",
7
+ "activation_size": -1,
8
+ "add_bos_token": false,
9
+ "evaluation_batches": 10,
10
+ "expansion_factor": 4,
11
+ "b_dec_init_method": "",
12
+ "n_steps": -1,
13
+ "device": "cuda",
14
+ "batch_size": 32,
15
+ "ctx_length": 256,
16
+ "lr": 0.001,
17
+ "min_lr": 0.0,
18
+ "lr_warmup_steps": 5000,
19
+ "sparsity_coefficient": 0.00013142857142857143,
20
+ "evaluation_interval": 200,
21
+ "beta1": 0.9,
22
+ "beta2": 0.999,
23
+ "l1_sqrt": true,
24
+ "cos_sim_reg": false,
25
+ "cos_sim_alpha": 0.0,
26
+ "n_tokens_in_feature_cache": 500000.0,
27
+ "use_ghost_grads": false,
28
+ "output_dir": "outputs",
29
+ "cache_dir": "cache",
30
+ "checkpoint_interval": 200,
31
+ "use_wandb": true,
32
+ "wandb_entity": "jannikbrinkmann",
33
+ "wandb_project": "best-sae",
34
+ "wandb_name": "sparsity_coefficient_0.000131_20240314001404151327",
35
+ "wandb_group": "L1(Sqrt(Sqrt))"
36
+ }