jbrinkma commited on
Commit
1f0bb28
1 Parent(s): 0f0a7b3

Upload sparsity_coefficient_0.0003_20240311220027664418_config.json with huggingface_hub

Browse files
sparsity_coefficient_0.0003_20240311220027664418_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "seed": 42,
3
+ "model_name_or_path": "EleutherAI/pythia-70m-deduped",
4
+ "hook_point": "gpt_neox.layers.3",
5
+ "dataset_name_or_path": "jbrinkma/pile-300k",
6
+ "activation_size": -1,
7
+ "add_bos_token": false,
8
+ "expansion_factor": 4,
9
+ "b_dec_init_method": "",
10
+ "n_steps": -1,
11
+ "device": "cuda",
12
+ "batch_size": 32,
13
+ "ctx_length": 256,
14
+ "lr": 0.001,
15
+ "min_lr": 0.0,
16
+ "lr_warmup_steps": 5000,
17
+ "sparsity_coefficient": 0.0003,
18
+ "evaluation_interval": 200,
19
+ "beta1": 0.9,
20
+ "beta2": 0.999,
21
+ "l1_sqrt": true,
22
+ "cos_sim_reg": false,
23
+ "cos_sim_alpha": 0.0,
24
+ "n_tokens_in_feature_cache": 500000.0,
25
+ "use_ghost_grads": false,
26
+ "output_dir": "outputs",
27
+ "cache_dir": "cache",
28
+ "checkpoint_interval": 200,
29
+ "use_wandb": true,
30
+ "wandb_entity": "best_sae",
31
+ "wandb_project": "best_sae",
32
+ "wandb_name": "sparsity_coefficient_0.0003_20240311220027664418",
33
+ "wandb_group": "L1(Sqrt)_v2"
34
+ }