jbrinkma commited on
Commit
bf16a44
1 Parent(s): e498e86

Upload GPT2_L0_sparsity_coefficient_0.0003_transformer.h.0.attn_config.json with huggingface_hub

Browse files
GPT2_L0_sparsity_coefficient_0.0003_transformer.h.0.attn_config.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "seed": 42,
3
+ "model_name_or_path": "gpt2",
4
+ "revision": "",
5
+ "hook_point": "transformer.h.0.attn",
6
+ "dataset_name_or_path": "jbrinkma/pile-100k",
7
+ "activation_size": -1,
8
+ "add_bos_token": false,
9
+ "evaluation_batches": 10,
10
+ "expansion_factor": 4,
11
+ "b_dec_init_method": "",
12
+ "use_pre_encoder_bias": true,
13
+ "tied": false,
14
+ "n_steps": -1,
15
+ "device": "cuda",
16
+ "batch_size": 32,
17
+ "ctx_length": 256,
18
+ "lr": 0.001,
19
+ "min_lr": 0.0,
20
+ "lr_warmup_steps": 5000,
21
+ "sparsity_coefficient": 0.0003,
22
+ "evaluation_interval": 200,
23
+ "beta1": 0.9,
24
+ "beta2": 0.999,
25
+ "l1_sqrt": false,
26
+ "cos_sim_reg": false,
27
+ "cos_sim_alpha": 0.0,
28
+ "decoder_normalization": true,
29
+ "decoder_norm_smaller_than_one": false,
30
+ "l1_with_norm": false,
31
+ "sqrt_mse": false,
32
+ "dynamic_weighting": false,
33
+ "l1_warmup_steps": 2000,
34
+ "target_l0": 40,
35
+ "n_tokens_in_feature_cache": 500000.0,
36
+ "use_ghost_grads": false,
37
+ "use_neuron_resampling": false,
38
+ "resampling_steps": -1,
39
+ "output_dir": "outputs",
40
+ "cache_dir": "cache",
41
+ "checkpoint_interval": 200,
42
+ "use_wandb": true,
43
+ "wandb_entity": "jannikbrinkmann",
44
+ "wandb_project": "best-sae",
45
+ "wandb_name": "GPT2_L0_sparsity_coefficient_0.0003_20240323115650953440",
46
+ "wandb_group": "L0_L1_Sweeps"
47
+ }