|
{ |
|
"seed": 42, |
|
"model_name_or_path": "gpt2", |
|
"revision": "", |
|
"hook_point": "transformer.h.0.mlp", |
|
"dataset_name_or_path": "jbrinkma/pile-100k", |
|
"activation_size": -1, |
|
"add_bos_token": false, |
|
"evaluation_batches": 10, |
|
"expansion_factor": 4, |
|
"b_dec_init_method": "", |
|
"use_pre_encoder_bias": true, |
|
"tied": false, |
|
"n_steps": -1, |
|
"device": "cuda", |
|
"batch_size": 32, |
|
"ctx_length": 256, |
|
"lr": 0.01, |
|
"min_lr": 0.0, |
|
"lr_warmup_steps": 5000, |
|
"sparsity_coefficient": 0.003, |
|
"evaluation_interval": 200, |
|
"beta1": 0.9, |
|
"beta2": 0.999, |
|
"l1_sqrt": false, |
|
"cos_sim_reg": false, |
|
"cos_sim_alpha": 0.0, |
|
"decoder_normalization": true, |
|
"decoder_norm_smaller_than_one": false, |
|
"l1_with_norm": false, |
|
"sqrt_mse": false, |
|
"dynamic_weighting": false, |
|
"l1_warmup_steps": 2000, |
|
"target_l0": 40, |
|
"n_tokens_in_feature_cache": 500000.0, |
|
"use_ghost_grads": false, |
|
"use_neuron_resampling": false, |
|
"resampling_steps": -1, |
|
"output_dir": "outputs", |
|
"cache_dir": "cache", |
|
"checkpoint_interval": 200, |
|
"use_wandb": true, |
|
"wandb_entity": "jannikbrinkmann", |
|
"wandb_project": "best-sae", |
|
"wandb_name": "GPT2_L0_lr_0.01_20240323115650955246", |
|
"wandb_group": "L0_L1_Sweeps" |
|
} |