ckkissane
/

attn-saes-gpt2-small-all-layers

Model card Files Files and versions Community

Connor commited on Jan 22, 2024

Commit

1afc0f5

1 Parent(s): e6c769b

correct L5

Browse files

Files changed (2) hide show

gpt2-small_L5_Hcat_z_lr1.20e-03_l16.00e-01_ds24576_bs4096_dc1.00e-06_rsanthropic_rie25000_nr4_v9.pt → gpt2-small_L5_Hcat_z_lr1.20e-03_l11.00e+00_ds49152_bs4096_dc1.00e-06_rsanthropic_rie25000_nr4_v9.pt +2 -2
gpt2-small_L5_Hcat_z_lr1.20e-03_l16.00e-01_ds24576_bs4096_dc1.00e-06_rsanthropic_rie25000_nr4_v9_cfg.json → gpt2-small_L5_Hcat_z_lr1.20e-03_l11.00e+00_ds49152_bs4096_dc1.00e-06_rsanthropic_rie25000_nr4_v9_cfg.json +1 -1

gpt2-small_L5_Hcat_z_lr1.20e-03_l16.00e-01_ds24576_bs4096_dc1.00e-06_rsanthropic_rie25000_nr4_v9.pt → gpt2-small_L5_Hcat_z_lr1.20e-03_l11.00e+00_ds49152_bs4096_dc1.00e-06_rsanthropic_rie25000_nr4_v9.pt RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1f25a65b6b97de636f1937cb8148ebbe0922cdccc0af9b1911248233232487bc
-size 151099672

 version https://git-lfs.github.com/spec/v1
+oid sha256:5fc134321b85ecf653ed67c0a3fd36998c88c0073f77403a320db6f1244c263d
+size 302192920

gpt2-small_L5_Hcat_z_lr1.20e-03_l16.00e-01_ds24576_bs4096_dc1.00e-06_rsanthropic_rie25000_nr4_v9_cfg.json → gpt2-small_L5_Hcat_z_lr1.20e-03_l11.00e+00_ds49152_bs4096_dc1.00e-06_rsanthropic_rie25000_nr4_v9_cfg.json RENAMED Viewed

@@ -1 +1 @@

- {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0012, "num_tokens": 2000000000, "l1_coeff": 0.6, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gpt2-small", "site": "z", "layer": 5, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "resample_scheme": "anthropic", "anthropic_neuron_resample_scale": 0.2, "dead_direction_cutoff": 1e-06, "re_init_every": 25000, "anthropic_resample_last": 12500, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "gpt2-test-L5-20240112", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "b_dec_init": "zeros", "sched_type": "cosine_warmup", "sched_epochs": 1000, "sched_lr_factor": 0.1, "sched_warmup_epochs": 1000, "sched_finish": true, "anthropic_resample_batches": 100, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.5.attn.hook_z", "act_size": 768, "dict_size": ~~24576~~, "name": "gpt2-~~small_5_24576_z~~"}

+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0012, "num_tokens": 2000000000, "l1_coeff": 1.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 64, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gpt2-small", "site": "z", "layer": 5, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "resample_scheme": "anthropic", "anthropic_neuron_resample_scale": 0.2, "dead_direction_cutoff": 1e-06, "re_init_every": 25000, "anthropic_resample_last": 12500, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "gpt2-test-L5-20240112", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "b_dec_init": "zeros", "sched_type": "cosine_warmup", "sched_epochs": 1000, "sched_lr_factor": 0.1, "sched_warmup_epochs": 1000, "sched_finish": true, "anthropic_resample_batches": 100, "eval_every": 1000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.5.attn.hook_z", "act_size": 768, "dict_size": 49152, "name": "gpt2-small_5_49152_z"}