neuronpedia commited on
Commit
6340563
·
verified ·
1 Parent(s): 6e9f16b

Upload folder using huggingface_hub

Browse files
.DS_Store ADDED
Binary file (6.15 kB). View file
 
10-res_sce-ajt/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model_name": "gpt2-small", "model_class_name": "HookedTransformer", "hook_point": "blocks.10.hook_resid_pre", "hook_point_eval": "blocks.{layer}.attn.pattern", "hook_point_layer": 10, "hook_point_head_index": null, "dataset_path": "apollo-research/Skylion007-openwebtext-tokenizer-gpt2", "is_dataset_tokenized": true, "context_size": 128, "use_cached_activations": false, "cached_activations_path": null, "d_in": 768, "d_sae": 46080, "b_dec_init_method": "geometric_median", "expansion_factor": 60, "activation_fn": "relu", "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": null, "apply_b_dec_to_input": false, "decoder_orthogonal_init": true, "n_batches_in_buffer": 8, "training_tokens": 2000000, "finetuning_tokens": 0, "store_batch_size": 16, "train_batch_size": 4096, "device": "mps", "seed": 42, "dtype": "torch.float32", "prepend_bos": false, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 0.001, "lp_norm": 1, "lr": 0.0003, "lr_scheduler_name": "constant", "lr_warm_up_steps": 500, "lr_end": 2.9999999999999997e-05, "lr_decay_steps": 0, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-08, "log_to_wandb": true, "wandb_project": "mats_sae_training_language_model", "run_name": "46080-L1-0.001-LR-0.0003-Tokens-2.000e+06", "wandb_entity": null, "wandb_log_frequency": 10, "n_checkpoints": 0, "checkpoint_path": "8crnit9h/lrkvecno", "verbose": false, "model_kwargs": {}, "tokens_per_buffer": 4194304}
10-res_sce-ajt/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f3e4ca935edee4bd34adb3fae116cab0101be7c7c78b785cd7e3cc89ea6c5e3
3
+ size 283487640
10-res_sce-ajt/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70e0026fb81af935b9d8dc23311cf5fee8f805aedbbf907a3647c120246ea244
3
+ size 184400
2-res_sce-ajt/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model_name": "gpt2-small", "model_class_name": "HookedTransformer", "hook_point": "blocks.2.hook_resid_pre", "hook_point_eval": "blocks.{layer}.attn.pattern", "hook_point_layer": 2, "hook_point_head_index": null, "dataset_path": "apollo-research/Skylion007-openwebtext-tokenizer-gpt2", "is_dataset_tokenized": true, "context_size": 128, "use_cached_activations": false, "cached_activations_path": null, "d_in": 768, "d_sae": 46080, "b_dec_init_method": "geometric_median", "expansion_factor": 60, "activation_fn": "relu", "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": null, "apply_b_dec_to_input": false, "decoder_orthogonal_init": true, "n_batches_in_buffer": 8, "training_tokens": 2000000, "finetuning_tokens": 0, "store_batch_size": 16, "train_batch_size": 4096, "device": "mps", "seed": 42, "dtype": "torch.float32", "prepend_bos": false, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 0.001, "lp_norm": 1, "lr": 0.0003, "lr_scheduler_name": "constant", "lr_warm_up_steps": 500, "lr_end": 2.9999999999999997e-05, "lr_decay_steps": 0, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-08, "log_to_wandb": true, "wandb_project": "mats_sae_training_language_model", "run_name": "46080-L1-0.001-LR-0.0003-Tokens-2.000e+06", "wandb_entity": null, "wandb_log_frequency": 10, "n_checkpoints": 0, "checkpoint_path": "ovhfts9n/gzyh3c36", "verbose": false, "model_kwargs": {}, "tokens_per_buffer": 4194304}
2-res_sce-ajt/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:510997fc90dfeb030934666c8f3684834917f790d95cfb1ff171c04a53ad9c2c
3
+ size 283487640
2-res_sce-ajt/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2549adf6befaf2d3cf26f8c4de04a6259f1abe0b78c5f4d1c6f0c1dd29a8499
3
+ size 184400
6-res_sce-ajt/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model_name": "gpt2-small", "model_class_name": "HookedTransformer", "hook_point": "blocks.6.hook_resid_pre", "hook_point_eval": "blocks.{layer}.attn.pattern", "hook_point_layer": 6, "hook_point_head_index": null, "dataset_path": "apollo-research/Skylion007-openwebtext-tokenizer-gpt2", "is_dataset_tokenized": true, "context_size": 128, "use_cached_activations": false, "cached_activations_path": null, "d_in": 768, "d_sae": 46080, "b_dec_init_method": "geometric_median", "expansion_factor": 60, "activation_fn": "relu", "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": null, "apply_b_dec_to_input": false, "decoder_orthogonal_init": true, "n_batches_in_buffer": 8, "training_tokens": 2000000, "finetuning_tokens": 0, "store_batch_size": 16, "train_batch_size": 4096, "device": "mps", "seed": 42, "dtype": "torch.float32", "prepend_bos": false, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 0.001, "lp_norm": 1, "lr": 0.0003, "lr_scheduler_name": "constant", "lr_warm_up_steps": 500, "lr_end": 2.9999999999999997e-05, "lr_decay_steps": 0, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-08, "log_to_wandb": true, "wandb_project": "mats_sae_training_language_model", "run_name": "46080-L1-0.001-LR-0.0003-Tokens-2.000e+06", "wandb_entity": null, "wandb_log_frequency": 10, "n_checkpoints": 0, "checkpoint_path": "zgdpkafo/vdhbr7l5", "verbose": false, "model_kwargs": {}, "tokens_per_buffer": 4194304}
6-res_sce-ajt/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7ae5881e1574f8e6d1f5a17502893ba3cc05bdd0c78b7cf671ac6978abe1479
3
+ size 283487640
6-res_sce-ajt/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddb8589298f4fcabc4e82401c042e2ac641b994d0a29181186a104a8deaaf260
3
+ size 184400