|
{ |
|
"seed": 49, |
|
"epochs": 5000, |
|
"batch_size": 16384, |
|
"lr": 0.001, |
|
"l1_coeff": 0, |
|
"beta1": 0.9, |
|
"beta2": 0.99, |
|
"num_tokens": 1000000000, |
|
"max_grad_norm": 100000, |
|
"seq_len": 128, |
|
"dtype": "torch.float32", |
|
"model_name": "gpt2-small", |
|
"site": "resid_pre", |
|
"layer": 8, |
|
"act_size": 768, |
|
"dict_size": 2304, |
|
"device": "cuda:0", |
|
"model_batch_size": 512, |
|
"num_batches_in_buffer": 10, |
|
"dataset_path": "Skylion007/openwebtext", |
|
"wandb_project": "gpt2-feature-splitting-saes", |
|
"input_unit_norm": false, |
|
"perf_log_freq": 1000, |
|
"sae_type": "topk", |
|
"architecture": "standard", |
|
"checkpoint_freq": 1000, |
|
"n_batches_to_dead": 100, |
|
"top_k": 4, |
|
"top_k_aux": 512, |
|
"aux_penalty": 0.0625, |
|
"hook_point": "blocks.8.hook_resid_pre", |
|
"threshold": null, |
|
"name": "gpt2-small_blocks.8.hook_resid_pre_2304_topk_4_0.001", |
|
"cosine_penalty": 0.0 |
|
} |