{ | |
"seed": 42, | |
"model_name_or_path": "EleutherAI/pythia-70m-deduped", | |
"hook_point": "gpt_neox.layers.3", | |
"dataset_name_or_path": "Elriggs/openwebtext-100k", | |
"activation_size": -1, | |
"add_bos_token": false, | |
"expansion_factor": 4, | |
"b_dec_init_method": "", | |
"n_steps": -1, | |
"device": "cuda", | |
"batch_size": 32, | |
"ctx_length": 256, | |
"lr": 0.001, | |
"min_lr": 0.0, | |
"lr_warmup_steps": 5000, | |
"sparsity_coefficient": 0.0002, | |
"evaluation_interval": 200, | |
"beta1": 0.9, | |
"beta2": 0.999, | |
"l1_sqrt": true, | |
"n_tokens_in_feature_cache": 500000.0, | |
"use_ghost_grads": false, | |
"output_dir": "outputs", | |
"cache_dir": "cache", | |
"checkpoint_interval": 200, | |
"use_wandb": true, | |
"wandb_entity": "best_sae", | |
"wandb_project": "best_sae", | |
"wandb_name": "sparsity_coefficient_0.0002_20240306162655275975", | |
"wandb_group": "L1(Sqrt)" | |
} |