sae-replication / dg6ryo96.json
ArthurConmy's picture
Upload 15 files
8ea403c
{"lr": "0.0012", "d_in": "2048", "seed": "1", "beta1": "0.9", "beta2": "0.99", "d_sae": "2048", "dtype": "torch.bfloat16", "device": "cuda", "dataset": "c4", "seq_len": "128", "testing": "False", "act_name": "blocks.0.mlp.hook_post", "l1_lambda": "0.0007999999797903001", "batch_size": "32", "num_tokens": "2000000000000", "sched_type": "cosine_annealing", "test_every": "100", "buffer_size": "524288", "wandb_group": "None", "dataset_args": "['en']", "delete_cache": "False", "l1_loss_form": "l1", "l2_loss_form": "l2", "sched_epochs": "1000", "sched_finish": "True", "buffer_device": "cuda:0", "resample_mode": "anthropic", "dataset_kwargs": "{'split': 'train', 'streaming': True}", "log_everything": "False", "resample_factor": "0.01", "sched_lr_factor": "0.1", "sched_warmup_epochs": "1000", "test_set_batch_size": "100", "save_state_dict_every": "sae.utils.get_cfg.<locals>.<lambda>", "anthropic_resample_last": "7500", "resample_sae_neurons_at": "[10000, 20000, 50000, 75000, 100000]", "activation_training_order": "shuffled", "anthropic_resample_batches": "200000", "resample_sae_neurons_every": "2.050427598475984e+32", "wandb_mode_online_override": "False", "resample_sae_neurons_cutoff": "1e-06"}