act_frequency_n_tokens: 500000 batch_size: 4 collect_act_frequency_every_n_samples: 40000 collect_output_metrics_every_n_samples: 0 cooldown_samples: 0 effective_batch_size: 16 eval_data: column_name: input_ids dataset_name: apollo-research/Skylion007-openwebtext-tokenizer-gpt2 is_tokenized: true n_ctx: 1024 seed: null split: train streaming: true tokenizer_name: gpt2 eval_every_n_samples: 20000 eval_n_samples: 200 log_every_n_grad_steps: 20 loss: in_to_orig: hook_positions: - blocks.7.hook_resid_pre - blocks.8.hook_resid_pre - blocks.9.hook_resid_pre - blocks.10.hook_resid_pre - blocks.11.hook_resid_pre total_coeff: 2.5 logits_kl: coeff: 0.5 out_to_in: coeff: 0.0 out_to_orig: null sparsity: coeff: 50.0 p_norm: 1.0 lr: 0.0005 lr_schedule: cosine max_grad_norm: 10.0 min_lr_factor: 0.1 n_samples: 400000 saes: dict_size_to_input_ratio: 60.0 k: null pretrained_sae_paths: null retrain_saes: false sae_positions: - blocks.6.hook_resid_pre type_of_sparsifier: sae save_dir: /data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out save_every_n_samples: null seed: 0 tlens_model_name: gpt2-small tlens_model_path: null train_data: column_name: input_ids dataset_name: apollo-research/Skylion007-openwebtext-tokenizer-gpt2 is_tokenized: true n_ctx: 1024 seed: null split: train streaming: true tokenizer_name: gpt2 wandb_project: gpt2-e2e wandb_run_name: null wandb_run_name_prefix: recon_ warmup_samples: 20000