|
act_frequency_n_tokens: 500000 |
|
batch_size: 4 |
|
collect_act_frequency_every_n_samples: 40000 |
|
collect_output_metrics_every_n_samples: 0 |
|
cooldown_samples: 0 |
|
effective_batch_size: 16 |
|
eval_data: |
|
column_name: input_ids |
|
dataset_name: apollo-research/Skylion007-openwebtext-tokenizer-gpt2 |
|
is_tokenized: true |
|
n_ctx: 1024 |
|
seed: null |
|
split: train |
|
streaming: true |
|
tokenizer_name: gpt2 |
|
eval_every_n_samples: 20000 |
|
eval_n_samples: 200 |
|
log_every_n_grad_steps: 20 |
|
loss: |
|
in_to_orig: |
|
hook_positions: |
|
- blocks.7.hook_resid_pre |
|
- blocks.8.hook_resid_pre |
|
- blocks.9.hook_resid_pre |
|
- blocks.10.hook_resid_pre |
|
- blocks.11.hook_resid_pre |
|
total_coeff: 2.5 |
|
logits_kl: |
|
coeff: 0.5 |
|
out_to_in: |
|
coeff: 0.0 |
|
out_to_orig: null |
|
sparsity: |
|
coeff: 50.0 |
|
p_norm: 1.0 |
|
lr: 0.0005 |
|
lr_schedule: cosine |
|
max_grad_norm: 10.0 |
|
min_lr_factor: 0.1 |
|
n_samples: 400000 |
|
saes: |
|
dict_size_to_input_ratio: 60.0 |
|
k: null |
|
pretrained_sae_paths: null |
|
retrain_saes: false |
|
sae_positions: |
|
- blocks.6.hook_resid_pre |
|
type_of_sparsifier: sae |
|
save_dir: /data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out |
|
save_every_n_samples: null |
|
seed: 0 |
|
tlens_model_name: gpt2-small |
|
tlens_model_path: null |
|
train_data: |
|
column_name: input_ids |
|
dataset_name: apollo-research/Skylion007-openwebtext-tokenizer-gpt2 |
|
is_tokenized: true |
|
n_ctx: 1024 |
|
seed: null |
|
split: train |
|
streaming: true |
|
tokenizer_name: gpt2 |
|
wandb_project: gpt2-e2e |
|
wandb_run_name: null |
|
wandb_run_name_prefix: recon_ |
|
warmup_samples: 20000 |
|
|