3v324v23 commited on
Commit
70891a6
1 Parent(s): ba6e3be

add hook_z concat

Browse files
gelu-2l_1_16384_z_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:677c9a0635099fdf57b7830a0cfd0f6ffa4a84c5a33d79e7b8a9508f7daf5749
3
+ size 67178608
gelu-2l_1_16384_z_0_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 200000000, "l1_coeff": 0.215, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": false, "concat_heads": true, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"}
gelu-2l_1_16384_z_1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65617e080f60906d312f8972ec46ae5ef0854d791502ea7b006179f9d03f6637
3
+ size 67178608
gelu-2l_1_16384_z_10.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88b4c4cee17ae2405f181360b9839abfaa333ca9e72d81a3a77a4cfc73305427
3
+ size 67178616
gelu-2l_1_16384_z_10_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 200000000, "l1_coeff": 0.1, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": false, "concat_heads": true, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"}
gelu-2l_1_16384_z_1_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 200000000, "l1_coeff": 0.215, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": false, "concat_heads": true, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"}
gelu-2l_1_16384_z_2.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d51e412b5038aab8f42a9e4fae35768eddf838b1ee3b0f5027ef4d862de8c1f8
3
+ size 67178608
gelu-2l_1_16384_z_2_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 200000000, "l1_coeff": 0.215, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": false, "concat_heads": true, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"}
gelu-2l_1_16384_z_3.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59ed72a74b6ed8c3fcf48fafb31663b0505f9a7e20f0941d651c7ba87f76cfa5
3
+ size 67178608
gelu-2l_1_16384_z_3_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 200000000, "l1_coeff": 1.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": false, "concat_heads": true, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"}
gelu-2l_1_16384_z_4.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7cd6f637840d641998568c5d3c002aa9d4b6b67db1a51b339af31fde4e9ea2da
3
+ size 67178608
gelu-2l_1_16384_z_4_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 200000000, "l1_coeff": 1.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": false, "concat_heads": true, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"}
gelu-2l_1_16384_z_5.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13af3b6673620200ab679399a819dc5c94d1c6fd316a9919dcc6a6cfb830b02b
3
+ size 67178608
gelu-2l_1_16384_z_5_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 200000000, "l1_coeff": 0.001, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": false, "concat_heads": true, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"}
gelu-2l_1_16384_z_6.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e6aa0f167b2b3895f0cc5d70dd930bb248bc3f1fb406ea860660df1e83431fe
3
+ size 67178608
gelu-2l_1_16384_z_6_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 200000000, "l1_coeff": 0.001, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": false, "concat_heads": true, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"}
gelu-2l_1_16384_z_7.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38b7ff7eaff0598707873ab4d255cde172751521d0dc7282ae92569682f31110
3
+ size 67178608
gelu-2l_1_16384_z_7_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 200000000, "l1_coeff": 0.01, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": false, "concat_heads": true, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"}
gelu-2l_1_16384_z_8.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05016a8a1a81365722770926b3fc73a4aa3a07c1ff60f8527365adadb17d29e5
3
+ size 67178608
gelu-2l_1_16384_z_8_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 200000000, "l1_coeff": 0.1, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": false, "concat_heads": true, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"}
gelu-2l_1_16384_z_9.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b155eedfc1d6186d054e062082d1e40d946673a5c7de1d98a5ddd167ad19a2ae
3
+ size 67178608
gelu-2l_1_16384_z_9_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 200000000, "l1_coeff": 0.01, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": false, "concat_heads": true, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"}