YX-S-Z commited on
Commit
19ece2e
1 Parent(s): a36bded
Files changed (32) hide show
  1. 0.pt +3 -0
  2. 0_cfg.json +1 -0
  3. 1.pt +3 -0
  4. 10.pt +3 -0
  5. 10_cfg.json +1 -0
  6. 11.pt +3 -0
  7. 11_cfg.json +1 -0
  8. 12.pt +3 -0
  9. 12_cfg.json +1 -0
  10. 13.pt +3 -0
  11. 13_cfg.json +1 -0
  12. 14.pt +3 -0
  13. 14_cfg.json +1 -0
  14. 15.pt +3 -0
  15. 15_cfg.json +1 -0
  16. 1_cfg.json +1 -0
  17. 2.pt +3 -0
  18. 2_cfg.json +1 -0
  19. 3.pt +3 -0
  20. 3_cfg.json +1 -0
  21. 4.pt +3 -0
  22. 4_cfg.json +1 -0
  23. 5.pt +3 -0
  24. 5_cfg.json +1 -0
  25. 6.pt +3 -0
  26. 6_cfg.json +1 -0
  27. 7.pt +3 -0
  28. 7_cfg.json +1 -0
  29. 8.pt +3 -0
  30. 8_cfg.json +1 -0
  31. 9.pt +3 -0
  32. 9_cfg.json +1 -0
0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:747852bf4f8f8f925d60d55572260d09b102542588659f9de695d97bd180eed0
3
+ size 67177787
0_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 16384, "name": "crate-3l_0_16384_post"}
1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5a17d52eb3b2a49d97a1503d350160bf1f25026ca812e74a7563776978ca91b
3
+ size 67177787
10.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:832566f6200a64f9d0fcffe2b09b1b7541eec9a326c126cd6f9d90f1bb2e9785
3
+ size 67177793
10_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 16384, "name": "crate-3l_0_16384_post"}
11.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64ef24137d6502bd75ed9a350bd1ea60f9f9b86e52f5fa7d233d98090f05d92e
3
+ size 67177793
11_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 16384, "name": "crate-3l_0_16384_post"}
12.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bd53234b0a06b92317f00e81ba2f6b16dd5f6f06b074e36447a9b6d8fe0a08f
3
+ size 67177793
12_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 16384, "name": "crate-3l_0_16384_post"}
13.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75298b4cb754a445be3fa1d6daffc579ad18ca065b87fee19304e5aa6b84116d
3
+ size 67177793
13_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 16384, "name": "crate-3l_0_16384_post"}
14.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:764a4af3e1b03a2c56840888ab415274055bfc439b86a208e4e4e366f2a0aea1
3
+ size 67177793
14_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 16384, "name": "crate-3l_0_16384_post"}
15.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0384170e410d95a39129e1d1a13e34203714a56c58c78c46518cf9a6628a1bdc
3
+ size 67177793
15_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 16384, "name": "crate-3l_0_16384_post"}
1_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 16384, "name": "crate-3l_0_16384_post"}
2.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee4ace15612ee4ff2c186ce782413198f79d560e27c7584a176f517361137654
3
+ size 67177787
2_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 16384, "name": "crate-3l_0_16384_post"}
3.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:301f89755c5183a45e4c032916a0827f9b7b12d060b11a1bc1394fe3a791c20e
3
+ size 67177787
3_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 16384, "name": "crate-3l_0_16384_post"}
4.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6eb4ad283c4140254f561b5d3a8c9fc72e26c02d8fe6f4c74945a3f0012dea3b
3
+ size 67177787
4_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 16384, "name": "crate-3l_0_16384_post"}
5.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2559b820cebb1f312c70784f762aac4eb997eee927d0e353f4e5c87570f0667f
3
+ size 67177787
5_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 16384, "name": "crate-3l_0_16384_post"}
6.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95ba30b46ed4514058012bdc912894707e8ba7dc0a6c7622d0f1f23b8a1f3bef
3
+ size 67177787
6_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 16384, "name": "crate-3l_0_16384_post"}
7.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2a19131e55a8215e9a75b9468b568b59aa26510ce1971ecb3773c18f7ce754a
3
+ size 67177787
7_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 16384, "name": "crate-3l_0_16384_post"}
8.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86b2048689c634a349c0aef544ef0f872ff3b7e2fb60e5c95d5b02a7628fce21
3
+ size 67177787
8_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 16384, "name": "crate-3l_0_16384_post"}
9.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1799e2554541917b06a1c163f15bb0611f5824ede7b6d93b54540422d1584720
3
+ size 67177787
9_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 16384, "name": "crate-3l_0_16384_post"}