YX-S-Z
commited on
Commit
·
72bba40
1
Parent(s):
9e71563
ne wckpts
Browse files- 10.pt +3 -0
- 10_cfg.json +1 -0
- 11.pt +3 -0
- 11_cfg.json +1 -0
- 12.pt +3 -0
- 12_cfg.json +1 -0
- 13.pt +3 -0
- 13_cfg.json +1 -0
- 14.pt +3 -0
- 14_cfg.json +1 -0
10.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3df7b5b674bd79077d3486348fe78e6d12d212a3ef823afa9581dbc0a07ec578
|
3 |
+
size 67177793
|
10_cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-1l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 16384, "name": "crate-1l_0_16384_post"}
|
11.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:25d93b82fe9ba6a87f647ecc68ae5bdef9f1587042e10727adc4cbdb5a69de66
|
3 |
+
size 67177793
|
11_cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-1l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 16384, "name": "crate-1l_0_16384_post"}
|
12.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d21e26db9dd7becfcd97f135b2f916c1ceda422c3119c42c5dbf7851c9f4e25e
|
3 |
+
size 67177793
|
12_cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-1l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 16384, "name": "crate-1l_0_16384_post"}
|
13.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:105448196f74ef3d74a137f12c61f1ca6d1538f6d5f09b0ec6af6263a78afbc9
|
3 |
+
size 67177793
|
13_cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-1l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 16384, "name": "crate-1l_0_16384_post"}
|
14.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1b28785f7bac89f79296f440ed45988edecd370931fd2a555a94c860d5657f3a
|
3 |
+
size 67177793
|
14_cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-1l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 16384, "name": "crate-1l_0_16384_post"}
|