checkpoint-134000 contents
Browse files- config.json +6 -2
- global_step134002/mp_rank_00_model_states.pt +3 -0
- global_step134002/zero_pp_rank_0_mp_rank_00optim_states.pt +3 -0
- global_step134002/zero_pp_rank_10_mp_rank_00optim_states.pt +3 -0
- global_step134002/zero_pp_rank_11_mp_rank_00optim_states.pt +3 -0
- global_step134002/zero_pp_rank_12_mp_rank_00optim_states.pt +3 -0
- global_step134002/zero_pp_rank_13_mp_rank_00optim_states.pt +3 -0
- global_step134002/zero_pp_rank_14_mp_rank_00optim_states.pt +3 -0
- global_step134002/zero_pp_rank_15_mp_rank_00optim_states.pt +3 -0
- global_step134002/zero_pp_rank_1_mp_rank_00optim_states.pt +3 -0
- global_step134002/zero_pp_rank_2_mp_rank_00optim_states.pt +3 -0
- global_step134002/zero_pp_rank_3_mp_rank_00optim_states.pt +3 -0
- global_step134002/zero_pp_rank_4_mp_rank_00optim_states.pt +3 -0
- global_step134002/zero_pp_rank_5_mp_rank_00optim_states.pt +3 -0
- global_step134002/zero_pp_rank_6_mp_rank_00optim_states.pt +3 -0
- global_step134002/zero_pp_rank_7_mp_rank_00optim_states.pt +3 -0
- global_step134002/zero_pp_rank_8_mp_rank_00optim_states.pt +3 -0
- global_step134002/zero_pp_rank_9_mp_rank_00optim_states.pt +3 -0
- latest +1 -1
- pytorch_model.bin +2 -2
- trainer_state.json +2 -2
config.json
CHANGED
@@ -1,7 +1,8 @@
|
|
1 |
{
|
|
|
2 |
"activation_function": "gelu_new",
|
3 |
"architectures": [
|
4 |
-
"
|
5 |
],
|
6 |
"attn_pdrop": 0.1,
|
7 |
"bos_token_id": 50256,
|
@@ -19,7 +20,10 @@
|
|
19 |
"n_positions": 1024,
|
20 |
"n_special": 0,
|
21 |
"predict_special_tokens": true,
|
|
|
22 |
"resid_pdrop": 0.1,
|
|
|
|
|
23 |
"summary_activation": null,
|
24 |
"summary_first_dropout": 0.1,
|
25 |
"summary_proj_to_labels": true,
|
@@ -31,7 +35,7 @@
|
|
31 |
"max_length": 50
|
32 |
}
|
33 |
},
|
34 |
-
"transformers_version": "4.5
|
35 |
"use_cache": false,
|
36 |
"vocab_size": 50257
|
37 |
}
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "stanford-crfm/beren-gpt2-medium-x49",
|
3 |
"activation_function": "gelu_new",
|
4 |
"architectures": [
|
5 |
+
"GPT2LMHeadModel"
|
6 |
],
|
7 |
"attn_pdrop": 0.1,
|
8 |
"bos_token_id": 50256,
|
|
|
20 |
"n_positions": 1024,
|
21 |
"n_special": 0,
|
22 |
"predict_special_tokens": true,
|
23 |
+
"reorder_and_upcast_attn": true,
|
24 |
"resid_pdrop": 0.1,
|
25 |
+
"scale_attn_by_inverse_layer_idx": true,
|
26 |
+
"scale_attn_weights": true,
|
27 |
"summary_activation": null,
|
28 |
"summary_first_dropout": 0.1,
|
29 |
"summary_proj_to_labels": true,
|
|
|
35 |
"max_length": 50
|
36 |
}
|
37 |
},
|
38 |
+
"transformers_version": "4.12.5",
|
39 |
"use_cache": false,
|
40 |
"vocab_size": 50257
|
41 |
}
|
global_step134002/mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ad38830e67ec25987fda03d24ec4555bf7c7873d86fdd333b92ba5ff643c3e03
|
3 |
+
size 734884708
|
global_step134002/zero_pp_rank_0_mp_rank_00optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0e792a70c4f0163cfa475767455c11d81bcc8a5bc48126457e23596decce43a6
|
3 |
+
size 266119230
|
global_step134002/zero_pp_rank_10_mp_rank_00optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f2a1a392d6d11c2732da21a90da7ed86668ff5b513cd711a5713c3fcb5cbdda9
|
3 |
+
size 266119230
|
global_step134002/zero_pp_rank_11_mp_rank_00optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f22436b5a129bd494ef9f7d2e4b1486faf719ac2e2cf1eee288376a4536b6204
|
3 |
+
size 266119230
|
global_step134002/zero_pp_rank_12_mp_rank_00optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:76a8dbfed7e3c96f71578865890d6edbfc4a81ca0b34879ff3efdb32f7c714d3
|
3 |
+
size 266119230
|
global_step134002/zero_pp_rank_13_mp_rank_00optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:71485daa15beb34d98b61b470d2bb26a3e54ea6a3f94f19808002433908865af
|
3 |
+
size 266119230
|
global_step134002/zero_pp_rank_14_mp_rank_00optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ffede73946d49177b055f819e4a9ebf2f84ef4922233a092ed0f5d2f9a659b45
|
3 |
+
size 266119230
|
global_step134002/zero_pp_rank_15_mp_rank_00optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fad252bbde1844e3a1c3d919654faceac5bc268ea49dd11490aefc40a883c570
|
3 |
+
size 266119230
|
global_step134002/zero_pp_rank_1_mp_rank_00optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:12a8df09b13ac54bdf67cc4c3f1f58b1d870fc15673f49ef582945fd64badd11
|
3 |
+
size 266119230
|
global_step134002/zero_pp_rank_2_mp_rank_00optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b21d333e6c8d8a8b7300d75db7c7c99e8e3fd9c4325286a400d651c25db3b6d9
|
3 |
+
size 266119230
|
global_step134002/zero_pp_rank_3_mp_rank_00optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0137bda59c3e1762d083b5f771bad13e500955aafba3e8576e0fe07320cbc1b4
|
3 |
+
size 266119230
|
global_step134002/zero_pp_rank_4_mp_rank_00optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5a4e801925c9896c80815dc7a2904a70fde570480877e2f0c48dace7f3ac6066
|
3 |
+
size 266119230
|
global_step134002/zero_pp_rank_5_mp_rank_00optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cd76d2d911c0e4badb9cbc1e7c47fa9be75a364d29b2a41ca1217edbfbedc7de
|
3 |
+
size 266119230
|
global_step134002/zero_pp_rank_6_mp_rank_00optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:df1763ed3e9b54a098ce5bf51304e1f0804cf1dc1e2e32084badf8465147c011
|
3 |
+
size 266119230
|
global_step134002/zero_pp_rank_7_mp_rank_00optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:57cfac681c2c794fcf74abea99f85c894af611396a5bdf2d3994b48f28af6c47
|
3 |
+
size 266119230
|
global_step134002/zero_pp_rank_8_mp_rank_00optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0bcfeb106b11f3c0043a24028cc0498633106c7315ed491da0c972e088d2ccc9
|
3 |
+
size 266119230
|
global_step134002/zero_pp_rank_9_mp_rank_00optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fc00ca2e7e097e17744d949a68c14753048d71bd158928becc74cb366c2dcb93
|
3 |
+
size 266119230
|
latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step134002
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8f5b739b03d29ce988229216fbffd3b5eabe4216a8cf56d25800ef7b93811a12
|
3 |
+
size 734884388
|
trainer_state.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:120315ab7795345090636c1196460db4a68b16e526dec383b033f09eb9088abb
|
3 |
+
size 9374094
|