redo-checkpoint-224000
Browse files- config.json +2 -6
- global_step224011/mp_rank_00_model_states.pt +3 -0
- global_step224011/zero_pp_rank_0_mp_rank_00optim_states.pt +3 -0
- global_step224011/zero_pp_rank_1_mp_rank_00optim_states.pt +3 -0
- global_step224011/zero_pp_rank_2_mp_rank_00optim_states.pt +3 -0
- global_step224011/zero_pp_rank_3_mp_rank_00optim_states.pt +3 -0
- global_step224011/zero_pp_rank_4_mp_rank_00optim_states.pt +3 -0
- global_step224011/zero_pp_rank_5_mp_rank_00optim_states.pt +3 -0
- global_step224011/zero_pp_rank_6_mp_rank_00optim_states.pt +3 -0
- global_step224011/zero_pp_rank_7_mp_rank_00optim_states.pt +3 -0
- latest +1 -1
- pytorch_model.bin +1 -1
- trainer_state.json +2 -2
config.json
CHANGED
@@ -1,8 +1,7 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "stanford-crfm/celebrimbor-gpt2-medium-x81",
|
3 |
"activation_function": "gelu_new",
|
4 |
"architectures": [
|
5 |
-
"
|
6 |
],
|
7 |
"attn_pdrop": 0.1,
|
8 |
"bos_token_id": 50256,
|
@@ -20,10 +19,7 @@
|
|
20 |
"n_positions": 1024,
|
21 |
"n_special": 0,
|
22 |
"predict_special_tokens": true,
|
23 |
-
"reorder_and_upcast_attn": true,
|
24 |
"resid_pdrop": 0.1,
|
25 |
-
"scale_attn_by_inverse_layer_idx": true,
|
26 |
-
"scale_attn_weights": true,
|
27 |
"summary_activation": null,
|
28 |
"summary_first_dropout": 0.1,
|
29 |
"summary_proj_to_labels": true,
|
@@ -35,7 +31,7 @@
|
|
35 |
"max_length": 50
|
36 |
}
|
37 |
},
|
38 |
-
"transformers_version": "4.
|
39 |
"use_cache": false,
|
40 |
"vocab_size": 50257
|
41 |
}
|
|
|
1 |
{
|
|
|
2 |
"activation_function": "gelu_new",
|
3 |
"architectures": [
|
4 |
+
"MistralGPT2LMHeadModel"
|
5 |
],
|
6 |
"attn_pdrop": 0.1,
|
7 |
"bos_token_id": 50256,
|
|
|
19 |
"n_positions": 1024,
|
20 |
"n_special": 0,
|
21 |
"predict_special_tokens": true,
|
|
|
22 |
"resid_pdrop": 0.1,
|
|
|
|
|
23 |
"summary_activation": null,
|
24 |
"summary_first_dropout": 0.1,
|
25 |
"summary_proj_to_labels": true,
|
|
|
31 |
"max_length": 50
|
32 |
}
|
33 |
},
|
34 |
+
"transformers_version": "4.5.0",
|
35 |
"use_cache": false,
|
36 |
"vocab_size": 50257
|
37 |
}
|
global_step224011/mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8611392c1049fdee0b542a4d29ca144bb1334dbf899a492476b85fce595c686d
|
3 |
+
size 734886248
|
global_step224011/zero_pp_rank_0_mp_rank_00optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:af2e21f391c6c933293b8cbe894d7e309e237483c4181568be0812da21d9f5ff
|
3 |
+
size 266119242
|
global_step224011/zero_pp_rank_1_mp_rank_00optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6987ab58104ccd96f9a91d4aa0bdee6b000cc30cceaf61679ff6783a3974db06
|
3 |
+
size 266119242
|
global_step224011/zero_pp_rank_2_mp_rank_00optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9431fb909beb02d5b6584ed7a03d2fd9e261a492d66acda68416a91629932776
|
3 |
+
size 266119242
|
global_step224011/zero_pp_rank_3_mp_rank_00optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0c750457f453fed4d6630433b463f16221a5825ba5c91c749b9e5fb0eed7da0a
|
3 |
+
size 266119242
|
global_step224011/zero_pp_rank_4_mp_rank_00optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c9ccbbe6b268c400946299a37e17140472d52d8738cd5bd082b780cf09fa9196
|
3 |
+
size 266119242
|
global_step224011/zero_pp_rank_5_mp_rank_00optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4faeee1db3554f70d5e443bd5070ad51e1a5a279066a3728b325abaad9b43bc7
|
3 |
+
size 266119242
|
global_step224011/zero_pp_rank_6_mp_rank_00optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c6181d85ae79147c88c4a80045edd7dbda650d7b47be192ac2dcc03c8780bfa6
|
3 |
+
size 266119242
|
global_step224011/zero_pp_rank_7_mp_rank_00optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ba264912db52c13d29e971efd258b5edbb9fe5fdd1cb0cd5cb85c0b4d4fed220
|
3 |
+
size 266119242
|
latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step224011
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 734885928
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bb1387226a204beb240f551b88f183d99903f7fca6a85e1a0fb95b7fdaf5a242
|
3 |
size 734885928
|
trainer_state.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ab15f7b14dd53416bc7ad71fa88e82e50a944cde9da707484f962e51dba0c31d
|
3 |
+
size 15668929
|