J38 commited on
Commit
c507866
1 Parent(s): 363c37e

redo-checkpoint-224000

Browse files
config.json CHANGED
@@ -1,8 +1,7 @@
1
  {
2
- "_name_or_path": "stanford-crfm/celebrimbor-gpt2-medium-x81",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
- "GPT2LMHeadModel"
6
  ],
7
  "attn_pdrop": 0.1,
8
  "bos_token_id": 50256,
@@ -20,10 +19,7 @@
20
  "n_positions": 1024,
21
  "n_special": 0,
22
  "predict_special_tokens": true,
23
- "reorder_and_upcast_attn": true,
24
  "resid_pdrop": 0.1,
25
- "scale_attn_by_inverse_layer_idx": true,
26
- "scale_attn_weights": true,
27
  "summary_activation": null,
28
  "summary_first_dropout": 0.1,
29
  "summary_proj_to_labels": true,
@@ -35,7 +31,7 @@
35
  "max_length": 50
36
  }
37
  },
38
- "transformers_version": "4.12.5",
39
  "use_cache": false,
40
  "vocab_size": 50257
41
  }
 
1
  {
 
2
  "activation_function": "gelu_new",
3
  "architectures": [
4
+ "MistralGPT2LMHeadModel"
5
  ],
6
  "attn_pdrop": 0.1,
7
  "bos_token_id": 50256,
 
19
  "n_positions": 1024,
20
  "n_special": 0,
21
  "predict_special_tokens": true,
 
22
  "resid_pdrop": 0.1,
 
 
23
  "summary_activation": null,
24
  "summary_first_dropout": 0.1,
25
  "summary_proj_to_labels": true,
 
31
  "max_length": 50
32
  }
33
  },
34
+ "transformers_version": "4.5.0",
35
  "use_cache": false,
36
  "vocab_size": 50257
37
  }
global_step224011/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8611392c1049fdee0b542a4d29ca144bb1334dbf899a492476b85fce595c686d
3
+ size 734886248
global_step224011/zero_pp_rank_0_mp_rank_00optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af2e21f391c6c933293b8cbe894d7e309e237483c4181568be0812da21d9f5ff
3
+ size 266119242
global_step224011/zero_pp_rank_1_mp_rank_00optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6987ab58104ccd96f9a91d4aa0bdee6b000cc30cceaf61679ff6783a3974db06
3
+ size 266119242
global_step224011/zero_pp_rank_2_mp_rank_00optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9431fb909beb02d5b6584ed7a03d2fd9e261a492d66acda68416a91629932776
3
+ size 266119242
global_step224011/zero_pp_rank_3_mp_rank_00optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c750457f453fed4d6630433b463f16221a5825ba5c91c749b9e5fb0eed7da0a
3
+ size 266119242
global_step224011/zero_pp_rank_4_mp_rank_00optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9ccbbe6b268c400946299a37e17140472d52d8738cd5bd082b780cf09fa9196
3
+ size 266119242
global_step224011/zero_pp_rank_5_mp_rank_00optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4faeee1db3554f70d5e443bd5070ad51e1a5a279066a3728b325abaad9b43bc7
3
+ size 266119242
global_step224011/zero_pp_rank_6_mp_rank_00optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6181d85ae79147c88c4a80045edd7dbda650d7b47be192ac2dcc03c8780bfa6
3
+ size 266119242
global_step224011/zero_pp_rank_7_mp_rank_00optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba264912db52c13d29e971efd258b5edbb9fe5fdd1cb0cd5cb85c0b4d4fed220
3
+ size 266119242
latest CHANGED
@@ -1 +1 @@
1
- global_step99004
 
1
+ global_step224011
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc2666775d30b92af2628777c6e0847c5f526182ba8a111a1978760e6198aedb
3
  size 734885928
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb1387226a204beb240f551b88f183d99903f7fca6a85e1a0fb95b7fdaf5a242
3
  size 734885928
trainer_state.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:090ef154fad084210cc130292d9f3fa9b4d4bbe24a29f527df898b2fbf1756ba
3
- size 6922082
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab15f7b14dd53416bc7ad71fa88e82e50a944cde9da707484f962e51dba0c31d
3
+ size 15668929