kevinwang676 commited on
Commit
0f6092c
1 Parent(s): b8d8149

Delete checkpoint-400

Browse files
checkpoint-400/config.json DELETED
@@ -1,47 +0,0 @@
1
- {
2
- "_name_or_path": "chatglm2-6b",
3
- "add_bias_linear": false,
4
- "add_qkv_bias": true,
5
- "apply_query_key_layer_scaling": true,
6
- "apply_residual_connection_post_layernorm": false,
7
- "architectures": [
8
- "ChatGLMForConditionalGeneration"
9
- ],
10
- "attention_dropout": 0.0,
11
- "attention_softmax_in_fp32": true,
12
- "auto_map": {
13
- "AutoConfig": "configuration_chatglm.ChatGLMConfig",
14
- "AutoModel": "modeling_chatglm.ChatGLMForConditionalGeneration",
15
- "AutoModelForCausalLM": "modeling_chatglm.ChatGLMForConditionalGeneration",
16
- "AutoModelForSeq2SeqLM": "modeling_chatglm.ChatGLMForConditionalGeneration",
17
- "AutoModelForSequenceClassification": "modeling_chatglm.ChatGLMForSequenceClassification"
18
- },
19
- "bias_dropout_fusion": true,
20
- "classifier_dropout": null,
21
- "eos_token_id": 2,
22
- "ffn_hidden_size": 13696,
23
- "fp32_residual_connection": false,
24
- "hidden_dropout": 0.0,
25
- "hidden_size": 4096,
26
- "kv_channels": 128,
27
- "layernorm_epsilon": 1e-05,
28
- "model_type": "chatglm",
29
- "multi_query_attention": true,
30
- "multi_query_group_num": 2,
31
- "num_attention_heads": 32,
32
- "num_layers": 28,
33
- "original_rope": true,
34
- "pad_token_id": 0,
35
- "padded_vocab_size": 65024,
36
- "post_layer_norm": true,
37
- "pre_seq_len": 128,
38
- "prefix_projection": false,
39
- "quantization_bit": 0,
40
- "rmsnorm": true,
41
- "seq_length": 32768,
42
- "tie_word_embeddings": false,
43
- "torch_dtype": "float16",
44
- "transformers_version": "4.30.2",
45
- "use_cache": true,
46
- "vocab_size": 65024
47
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-400/generation_config.json DELETED
@@ -1,6 +0,0 @@
1
- {
2
- "_from_model_config": true,
3
- "eos_token_id": 2,
4
- "pad_token_id": 0,
5
- "transformers_version": "4.30.2"
6
- }
 
 
 
 
 
 
 
checkpoint-400/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:2ddda63cbe968668b459a73f0a54c34fc36c007f9f202063794ded2a8814a37a
3
- size 14681892
 
 
 
 
checkpoint-400/pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b954c8f23337c53ad1c86bafb2969338878db3b96c2bc2459aa04e1198a2141
3
- size 7341306
 
 
 
 
checkpoint-400/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:11204a688e287bc0c7409fba921f7fd490e9471d91d738932d045851e4742a4e
3
- size 14244
 
 
 
 
checkpoint-400/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c32c17fb8a573adc159285286f456bfb53c7e2d80664d0c2cce541b6013ed8d7
3
- size 1064
 
 
 
 
checkpoint-400/special_tokens_map.json DELETED
@@ -1 +0,0 @@
1
- {}
 
 
checkpoint-400/tokenizer.model DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e7dc4c393423b76e4373e5157ddc34803a0189ba96b21ddbb40269d31468a6f2
3
- size 1018370
 
 
 
 
checkpoint-400/tokenizer_config.json DELETED
@@ -1,14 +0,0 @@
1
- {
2
- "auto_map": {
3
- "AutoTokenizer": [
4
- "tokenization_chatglm.ChatGLMTokenizer",
5
- null
6
- ]
7
- },
8
- "clean_up_tokenization_spaces": false,
9
- "do_lower_case": false,
10
- "model_max_length": 1000000000000000019884624838656,
11
- "padding_side": "left",
12
- "remove_space": false,
13
- "tokenizer_class": "ChatGLMTokenizer"
14
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-400/trainer_state.json DELETED
@@ -1,256 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 65.3061224489796,
5
- "global_step": 400,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 1.63,
12
- "learning_rate": 0.009833333333333333,
13
- "loss": 2.53,
14
- "step": 10
15
- },
16
- {
17
- "epoch": 3.27,
18
- "learning_rate": 0.009666666666666667,
19
- "loss": 2.0016,
20
- "step": 20
21
- },
22
- {
23
- "epoch": 4.9,
24
- "learning_rate": 0.0095,
25
- "loss": 1.7775,
26
- "step": 30
27
- },
28
- {
29
- "epoch": 6.53,
30
- "learning_rate": 0.009333333333333334,
31
- "loss": 1.6576,
32
- "step": 40
33
- },
34
- {
35
- "epoch": 8.16,
36
- "learning_rate": 0.009166666666666667,
37
- "loss": 1.5048,
38
- "step": 50
39
- },
40
- {
41
- "epoch": 9.8,
42
- "learning_rate": 0.009000000000000001,
43
- "loss": 1.3572,
44
- "step": 60
45
- },
46
- {
47
- "epoch": 11.43,
48
- "learning_rate": 0.008833333333333334,
49
- "loss": 1.2067,
50
- "step": 70
51
- },
52
- {
53
- "epoch": 13.06,
54
- "learning_rate": 0.008666666666666668,
55
- "loss": 1.0777,
56
- "step": 80
57
- },
58
- {
59
- "epoch": 14.69,
60
- "learning_rate": 0.0085,
61
- "loss": 0.9188,
62
- "step": 90
63
- },
64
- {
65
- "epoch": 16.33,
66
- "learning_rate": 0.008333333333333333,
67
- "loss": 0.7241,
68
- "step": 100
69
- },
70
- {
71
- "epoch": 17.96,
72
- "learning_rate": 0.008166666666666666,
73
- "loss": 0.5775,
74
- "step": 110
75
- },
76
- {
77
- "epoch": 19.59,
78
- "learning_rate": 0.008,
79
- "loss": 0.4235,
80
- "step": 120
81
- },
82
- {
83
- "epoch": 21.22,
84
- "learning_rate": 0.007833333333333333,
85
- "loss": 0.3182,
86
- "step": 130
87
- },
88
- {
89
- "epoch": 22.86,
90
- "learning_rate": 0.007666666666666667,
91
- "loss": 0.2155,
92
- "step": 140
93
- },
94
- {
95
- "epoch": 24.49,
96
- "learning_rate": 0.0075,
97
- "loss": 0.1633,
98
- "step": 150
99
- },
100
- {
101
- "epoch": 26.12,
102
- "learning_rate": 0.007333333333333333,
103
- "loss": 0.1234,
104
- "step": 160
105
- },
106
- {
107
- "epoch": 27.76,
108
- "learning_rate": 0.007166666666666667,
109
- "loss": 0.0911,
110
- "step": 170
111
- },
112
- {
113
- "epoch": 29.39,
114
- "learning_rate": 0.006999999999999999,
115
- "loss": 0.0738,
116
- "step": 180
117
- },
118
- {
119
- "epoch": 31.02,
120
- "learning_rate": 0.006833333333333334,
121
- "loss": 0.0673,
122
- "step": 190
123
- },
124
- {
125
- "epoch": 32.65,
126
- "learning_rate": 0.006666666666666666,
127
- "loss": 0.0544,
128
- "step": 200
129
- },
130
- {
131
- "epoch": 34.29,
132
- "learning_rate": 0.006500000000000001,
133
- "loss": 0.0492,
134
- "step": 210
135
- },
136
- {
137
- "epoch": 35.92,
138
- "learning_rate": 0.006333333333333333,
139
- "loss": 0.0458,
140
- "step": 220
141
- },
142
- {
143
- "epoch": 37.55,
144
- "learning_rate": 0.0061666666666666675,
145
- "loss": 0.0434,
146
- "step": 230
147
- },
148
- {
149
- "epoch": 39.18,
150
- "learning_rate": 0.006,
151
- "loss": 0.0387,
152
- "step": 240
153
- },
154
- {
155
- "epoch": 40.82,
156
- "learning_rate": 0.005833333333333334,
157
- "loss": 0.0375,
158
- "step": 250
159
- },
160
- {
161
- "epoch": 42.45,
162
- "learning_rate": 0.005666666666666666,
163
- "loss": 0.0363,
164
- "step": 260
165
- },
166
- {
167
- "epoch": 44.08,
168
- "learning_rate": 0.0055000000000000005,
169
- "loss": 0.0347,
170
- "step": 270
171
- },
172
- {
173
- "epoch": 45.71,
174
- "learning_rate": 0.005333333333333333,
175
- "loss": 0.0341,
176
- "step": 280
177
- },
178
- {
179
- "epoch": 47.35,
180
- "learning_rate": 0.0051666666666666675,
181
- "loss": 0.0327,
182
- "step": 290
183
- },
184
- {
185
- "epoch": 48.98,
186
- "learning_rate": 0.005,
187
- "loss": 0.0307,
188
- "step": 300
189
- },
190
- {
191
- "epoch": 50.61,
192
- "learning_rate": 0.004833333333333334,
193
- "loss": 0.031,
194
- "step": 310
195
- },
196
- {
197
- "epoch": 52.24,
198
- "learning_rate": 0.004666666666666667,
199
- "loss": 0.0312,
200
- "step": 320
201
- },
202
- {
203
- "epoch": 53.88,
204
- "learning_rate": 0.0045000000000000005,
205
- "loss": 0.033,
206
- "step": 330
207
- },
208
- {
209
- "epoch": 55.51,
210
- "learning_rate": 0.004333333333333334,
211
- "loss": 0.0294,
212
- "step": 340
213
- },
214
- {
215
- "epoch": 57.14,
216
- "learning_rate": 0.004166666666666667,
217
- "loss": 0.0308,
218
- "step": 350
219
- },
220
- {
221
- "epoch": 58.78,
222
- "learning_rate": 0.004,
223
- "loss": 0.0301,
224
- "step": 360
225
- },
226
- {
227
- "epoch": 60.41,
228
- "learning_rate": 0.0038333333333333336,
229
- "loss": 0.0292,
230
- "step": 370
231
- },
232
- {
233
- "epoch": 62.04,
234
- "learning_rate": 0.0036666666666666666,
235
- "loss": 0.0316,
236
- "step": 380
237
- },
238
- {
239
- "epoch": 63.67,
240
- "learning_rate": 0.0034999999999999996,
241
- "loss": 0.0302,
242
- "step": 390
243
- },
244
- {
245
- "epoch": 65.31,
246
- "learning_rate": 0.003333333333333333,
247
- "loss": 0.0295,
248
- "step": 400
249
- }
250
- ],
251
- "max_steps": 600,
252
- "num_train_epochs": 100,
253
- "total_flos": 4.702992625093837e+17,
254
- "trial_name": null,
255
- "trial_params": null
256
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-400/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:df0a343e1f2ccb38a19082ba999546089030c0e15418471a24d346cbb68fa7af
3
- size 4472