Bingsu commited on
Commit
8e68d53
1 Parent(s): 9f873f8

Training in progress, step 10000

Browse files
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ checkpoint-*/
README.md ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ ---
2
+ mask_token: "[MASK]"
3
+ widget:
4
+ - text: 대한민국의 수도는 [MASK]입니다.
5
+ ---
6
+ ![image](https://i.imgur.com/brETMw6.jpg)
config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "Bingsu/my_mobilebert_untrained",
3
+ "architectures": [
4
+ "MobileBertForMaskedLM"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_activation": false,
8
+ "classifier_dropout": null,
9
+ "embedding_size": 128,
10
+ "hidden_act": "relu",
11
+ "hidden_dropout_prob": 0.0,
12
+ "hidden_size": 512,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 512,
15
+ "intra_bottleneck_size": 128,
16
+ "key_query_shared_bottleneck": true,
17
+ "layer_norm_eps": 1e-12,
18
+ "max_position_embeddings": 512,
19
+ "model_type": "mobilebert",
20
+ "normalization_type": "no_norm",
21
+ "num_attention_heads": 4,
22
+ "num_feedforward_networks": 4,
23
+ "num_hidden_layers": 24,
24
+ "pad_token_id": 1,
25
+ "torch_dtype": "float32",
26
+ "transformers_version": "4.21.2",
27
+ "trigram_input": true,
28
+ "true_hidden_size": 128,
29
+ "type_vocab_size": 2,
30
+ "use_bottleneck": true,
31
+ "use_bottleneck_attention": false,
32
+ "vocab_size": 30522
33
+ }
last-checkpoint/config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "Bingsu/my_mobilebert_untrained",
3
+ "architectures": [
4
+ "MobileBertForMaskedLM"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_activation": false,
8
+ "classifier_dropout": null,
9
+ "embedding_size": 128,
10
+ "hidden_act": "relu",
11
+ "hidden_dropout_prob": 0.0,
12
+ "hidden_size": 512,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 512,
15
+ "intra_bottleneck_size": 128,
16
+ "key_query_shared_bottleneck": true,
17
+ "layer_norm_eps": 1e-12,
18
+ "max_position_embeddings": 512,
19
+ "model_type": "mobilebert",
20
+ "normalization_type": "no_norm",
21
+ "num_attention_heads": 4,
22
+ "num_feedforward_networks": 4,
23
+ "num_hidden_layers": 24,
24
+ "pad_token_id": 1,
25
+ "torch_dtype": "float32",
26
+ "transformers_version": "4.21.2",
27
+ "trigram_input": true,
28
+ "true_hidden_size": 128,
29
+ "type_vocab_size": 2,
30
+ "use_bottleneck": true,
31
+ "use_bottleneck_attention": false,
32
+ "vocab_size": 30522
33
+ }
last-checkpoint/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
last-checkpoint/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53f86b7c1030ebca2a6c99c8f28aa18cf67208a00e08fce91eea733b0bec240d
3
+ size 586826597
last-checkpoint/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf968dcd05b6ea7291b9aeb7e5ef41b98d5f49af58bdfb3e0c4d109ecd867493
3
+ size 146774203
last-checkpoint/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61fc86e3bcf87c025896dabb3d26ce5ca4103ec874557dd3084c738eb61ad888
3
+ size 14503
last-checkpoint/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e22a3cb16dddcf5c937c1bec07637df13115b6ed917ca6d1cf2116057954b0ce
3
+ size 559
last-checkpoint/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3821f79e5d56fda48d85057de6b8b28f1ffc595c942da877ef01bd25d14ff514
3
+ size 733553608
last-checkpoint/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
last-checkpoint/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
last-checkpoint/tokenizer_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "do_lower_case": true,
4
+ "mask_token": "[MASK]",
5
+ "model_max_length": 512,
6
+ "name_or_path": "Bingsu/my_mobilebert_untrained",
7
+ "pad_token": "[PAD]",
8
+ "sep_token": "[SEP]",
9
+ "special_tokens_map_file": null,
10
+ "strip_accents": null,
11
+ "tokenize_chinese_chars": true,
12
+ "tokenizer_class": "MobileBertTokenizer",
13
+ "unk_token": "[UNK]"
14
+ }
last-checkpoint/trainer_state.json ADDED
@@ -0,0 +1,316 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.04297249330703417,
5
+ "global_step": 10000,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.0,
12
+ "learning_rate": 0.0004000105276443632,
13
+ "loss": 10.1199,
14
+ "step": 200
15
+ },
16
+ {
17
+ "epoch": 0.0,
18
+ "learning_rate": 0.00040004211053127486,
19
+ "loss": 9.997,
20
+ "step": 400
21
+ },
22
+ {
23
+ "epoch": 0.0,
24
+ "learning_rate": 0.000400094748522194,
25
+ "loss": 9.9386,
26
+ "step": 600
27
+ },
28
+ {
29
+ "epoch": 0.0,
30
+ "learning_rate": 0.00040016844138622554,
31
+ "loss": 9.8988,
32
+ "step": 800
33
+ },
34
+ {
35
+ "epoch": 0.0,
36
+ "learning_rate": 0.0004002631888001141,
37
+ "loss": 9.8579,
38
+ "step": 1000
39
+ },
40
+ {
41
+ "epoch": 0.01,
42
+ "learning_rate": 0.0004003789903482477,
43
+ "loss": 9.8159,
44
+ "step": 1200
45
+ },
46
+ {
47
+ "epoch": 0.01,
48
+ "learning_rate": 0.0004005158455226594,
49
+ "loss": 9.7867,
50
+ "step": 1400
51
+ },
52
+ {
53
+ "epoch": 0.01,
54
+ "learning_rate": 0.0004006737537230326,
55
+ "loss": 9.7605,
56
+ "step": 1600
57
+ },
58
+ {
59
+ "epoch": 0.01,
60
+ "learning_rate": 0.0004008527142566991,
61
+ "loss": 9.7357,
62
+ "step": 1800
63
+ },
64
+ {
65
+ "epoch": 0.01,
66
+ "learning_rate": 0.0004010527263386479,
67
+ "loss": 9.7138,
68
+ "step": 2000
69
+ },
70
+ {
71
+ "epoch": 0.01,
72
+ "learning_rate": 0.00040127378909152016,
73
+ "loss": 9.6894,
74
+ "step": 2200
75
+ },
76
+ {
77
+ "epoch": 0.01,
78
+ "learning_rate": 0.000401515901545621,
79
+ "loss": 9.6634,
80
+ "step": 2400
81
+ },
82
+ {
83
+ "epoch": 0.01,
84
+ "learning_rate": 0.00040177906263891804,
85
+ "loss": 9.6451,
86
+ "step": 2600
87
+ },
88
+ {
89
+ "epoch": 0.01,
90
+ "learning_rate": 0.00040206327121705167,
91
+ "loss": 9.6279,
92
+ "step": 2800
93
+ },
94
+ {
95
+ "epoch": 0.01,
96
+ "learning_rate": 0.00040236852603333685,
97
+ "loss": 9.6038,
98
+ "step": 3000
99
+ },
100
+ {
101
+ "epoch": 0.01,
102
+ "learning_rate": 0.0004026948257487631,
103
+ "loss": 9.5874,
104
+ "step": 3200
105
+ },
106
+ {
107
+ "epoch": 0.01,
108
+ "learning_rate": 0.00040304216893201697,
109
+ "loss": 9.5729,
110
+ "step": 3400
111
+ },
112
+ {
113
+ "epoch": 0.02,
114
+ "learning_rate": 0.0004034105540594666,
115
+ "loss": 9.547,
116
+ "step": 3600
117
+ },
118
+ {
119
+ "epoch": 0.02,
120
+ "learning_rate": 0.0004037999795151858,
121
+ "loss": 9.5348,
122
+ "step": 3800
123
+ },
124
+ {
125
+ "epoch": 0.02,
126
+ "learning_rate": 0.0004042104435909525,
127
+ "loss": 9.5207,
128
+ "step": 4000
129
+ },
130
+ {
131
+ "epoch": 0.02,
132
+ "learning_rate": 0.0004046419444862573,
133
+ "loss": 9.5061,
134
+ "step": 4200
135
+ },
136
+ {
137
+ "epoch": 0.02,
138
+ "learning_rate": 0.0004050944803083139,
139
+ "loss": 9.493,
140
+ "step": 4400
141
+ },
142
+ {
143
+ "epoch": 0.02,
144
+ "learning_rate": 0.0004055680490720661,
145
+ "loss": 9.4782,
146
+ "step": 4600
147
+ },
148
+ {
149
+ "epoch": 0.02,
150
+ "learning_rate": 0.0004060626487001964,
151
+ "loss": 9.4636,
152
+ "step": 4800
153
+ },
154
+ {
155
+ "epoch": 0.02,
156
+ "learning_rate": 0.0004065782770231313,
157
+ "loss": 9.4546,
158
+ "step": 5000
159
+ },
160
+ {
161
+ "epoch": 0.02,
162
+ "learning_rate": 0.000407114931779062,
163
+ "loss": 9.4453,
164
+ "step": 5200
165
+ },
166
+ {
167
+ "epoch": 0.02,
168
+ "learning_rate": 0.00040767261061393917,
169
+ "loss": 9.4174,
170
+ "step": 5400
171
+ },
172
+ {
173
+ "epoch": 0.02,
174
+ "learning_rate": 0.00040825131108149573,
175
+ "loss": 9.4159,
176
+ "step": 5600
177
+ },
178
+ {
179
+ "epoch": 0.02,
180
+ "learning_rate": 0.00040885103064325357,
181
+ "loss": 9.3993,
182
+ "step": 5800
183
+ },
184
+ {
185
+ "epoch": 0.03,
186
+ "learning_rate": 0.00040947176666852707,
187
+ "loss": 9.3953,
188
+ "step": 6000
189
+ },
190
+ {
191
+ "epoch": 0.03,
192
+ "learning_rate": 0.00041011351643444917,
193
+ "loss": 9.3854,
194
+ "step": 6200
195
+ },
196
+ {
197
+ "epoch": 0.03,
198
+ "learning_rate": 0.0004107762771259713,
199
+ "loss": 9.3679,
200
+ "step": 6400
201
+ },
202
+ {
203
+ "epoch": 0.03,
204
+ "learning_rate": 0.0004114600458358809,
205
+ "loss": 9.3595,
206
+ "step": 6600
207
+ },
208
+ {
209
+ "epoch": 0.03,
210
+ "learning_rate": 0.00041216481956481664,
211
+ "loss": 9.3504,
212
+ "step": 6800
213
+ },
214
+ {
215
+ "epoch": 0.03,
216
+ "learning_rate": 0.00041289059522127414,
217
+ "loss": 9.3417,
218
+ "step": 7000
219
+ },
220
+ {
221
+ "epoch": 0.03,
222
+ "learning_rate": 0.0004136373696216229,
223
+ "loss": 9.3275,
224
+ "step": 7200
225
+ },
226
+ {
227
+ "epoch": 0.03,
228
+ "learning_rate": 0.0004144051394901274,
229
+ "loss": 9.3201,
230
+ "step": 7400
231
+ },
232
+ {
233
+ "epoch": 0.03,
234
+ "learning_rate": 0.0004151939014589469,
235
+ "loss": 9.3123,
236
+ "step": 7600
237
+ },
238
+ {
239
+ "epoch": 0.03,
240
+ "learning_rate": 0.0004160036520681667,
241
+ "loss": 9.3084,
242
+ "step": 7800
243
+ },
244
+ {
245
+ "epoch": 0.03,
246
+ "learning_rate": 0.0004168343877657965,
247
+ "loss": 9.2954,
248
+ "step": 8000
249
+ },
250
+ {
251
+ "epoch": 0.04,
252
+ "learning_rate": 0.00041768179413688954,
253
+ "loss": 9.2862,
254
+ "step": 8200
255
+ },
256
+ {
257
+ "epoch": 0.04,
258
+ "learning_rate": 0.00041855438410810103,
259
+ "loss": 9.283,
260
+ "step": 8400
261
+ },
262
+ {
263
+ "epoch": 0.04,
264
+ "learning_rate": 0.00041944794797888797,
265
+ "loss": 9.2711,
266
+ "step": 8600
267
+ },
268
+ {
269
+ "epoch": 0.04,
270
+ "learning_rate": 0.00042036248182962185,
271
+ "loss": 9.2726,
272
+ "step": 8800
273
+ },
274
+ {
275
+ "epoch": 0.04,
276
+ "learning_rate": 0.0004212979816486783,
277
+ "loss": 9.2621,
278
+ "step": 9000
279
+ },
280
+ {
281
+ "epoch": 0.04,
282
+ "learning_rate": 0.00042225444333247354,
283
+ "loss": 9.2527,
284
+ "step": 9200
285
+ },
286
+ {
287
+ "epoch": 0.04,
288
+ "learning_rate": 0.0004232318626854678,
289
+ "loss": 9.2453,
290
+ "step": 9400
291
+ },
292
+ {
293
+ "epoch": 0.04,
294
+ "learning_rate": 0.0004242302354201949,
295
+ "loss": 9.2314,
296
+ "step": 9600
297
+ },
298
+ {
299
+ "epoch": 0.04,
300
+ "learning_rate": 0.000425249557157276,
301
+ "loss": 9.2337,
302
+ "step": 9800
303
+ },
304
+ {
305
+ "epoch": 0.04,
306
+ "learning_rate": 0.00042628982342543184,
307
+ "loss": 9.2276,
308
+ "step": 10000
309
+ }
310
+ ],
311
+ "max_steps": 1000000,
312
+ "num_train_epochs": 5,
313
+ "total_flos": 1.593829982208e+16,
314
+ "trial_name": null,
315
+ "trial_params": null
316
+ }
last-checkpoint/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c33b030231937c49711cbb55890f65b86f81a68638a6a5c30e4f67ed0b41b6f
3
+ size 3375
last-checkpoint/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf968dcd05b6ea7291b9aeb7e5ef41b98d5f49af58bdfb3e0c4d109ecd867493
3
+ size 146774203
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "do_lower_case": true,
4
+ "mask_token": "[MASK]",
5
+ "model_max_length": 512,
6
+ "name_or_path": "Bingsu/my_mobilebert_untrained",
7
+ "pad_token": "[PAD]",
8
+ "sep_token": "[SEP]",
9
+ "special_tokens_map_file": null,
10
+ "strip_accents": null,
11
+ "tokenize_chinese_chars": true,
12
+ "tokenizer_class": "MobileBertTokenizer",
13
+ "unk_token": "[UNK]"
14
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c33b030231937c49711cbb55890f65b86f81a68638a6a5c30e4f67ed0b41b6f
3
+ size 3375
vocab.json ADDED
The diff for this file is too large to render. See raw diff