Wikidepia commited on
Commit
d586a13
1 Parent(s): 669357e

Initial Commits

Browse files
README.md ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language: id
3
+ inference: false
4
+ ---
5
+
6
+ # IndoBERT-Lite base fine-tuned on Translated SQuAD v2
7
+
8
+ [IndoBERT-Lite](https://huggingface.co/indobenchmark/indobert-lite-base-p2) created by [Indo Benchmark](https://www.indobenchmark.com/) and fine-tuned on [Translated SQuAD 2.0](https://github.com/Wikidepia/indonesia_dataset/tree/master/question-answering/SQuAD) for **Q&A** downstream task.
9
+
10
+ ## Model in action
11
+
12
+ Fast usage with **pipelines**:
13
+
14
+ ```python
15
+ from transformers import BertTokenizerFast, pipeline
16
+
17
+ tokenizer = BertTokenizerFast.from_pretrained(
18
+ 'Wikidepia/albert-bahasa-uncased-squad'
19
+ )
20
+ qa_pipeline = pipeline(
21
+ "question-answering",
22
+ model="Wikidepia/indobert-lite-squad",
23
+ tokenizer=tokenizer
24
+ )
25
+
26
+ qa_pipeline({
27
+ 'context': "Setelah menghabiskan waktu satu tahun di Praha, Einstein tinggal di Swiss antara tahun 1895 dan 1914, melepas kewarganegaraan Jermannya pada tahun 1896, dan lulus sarjana dari sekolah politeknik federal Swiss (kelak Eidgenössische Technische Hochschule, ETH) di Zürich pada tahun 1900.",
28
+ 'question': "Kapan Einstein melepas kewarganegaraan Jerman?"
29
+ })
30
+ ```
31
+
32
+ # Output:
33
+
34
+ ```json
35
+ {
36
+ "score":0.9799205660820007,
37
+ "start":147,
38
+ "end":151,
39
+ "answer":"1896"
40
+ }
41
+ ```
42
+
43
+ README copied from [mrm8488's repository](https://huggingface.co/mrm8488/bert-tiny-finetuned-squadv2)
all_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 4.0,
3
+ "init_mem_cpu_alloc_delta": 63851,
4
+ "init_mem_cpu_peaked_delta": 18306,
5
+ "init_mem_gpu_alloc_delta": 44382720,
6
+ "init_mem_gpu_peaked_delta": 0,
7
+ "train_mem_cpu_alloc_delta": 507130,
8
+ "train_mem_cpu_peaked_delta": 19299673,
9
+ "train_mem_gpu_alloc_delta": 181302784,
10
+ "train_mem_gpu_peaked_delta": 4986389504,
11
+ "train_runtime": 8610.7771,
12
+ "train_samples_per_second": 3.347
13
+ }
config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "indobert-lite-base-p2",
3
+ "architectures": [
4
+ "AlbertForQuestionAnswering"
5
+ ],
6
+ "attention_probs_dropout_prob": 0,
7
+ "bos_token_id": 2,
8
+ "classifier_dropout_prob": 0.1,
9
+ "down_scale_factor": 1,
10
+ "embedding_size": 128,
11
+ "eos_token_id": 3,
12
+ "gap_size": 0,
13
+ "hidden_act": "gelu",
14
+ "hidden_dropout_prob": 0,
15
+ "hidden_size": 768,
16
+ "initializer_range": 0.02,
17
+ "inner_group_num": 1,
18
+ "intermediate_size": 3072,
19
+ "layer_norm_eps": 1e-12,
20
+ "max_position_embeddings": 512,
21
+ "model_type": "albert",
22
+ "net_structure_type": 0,
23
+ "num_attention_heads": 12,
24
+ "num_hidden_groups": 1,
25
+ "num_hidden_layers": 12,
26
+ "num_memory_blocks": 0,
27
+ "output_past": true,
28
+ "pad_token_id": 0,
29
+ "position_embedding_type": "absolute",
30
+ "transformers_version": "4.4.0.dev0",
31
+ "type_vocab_size": 2,
32
+ "vocab_size": 30000
33
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2052493fa4334cfdb87fd93057c5a1e4b4630db5a6965d18a96d31da2e41e93c
3
+ size 44393497
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "max_len": 512, "use_fast": true, "special_tokens_map_file": "indobert-lite-base-p2/special_tokens_map.json", "name_or_path": "indobert-lite-base-p2", "do_basic_tokenize": true, "never_split": null}
train_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 4.0,
3
+ "init_mem_cpu_alloc_delta": 63851,
4
+ "init_mem_cpu_peaked_delta": 18306,
5
+ "init_mem_gpu_alloc_delta": 44382720,
6
+ "init_mem_gpu_peaked_delta": 0,
7
+ "train_mem_cpu_alloc_delta": 507130,
8
+ "train_mem_cpu_peaked_delta": 19299673,
9
+ "train_mem_gpu_alloc_delta": 181302784,
10
+ "train_mem_gpu_peaked_delta": 4986389504,
11
+ "train_runtime": 8610.7771,
12
+ "train_samples_per_second": 3.347
13
+ }
trainer_state.json ADDED
@@ -0,0 +1,365 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 4.0,
5
+ "global_step": 28824,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.07,
12
+ "learning_rate": 2.947960033305579e-05,
13
+ "loss": 2.1557,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 0.14,
18
+ "learning_rate": 2.8959200666111577e-05,
19
+ "loss": 1.6641,
20
+ "step": 1000
21
+ },
22
+ {
23
+ "epoch": 0.21,
24
+ "learning_rate": 2.843880099916736e-05,
25
+ "loss": 1.6261,
26
+ "step": 1500
27
+ },
28
+ {
29
+ "epoch": 0.28,
30
+ "learning_rate": 2.7918401332223147e-05,
31
+ "loss": 1.5307,
32
+ "step": 2000
33
+ },
34
+ {
35
+ "epoch": 0.35,
36
+ "learning_rate": 2.7398001665278935e-05,
37
+ "loss": 1.4856,
38
+ "step": 2500
39
+ },
40
+ {
41
+ "epoch": 0.42,
42
+ "learning_rate": 2.6877601998334723e-05,
43
+ "loss": 1.4515,
44
+ "step": 3000
45
+ },
46
+ {
47
+ "epoch": 0.49,
48
+ "learning_rate": 2.635720233139051e-05,
49
+ "loss": 1.4413,
50
+ "step": 3500
51
+ },
52
+ {
53
+ "epoch": 0.56,
54
+ "learning_rate": 2.5836802664446297e-05,
55
+ "loss": 1.3974,
56
+ "step": 4000
57
+ },
58
+ {
59
+ "epoch": 0.62,
60
+ "learning_rate": 2.531640299750208e-05,
61
+ "loss": 1.3848,
62
+ "step": 4500
63
+ },
64
+ {
65
+ "epoch": 0.69,
66
+ "learning_rate": 2.479600333055787e-05,
67
+ "loss": 1.3741,
68
+ "step": 5000
69
+ },
70
+ {
71
+ "epoch": 0.76,
72
+ "learning_rate": 2.4275603663613655e-05,
73
+ "loss": 1.3435,
74
+ "step": 5500
75
+ },
76
+ {
77
+ "epoch": 0.83,
78
+ "learning_rate": 2.3755203996669443e-05,
79
+ "loss": 1.2997,
80
+ "step": 6000
81
+ },
82
+ {
83
+ "epoch": 0.9,
84
+ "learning_rate": 2.323480432972523e-05,
85
+ "loss": 1.3027,
86
+ "step": 6500
87
+ },
88
+ {
89
+ "epoch": 0.97,
90
+ "learning_rate": 2.2714404662781016e-05,
91
+ "loss": 1.3309,
92
+ "step": 7000
93
+ },
94
+ {
95
+ "epoch": 1.04,
96
+ "learning_rate": 2.21940049958368e-05,
97
+ "loss": 1.1215,
98
+ "step": 7500
99
+ },
100
+ {
101
+ "epoch": 1.11,
102
+ "learning_rate": 2.167360532889259e-05,
103
+ "loss": 1.0274,
104
+ "step": 8000
105
+ },
106
+ {
107
+ "epoch": 1.18,
108
+ "learning_rate": 2.1153205661948377e-05,
109
+ "loss": 1.0156,
110
+ "step": 8500
111
+ },
112
+ {
113
+ "epoch": 1.25,
114
+ "learning_rate": 2.0632805995004166e-05,
115
+ "loss": 0.9939,
116
+ "step": 9000
117
+ },
118
+ {
119
+ "epoch": 1.32,
120
+ "learning_rate": 2.011240632805995e-05,
121
+ "loss": 1.0544,
122
+ "step": 9500
123
+ },
124
+ {
125
+ "epoch": 1.39,
126
+ "learning_rate": 1.9592006661115735e-05,
127
+ "loss": 1.0023,
128
+ "step": 10000
129
+ },
130
+ {
131
+ "epoch": 1.46,
132
+ "learning_rate": 1.9071606994171524e-05,
133
+ "loss": 1.0157,
134
+ "step": 10500
135
+ },
136
+ {
137
+ "epoch": 1.53,
138
+ "learning_rate": 1.8551207327227312e-05,
139
+ "loss": 0.9919,
140
+ "step": 11000
141
+ },
142
+ {
143
+ "epoch": 1.6,
144
+ "learning_rate": 1.8030807660283097e-05,
145
+ "loss": 1.0093,
146
+ "step": 11500
147
+ },
148
+ {
149
+ "epoch": 1.67,
150
+ "learning_rate": 1.7510407993338885e-05,
151
+ "loss": 0.9634,
152
+ "step": 12000
153
+ },
154
+ {
155
+ "epoch": 1.73,
156
+ "learning_rate": 1.6990008326394673e-05,
157
+ "loss": 1.0209,
158
+ "step": 12500
159
+ },
160
+ {
161
+ "epoch": 1.8,
162
+ "learning_rate": 1.6469608659450458e-05,
163
+ "loss": 0.9783,
164
+ "step": 13000
165
+ },
166
+ {
167
+ "epoch": 1.87,
168
+ "learning_rate": 1.5949208992506246e-05,
169
+ "loss": 0.9689,
170
+ "step": 13500
171
+ },
172
+ {
173
+ "epoch": 1.94,
174
+ "learning_rate": 1.542880932556203e-05,
175
+ "loss": 0.9864,
176
+ "step": 14000
177
+ },
178
+ {
179
+ "epoch": 2.01,
180
+ "learning_rate": 1.490840965861782e-05,
181
+ "loss": 0.9052,
182
+ "step": 14500
183
+ },
184
+ {
185
+ "epoch": 2.08,
186
+ "learning_rate": 1.4388009991673606e-05,
187
+ "loss": 0.673,
188
+ "step": 15000
189
+ },
190
+ {
191
+ "epoch": 2.15,
192
+ "learning_rate": 1.3867610324729393e-05,
193
+ "loss": 0.6972,
194
+ "step": 15500
195
+ },
196
+ {
197
+ "epoch": 2.22,
198
+ "learning_rate": 1.3347210657785179e-05,
199
+ "loss": 0.6885,
200
+ "step": 16000
201
+ },
202
+ {
203
+ "epoch": 2.29,
204
+ "learning_rate": 1.2826810990840966e-05,
205
+ "loss": 0.6846,
206
+ "step": 16500
207
+ },
208
+ {
209
+ "epoch": 2.36,
210
+ "learning_rate": 1.2306411323896754e-05,
211
+ "loss": 0.674,
212
+ "step": 17000
213
+ },
214
+ {
215
+ "epoch": 2.43,
216
+ "learning_rate": 1.1786011656952539e-05,
217
+ "loss": 0.6818,
218
+ "step": 17500
219
+ },
220
+ {
221
+ "epoch": 2.5,
222
+ "learning_rate": 1.1265611990008327e-05,
223
+ "loss": 0.6804,
224
+ "step": 18000
225
+ },
226
+ {
227
+ "epoch": 2.57,
228
+ "learning_rate": 1.0745212323064114e-05,
229
+ "loss": 0.6701,
230
+ "step": 18500
231
+ },
232
+ {
233
+ "epoch": 2.64,
234
+ "learning_rate": 1.02248126561199e-05,
235
+ "loss": 0.6729,
236
+ "step": 19000
237
+ },
238
+ {
239
+ "epoch": 2.71,
240
+ "learning_rate": 9.704412989175687e-06,
241
+ "loss": 0.6802,
242
+ "step": 19500
243
+ },
244
+ {
245
+ "epoch": 2.78,
246
+ "learning_rate": 9.184013322231475e-06,
247
+ "loss": 0.6547,
248
+ "step": 20000
249
+ },
250
+ {
251
+ "epoch": 2.84,
252
+ "learning_rate": 8.66361365528726e-06,
253
+ "loss": 0.6525,
254
+ "step": 20500
255
+ },
256
+ {
257
+ "epoch": 2.91,
258
+ "learning_rate": 8.143213988343048e-06,
259
+ "loss": 0.6627,
260
+ "step": 21000
261
+ },
262
+ {
263
+ "epoch": 2.98,
264
+ "learning_rate": 7.622814321398834e-06,
265
+ "loss": 0.6273,
266
+ "step": 21500
267
+ },
268
+ {
269
+ "epoch": 3.05,
270
+ "learning_rate": 7.102414654454621e-06,
271
+ "loss": 0.48,
272
+ "step": 22000
273
+ },
274
+ {
275
+ "epoch": 3.12,
276
+ "learning_rate": 6.582014987510409e-06,
277
+ "loss": 0.4171,
278
+ "step": 22500
279
+ },
280
+ {
281
+ "epoch": 3.19,
282
+ "learning_rate": 6.061615320566195e-06,
283
+ "loss": 0.4214,
284
+ "step": 23000
285
+ },
286
+ {
287
+ "epoch": 3.26,
288
+ "learning_rate": 5.541215653621982e-06,
289
+ "loss": 0.4104,
290
+ "step": 23500
291
+ },
292
+ {
293
+ "epoch": 3.33,
294
+ "learning_rate": 5.020815986677768e-06,
295
+ "loss": 0.403,
296
+ "step": 24000
297
+ },
298
+ {
299
+ "epoch": 3.4,
300
+ "learning_rate": 4.500416319733556e-06,
301
+ "loss": 0.4146,
302
+ "step": 24500
303
+ },
304
+ {
305
+ "epoch": 3.47,
306
+ "learning_rate": 3.980016652789342e-06,
307
+ "loss": 0.3808,
308
+ "step": 25000
309
+ },
310
+ {
311
+ "epoch": 3.54,
312
+ "learning_rate": 3.4596169858451292e-06,
313
+ "loss": 0.4089,
314
+ "step": 25500
315
+ },
316
+ {
317
+ "epoch": 3.61,
318
+ "learning_rate": 2.9392173189009158e-06,
319
+ "loss": 0.4005,
320
+ "step": 26000
321
+ },
322
+ {
323
+ "epoch": 3.68,
324
+ "learning_rate": 2.4188176519567028e-06,
325
+ "loss": 0.4189,
326
+ "step": 26500
327
+ },
328
+ {
329
+ "epoch": 3.75,
330
+ "learning_rate": 1.8984179850124897e-06,
331
+ "loss": 0.3943,
332
+ "step": 27000
333
+ },
334
+ {
335
+ "epoch": 3.82,
336
+ "learning_rate": 1.3780183180682765e-06,
337
+ "loss": 0.3913,
338
+ "step": 27500
339
+ },
340
+ {
341
+ "epoch": 3.89,
342
+ "learning_rate": 8.576186511240633e-07,
343
+ "loss": 0.3854,
344
+ "step": 28000
345
+ },
346
+ {
347
+ "epoch": 3.96,
348
+ "learning_rate": 3.372189841798501e-07,
349
+ "loss": 0.4227,
350
+ "step": 28500
351
+ },
352
+ {
353
+ "epoch": 4.0,
354
+ "step": 28824,
355
+ "total_flos": 8840905399111680.0,
356
+ "train_runtime": 8610.7771,
357
+ "train_samples_per_second": 3.347
358
+ }
359
+ ],
360
+ "max_steps": 28824,
361
+ "num_train_epochs": 4,
362
+ "total_flos": 8840905399111680.0,
363
+ "trial_name": null,
364
+ "trial_params": null
365
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:647305bc3d53a0d588fdead14853d7c9888ac3241658930b55a31588dc4c42f3
3
+ size 2287
vocab.txt ADDED
The diff for this file is too large to render. See raw diff