w11wo commited on
Commit
5ad8060
1 Parent(s): ae7dcc1

Training in progress, epoch 1

Browse files
added_tokens.json ADDED
@@ -0,0 +1,1513 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "##0e": 30913,
3
+ "##abab": 30658,
4
+ "##abaya": 30830,
5
+ "##abit": 31782,
6
+ "##abo": 32013,
7
+ "##abungkan": 30614,
8
+ "##acan": 31103,
9
+ "##adb": 30661,
10
+ "##ados": 30819,
11
+ "##aduhan": 31609,
12
+ "##agaan": 31822,
13
+ "##agalan": 30826,
14
+ "##agas": 31941,
15
+ "##ahag": 30808,
16
+ "##ahagian": 31516,
17
+ "##ahagikan": 31204,
18
+ "##ahar": 30757,
19
+ "##ahr": 31165,
20
+ "##ahw": 30759,
21
+ "##aid": 30981,
22
+ "##aik": 30784,
23
+ "##aire": 31819,
24
+ "##ajeng": 31456,
25
+ "##akake": 31722,
26
+ "##akamkan": 31025,
27
+ "##akeuh": 32010,
28
+ "##akeun": 30853,
29
+ "##akeup": 32025,
30
+ "##akia": 31857,
31
+ "##akl": 31098,
32
+ "##aklumkan": 30986,
33
+ "##akon": 30738,
34
+ "##akukeun": 31454,
35
+ "##aland": 31163,
36
+ "##aliti": 30703,
37
+ "##ally": 30786,
38
+ "##aluk": 30632,
39
+ "##amane": 30547,
40
+ "##amonyo": 30814,
41
+ "##ampiran": 31561,
42
+ "##ams": 30665,
43
+ "##ancara": 31756,
44
+ "##andh": 31001,
45
+ "##angge": 30939,
46
+ "##anggeus": 31164,
47
+ "##anggo": 31713,
48
+ "##angkaian": 31035,
49
+ "##angkeun": 31828,
50
+ "##aning": 31496,
51
+ "##anjaya": 31088,
52
+ "##antes": 31854,
53
+ "##antos": 30664,
54
+ "##antuak": 31564,
55
+ "##anyo": 31280,
56
+ "##apo": 31741,
57
+ "##aptar": 31352,
58
+ "##arag": 31205,
59
+ "##araha": 30750,
60
+ "##arahkan": 31478,
61
+ "##arane": 31537,
62
+ "##ares": 30878,
63
+ "##aritu": 31267,
64
+ "##arya": 31180,
65
+ "##asane": 31040,
66
+ "##asemen": 31716,
67
+ "##asifikasi": 31347,
68
+ "##astikan": 31935,
69
+ "##asup": 30692,
70
+ "##atangani": 30933,
71
+ "##ateng": 30638,
72
+ "##atet": 31078,
73
+ "##atip": 30571,
74
+ "##autan": 30977,
75
+ "##auty": 31678,
76
+ "##awis": 31967,
77
+ "##awot": 30696,
78
+ "##awu": 30535,
79
+ "##ayaran": 30767,
80
+ "##beauty": 31362,
81
+ "##bitkan": 31540,
82
+ "##cayaan": 31839,
83
+ "##ceae": 30822,
84
+ "##cidae": 30708,
85
+ "##deon": 31311,
86
+ "##ebabkan": 31094,
87
+ "##ebaskan": 31023,
88
+ "##ebel": 31543,
89
+ "##ebor": 31627,
90
+ "##echt": 31351,
91
+ "##ecio": 31788,
92
+ "##edd": 30736,
93
+ "##edding": 31208,
94
+ "##eden": 31402,
95
+ "##edh": 31562,
96
+ "##edhi": 31123,
97
+ "##edhiy": 31400,
98
+ "##edonia": 30834,
99
+ "##efinis": 31911,
100
+ "##egori": 30616,
101
+ "##egoro": 31975,
102
+ "##ehanana": 31479,
103
+ "##ehna": 30720,
104
+ "##ekake": 31558,
105
+ "##ekalkan": 30688,
106
+ "##ekeu": 30818,
107
+ "##ekl": 31404,
108
+ "##ektur": 30870,
109
+ "##ekut": 30996,
110
+ "##ekutuan": 31032,
111
+ "##elesaikan": 31885,
112
+ "##eleu": 31968,
113
+ "##elu": 31458,
114
+ "##eluar": 31053,
115
+ "##eluaran": 31573,
116
+ "##eluarkan": 31658,
117
+ "##embagaan": 31396,
118
+ "##empor": 31585,
119
+ "##ench": 30727,
120
+ "##endalikan": 31110,
121
+ "##enengan": 31398,
122
+ "##engen": 31052,
123
+ "##enggan": 31139,
124
+ "##engganu": 30776,
125
+ "##engi": 31576,
126
+ "##engkon": 31790,
127
+ "##enne": 31541,
128
+ "##ensina": 31709,
129
+ "##entah": 31253,
130
+ "##enten": 31523,
131
+ "##eop": 31005,
132
+ "##eor": 30594,
133
+ "##epe": 30645,
134
+ "##epeu": 31682,
135
+ "##epl": 31874,
136
+ "##erak": 30606,
137
+ "##eres": 31922,
138
+ "##erikan": 30653,
139
+ "##eris": 31924,
140
+ "##eritakan": 31403,
141
+ "##erj": 30629,
142
+ "##erja": 31089,
143
+ "##erjaan": 31913,
144
+ "##erjakan": 30879,
145
+ "##erl": 31886,
146
+ "##erland": 31081,
147
+ "##eron": 30576,
148
+ "##eroning": 31156,
149
+ "##ersi": 31468,
150
+ "##ersial": 31239,
151
+ "##ertai": 31119,
152
+ "##erti": 31132,
153
+ "##ertos": 31464,
154
+ "##esarkan": 31939,
155
+ "##esen": 30944,
156
+ "##esimal": 31305,
157
+ "##etasi": 31282,
158
+ "##etisi": 31444,
159
+ "##etoph": 30647,
160
+ "##etuj": 30546,
161
+ "##etujuan": 31666,
162
+ "##eud": 30662,
163
+ "##eue": 31829,
164
+ "##euen": 30553,
165
+ "##eueng": 31701,
166
+ "##eug": 30921,
167
+ "##euh": 31266,
168
+ "##euk": 31093,
169
+ "##eul": 32027,
170
+ "##euna": 31316,
171
+ "##eung": 31210,
172
+ "##eunna": 30584,
173
+ "##eup": 30752,
174
+ "##euu": 31137,
175
+ "##eux": 32001,
176
+ "##eye": 31850,
177
+ "##eza": 30793,
178
+ "##faatkan": 32008,
179
+ "##fera": 31247,
180
+ "##fis": 30960,
181
+ "##flik": 30993,
182
+ "##ftar": 31144,
183
+ "##gaan": 30922,
184
+ "##gay": 30668,
185
+ "##geon": 31349,
186
+ "##geot": 31699,
187
+ "##gk": 31101,
188
+ "##gro": 31438,
189
+ "##gunakake": 30588,
190
+ "##hadiran": 31335,
191
+ "##hair": 31178,
192
+ "##hang": 30982,
193
+ "##hee": 31950,
194
+ "##hidm": 31060,
195
+ "##hidmat": 31310,
196
+ "##hidmatan": 31766,
197
+ "##hidupan": 31249,
198
+ "##hik": 31016,
199
+ "##hong": 31961,
200
+ "##iae": 30880,
201
+ "##iang": 30677,
202
+ "##ians": 31092,
203
+ "##iao": 31833,
204
+ "##iau": 30554,
205
+ "##ibut": 31340,
206
+ "##iel": 31876,
207
+ "##iet": 30542,
208
+ "##igan": 31601,
209
+ "##igi": 31545,
210
+ "##iidae": 30628,
211
+ "##ikeun": 31520,
212
+ "##ikt": 31591,
213
+ "##iktira": 30863,
214
+ "##iland": 30962,
215
+ "##ilidae": 31652,
216
+ "##ilion": 30585,
217
+ "##imbulkan": 31474,
218
+ "##impen": 30526,
219
+ "##impik": 30603,
220
+ "##indakake": 31642,
221
+ "##ingen": 30732,
222
+ "##ingkeun": 30604,
223
+ "##inidae": 31297,
224
+ "##inten": 31104,
225
+ "##inyo": 31925,
226
+ "##ions": 32026,
227
+ "##ipp": 31470,
228
+ "##iriman": 31511,
229
+ "##isok": 31462,
230
+ "##isyen": 30844,
231
+ "##itud": 31252,
232
+ "##itude": 31106,
233
+ "##itusi": 30876,
234
+ "##ivia": 31220,
235
+ "##iw": 30621,
236
+ "##jata": 31640,
237
+ "##kamer": 31451,
238
+ "##keun": 31050,
239
+ "##klist": 31142,
240
+ "##klusif": 30917,
241
+ "##kod": 32031,
242
+ "##kopan": 31161,
243
+ "##kritik": 31070,
244
+ "##kt48": 31447,
245
+ "##kuasa": 30997,
246
+ "##ld": 31390,
247
+ "##lec": 31613,
248
+ "##leu": 31830,
249
+ "##limen": 30899,
250
+ "##linawan": 31584,
251
+ "##lopedia": 31509,
252
+ "##makeup": 31467,
253
+ "##malaya": 31290,
254
+ "##manapun": 30802,
255
+ "##maniyah": 30539,
256
+ "##masi": 31549,
257
+ "##meub": 31879,
258
+ "##meubel": 31287,
259
+ "##miah": 31082,
260
+ "##namen": 30959,
261
+ "##neun": 31207,
262
+ "##nick": 31895,
263
+ "##nyane": 31831,
264
+ "##nyo": 31763,
265
+ "##ocera": 31033,
266
+ "##odidae": 30672,
267
+ "##oge": 31203,
268
+ "##ogikeun": 30924,
269
+ "##ogon": 30765,
270
+ "##ogra": 30940,
271
+ "##ogy": 31495,
272
+ "##ogyakarta": 31718,
273
+ "##ohanan": 31043,
274
+ "##oides": 31937,
275
+ "##oja": 31951,
276
+ "##okake": 31500,
277
+ "##okh": 30763,
278
+ "##oknyo": 30714,
279
+ "##okteran": 31964,
280
+ "##ols": 30831,
281
+ "##omat": 31754,
282
+ "##omatik": 30929,
283
+ "##ombor": 31690,
284
+ "##omena": 31482,
285
+ "##omyia": 31198,
286
+ "##omyiidae": 31326,
287
+ "##omyza": 31847,
288
+ "##oniidae": 31866,
289
+ "##onne": 30697,
290
+ "##ontalo": 32009,
291
+ "##onten": 30530,
292
+ "##onyo": 30544,
293
+ "##ophagus": 30918,
294
+ "##ophila": 31517,
295
+ "##ophyl": 30625,
296
+ "##ophyllum": 31710,
297
+ "##ople": 31375,
298
+ "##opogon": 31095,
299
+ "##orat": 31412,
300
+ "##orbit": 30912,
301
+ "##ordial": 30652,
302
+ "##orian": 31905,
303
+ "##orro": 30755,
304
+ "##orus": 31206,
305
+ "##oser": 31315,
306
+ "##oslav": 31250,
307
+ "##ospong": 31041,
308
+ "##otr": 30741,
309
+ "##otria": 31634,
310
+ "##oyeur": 31889,
311
+ "##pekeu": 31536,
312
+ "##peu": 31048,
313
+ "##poe": 31202,
314
+ "##raft": 31410,
315
+ "##rard": 31217,
316
+ "##rentahan": 31483,
317
+ "##rilus": 30643,
318
+ "##romos": 31071,
319
+ "##romosikan": 31177,
320
+ "##rys": 31990,
321
+ "##semb": 31051,
322
+ "##sic": 30975,
323
+ "##tau": 31933,
324
+ "##teu": 31087,
325
+ "##ther": 30775,
326
+ "##thi": 31036,
327
+ "##tig": 30735,
328
+ "##tiwa": 31899,
329
+ "##tob": 31461,
330
+ "##tos": 31631,
331
+ "##trol": 30642,
332
+ "##tun": 30579,
333
+ "##tung": 30538,
334
+ "##tunus": 31118,
335
+ "##tural": 30541,
336
+ "##uad": 31657,
337
+ "##uaikan": 31654,
338
+ "##uak": 31547,
339
+ "##uakan": 30812,
340
+ "##uam": 31603,
341
+ "##uatan": 31366,
342
+ "##ubarkan": 30891,
343
+ "##ube": 31074,
344
+ "##ubeul": 30937,
345
+ "##ubuhan": 31374,
346
+ "##ubuhkan": 31566,
347
+ "##uduh": 30649,
348
+ "##uduhake": 31019,
349
+ "##ugas": 30578,
350
+ "##ugi": 31668,
351
+ "##uhake": 31985,
352
+ "##uhun": 30923,
353
+ "##uik": 30932,
354
+ "##ukak": 31981,
355
+ "##ukeun": 31928,
356
+ "##ukeut": 30882,
357
+ "##uks": 31146,
358
+ "##uksesan": 30558,
359
+ "##ukuik": 31187,
360
+ "##ulitan": 31918,
361
+ "##ulo": 31769,
362
+ "##uluy": 31258,
363
+ "##umbuhan": 30711,
364
+ "##unakeun": 30733,
365
+ "##unani": 31359,
366
+ "##ungs": 31130,
367
+ "##ungsa": 31821,
368
+ "##unu": 31556,
369
+ "##unung": 31639,
370
+ "##unyo": 31837,
371
+ "##uo": 31745,
372
+ "##upe": 30744,
373
+ "##upeu": 31169,
374
+ "##upiter": 30783,
375
+ "##upo": 30756,
376
+ "##urit": 30837,
377
+ "##uruahan": 31724,
378
+ "##urunan": 30805,
379
+ "##uskupan": 30728,
380
+ "##usn": 31660,
381
+ "##uten": 31548,
382
+ "##utuhkan": 30867,
383
+ "##utup": 30685,
384
+ "##utur": 31873,
385
+ "##uv": 30974,
386
+ "##uwan": 31893,
387
+ "##uwe": 30679,
388
+ "##uweni": 31717,
389
+ "##uwun": 31952,
390
+ "##uwur": 31608,
391
+ "##watan": 31428,
392
+ "##wegen": 31000,
393
+ "##wor": 30854,
394
+ "##yane": 31691,
395
+ "##ych": 31735,
396
+ "##yeo": 31684,
397
+ "##yeon": 31459,
398
+ "##yeong": 31271,
399
+ "##yle": 31560,
400
+ "##yli": 30810,
401
+ "##yogikeun": 31806,
402
+ "##yphi": 31694,
403
+ "##yt": 30824,
404
+ "##ytih": 31715,
405
+ "##yuar": 31345,
406
+ "##yuarat": 31832,
407
+ "##zidae": 31296,
408
+ "##°": 31212,
409
+ "##²": 30895,
410
+ "##ı": 30988,
411
+ "##ł": 30564,
412
+ "##α": 31381,
413
+ "##ε": 31887,
414
+ "##ι": 30781,
415
+ "##ο": 30989,
416
+ "##а": 31494,
417
+ "##е": 30671,
418
+ "##и": 31991,
419
+ "##н": 31263,
420
+ "##о": 31075,
421
+ "##р": 31481,
422
+ "##с": 31330,
423
+ "##ا": 30872,
424
+ "##ر": 31047,
425
+ "##ل": 31730,
426
+ "##م": 30678,
427
+ "##ن": 31992,
428
+ "##و": 31288,
429
+ "##ي": 31122,
430
+ "##ᄋ": 31189,
431
+ "##ᅡ": 30723,
432
+ "##ᅵ": 31793,
433
+ "##ᆫ": 31058,
434
+ "191": 30587,
435
+ "1913": 31641,
436
+ "1915": 31662,
437
+ "1916": 31890,
438
+ "1931": 31068,
439
+ "2021": 30712,
440
+ "2022": 30675,
441
+ "2023": 31411,
442
+ "2024": 31006,
443
+ "3d": 31712,
444
+ "978": 31407,
445
+ "ado": 31292,
446
+ "adol": 31188,
447
+ "adolah": 31936,
448
+ "advent": 31687,
449
+ "afr": 31878,
450
+ "agrilus": 31460,
451
+ "agromy": 31259,
452
+ "agromyzidae": 30799,
453
+ "aia": 31770,
454
+ "aircraft": 31096,
455
+ "akeh": 31862,
456
+ "akhbar": 31059,
457
+ "aktip": 30862,
458
+ "aktiviti": 31931,
459
+ "aleks": 31443,
460
+ "align": 30873,
461
+ "alit": 30925,
462
+ "amarga": 31170,
463
+ "amaz": 31302,
464
+ "anggo": 30884,
465
+ "anjeun": 31743,
466
+ "anjeunna": 31471,
467
+ "antarab": 30806,
468
+ "antarabangsa": 30883,
469
+ "antarane": 31661,
470
+ "antuk": 31629,
471
+ "apal": 31764,
472
+ "apost": 31507,
473
+ "apostolik": 31140,
474
+ "arane": 31883,
475
+ "araneae": 31329,
476
+ "araneidae": 31544,
477
+ "aranj": 31384,
478
+ "aranjeunna": 31196,
479
+ "aras": 31946,
480
+ "arep": 31308,
481
+ "argent": 31869,
482
+ "armenia": 31725,
483
+ "arsit": 31325,
484
+ "arth": 32016,
485
+ "artic": 31373,
486
+ "asilidae": 31853,
487
+ "asring": 31945,
488
+ "aster": 31435,
489
+ "astrag": 31091,
490
+ "astragalus": 31649,
491
+ "astro": 31864,
492
+ "astronomical": 31762,
493
+ "atan": 31720,
494
+ "atanapi": 30552,
495
+ "atawa": 31693,
496
+ "ayeuna": 30976,
497
+ "azerb": 30754,
498
+ "azerbai": 31765,
499
+ "azerbaijan": 31295,
500
+ "babagan": 31622,
501
+ "babar": 31681,
502
+ "babarapo": 31587,
503
+ "badag": 30930,
504
+ "bade": 31863,
505
+ "bagean": 31486,
506
+ "baharu": 30600,
507
+ "baillar": 31619,
508
+ "bain": 31807,
509
+ "banc": 31797,
510
+ "bancian": 31714,
511
+ "bandaraya": 31814,
512
+ "banglad": 31856,
513
+ "banjur": 30871,
514
+ "barubah": 30761,
515
+ "beau": 30695,
516
+ "bebentukan": 31289,
517
+ "berbeza": 31927,
518
+ "berhad": 30719,
519
+ "berhampiran": 30684,
520
+ "berhar": 31473,
521
+ "berikutan": 31618,
522
+ "berkahwin": 31265,
523
+ "berket": 31860,
524
+ "berketurunan": 31121,
525
+ "berkhidmat": 30581,
526
+ "berkomp": 30524,
527
+ "bersa": 30591,
528
+ "bersemp": 31567,
529
+ "bersempadan": 31361,
530
+ "bersi": 30743,
531
+ "berta": 30961,
532
+ "bertanggungjawab": 31638,
533
+ "berter": 31175,
534
+ "bertur": 31803,
535
+ "beubeul": 30586,
536
+ "beubeulahan": 30597,
537
+ "beur": 31273,
538
+ "bhar": 31061,
539
+ "bhd": 31501,
540
+ "biasana": 30928,
541
+ "biasane": 31394,
542
+ "biaso": 31786,
543
+ "biasonyo": 32004,
544
+ "bibli": 30570,
545
+ "bilion": 30852,
546
+ "biography": 31233,
547
+ "ble": 31757,
548
+ "blo": 31835,
549
+ "boga": 31559,
550
+ "bohemia": 31066,
551
+ "bois": 31758,
552
+ "bombyli": 31965,
553
+ "bombyliidae": 31838,
554
+ "born": 30865,
555
+ "boten": 31901,
556
+ "broyeur": 30942,
557
+ "bulb": 30902,
558
+ "bulbophyllum": 31881,
559
+ "bulgaria": 31973,
560
+ "bungo": 31750,
561
+ "bup": 31015,
562
+ "cai": 31527,
563
+ "calif": 30577,
564
+ "californ": 32011,
565
+ "carex": 30829,
566
+ "carian": 30650,
567
+ "carita": 31371,
568
+ "cath": 30648,
569
+ "cathetan": 30627,
570
+ "catholic": 31213,
571
+ "cawangan": 30654,
572
+ "cecid": 31593,
573
+ "cecidomyiidae": 30724,
574
+ "centr": 31143,
575
+ "ceramb": 31606,
576
+ "cerambyc": 31423,
577
+ "cerat": 30817,
578
+ "ceratopogon": 31490,
579
+ "ceratopogonidae": 31904,
580
+ "ceuk": 30995,
581
+ "chann": 30702,
582
+ "chec": 31497,
583
+ "chir": 31744,
584
+ "chrys": 30710,
585
+ "cla": 31773,
586
+ "clas": 30908,
587
+ "cocog": 30905,
588
+ "coleop": 32018,
589
+ "collec": 31232,
590
+ "cols": 30701,
591
+ "colspan": 31960,
592
+ "concass": 31069,
593
+ "concasseur": 31341,
594
+ "const": 31602,
595
+ "conv": 31111,
596
+ "cov": 31932,
597
+ "covid": 30869,
598
+ "cuk": 31128,
599
+ "cukuik": 31680,
600
+ "culic": 31431,
601
+ "czech": 31172,
602
+ "dados": 31018,
603
+ "dakek": 31590,
604
+ "dalan": 30694,
605
+ "dapek": 31236,
606
+ "daptar": 31987,
607
+ "darip": 30674,
608
+ "darjah": 31317,
609
+ "dato": 30839,
610
+ "dav": 31632,
611
+ "delengen": 31538,
612
+ "demokratik": 31849,
613
+ "demospong": 31235,
614
+ "demospongiae": 30617,
615
+ "dendrob": 31811,
616
+ "dendrobium": 30823,
617
+ "dene": 31115,
618
+ "dening": 31031,
619
+ "deparet": 30713,
620
+ "deparetema": 31953,
621
+ "deu": 30657,
622
+ "deui": 31272,
623
+ "deukeut": 30855,
624
+ "deut": 30794,
625
+ "dhe": 31184,
626
+ "dhew": 31747,
627
+ "dhewe": 31283,
628
+ "dheweke": 31986,
629
+ "dhuwur": 31360,
630
+ "dianggo": 31268,
631
+ "dianugerah": 31616,
632
+ "diarani": 30718,
633
+ "dibahagikan": 30999,
634
+ "digawe": 31944,
635
+ "digunakake": 31328,
636
+ "dijad": 30795,
637
+ "diji": 30562,
638
+ "dikl": 30868,
639
+ "dikon": 31109,
640
+ "dilancarkan": 30777,
641
+ "dima": 31892,
642
+ "dimasuk": 30954,
643
+ "dinten": 31653,
644
+ "dinyat": 31107,
645
+ "dipun": 30556,
646
+ "diraja": 31772,
647
+ "disab": 31650,
648
+ "disabuik": 30820,
649
+ "disember": 31884,
650
+ "diskografi": 31484,
651
+ "ditabang": 31337,
652
+ "ditabik": 31906,
653
+ "ditam": 31298,
654
+ "ditamui": 31293,
655
+ "ditamukan": 31737,
656
+ "ditand": 30596,
657
+ "ditem": 31378,
658
+ "ditub": 31871,
659
+ "ditubuhkan": 31334,
660
+ "ditug": 30660,
661
+ "diturun": 31583,
662
+ "dolich": 31633,
663
+ "dolichop": 32017,
664
+ "dolichopodidae": 30825,
665
+ "domest": 31651,
666
+ "donya": 31877,
667
+ "doub": 30666,
668
+ "dros": 30801,
669
+ "drosophila": 31365,
670
+ "dunya": 30707,
671
+ "durung": 31211,
672
+ "dut": 31393,
673
+ "duwe": 30935,
674
+ "dytis": 31465,
675
+ "dytiscidae": 31238,
676
+ "eksekut": 31571,
677
+ "eksentr": 30994,
678
+ "eksped": 31672,
679
+ "eksper": 31186,
680
+ "elec": 31445,
681
+ "eli": 31966,
682
+ "elitra": 31085,
683
+ "empayar": 30881,
684
+ "encyc": 31129,
685
+ "encyclopedia": 31554,
686
+ "ener": 31336,
687
+ "entuk": 30860,
688
+ "episod": 30951,
689
+ "eropah": 31176,
690
+ "eta": 31116,
691
+ "eu": 30926,
692
+ "eul": 30599,
693
+ "eup": 31751,
694
+ "f1": 31086,
695
+ "fakult": 31949,
696
+ "filem": 31569,
697
+ "filmografi": 31425,
698
+ "fiz": 31453,
699
+ "fn": 31324,
700
+ "france": 31406,
701
+ "gadhah": 31251,
702
+ "gaduh": 31891,
703
+ "gampong": 30626,
704
+ "gancang": 31231,
705
+ "gawe": 30739,
706
+ "gedena": 31014,
707
+ "gedhe": 30866,
708
+ "geor": 31331,
709
+ "german": 30963,
710
+ "geu": 30898,
711
+ "geud": 31277,
712
+ "geuda": 30893,
713
+ "geudae": 31659,
714
+ "geun": 31278,
715
+ "geur": 30595,
716
+ "geure": 31805,
717
+ "geus": 31167,
718
+ "gov": 30624,
719
+ "gra": 31174,
720
+ "gubern": 30529,
721
+ "guinea": 31222,
722
+ "gunong": 31147,
723
+ "habitatnyo": 30919,
724
+ "hade": 31801,
725
+ "hae": 31989,
726
+ "haiwan": 30740,
727
+ "handap": 30602,
728
+ "happ": 30680,
729
+ "hass": 30758,
730
+ "haut": 31343,
731
+ "haute": 31448,
732
+ "hayang": 32014,
733
+ "hea": 31774,
734
+ "hek": 31759,
735
+ "helik": 31900,
736
+ "henteu": 31387,
737
+ "hiduik": 31449,
738
+ "hiji": 31630,
739
+ "hirup": 31369,
740
+ "histeridae": 31727,
741
+ "htm": 30749,
742
+ "hungaria": 31600,
743
+ "hungary": 30910,
744
+ "huru": 31020,
745
+ "iaitu": 30731,
746
+ "ideas": 30792,
747
+ "ieu": 31577,
748
+ "iha": 31433,
749
+ "ija": 31938,
750
+ "ikea": 31182,
751
+ "iko": 31667,
752
+ "iku": 31728,
753
+ "imah": 31565,
754
+ "imej": 31491,
755
+ "indak": 31152,
756
+ "indun": 31956,
757
+ "indunisia": 30785,
758
+ "infan": 30780,
759
+ "ingg": 31480,
760
+ "inggeris": 30583,
761
+ "inggih": 31348,
762
+ "ingkang": 31669,
763
+ "insee": 31055,
764
+ "instit": 31979,
765
+ "inyo": 32030,
766
+ "ireland": 31391,
767
+ "isih": 31221,
768
+ "iss": 30936,
769
+ "iyan": 31921,
770
+ "iyanaritu": 31908,
771
+ "jaba": 31413,
772
+ "jalma": 31843,
773
+ "janten": 31131,
774
+ "jawatan": 31332,
775
+ "jawatankuasa": 31380,
776
+ "jawi": 31304,
777
+ "jema": 30548,
778
+ "jenayah": 31469,
779
+ "jeneng": 30790,
780
+ "jepun": 30835,
781
+ "jero": 30663,
782
+ "jeu": 30991,
783
+ "jeun": 31225,
784
+ "jeung": 31007,
785
+ "jeux": 31815,
786
+ "jewel": 30965,
787
+ "jih": 32002,
788
+ "jinis": 31224,
789
+ "johann": 30885,
790
+ "jpl": 30734,
791
+ "julai": 31930,
792
+ "juo": 30676,
793
+ "kaasup": 31388,
794
+ "kaayaan": 30687,
795
+ "kabanyakan": 30630,
796
+ "kabeh": 31422,
797
+ "kabentuk": 30651,
798
+ "kabis": 31787,
799
+ "kabiskopan": 30644,
800
+ "kabup": 31134,
801
+ "kacamatan": 31057,
802
+ "kacatet": 31595,
803
+ "kada": 31113,
804
+ "kados": 31923,
805
+ "kaedah": 30590,
806
+ "kajadian": 32015,
807
+ "kakit": 30877,
808
+ "kakitangan": 31489,
809
+ "kalas": 31943,
810
+ "kalawan": 31370,
811
+ "kalayan": 31160,
812
+ "kalebu": 31599,
813
+ "kaler": 30782,
814
+ "kalih": 31553,
815
+ "kaliyan": 31522,
816
+ "kaluar": 31934,
817
+ "kalurahan": 30858,
818
+ "kamb": 31488,
819
+ "kanak": 30833,
820
+ "kangge": 31201,
821
+ "kanggo": 30716,
822
+ "kanthi": 31190,
823
+ "kapalo": 30534,
824
+ "kaping": 30540,
825
+ "karak": 31820,
826
+ "kardinal": 31320,
827
+ "kareh": 31312,
828
+ "kareta": 30641,
829
+ "karusakan": 31947,
830
+ "kary": 31084,
831
+ "kasal": 31257,
832
+ "kasaluruahan": 31909,
833
+ "kasebut": 31193,
834
+ "katon": 31685,
835
+ "kaul": 31903,
836
+ "kaulinan": 31285,
837
+ "kaun": 31851,
838
+ "kaunti": 31179,
839
+ "kawa": 31721,
840
+ "kawalan": 31199,
841
+ "kawas": 30953,
842
+ "kayata": 31097,
843
+ "kebi": 32003,
844
+ "kedah": 31321,
845
+ "kedi": 30686,
846
+ "kejohanan": 30725,
847
+ "kelab": 31607,
848
+ "kelantan": 31322,
849
+ "kelu": 31733,
850
+ "kemend": 31644,
851
+ "kemp": 30598,
852
+ "kemudiannya": 31719,
853
+ "kemunc": 31656,
854
+ "kender": 31240,
855
+ "kenderaan": 30768,
856
+ "kene": 31073,
857
+ "keneh": 31533,
858
+ "keng": 31539,
859
+ "kerjaya": 31151,
860
+ "kerusi": 30949,
861
+ "kesihatan": 31524,
862
+ "ketuj": 31318,
863
+ "keu": 31825,
864
+ "keur": 31389,
865
+ "kewangan": 31485,
866
+ "khalif": 31044,
867
+ "kitu": 30864,
868
+ "kiw": 30525,
869
+ "km2": 30859,
870
+ "km²": 31417,
871
+ "kolej": 31003,
872
+ "komand": 31610,
873
+ "komuniti": 31150,
874
+ "kontrov": 30914,
875
+ "kowe": 31145,
876
+ "kristian": 31702,
877
+ "kula": 31792,
878
+ "kulawar": 31983,
879
+ "kulawarga": 30764,
880
+ "kumaha": 31915,
881
+ "kunj": 31197,
882
+ "kura": 31319,
883
+ "kuring": 31300,
884
+ "kutha": 31242,
885
+ "kuwe": 30700,
886
+ "kuwi": 30816,
887
+ "labiah": 30770,
888
+ "laborat": 31706,
889
+ "lair": 30946,
890
+ "lajeng": 31916,
891
+ "lalek": 31477,
892
+ "laluan": 31840,
893
+ "lambok": 30704,
894
+ "lamela": 30746,
895
+ "lamun": 30956,
896
+ "lanang": 31997,
897
+ "langau": 31655,
898
+ "langk": 31112,
899
+ "langkung": 31784,
900
+ "lapan": 31215,
901
+ "larv": 31125,
902
+ "larvanyo": 30698,
903
+ "lawah": 31429,
904
+ "laweh": 31813,
905
+ "left": 31344,
906
+ "lengkep": 31748,
907
+ "ler": 32007,
908
+ "leres": 31275,
909
+ "leu": 31674,
910
+ "leung": 31988,
911
+ "leutik": 31008,
912
+ "leuwih": 30766,
913
+ "lian": 31230,
914
+ "limoniidae": 30640,
915
+ "linyphi": 31379,
916
+ "linyphiidae": 31090,
917
+ "liwat": 31550,
918
+ "liya": 31042,
919
+ "liyane": 31704,
920
+ "loba": 31977,
921
+ "loro": 31260,
922
+ "lua": 30592,
923
+ "lumb": 31824,
924
+ "luwih": 30927,
925
+ "maca": 30567,
926
+ "madras": 31476,
927
+ "magnit": 31382,
928
+ "maharaja": 30573,
929
+ "mahisok": 30791,
930
+ "majoriti": 30888,
931
+ "makeupmakeup": 30634,
932
+ "makh": 30957,
933
+ "maklumat": 31917,
934
+ "malalui": 31795,
935
+ "malaya": 31817,
936
+ "malih": 31046,
937
+ "mamakan": 31248,
938
+ "mamb": 30673,
939
+ "mamiliki": 30574,
940
+ "mampunyo": 30760,
941
+ "mampunyoi": 32024,
942
+ "manawa": 31368,
943
+ "maneh": 31802,
944
+ "manehna": 31426,
945
+ "manggerek": 31605,
946
+ "mangr": 31157,
947
+ "mangrup": 31354,
948
+ "mangrupa": 31836,
949
+ "mangrupikeun": 30637,
950
+ "manj": 31356,
951
+ "manjadi": 31114,
952
+ "mano": 31083,
953
+ "manungsa": 31149,
954
+ "manusa": 31636,
955
+ "manyebabkan": 31270,
956
+ "maran": 31826,
957
+ "maranehanana": 30549,
958
+ "marang": 30947,
959
+ "marcapada": 30916,
960
+ "marie": 30909,
961
+ "marin": 31705,
962
+ "marit": 31307,
963
+ "marupo": 31896,
964
+ "marupokan": 31487,
965
+ "masar": 31776,
966
+ "masarakat": 31223,
967
+ "maso": 31314,
968
+ "matemat": 31872,
969
+ "mato": 31392,
970
+ "matoari": 30774,
971
+ "mats": 30689,
972
+ "mawa": 31138,
973
+ "mbukak": 31783,
974
+ "medal": 30797,
975
+ "meks": 30721,
976
+ "melaka": 30601,
977
+ "melint": 31697,
978
+ "melu": 31284,
979
+ "memberit": 30560,
980
+ "mempel": 30875,
981
+ "memulakan": 30836,
982
+ "menand": 30639,
983
+ "menehi": 30635,
984
+ "menerusi": 31209,
985
+ "mengandungi": 31612,
986
+ "mengekalkan": 31816,
987
+ "mengkl": 31753,
988
+ "mengut": 30771,
989
+ "menika": 30618,
990
+ "menyang": 30593,
991
+ "menyokong": 31998,
992
+ "mere": 30990,
993
+ "mesyuarat": 31574,
994
+ "meu": 32022,
995
+ "meub": 31912,
996
+ "meubel": 31395,
997
+ "meubels": 31781,
998
+ "meubl": 31907,
999
+ "meuble": 31457,
1000
+ "meubles": 31355,
1001
+ "meul": 31580,
1002
+ "meulage": 30889,
1003
+ "meun": 31030,
1004
+ "meunang": 31980,
1005
+ "mib": 30715,
1006
+ "mich": 30633,
1007
+ "militar": 31108,
1008
+ "military": 31778,
1009
+ "mimiti": 31880,
1010
+ "minangk": 31643,
1011
+ "minangka": 30966,
1012
+ "minangkab": 31542,
1013
+ "minit": 31377,
1014
+ "misuwur": 31463,
1015
+ "miturut": 30955,
1016
+ "miw": 31971,
1017
+ "miwah": 31531,
1018
+ "moal": 31551,
1019
+ "moden": 31200,
1020
+ "mohamad": 30769,
1021
+ "mohd": 31589,
1022
+ "mp3": 30745,
1023
+ "mpo": 31823,
1024
+ "mudo": 31227,
1025
+ "mukim": 31519,
1026
+ "mungg": 30843,
1027
+ "munisip": 31077,
1028
+ "munisipalitas": 31518,
1029
+ "muscidae": 30730,
1030
+ "mutlakna": 30938,
1031
+ "mutlaknyo": 31568,
1032
+ "muzik": 31427,
1033
+ "myc": 31740,
1034
+ "mycetoph": 30964,
1035
+ "mycetophilidae": 30619,
1036
+ "nae": 31647,
1037
+ "nagara": 31775,
1038
+ "naha": 31183,
1039
+ "naib": 31530,
1040
+ "najib": 30842,
1041
+ "nakeuh": 31929,
1042
+ "nalika": 31746,
1043
+ "namo": 31062,
1044
+ "namp": 31581,
1045
+ "nangg": 31262,
1046
+ "nanging": 31663,
1047
+ "naon": 30536,
1048
+ "ndh": 30827,
1049
+ "nduweni": 31976,
1050
+ "nektar": 30838,
1051
+ "nemb": 30550,
1052
+ "nenten": 30726,
1053
+ "neol": 31353,
1054
+ "nepi": 31578,
1055
+ "ngad": 30706,
1056
+ "ngag": 30659,
1057
+ "ngagaduhan": 30543,
1058
+ "ngagunakeun": 30615,
1059
+ "ngah": 31063,
1060
+ "ngal": 31808,
1061
+ "ngali": 30646,
1062
+ "ngam": 30622,
1063
+ "ngand": 31009,
1064
+ "nganggo": 31346,
1065
+ "nganj": 31450,
1066
+ "nganjrek": 31475,
1067
+ "nganti": 31810,
1068
+ "ngar": 31424,
1069
+ "ngaran": 31646,
1070
+ "ngarep": 30729,
1071
+ "ngatur": 30911,
1072
+ "ngen": 31323,
1073
+ "ngeun": 30809,
1074
+ "ngeunaan": 31615,
1075
+ "nggawe": 31957,
1076
+ "nggunakake": 30840,
1077
+ "ngirim": 31437,
1078
+ "ngiring": 30887,
1079
+ "ngis": 31281,
1080
+ "ngisor": 31673,
1081
+ "ngo": 30709,
1082
+ "ngom": 30821,
1083
+ "ngon": 31013,
1084
+ "ngora": 32000,
1085
+ "ngr": 31017,
1086
+ "ngu": 32029,
1087
+ "nib": 31127,
1088
+ "ningali": 31861,
1089
+ "ningkat": 31357,
1090
+ "njaba": 30572,
1091
+ "njupuk": 30903,
1092
+ "nombor": 31711,
1093
+ "nord": 31692,
1094
+ "norway": 30848,
1095
+ "nps": 30847,
1096
+ "npsn": 31439,
1097
+ "nuduhake": 30985,
1098
+ "nurwegen": 30958,
1099
+ "nyaeta": 31779,
1100
+ "nyan": 31279,
1101
+ "nyang": 31333,
1102
+ "nye": 30705,
1103
+ "nyieun": 31910,
1104
+ "nyo": 31309,
1105
+ "nyoe": 31800,
1106
+ "oge": 30972,
1107
+ "ogos": 31409,
1108
+ "oksi": 30561,
1109
+ "olahr": 31269,
1110
+ "olimpik": 31099,
1111
+ "omah": 30667,
1112
+ "ono": 31034,
1113
+ "onth": 31158,
1114
+ "onthophagus": 31214,
1115
+ "ortod": 31286,
1116
+ "ouv": 31780,
1117
+ "owl": 31024,
1118
+ "pacahan": 31535,
1119
+ "padha": 31926,
1120
+ "pado": 31430,
1121
+ "pahang": 31022,
1122
+ "pahl": 31234,
1123
+ "pama": 31635,
1124
+ "pamaen": 31645,
1125
+ "pamake": 31440,
1126
+ "pamar": 31397,
1127
+ "pamarentah": 31628,
1128
+ "pamb": 30575,
1129
+ "pambantu": 30568,
1130
+ "pambantuakan": 31563,
1131
+ "pamrentahan": 31408,
1132
+ "pandemi": 31446,
1133
+ "panganan": 30971,
1134
+ "panggerek": 31029,
1135
+ "pangguna": 30832,
1136
+ "pangl": 31383,
1137
+ "panjangnyo": 30742,
1138
+ "panjenengan": 31011,
1139
+ "panon": 31623,
1140
+ "panonpoe": 31752,
1141
+ "pany": 31870,
1142
+ "parai": 31512,
1143
+ "paraiaran": 30807,
1144
+ "parairan": 31141,
1145
+ "parantos": 31555,
1146
+ "parl": 31504,
1147
+ "parlimen": 30907,
1148
+ "pautan": 31166,
1149
+ "pelab": 31848,
1150
+ "pelakon": 30608,
1151
+ "pelanc": 31614,
1152
+ "pembahagian": 31648,
1153
+ "pemberont": 31162,
1154
+ "pembun": 31229,
1155
+ "pengamb": 30636,
1156
+ "pengangk": 31897,
1157
+ "pengarah": 31120,
1158
+ "pengerusi": 32023,
1159
+ "penghar": 30943,
1160
+ "penil": 31809,
1161
+ "pentadb": 30565,
1162
+ "pentadbiran": 30748,
1163
+ "penubuhan": 30607,
1164
+ "perad": 31846,
1165
+ "perangan": 30527,
1166
+ "peratus": 31416,
1167
+ "perband": 31421,
1168
+ "perbandaran": 31105,
1169
+ "perger": 31841,
1170
+ "peribadi": 31942,
1171
+ "pering": 31195,
1172
+ "perkahw": 30979,
1173
+ "perkahwinan": 30945,
1174
+ "perkhidmatan": 30537,
1175
+ "perlembagaan": 30813,
1176
+ "perlis": 30569,
1177
+ "perma": 31159,
1178
+ "perni": 30845,
1179
+ "perniagaan": 30811,
1180
+ "persa": 31261,
1181
+ "persek": 31670,
1182
+ "pertamo": 30656,
1183
+ "pertub": 30856,
1184
+ "pertubuhan": 31844,
1185
+ "perub": 31299,
1186
+ "perubatan": 31683,
1187
+ "petempatan": 31254,
1188
+ "peu": 31102,
1189
+ "peugeot": 30533,
1190
+ "peun": 30751,
1191
+ "peus": 31192,
1192
+ "philipp": 30531,
1193
+ "phor": 31842,
1194
+ "phoridae": 30747,
1195
+ "pierre": 31056,
1196
+ "pikeun": 30753,
1197
+ "pingat": 31620,
1198
+ "pirang": 31510,
1199
+ "pisan": 31799,
1200
+ "pisanan": 31514,
1201
+ "piy": 31845,
1202
+ "piyamb": 31244,
1203
+ "piyambak": 31579,
1204
+ "piyambakipun": 31552,
1205
+ "pkr": 31503,
1206
+ "planetesimal": 30992,
1207
+ "planetis": 31898,
1208
+ "platnick": 31789,
1209
+ "plek": 31827,
1210
+ "poe": 31818,
1211
+ "poh": 30737,
1212
+ "poland": 31671,
1213
+ "polen": 30987,
1214
+ "pont": 30788,
1215
+ "popul": 30849,
1216
+ "porifera": 30920,
1217
+ "poskod": 31700,
1218
+ "pour": 30779,
1219
+ "praj": 31372,
1220
+ "pratelan": 31027,
1221
+ "prefiks": 31264,
1222
+ "presid": 31154,
1223
+ "prix": 31611,
1224
+ "produs": 31996,
1225
+ "projek": 31664,
1226
+ "prote": 31100,
1227
+ "protel": 30948,
1228
+ "protelean": 31665,
1229
+ "psychodidae": 31418,
1230
+ "psychotria": 31902,
1231
+ "publishing": 31749,
1232
+ "pungkasan": 31026,
1233
+ "punika": 31338,
1234
+ "puniki": 31597,
1235
+ "punyo": 31148,
1236
+ "purata": 31515,
1237
+ "pusingan": 31738,
1238
+ "railway": 31729,
1239
+ "rakan": 31621,
1240
+ "rasmi": 30846,
1241
+ "rawatan": 31376,
1242
+ "razak": 31054,
1243
+ "rd1": 31959,
1244
+ "reference": 31970,
1245
+ "referensina": 31804,
1246
+ "reka": 31493,
1247
+ "rekod": 31972,
1248
+ "remp": 30690,
1249
+ "rese": 31958,
1250
+ "ringkasnya": 31962,
1251
+ "rings": 31686,
1252
+ "ringtones": 31405,
1253
+ "riw": 31505,
1254
+ "robah": 31676,
1255
+ "romania": 31798,
1256
+ "rows": 32005,
1257
+ "rowspan": 30973,
1258
+ "rujuakan": 31582,
1259
+ "rump": 31734,
1260
+ "runt": 31181,
1261
+ "s1": 31010,
1262
+ "sabab": 30931,
1263
+ "sababaraha": 30978,
1264
+ "sabagai": 30669,
1265
+ "sabah": 31350,
1266
+ "saben": 31313,
1267
+ "sabo": 30952,
1268
+ "saboh": 30545,
1269
+ "sabuah": 31993,
1270
+ "sabuak": 31012,
1271
+ "sacara": 31791,
1272
+ "sacaro": 30950,
1273
+ "sada": 30623,
1274
+ "sadaya": 31731,
1275
+ "sadur": 31506,
1276
+ "sadurunge": 31812,
1277
+ "sae": 31859,
1278
+ "sagadang": 30566,
1279
+ "sagala": 31696,
1280
+ "saged": 31529,
1281
+ "saha": 30851,
1282
+ "sahiji": 31767,
1283
+ "saik": 31135,
1284
+ "saiki": 31079,
1285
+ "saikua": 31126,
1286
+ "sajarah": 31755,
1287
+ "sajeroning": 30967,
1288
+ "sakab": 30620,
1289
+ "sakabeh": 31768,
1290
+ "sakola": 30523,
1291
+ "sakum": 31982,
1292
+ "sakumna": 31452,
1293
+ "salain": 31955,
1294
+ "salaku": 31472,
1295
+ "salle": 31124,
1296
+ "saltic": 30934,
1297
+ "salticidae": 31339,
1298
+ "samant": 30970,
1299
+ "samantaro": 31256,
1300
+ "samem": 31049,
1301
+ "sami": 30998,
1302
+ "sampun": 31420,
1303
+ "sanajan": 31796,
1304
+ "sane": 31528,
1305
+ "sanes": 31419,
1306
+ "sanggeus": 31072,
1307
+ "sangkan": 31133,
1308
+ "saperti": 30894,
1309
+ "sapertos": 31243,
1310
+ "sarawak": 30532,
1311
+ "sarekat": 31598,
1312
+ "sareng": 31994,
1313
+ "saroman": 30557,
1314
+ "sarta": 30699,
1315
+ "sarua": 31173,
1316
+ "sarupo": 30796,
1317
+ "sasuatu": 30778,
1318
+ "satungg": 31760,
1319
+ "sawet": 31219,
1320
+ "sawetara": 31021,
1321
+ "sawij": 30563,
1322
+ "sawijining": 31625,
1323
+ "sawise": 31466,
1324
+ "sayoknyo": 31723,
1325
+ "scarab": 31191,
1326
+ "scarabae": 30522,
1327
+ "scarabaeidae": 31080,
1328
+ "scope": 31028,
1329
+ "sebag": 31171,
1330
+ "sebal": 30789,
1331
+ "sebarang": 30717,
1332
+ "sedeng": 31508,
1333
+ "sedengkeun": 31301,
1334
+ "seek": 31914,
1335
+ "sejen": 31679,
1336
+ "seksyen": 30610,
1337
+ "selanjut": 31064,
1338
+ "sempadan": 31688,
1339
+ "senarai": 31888,
1340
+ "senecio": 31291,
1341
+ "sepanyol": 31414,
1342
+ "sepur": 31434,
1343
+ "seramai": 31919,
1344
+ "serb": 31984,
1345
+ "sese": 31732,
1346
+ "seset": 31882,
1347
+ "sesetengah": 31246,
1348
+ "setiausaha": 31739,
1349
+ "setidak": 30580,
1350
+ "seue": 31327,
1351
+ "seueur": 31617,
1352
+ "seul": 31575,
1353
+ "seuwa": 31855,
1354
+ "sij": 31978,
1355
+ "siji": 31546,
1356
+ "singh": 30815,
1357
+ "sira": 30691,
1358
+ "sisih": 30762,
1359
+ "sken": 31185,
1360
+ "slov": 31592,
1361
+ "slovakia": 31364,
1362
+ "societ": 30693,
1363
+ "socorro": 31761,
1364
+ "soest": 31168,
1365
+ "sokongan": 30681,
1366
+ "sora": 31974,
1367
+ "sorangan": 30670,
1368
+ "sous": 31624,
1369
+ "sov": 31940,
1370
+ "squar": 31039,
1371
+ "staphy": 30874,
1372
+ "staphylin": 31703,
1373
+ "staphylinidae": 31502,
1374
+ "stasion": 30612,
1375
+ "stesen": 31604,
1376
+ "sukan": 31532,
1377
+ "sumat": 30559,
1378
+ "swara": 31194,
1379
+ "syarikat": 31386,
1380
+ "taban": 31695,
1381
+ "tabanidae": 30722,
1382
+ "tabantuak": 31834,
1383
+ "tacatat": 31596,
1384
+ "tach": 31736,
1385
+ "tachinidae": 31038,
1386
+ "tagolong": 31498,
1387
+ "talatak": 31002,
1388
+ "talian": 31858,
1389
+ "tamil": 31513,
1390
+ "tampek": 32012,
1391
+ "tanduak": 31742,
1392
+ "tarikh": 31995,
1393
+ "tegese": 31216,
1394
+ "teka": 31436,
1395
+ "telef": 31771,
1396
+ "telefon": 31136,
1397
+ "telev": 31999,
1398
+ "television": 30904,
1399
+ "televisyen": 30800,
1400
+ "tembung": 30969,
1401
+ "tempatan": 30803,
1402
+ "tempoh": 31237,
1403
+ "tengku": 31358,
1404
+ "tentera": 31948,
1405
+ "teph": 31441,
1406
+ "tephrit": 31303,
1407
+ "tephritidae": 31526,
1408
+ "terengganu": 31067,
1409
+ "terhad": 31954,
1410
+ "terut": 31274,
1411
+ "terutamanya": 31276,
1412
+ "teu": 30861,
1413
+ "teub": 31521,
1414
+ "teuing": 30901,
1415
+ "tha": 30611,
1416
+ "thom": 31367,
1417
+ "tiasa": 30655,
1418
+ "tig": 31572,
1419
+ "tilu": 31777,
1420
+ "timbalan": 30787,
1421
+ "tingga": 30897,
1422
+ "tinimbang": 31226,
1423
+ "tipul": 32006,
1424
+ "tipula": 32019,
1425
+ "tipulidae": 32021,
1426
+ "tiyang": 31875,
1427
+ "tre": 31415,
1428
+ "troya": 30900,
1429
+ "tug": 30772,
1430
+ "tuluy": 31677,
1431
+ "tumb": 30798,
1432
+ "tumbu": 30850,
1433
+ "tunggil": 31385,
1434
+ "turk": 31228,
1435
+ "tutumbu": 30683,
1436
+ "tuw": 30984,
1437
+ "tuwawu": 31969,
1438
+ "ubat": 30892,
1439
+ "uel": 30589,
1440
+ "uga": 30841,
1441
+ "ugi": 31255,
1442
+ "ulekl": 31401,
1443
+ "umno": 30983,
1444
+ "umumnyo": 31689,
1445
+ "undi": 31455,
1446
+ "unggal": 31726,
1447
+ "universiti": 31594,
1448
+ "untuak": 32028,
1449
+ "upami": 31867,
1450
+ "ureueng": 31037,
1451
+ "utamo": 30941,
1452
+ "utawa": 31852,
1453
+ "utawi": 31004,
1454
+ "vasque": 30605,
1455
+ "vcard": 32020,
1456
+ "veg": 31492,
1457
+ "vl": 31637,
1458
+ "waktos": 31342,
1459
+ "walanda": 31432,
1460
+ "wangsa": 31708,
1461
+ "wangun": 31306,
1462
+ "warsa": 30682,
1463
+ "wates": 31707,
1464
+ "wawengkon": 31868,
1465
+ "wayah": 31294,
1466
+ "wek": 31570,
1467
+ "wektu": 30804,
1468
+ "wenten": 30555,
1469
+ "wer": 30773,
1470
+ "wewengkon": 30609,
1471
+ "wikip": 30890,
1472
+ "wiw": 31218,
1473
+ "wiwit": 31894,
1474
+ "woh": 30886,
1475
+ "wonten": 31920,
1476
+ "worms": 30521,
1477
+ "yaiku": 31794,
1478
+ "yening": 30906,
1479
+ "yito": 31963,
1480
+ "yout": 30582,
1481
+ "zealand": 31117,
1482
+ "°": 31045,
1483
+ "°c": 30857,
1484
+ "²": 31698,
1485
+ "ı": 31675,
1486
+ "ł": 30915,
1487
+ "α": 30980,
1488
+ "ε": 31442,
1489
+ "ι": 30828,
1490
+ "ο": 31245,
1491
+ "а": 31626,
1492
+ "е": 30613,
1493
+ "и": 31588,
1494
+ "н": 31076,
1495
+ "о": 31155,
1496
+ "р": 31153,
1497
+ "с": 31865,
1498
+ "ا": 30631,
1499
+ "ال": 30551,
1500
+ "ر": 31557,
1501
+ "ل": 31499,
1502
+ "م": 31065,
1503
+ "ن": 30528,
1504
+ "و": 30968,
1505
+ "ي": 31241,
1506
+ "ᄋ": 31525,
1507
+ "ᅡ": 31399,
1508
+ "ᅵ": 31534,
1509
+ "ᆫ": 31363,
1510
+ "—": 31586,
1511
+ "•": 30896,
1512
+ "…": 31785
1513
+ }
config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "LazarusNLP/NusaBERT-base",
3
+ "_num_labels": 5,
4
+ "architectures": [
5
+ "BertForSequenceClassification"
6
+ ],
7
+ "attention_probs_dropout_prob": 0.1,
8
+ "classifier_dropout": null,
9
+ "directionality": "bidi",
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "id2label": {
14
+ "0": "positive",
15
+ "1": "neutral",
16
+ "2": "negative"
17
+ },
18
+ "initializer_range": 0.02,
19
+ "intermediate_size": 3072,
20
+ "label2id": {
21
+ "negative": 2,
22
+ "neutral": 1,
23
+ "positive": 0
24
+ },
25
+ "layer_norm_eps": 1e-12,
26
+ "max_position_embeddings": 512,
27
+ "model_type": "bert",
28
+ "num_attention_heads": 12,
29
+ "num_hidden_layers": 12,
30
+ "output_past": true,
31
+ "pad_token_id": 0,
32
+ "pooler_fc_size": 768,
33
+ "pooler_num_attention_heads": 12,
34
+ "pooler_num_fc_layers": 3,
35
+ "pooler_size_per_head": 128,
36
+ "pooler_type": "first_token_transform",
37
+ "position_embedding_type": "absolute",
38
+ "problem_type": "single_label_classification",
39
+ "torch_dtype": "float32",
40
+ "transformers_version": "4.37.2",
41
+ "type_vocab_size": 2,
42
+ "use_cache": true,
43
+ "vocab_size": 32032
44
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3634f4fcc7d65556d415b8d68f49eb2e3c49e8cb6c45b2427511bce38cf3d6e8
3
+ size 442600444
runs/Feb22_11-56-21_bookbot-h100/events.out.tfevents.1708602982.bookbot-h100.137366.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94228e55803ff395f55a8487978a097ad2e3ecfed903b525a85af4c92c76c2cc
3
+ size 5112
special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff
 
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0aa972d1890b9a54801db6727317a612faa8dadd5f462bc6f7e3de22509e7651
3
+ size 4792
vocab.txt ADDED
The diff for this file is too large to render. See raw diff