hf-transformers-bot commited on
Commit
bda4316
1 Parent(s): d1f90f2

Upload tiny models for OwlViTForObjectDetection

Browse files
config.json ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_commit_hash": null,
3
+ "architectures": [
4
+ "OwlViTForObjectDetection"
5
+ ],
6
+ "bos_token_id": 1,
7
+ "eos_token_id": 2,
8
+ "initializer_factor": 1.0,
9
+ "logit_scale_init_value": 2.6592,
10
+ "model_type": "owlvit",
11
+ "pad_token_id": 0,
12
+ "projection_dim": 64,
13
+ "text_config": {
14
+ "_name_or_path": "",
15
+ "add_cross_attention": false,
16
+ "architectures": null,
17
+ "attention_dropout": 0.1,
18
+ "bad_words_ids": null,
19
+ "begin_suppress_tokens": null,
20
+ "bos_token_id": 49406,
21
+ "chunk_size_feed_forward": 0,
22
+ "cross_attention_hidden_size": null,
23
+ "decoder_start_token_id": null,
24
+ "diversity_penalty": 0.0,
25
+ "do_sample": false,
26
+ "dropout": 0.1,
27
+ "early_stopping": false,
28
+ "encoder_no_repeat_ngram_size": 0,
29
+ "eos_token_id": 49407,
30
+ "exponential_decay_length_penalty": null,
31
+ "finetuning_task": null,
32
+ "forced_bos_token_id": null,
33
+ "forced_eos_token_id": null,
34
+ "hidden_act": "quick_gelu",
35
+ "hidden_size": 64,
36
+ "id2label": {
37
+ "0": "LABEL_0",
38
+ "1": "LABEL_1"
39
+ },
40
+ "initializer_factor": 1.0,
41
+ "initializer_range": 0.02,
42
+ "intermediate_size": 37,
43
+ "is_decoder": false,
44
+ "is_encoder_decoder": false,
45
+ "label2id": {
46
+ "LABEL_0": 0,
47
+ "LABEL_1": 1
48
+ },
49
+ "layer_norm_eps": 1e-05,
50
+ "length_penalty": 1.0,
51
+ "max_length": 20,
52
+ "max_position_embeddings": 16,
53
+ "min_length": 0,
54
+ "model_type": "owlvit_text_model",
55
+ "no_repeat_ngram_size": 0,
56
+ "num_attention_heads": 4,
57
+ "num_beam_groups": 1,
58
+ "num_beams": 1,
59
+ "num_hidden_layers": 12,
60
+ "num_return_sequences": 1,
61
+ "output_attentions": false,
62
+ "output_hidden_states": false,
63
+ "output_scores": false,
64
+ "pad_token_id": 0,
65
+ "prefix": null,
66
+ "problem_type": null,
67
+ "pruned_heads": {},
68
+ "remove_invalid_values": false,
69
+ "repetition_penalty": 1.0,
70
+ "return_dict": true,
71
+ "return_dict_in_generate": false,
72
+ "sep_token_id": null,
73
+ "suppress_tokens": null,
74
+ "task_specific_params": null,
75
+ "temperature": 1.0,
76
+ "tf_legacy_loss": false,
77
+ "tie_encoder_decoder": false,
78
+ "tie_word_embeddings": true,
79
+ "tokenizer_class": null,
80
+ "top_k": 50,
81
+ "top_p": 1.0,
82
+ "torch_dtype": null,
83
+ "torchscript": false,
84
+ "transformers_version": "4.28.0.dev0",
85
+ "typical_p": 1.0,
86
+ "use_bfloat16": false,
87
+ "vocab_size": 1024
88
+ },
89
+ "torch_dtype": "float32",
90
+ "transformers_version": null,
91
+ "vision_config": {
92
+ "_name_or_path": "",
93
+ "add_cross_attention": false,
94
+ "architectures": null,
95
+ "attention_dropout": 0.1,
96
+ "bad_words_ids": null,
97
+ "begin_suppress_tokens": null,
98
+ "bos_token_id": null,
99
+ "chunk_size_feed_forward": 0,
100
+ "cross_attention_hidden_size": null,
101
+ "decoder_start_token_id": null,
102
+ "diversity_penalty": 0.0,
103
+ "do_sample": false,
104
+ "dropout": 0.1,
105
+ "early_stopping": false,
106
+ "encoder_no_repeat_ngram_size": 0,
107
+ "eos_token_id": null,
108
+ "exponential_decay_length_penalty": null,
109
+ "finetuning_task": null,
110
+ "forced_bos_token_id": null,
111
+ "forced_eos_token_id": null,
112
+ "hidden_act": "quick_gelu",
113
+ "hidden_size": 32,
114
+ "id2label": {
115
+ "0": "LABEL_0",
116
+ "1": "LABEL_1"
117
+ },
118
+ "image_size": 32,
119
+ "initializer_factor": 1.0,
120
+ "initializer_range": 0.02,
121
+ "intermediate_size": 37,
122
+ "is_decoder": false,
123
+ "is_encoder_decoder": false,
124
+ "label2id": {
125
+ "LABEL_0": 0,
126
+ "LABEL_1": 1
127
+ },
128
+ "layer_norm_eps": 1e-05,
129
+ "length_penalty": 1.0,
130
+ "max_length": 20,
131
+ "min_length": 0,
132
+ "model_type": "owlvit_vision_model",
133
+ "no_repeat_ngram_size": 0,
134
+ "num_attention_heads": 4,
135
+ "num_beam_groups": 1,
136
+ "num_beams": 1,
137
+ "num_channels": 3,
138
+ "num_hidden_layers": 5,
139
+ "num_return_sequences": 1,
140
+ "output_attentions": false,
141
+ "output_hidden_states": false,
142
+ "output_scores": false,
143
+ "pad_token_id": null,
144
+ "patch_size": 2,
145
+ "prefix": null,
146
+ "problem_type": null,
147
+ "pruned_heads": {},
148
+ "remove_invalid_values": false,
149
+ "repetition_penalty": 1.0,
150
+ "return_dict": true,
151
+ "return_dict_in_generate": false,
152
+ "sep_token_id": null,
153
+ "suppress_tokens": null,
154
+ "task_specific_params": null,
155
+ "temperature": 1.0,
156
+ "tf_legacy_loss": false,
157
+ "tie_encoder_decoder": false,
158
+ "tie_word_embeddings": true,
159
+ "tokenizer_class": null,
160
+ "top_k": 50,
161
+ "top_p": 1.0,
162
+ "torch_dtype": null,
163
+ "torchscript": false,
164
+ "transformers_version": "4.28.0.dev0",
165
+ "typical_p": 1.0,
166
+ "use_bfloat16": false
167
+ }
168
+ }
merges.txt ADDED
@@ -0,0 +1,727 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #version: 0.2
2
+ t h
3
+ th e</w>
4
+ i n
5
+ a n
6
+ e d</w>
7
+ e r
8
+ r e
9
+ a r
10
+ t i
11
+ o n
12
+ e n
13
+ o f</w>
14
+ o r
15
+ an d</w>
16
+ e r</w>
17
+ o n</w>
18
+ i n</w>
19
+ in g</w>
20
+ s t
21
+ r o
22
+ a l
23
+ i t
24
+ t o</w>
25
+ a s</w>
26
+ a t
27
+ e s</w>
28
+ o u
29
+ h i
30
+ a c
31
+ s i
32
+ a t</w>
33
+ r i
34
+ a l</w>
35
+ e l
36
+ a n</w>
37
+ a m
38
+ o r</w>
39
+ s t</w>
40
+ l i
41
+ u r
42
+ e c
43
+ o m
44
+ d i
45
+ w as</w>
46
+ l y</w>
47
+ e n</w>
48
+ e a
49
+ c h
50
+ u n
51
+ ti on</w>
52
+ l a
53
+ i s</w>
54
+ f i
55
+ o l
56
+ d e
57
+ - @</w>
58
+ @ -@</w>
59
+ r a
60
+ v i
61
+ l e</w>
62
+ l o
63
+ s h
64
+ e m
65
+ b e
66
+ th at</w>
67
+ ' s</w>
68
+ c on
69
+ m a
70
+ f or</w>
71
+ h a
72
+ s u
73
+ b y</w>
74
+ it h</w>
75
+ v e</w>
76
+ w ith</w>
77
+ s e</w>
78
+ c h</w>
79
+ th e
80
+ en t
81
+ p o
82
+ c e</w>
83
+ i l
84
+ s e
85
+ en t</w>
86
+ l e
87
+ c om
88
+ s p
89
+ er e</w>
90
+ p ro
91
+ n o
92
+ b u
93
+ w h
94
+ i t</w>
95
+ t h</w>
96
+ v er
97
+ n e
98
+ c a
99
+ i s
100
+ f or
101
+ a g
102
+ er s</w>
103
+ m o
104
+ g h
105
+ f ro
106
+ t ed</w>
107
+ fro m</w>
108
+ ti on
109
+ o p
110
+ hi s</w>
111
+ a d
112
+ a b
113
+ i c
114
+ h e</w>
115
+ ou n
116
+ a s
117
+ t s</w>
118
+ s c
119
+ d e</w>
120
+ o w
121
+ e x
122
+ w hi
123
+ r u
124
+ t er</w>
125
+ a p
126
+ d s</w>
127
+ w ere</w>
128
+ p re
129
+ d u
130
+ g u
131
+ p ar
132
+ i r
133
+ b o
134
+ th er</w>
135
+ q u
136
+ l u
137
+ t er
138
+ t w
139
+ e s
140
+ re c
141
+ p er
142
+ t a
143
+ at e</w>
144
+ v er</w>
145
+ at ed</w>
146
+ d ing</w>
147
+ it y</w>
148
+ m an
149
+ e ar
150
+ s ed</w>
151
+ d ed</w>
152
+ a u
153
+ al l</w>
154
+ am e</w>
155
+ c i
156
+ on e</w>
157
+ in g
158
+ ar e</w>
159
+ a f
160
+ i r</w>
161
+ a tion</w>
162
+ â Ģ
163
+ ha d</w>
164
+ t r
165
+ u l
166
+ l d</w>
167
+ whi ch</w>
168
+ w a
169
+ i m
170
+ l ea
171
+ b e</w>
172
+ t o
173
+ ti m
174
+ fi r
175
+ w or
176
+ on g</w>
177
+ p or
178
+ m ar
179
+ m e
180
+ al ly</w>
181
+ s o</w>
182
+ ou t</w>
183
+ tion s</w>
184
+ it s</w>
185
+ g h</w>
186
+ g e</w>
187
+ b er</w>
188
+ f e
189
+ p u
190
+ s er
191
+ d er
192
+ p l
193
+ s s</w>
194
+ in e</w>
195
+ in c
196
+ m i
197
+ gh t</w>
198
+ g o
199
+ th is</w>
200
+ t ur
201
+ d a
202
+ ro u
203
+ bu t</w>
204
+ u m
205
+ s on</w>
206
+ w e
207
+ v ed</w>
208
+ si on</w>
209
+ k e</w>
210
+ p la
211
+ the ir</w>
212
+ i es</w>
213
+ fir st</w>
214
+ s a
215
+ o c
216
+ at t
217
+ o f
218
+ p e
219
+ no t</w>
220
+ g i
221
+ n a
222
+ ar y</w>
223
+ m u
224
+ l ed</w>
225
+ âĢ ĵ</w>
226
+ h er</w>
227
+ r an
228
+ c o
229
+ the y</w>
230
+ d er</w>
231
+ al i
232
+ al so</w>
233
+ or e</w>
234
+ e p
235
+ ou ld</w>
236
+ af ter</w>
237
+ s hi
238
+ u s</w>
239
+ e t</w>
240
+ ti c
241
+ st or
242
+ w i
243
+ e v
244
+ o ther</w>
245
+ s h</w>
246
+ t ing</w>
247
+ ar d</w>
248
+ t e
249
+ tw o</w>
250
+ n i
251
+ ha ve</w>
252
+ ou r
253
+ com m
254
+ t e</w>
255
+ ac k</w>
256
+ o o
257
+ f in
258
+ s ec
259
+ ent s</w>
260
+ h as</w>
261
+ com p
262
+ b ec
263
+ k s</w>
264
+ con t
265
+ l and</w>
266
+ be en</w>
267
+ en ce</w>
268
+ k ing</w>
269
+ e l</w>
270
+ ag e</w>
271
+ lo w
272
+ m in
273
+ . @</w>
274
+ @ .@</w>
275
+ om e</w>
276
+ m ent</w>
277
+ ch ar
278
+ g e
279
+ at er</w>
280
+ n or
281
+ h o
282
+ ou s</w>
283
+ wh o</w>
284
+ ea r</w>
285
+ sp ec
286
+ c ol
287
+ el y</w>
288
+ t y</w>
289
+ j o
290
+ ur ing</w>
291
+ du c
292
+ b ri
293
+ st r
294
+ c an
295
+ or i
296
+ t ra
297
+ p a
298
+ sh e</w>
299
+ d o
300
+ ti ve</w>
301
+ m on
302
+ ne w</w>
303
+ r it
304
+ tim e</w>
305
+ on s</w>
306
+ s o
307
+ m an</w>
308
+ d ec
309
+ c ent
310
+ l an
311
+ p i
312
+ ou r</w>
313
+ in ter
314
+ f er
315
+ g ra
316
+ g re
317
+ re s</w>
318
+ inc lu
319
+ m il
320
+ d uring</w>
321
+ ow n</w>
322
+ pre s
323
+ j u
324
+ n ed</w>
325
+ el l</w>
326
+ , @</w>
327
+ @ ,@</w>
328
+ it e</w>
329
+ g en
330
+ wh en</w>
331
+ si g
332
+ b i
333
+ re n
334
+ f a
335
+ g a
336
+ pla y
337
+ en g
338
+ tion al</w>
339
+ oun d</w>
340
+ th ou
341
+ m ore</w>
342
+ re e</w>
343
+ em ber</w>
344
+ e i
345
+ s ou
346
+ s ur
347
+ s ti
348
+ c ar
349
+ for m
350
+ l ar
351
+ s es</w>
352
+ t en
353
+ in to</w>
354
+ t u
355
+ c es</w>
356
+ mo st</w>
357
+ k ed</w>
358
+ wa y</w>
359
+ c re
360
+ c oun
361
+ u p</w>
362
+ l es</w>
363
+ ac e</w>
364
+ al s</w>
365
+ k e
366
+ w ould</w>
367
+ an t</w>
368
+ b er
369
+ f u
370
+ it ed</w>
371
+ p ri
372
+ whi le</w>
373
+ o ver</w>
374
+ ing s</w>
375
+ r e</w>
376
+ fi l
377
+ s y
378
+ e st
379
+ ab le</w>
380
+ w n</w>
381
+ s ea
382
+ ac h
383
+ s ing</w>
384
+ in s</w>
385
+ ti c</w>
386
+ i d</w>
387
+ on ly</w>
388
+ at es</w>
389
+ t ri
390
+ v ing</w>
391
+ b a
392
+ v el
393
+ an ce</w>
394
+ st a
395
+ er n</w>
396
+ f ol
397
+ e en</w>
398
+ in ed</w>
399
+ st ru
400
+ un i
401
+ g ame</w>
402
+ la r</w>
403
+ s el
404
+ b li
405
+ u sed</w>
406
+ n ing</w>
407
+ p s</w>
408
+ ti es</w>
409
+ k no
410
+ c or
411
+ f t</w>
412
+ rec or
413
+ b le</w>
414
+ vi e
415
+ y s</w>
416
+ w il
417
+ ic al</w>
418
+ ap p
419
+ t ro
420
+ th ree</w>
421
+ c la
422
+ ol d</w>
423
+ sh ed</w>
424
+ h ea
425
+ ab out</w>
426
+ w rit
427
+ th an</w>
428
+ st e
429
+ l ater</w>
430
+ ar i
431
+ d y</w>
432
+ pu bli
433
+ lo c
434
+ ag a
435
+ th rou
436
+ s si
437
+ en d</w>
438
+ ma y</w>
439
+ an g
440
+ ac h</w>
441
+ v es</w>
442
+ o g
443
+ hi m</w>
444
+ be tw
445
+ thou gh</w>
446
+ betw een</w>
447
+ u m</w>
448
+ st ar
449
+ sc ri
450
+ re a
451
+ on d</w>
452
+ shi p</w>
453
+ o k</w>
454
+ h el
455
+ s ong</w>
456
+ c hi
457
+ ca p
458
+ e ver</w>
459
+ da y</w>
460
+ c ri
461
+ s ome</w>
462
+ b ro
463
+ n o</w>
464
+ th ere</w>
465
+ an s</w>
466
+ al l
467
+ n um
468
+ r ed</w>
469
+ ear s</w>
470
+ st s</w>
471
+ an y</w>
472
+ w ar
473
+ p h
474
+ p p
475
+ g in
476
+ stru c
477
+ am er
478
+ pro duc
479
+ s ch
480
+ c es
481
+ ur e</w>
482
+ at ing</w>
483
+ em p
484
+ t or
485
+ sea son</w>
486
+ for e</w>
487
+ i c</w>
488
+ c ity</w>
489
+ g ro
490
+ fol low
491
+ su b
492
+ b el
493
+ y ear</w>
494
+ c an</w>
495
+ s in
496
+ wh ere</w>
497
+ an d
498
+ ma de</w>
499
+ re lea
500
+ s m
501
+ b l
502
+ t en</w>
503
+ wi th
504
+ s on
505
+ man y</w>
506
+ a re
507
+ e d
508
+ h ow
509
+ amer ic
510
+ ur y</w>
511
+ st u
512
+ mu si
513
+ c u
514
+ n am
515
+ em ent</w>
516
+ su ch</w>
517
+ al bu
518
+ bu il
519
+ be fore</w>
520
+ e f
521
+ ar m
522
+ t on</w>
523
+ the m</w>
524
+ c al
525
+ b ar
526
+ d es</w>
527
+ m at
528
+ gen er
529
+ o d</w>
530
+ ser ies</w>
531
+ c er
532
+ sh o
533
+ en ti
534
+ h er
535
+ o ver
536
+ an n
537
+ w ell</w>
538
+ wor ld</w>
539
+ g an</w>
540
+ e st</w>
541
+ sec ond</w>
542
+ t ers</w>
543
+ si de</w>
544
+ tr an
545
+ l ine</w>
546
+ tur e</w>
547
+ por t</w>
548
+ be ing</w>
549
+ y ears</w>
550
+ bo th</w>
551
+ in di
552
+ the se</w>
553
+ na tional</w>
554
+ hi stor
555
+ f e</w>
556
+ v o
557
+ st ed</w>
558
+ an i
559
+ b as
560
+ po in
561
+ s ing
562
+ fil m</w>
563
+ p en
564
+ su p
565
+ m is
566
+ c ro
567
+ st ri
568
+ l in
569
+ t re
570
+ wa r</w>
571
+ how ever</w>
572
+ y ing</w>
573
+ l ing</w>
574
+ y p
575
+ ec ted</w>
576
+ di rec
577
+ vi sion</w>
578
+ albu m</w>
579
+ th en</w>
580
+ l l</w>
581
+ se ver
582
+ throu gh</w>
583
+ kno wn</w>
584
+ b or
585
+ c ul
586
+ c lu
587
+ st er</w>
588
+ sou th</w>
589
+ r y</w>
590
+ ec t</w>
591
+ lo w</w>
592
+ p r
593
+ s k
594
+ is o
595
+ nor th</w>
596
+ par t</w>
597
+ f ac
598
+ t ly</w>
599
+ per i
600
+ e u
601
+ b att
602
+ st ate</w>
603
+ c ed</w>
604
+ con si
605
+ in f
606
+ po li
607
+ ol og
608
+ ear ly</w>
609
+ po si
610
+ am es</w>
611
+ w in
612
+ de vel
613
+ o b
614
+ v e
615
+ v en</w>
616
+ op er
617
+ g er
618
+ of fi
619
+ char ac
620
+ m s</w>
621
+ hi gh
622
+ a d</w>
623
+ th o
624
+ sever al</w>
625
+ d re
626
+ de scri
627
+ al e</w>
628
+ num ber</w>
629
+ a ir
630
+ inclu ding</w>
631
+ in st</w>
632
+ aga inst</w>
633
+ l s</w>
634
+ su l
635
+ ep iso
636
+ c am
637
+ di f
638
+ so ci
639
+ bec ame</w>
640
+ li ke</w>
641
+ t el
642
+ f our</w>
643
+ âĢ Ķ</w>
644
+ h ou
645
+ jo h
646
+ un ited</w>
647
+ in v
648
+ un der</w>
649
+ no v
650
+ ti v
651
+ su c
652
+ a tions</w>
653
+ ac k
654
+ t or</w>
655
+ r on
656
+ un d</w>
657
+ w s</w>
658
+ f o
659
+ g r
660
+ devel op
661
+ al though</w>
662
+ cont in
663
+ we st</w>
664
+ ori gin
665
+ musi c</w>
666
+ or s</w>
667
+ d on</w>
668
+ cent ury</w>
669
+ w ard</w>
670
+ wor k</w>
671
+ m e</w>
672
+ am i
673
+ ch a
674
+ ver y</w>
675
+ h ar
676
+ di s
677
+ z ed</w>
678
+ d o</w>
679
+ g s</w>
680
+ t ow
681
+ s ol
682
+ follow ing</w>
683
+ li on</w>
684
+ re ma
685
+ n s</w>
686
+ ti sh</w>
687
+ ch ur
688
+ s om
689
+ m p
690
+ t le</w>
691
+ go ver
692
+ d el
693
+ comp le
694
+ c ur
695
+ u se</w>
696
+ b ack</w>
697
+ h u
698
+ st ern</w>
699
+ be gan</w>
700
+ fi el
701
+ au se</w>
702
+ d ra
703
+ p as
704
+ b il
705
+ ca tion</w>
706
+ d ent</w>
707
+ b ed</w>
708
+ bec ause</w>
709
+ an t
710
+ ea m</w>
711
+ p hi
712
+ y o
713
+ contin u
714
+ ta in</w>
715
+ tr y</w>
716
+ f re
717
+ pe op
718
+ cal led</w>
719
+ f ound</w>
720
+ episo de</w>
721
+ de sig
722
+ m or
723
+ se t</w>
724
+ le y</w>
725
+ ea st</w>
726
+ tr ac
727
+ c ra
preprocessor_config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": {
3
+ "height": 32,
4
+ "width": 32
5
+ },
6
+ "do_center_crop": false,
7
+ "do_normalize": true,
8
+ "do_rescale": true,
9
+ "do_resize": true,
10
+ "image_mean": [
11
+ 0.48145466,
12
+ 0.4578275,
13
+ 0.40821073
14
+ ],
15
+ "image_processor_type": "OwlViTImageProcessor",
16
+ "image_std": [
17
+ 0.26862954,
18
+ 0.26130258,
19
+ 0.27577711
20
+ ],
21
+ "resample": 3,
22
+ "rescale_factor": 0.00392156862745098,
23
+ "size": {
24
+ "height": 32,
25
+ "width": 32
26
+ }
27
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f0b6f5353347fb28c5e0b4d1098c3c0df1d83813ba5b2747eaaa2afaf2ac68e
3
+ size 1625029
special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|startoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "!",
17
+ "unk_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": true,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
tokenizer.json ADDED
@@ -0,0 +1,1852 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
5
+ "added_tokens": [
6
+ {
7
+ "id": 0,
8
+ "content": "!",
9
+ "single_word": false,
10
+ "lstrip": false,
11
+ "rstrip": false,
12
+ "normalized": false,
13
+ "special": true
14
+ },
15
+ {
16
+ "id": 1,
17
+ "content": "<|startoftext|>",
18
+ "single_word": false,
19
+ "lstrip": false,
20
+ "rstrip": false,
21
+ "normalized": true,
22
+ "special": true
23
+ },
24
+ {
25
+ "id": 2,
26
+ "content": "<|endoftext|>",
27
+ "single_word": false,
28
+ "lstrip": false,
29
+ "rstrip": false,
30
+ "normalized": true,
31
+ "special": true
32
+ }
33
+ ],
34
+ "normalizer": {
35
+ "type": "Sequence",
36
+ "normalizers": [
37
+ {
38
+ "type": "NFC"
39
+ },
40
+ {
41
+ "type": "Replace",
42
+ "pattern": {
43
+ "Regex": "\\s+"
44
+ },
45
+ "content": " "
46
+ },
47
+ {
48
+ "type": "Lowercase"
49
+ }
50
+ ]
51
+ },
52
+ "pre_tokenizer": {
53
+ "type": "Sequence",
54
+ "pretokenizers": [
55
+ {
56
+ "type": "Split",
57
+ "pattern": {
58
+ "Regex": "'s|'t|'re|'ve|'m|'ll|'d|[\\p{L}]+|[\\p{N}]|[^\\s\\p{L}\\p{N}]+"
59
+ },
60
+ "behavior": "Removed",
61
+ "invert": true
62
+ },
63
+ {
64
+ "type": "ByteLevel",
65
+ "add_prefix_space": false,
66
+ "trim_offsets": true,
67
+ "use_regex": true
68
+ }
69
+ ]
70
+ },
71
+ "post_processor": {
72
+ "type": "RobertaProcessing",
73
+ "sep": [
74
+ "<|endoftext|>",
75
+ 2
76
+ ],
77
+ "cls": [
78
+ "<|startoftext|>",
79
+ 1
80
+ ],
81
+ "trim_offsets": false,
82
+ "add_prefix_space": false
83
+ },
84
+ "decoder": {
85
+ "type": "ByteLevel",
86
+ "add_prefix_space": true,
87
+ "trim_offsets": true,
88
+ "use_regex": true
89
+ },
90
+ "model": {
91
+ "type": "BPE",
92
+ "dropout": null,
93
+ "unk_token": "<|endoftext|>",
94
+ "continuing_subword_prefix": "",
95
+ "end_of_word_suffix": "</w>",
96
+ "fuse_unk": false,
97
+ "vocab": {
98
+ "!": 0,
99
+ "<|startoftext|>": 1,
100
+ "<|endoftext|>": 2,
101
+ "\"": 3,
102
+ "#": 4,
103
+ "$": 5,
104
+ "%": 6,
105
+ "&": 7,
106
+ "'": 8,
107
+ "(": 9,
108
+ ")": 10,
109
+ "*": 11,
110
+ "+": 12,
111
+ ",": 13,
112
+ "-": 14,
113
+ ".": 15,
114
+ "/": 16,
115
+ "0": 17,
116
+ "1": 18,
117
+ "2": 19,
118
+ "3": 20,
119
+ "4": 21,
120
+ "5": 22,
121
+ "6": 23,
122
+ "7": 24,
123
+ "8": 25,
124
+ "9": 26,
125
+ ":": 27,
126
+ ";": 28,
127
+ "<": 29,
128
+ "=": 30,
129
+ ">": 31,
130
+ "?": 32,
131
+ "@": 33,
132
+ "[": 34,
133
+ "\\": 35,
134
+ "]": 36,
135
+ "^": 37,
136
+ "_": 38,
137
+ "`": 39,
138
+ "a": 40,
139
+ "b": 41,
140
+ "c": 42,
141
+ "d": 43,
142
+ "e": 44,
143
+ "f": 45,
144
+ "g": 46,
145
+ "h": 47,
146
+ "i": 48,
147
+ "j": 49,
148
+ "k": 50,
149
+ "l": 51,
150
+ "m": 52,
151
+ "n": 53,
152
+ "o": 54,
153
+ "p": 55,
154
+ "q": 56,
155
+ "r": 57,
156
+ "s": 58,
157
+ "t": 59,
158
+ "u": 60,
159
+ "v": 61,
160
+ "w": 62,
161
+ "x": 63,
162
+ "y": 64,
163
+ "z": 65,
164
+ "|": 66,
165
+ "}": 67,
166
+ "~": 68,
167
+ "¡": 69,
168
+ "¢": 70,
169
+ "£": 71,
170
+ "¤": 72,
171
+ "¥": 73,
172
+ "¦": 74,
173
+ "§": 75,
174
+ "¨": 76,
175
+ "©": 77,
176
+ "ª": 78,
177
+ "«": 79,
178
+ "¬": 80,
179
+ "®": 81,
180
+ "¯": 82,
181
+ "°": 83,
182
+ "±": 84,
183
+ "²": 85,
184
+ "³": 86,
185
+ "´": 87,
186
+ "µ": 88,
187
+ "¶": 89,
188
+ "·": 90,
189
+ "¸": 91,
190
+ "¹": 92,
191
+ "º": 93,
192
+ "»": 94,
193
+ "¼": 95,
194
+ "½": 96,
195
+ "¾": 97,
196
+ "¿": 98,
197
+ "Â": 99,
198
+ "Ã": 100,
199
+ "Ä": 101,
200
+ "Å": 102,
201
+ "Æ": 103,
202
+ "Ç": 104,
203
+ "È": 105,
204
+ "É": 106,
205
+ "Ê": 107,
206
+ "Ë": 108,
207
+ "Ì": 109,
208
+ "Í": 110,
209
+ "Î": 111,
210
+ "Ï": 112,
211
+ "Ð": 113,
212
+ "Ñ": 114,
213
+ "Ö": 115,
214
+ "×": 116,
215
+ "Ø": 117,
216
+ "Ù": 118,
217
+ "Ü": 119,
218
+ "à": 120,
219
+ "á": 121,
220
+ "â": 122,
221
+ "ã": 123,
222
+ "ä": 124,
223
+ "å": 125,
224
+ "æ": 126,
225
+ "ç": 127,
226
+ "è": 128,
227
+ "é": 129,
228
+ "ë": 130,
229
+ "ì": 131,
230
+ "ï": 132,
231
+ "Ģ": 133,
232
+ "ģ": 134,
233
+ "Ĥ": 135,
234
+ "ĥ": 136,
235
+ "Ħ": 137,
236
+ "ħ": 138,
237
+ "Ĩ": 139,
238
+ "ĩ": 140,
239
+ "Ī": 141,
240
+ "ī": 142,
241
+ "Ĭ": 143,
242
+ "ĭ": 144,
243
+ "Į": 145,
244
+ "į": 146,
245
+ "İ": 147,
246
+ "ı": 148,
247
+ "IJ": 149,
248
+ "ij": 150,
249
+ "Ĵ": 151,
250
+ "ĵ": 152,
251
+ "Ķ": 153,
252
+ "ķ": 154,
253
+ "ĸ": 155,
254
+ "Ĺ": 156,
255
+ "ĺ": 157,
256
+ "Ļ": 158,
257
+ "ļ": 159,
258
+ "Ľ": 160,
259
+ "ľ": 161,
260
+ "Ŀ": 162,
261
+ "ŀ": 163,
262
+ "Ł": 164,
263
+ "ł": 165,
264
+ "Ń": 166,
265
+ "e</w>": 167,
266
+ "n</w>": 168,
267
+ "s</w>": 169,
268
+ "o</w>": 170,
269
+ "g</w>": 171,
270
+ "a</w>": 172,
271
+ "r</w>": 173,
272
+ "t</w>": 174,
273
+ "w</w>": 175,
274
+ "d</w>": 176,
275
+ "y</w>": 177,
276
+ "i</w>": 178,
277
+ "p</w>": 179,
278
+ "l</w>": 180,
279
+ "h</w>": 181,
280
+ "f</w>": 182,
281
+ "k</w>": 183,
282
+ "c</w>": 184,
283
+ "v</w>": 185,
284
+ "m</w>": 186,
285
+ "x</w>": 187,
286
+ "z</w>": 188,
287
+ "u</w>": 189,
288
+ "ľ</w>": 190,
289
+ "[</w>": 191,
290
+ "ģ</w>": 192,
291
+ "|</w>": 193,
292
+ "¼</w>": 194,
293
+ "j</w>": 195,
294
+ "į</w>": 196,
295
+ "²</w>": 197,
296
+ "b</w>": 198,
297
+ "ĩ</w>": 199,
298
+ "Ļ</w>": 200,
299
+ "¿</w>": 201,
300
+ "¡</w>": 202,
301
+ "'</w>": 203,
302
+ "></w>": 204,
303
+ "¢</w>": 205,
304
+ "±</w>": 206,
305
+ "·</w>": 207,
306
+ "¶</w>": 208,
307
+ "«</w>": 209,
308
+ "¤</w>": 210,
309
+ "©</w>": 211,
310
+ "½</w>": 212,
311
+ "³</w>": 213,
312
+ "Ń</w>": 214,
313
+ "\\</w>": 215,
314
+ "+</w>": 216,
315
+ "¸</w>": 217,
316
+ "¨</w>": 218,
317
+ "Ł</w>": 219,
318
+ "ĺ</w>": 220,
319
+ "Ľ</w>": 221,
320
+ "¹</w>": 222,
321
+ "Ĩ</w>": 223,
322
+ "Ģ</w>": 224,
323
+ "Ĺ</w>": 225,
324
+ "°</w>": 226,
325
+ "-</w>": 227,
326
+ "Ĥ</w>": 228,
327
+ "Į</w>": 229,
328
+ "ħ</w>": 230,
329
+ "Ĭ</w>": 231,
330
+ "§</w>": 232,
331
+ "IJ</w>": 233,
332
+ "`</w>": 234,
333
+ "q</w>": 235,
334
+ "ķ</w>": 236,
335
+ "µ</w>": 237,
336
+ "ī</w>": 238,
337
+ "º</w>": 239,
338
+ "!</w>": 240,
339
+ "¯</w>": 241,
340
+ "Ŀ</w>": 242,
341
+ "Ħ</w>": 243,
342
+ "ł</w>": 244,
343
+ "%</w>": 245,
344
+ "£</w>": 246,
345
+ "¦</w>": 247,
346
+ "ŀ</w>": 248,
347
+ "¾</w>": 249,
348
+ "´</w>": 250,
349
+ "ĵ</w>": 251,
350
+ "^</w>": 252,
351
+ "Ķ</w>": 253,
352
+ "ª</w>": 254,
353
+ "»</w>": 255,
354
+ "0</w>": 256,
355
+ "5</w>": 257,
356
+ "ĸ</w>": 258,
357
+ "ļ</w>": 259,
358
+ "İ</w>": 260,
359
+ "=</w>": 261,
360
+ "Ĵ</w>": 262,
361
+ "Ī</w>": 263,
362
+ "ĥ</w>": 264,
363
+ "¥</w>": 265,
364
+ "®</w>": 266,
365
+ "3</w>": 267,
366
+ "¬</w>": 268,
367
+ ".</w>": 269,
368
+ "1</w>": 270,
369
+ "ĭ</w>": 271,
370
+ "ij</w>": 272,
371
+ "@</w>": 273,
372
+ "&</w>": 274,
373
+ "ı</w>": 275,
374
+ "~</w>": 276,
375
+ "8</w>": 277,
376
+ "}</w>": 278,
377
+ "*</w>": 279,
378
+ ";</w>": 280,
379
+ "\"</w>": 281,
380
+ "2</w>": 282,
381
+ "(</w>": 283,
382
+ ")</w>": 284,
383
+ "4</w>": 285,
384
+ "9</w>": 286,
385
+ "/</w>": 287,
386
+ ":</w>": 288,
387
+ "#</w>": 289,
388
+ ",</w>": 290,
389
+ "$</w>": 291,
390
+ "]</w>": 292,
391
+ "<</w>": 293,
392
+ "_</w>": 294,
393
+ "?</w>": 295,
394
+ "6</w>": 296,
395
+ "7</w>": 297,
396
+ "th": 298,
397
+ "the</w>": 299,
398
+ "in": 300,
399
+ "an": 301,
400
+ "ed</w>": 302,
401
+ "er": 303,
402
+ "re": 304,
403
+ "ar": 305,
404
+ "ti": 306,
405
+ "on": 307,
406
+ "en": 308,
407
+ "of</w>": 309,
408
+ "or": 310,
409
+ "and</w>": 311,
410
+ "er</w>": 312,
411
+ "on</w>": 313,
412
+ "in</w>": 314,
413
+ "ing</w>": 315,
414
+ "st": 316,
415
+ "ro": 317,
416
+ "al": 318,
417
+ "it": 319,
418
+ "to</w>": 320,
419
+ "as</w>": 321,
420
+ "at": 322,
421
+ "es</w>": 323,
422
+ "ou": 324,
423
+ "hi": 325,
424
+ "ac": 326,
425
+ "si": 327,
426
+ "at</w>": 328,
427
+ "ri": 329,
428
+ "al</w>": 330,
429
+ "el": 331,
430
+ "an</w>": 332,
431
+ "am": 333,
432
+ "or</w>": 334,
433
+ "st</w>": 335,
434
+ "li": 336,
435
+ "ur": 337,
436
+ "ec": 338,
437
+ "om": 339,
438
+ "di": 340,
439
+ "was</w>": 341,
440
+ "ly</w>": 342,
441
+ "en</w>": 343,
442
+ "ea": 344,
443
+ "ch": 345,
444
+ "un": 346,
445
+ "tion</w>": 347,
446
+ "la": 348,
447
+ "is</w>": 349,
448
+ "fi": 350,
449
+ "ol": 351,
450
+ "de": 352,
451
+ "-@</w>": 353,
452
+ "@-@</w>": 354,
453
+ "ra": 355,
454
+ "vi": 356,
455
+ "le</w>": 357,
456
+ "lo": 358,
457
+ "sh": 359,
458
+ "em": 360,
459
+ "be": 361,
460
+ "that</w>": 362,
461
+ "'s</w>": 363,
462
+ "con": 364,
463
+ "ma": 365,
464
+ "for</w>": 366,
465
+ "ha": 367,
466
+ "su": 368,
467
+ "by</w>": 369,
468
+ "ith</w>": 370,
469
+ "ve</w>": 371,
470
+ "with</w>": 372,
471
+ "se</w>": 373,
472
+ "ch</w>": 374,
473
+ "the": 375,
474
+ "ent": 376,
475
+ "po": 377,
476
+ "ce</w>": 378,
477
+ "il": 379,
478
+ "se": 380,
479
+ "ent</w>": 381,
480
+ "le": 382,
481
+ "com": 383,
482
+ "sp": 384,
483
+ "ere</w>": 385,
484
+ "pro": 386,
485
+ "no": 387,
486
+ "bu": 388,
487
+ "wh": 389,
488
+ "it</w>": 390,
489
+ "th</w>": 391,
490
+ "ver": 392,
491
+ "ne": 393,
492
+ "ca": 394,
493
+ "is": 395,
494
+ "for": 396,
495
+ "ag": 397,
496
+ "ers</w>": 398,
497
+ "mo": 399,
498
+ "gh": 400,
499
+ "fro": 401,
500
+ "ted</w>": 402,
501
+ "from</w>": 403,
502
+ "tion": 404,
503
+ "op": 405,
504
+ "his</w>": 406,
505
+ "ad": 407,
506
+ "ab": 408,
507
+ "ic": 409,
508
+ "he</w>": 410,
509
+ "oun": 411,
510
+ "as": 412,
511
+ "ts</w>": 413,
512
+ "sc": 414,
513
+ "de</w>": 415,
514
+ "ow": 416,
515
+ "ex": 417,
516
+ "whi": 418,
517
+ "ru": 419,
518
+ "ter</w>": 420,
519
+ "ap": 421,
520
+ "ds</w>": 422,
521
+ "were</w>": 423,
522
+ "pre": 424,
523
+ "du": 425,
524
+ "gu": 426,
525
+ "par": 427,
526
+ "ir": 428,
527
+ "bo": 429,
528
+ "ther</w>": 430,
529
+ "qu": 431,
530
+ "lu": 432,
531
+ "ter": 433,
532
+ "tw": 434,
533
+ "es": 435,
534
+ "rec": 436,
535
+ "per": 437,
536
+ "ta": 438,
537
+ "ate</w>": 439,
538
+ "ver</w>": 440,
539
+ "ated</w>": 441,
540
+ "ding</w>": 442,
541
+ "ity</w>": 443,
542
+ "man": 444,
543
+ "ear": 445,
544
+ "sed</w>": 446,
545
+ "ded</w>": 447,
546
+ "au": 448,
547
+ "all</w>": 449,
548
+ "ame</w>": 450,
549
+ "ci": 451,
550
+ "one</w>": 452,
551
+ "ing": 453,
552
+ "are</w>": 454,
553
+ "af": 455,
554
+ "ir</w>": 456,
555
+ "ation</w>": 457,
556
+ "âĢ": 458,
557
+ "had</w>": 459,
558
+ "tr": 460,
559
+ "ul": 461,
560
+ "ld</w>": 462,
561
+ "which</w>": 463,
562
+ "wa": 464,
563
+ "im": 465,
564
+ "lea": 466,
565
+ "be</w>": 467,
566
+ "to": 468,
567
+ "tim": 469,
568
+ "fir": 470,
569
+ "wor": 471,
570
+ "ong</w>": 472,
571
+ "por": 473,
572
+ "mar": 474,
573
+ "me": 475,
574
+ "ally</w>": 476,
575
+ "so</w>": 477,
576
+ "out</w>": 478,
577
+ "tions</w>": 479,
578
+ "its</w>": 480,
579
+ "gh</w>": 481,
580
+ "ge</w>": 482,
581
+ "ber</w>": 483,
582
+ "fe": 484,
583
+ "pu": 485,
584
+ "ser": 486,
585
+ "der": 487,
586
+ "pl": 488,
587
+ "ss</w>": 489,
588
+ "ine</w>": 490,
589
+ "inc": 491,
590
+ "mi": 492,
591
+ "ght</w>": 493,
592
+ "go": 494,
593
+ "this</w>": 495,
594
+ "tur": 496,
595
+ "da": 497,
596
+ "rou": 498,
597
+ "but</w>": 499,
598
+ "um": 500,
599
+ "son</w>": 501,
600
+ "we": 502,
601
+ "ved</w>": 503,
602
+ "sion</w>": 504,
603
+ "ke</w>": 505,
604
+ "pla": 506,
605
+ "their</w>": 507,
606
+ "ies</w>": 508,
607
+ "first</w>": 509,
608
+ "sa": 510,
609
+ "oc": 511,
610
+ "att": 512,
611
+ "of": 513,
612
+ "pe": 514,
613
+ "not</w>": 515,
614
+ "gi": 516,
615
+ "na": 517,
616
+ "ary</w>": 518,
617
+ "mu": 519,
618
+ "led</w>": 520,
619
+ "âĢĵ</w>": 521,
620
+ "her</w>": 522,
621
+ "ran": 523,
622
+ "co": 524,
623
+ "they</w>": 525,
624
+ "der</w>": 526,
625
+ "ali": 527,
626
+ "also</w>": 528,
627
+ "ore</w>": 529,
628
+ "ep": 530,
629
+ "ould</w>": 531,
630
+ "after</w>": 532,
631
+ "shi": 533,
632
+ "us</w>": 534,
633
+ "et</w>": 535,
634
+ "tic": 536,
635
+ "stor": 537,
636
+ "wi": 538,
637
+ "ev": 539,
638
+ "other</w>": 540,
639
+ "sh</w>": 541,
640
+ "ting</w>": 542,
641
+ "ard</w>": 543,
642
+ "te": 544,
643
+ "two</w>": 545,
644
+ "ni": 546,
645
+ "have</w>": 547,
646
+ "our": 548,
647
+ "comm": 549,
648
+ "te</w>": 550,
649
+ "ack</w>": 551,
650
+ "oo": 552,
651
+ "fin": 553,
652
+ "sec": 554,
653
+ "ents</w>": 555,
654
+ "has</w>": 556,
655
+ "comp": 557,
656
+ "bec": 558,
657
+ "ks</w>": 559,
658
+ "cont": 560,
659
+ "land</w>": 561,
660
+ "been</w>": 562,
661
+ "ence</w>": 563,
662
+ "king</w>": 564,
663
+ "el</w>": 565,
664
+ "age</w>": 566,
665
+ "low": 567,
666
+ "min": 568,
667
+ ".@</w>": 569,
668
+ "@.@</w>": 570,
669
+ "ome</w>": 571,
670
+ "ment</w>": 572,
671
+ "char": 573,
672
+ "ge": 574,
673
+ "ater</w>": 575,
674
+ "nor": 576,
675
+ "ho": 577,
676
+ "ous</w>": 578,
677
+ "who</w>": 579,
678
+ "ear</w>": 580,
679
+ "spec": 581,
680
+ "col": 582,
681
+ "ely</w>": 583,
682
+ "ty</w>": 584,
683
+ "jo": 585,
684
+ "uring</w>": 586,
685
+ "duc": 587,
686
+ "bri": 588,
687
+ "str": 589,
688
+ "can": 590,
689
+ "ori": 591,
690
+ "tra": 592,
691
+ "pa": 593,
692
+ "she</w>": 594,
693
+ "do": 595,
694
+ "tive</w>": 596,
695
+ "mon": 597,
696
+ "new</w>": 598,
697
+ "rit": 599,
698
+ "time</w>": 600,
699
+ "ons</w>": 601,
700
+ "so": 602,
701
+ "man</w>": 603,
702
+ "dec": 604,
703
+ "cent": 605,
704
+ "lan": 606,
705
+ "pi": 607,
706
+ "our</w>": 608,
707
+ "inter": 609,
708
+ "fer": 610,
709
+ "gra": 611,
710
+ "gre": 612,
711
+ "res</w>": 613,
712
+ "inclu": 614,
713
+ "mil": 615,
714
+ "during</w>": 616,
715
+ "own</w>": 617,
716
+ "pres": 618,
717
+ "ju": 619,
718
+ "ned</w>": 620,
719
+ "ell</w>": 621,
720
+ ",@</w>": 622,
721
+ "@,@</w>": 623,
722
+ "ite</w>": 624,
723
+ "gen": 625,
724
+ "when</w>": 626,
725
+ "sig": 627,
726
+ "bi": 628,
727
+ "ren": 629,
728
+ "fa": 630,
729
+ "ga": 631,
730
+ "play": 632,
731
+ "eng": 633,
732
+ "tional</w>": 634,
733
+ "ound</w>": 635,
734
+ "thou": 636,
735
+ "more</w>": 637,
736
+ "ree</w>": 638,
737
+ "ember</w>": 639,
738
+ "ei": 640,
739
+ "sou": 641,
740
+ "sur": 642,
741
+ "sti": 643,
742
+ "car": 644,
743
+ "form": 645,
744
+ "lar": 646,
745
+ "ses</w>": 647,
746
+ "ten": 648,
747
+ "into</w>": 649,
748
+ "tu": 650,
749
+ "ces</w>": 651,
750
+ "most</w>": 652,
751
+ "ked</w>": 653,
752
+ "way</w>": 654,
753
+ "cre": 655,
754
+ "coun": 656,
755
+ "up</w>": 657,
756
+ "les</w>": 658,
757
+ "ace</w>": 659,
758
+ "als</w>": 660,
759
+ "ke": 661,
760
+ "would</w>": 662,
761
+ "ant</w>": 663,
762
+ "ber": 664,
763
+ "fu": 665,
764
+ "ited</w>": 666,
765
+ "pri": 667,
766
+ "while</w>": 668,
767
+ "over</w>": 669,
768
+ "ings</w>": 670,
769
+ "re</w>": 671,
770
+ "fil": 672,
771
+ "sy": 673,
772
+ "est": 674,
773
+ "able</w>": 675,
774
+ "wn</w>": 676,
775
+ "sea": 677,
776
+ "ach": 678,
777
+ "sing</w>": 679,
778
+ "ins</w>": 680,
779
+ "tic</w>": 681,
780
+ "id</w>": 682,
781
+ "only</w>": 683,
782
+ "ates</w>": 684,
783
+ "tri": 685,
784
+ "ving</w>": 686,
785
+ "ba": 687,
786
+ "vel": 688,
787
+ "ance</w>": 689,
788
+ "sta": 690,
789
+ "ern</w>": 691,
790
+ "fol": 692,
791
+ "een</w>": 693,
792
+ "ined</w>": 694,
793
+ "stru": 695,
794
+ "uni": 696,
795
+ "game</w>": 697,
796
+ "lar</w>": 698,
797
+ "sel": 699,
798
+ "bli": 700,
799
+ "used</w>": 701,
800
+ "ning</w>": 702,
801
+ "ps</w>": 703,
802
+ "ties</w>": 704,
803
+ "kno": 705,
804
+ "cor": 706,
805
+ "ft</w>": 707,
806
+ "recor": 708,
807
+ "ble</w>": 709,
808
+ "vie": 710,
809
+ "ys</w>": 711,
810
+ "wil": 712,
811
+ "ical</w>": 713,
812
+ "app": 714,
813
+ "tro": 715,
814
+ "three</w>": 716,
815
+ "cla": 717,
816
+ "old</w>": 718,
817
+ "shed</w>": 719,
818
+ "hea": 720,
819
+ "about</w>": 721,
820
+ "writ": 722,
821
+ "than</w>": 723,
822
+ "ste": 724,
823
+ "later</w>": 725,
824
+ "ari": 726,
825
+ "dy</w>": 727,
826
+ "publi": 728,
827
+ "loc": 729,
828
+ "aga": 730,
829
+ "throu": 731,
830
+ "ssi": 732,
831
+ "end</w>": 733,
832
+ "may</w>": 734,
833
+ "ang": 735,
834
+ "ach</w>": 736,
835
+ "ves</w>": 737,
836
+ "og": 738,
837
+ "him</w>": 739,
838
+ "betw": 740,
839
+ "though</w>": 741,
840
+ "between</w>": 742,
841
+ "um</w>": 743,
842
+ "star": 744,
843
+ "scri": 745,
844
+ "rea": 746,
845
+ "ond</w>": 747,
846
+ "ship</w>": 748,
847
+ "ok</w>": 749,
848
+ "hel": 750,
849
+ "song</w>": 751,
850
+ "chi": 752,
851
+ "cap": 753,
852
+ "ever</w>": 754,
853
+ "day</w>": 755,
854
+ "cri": 756,
855
+ "some</w>": 757,
856
+ "bro": 758,
857
+ "no</w>": 759,
858
+ "there</w>": 760,
859
+ "ans</w>": 761,
860
+ "all": 762,
861
+ "num": 763,
862
+ "red</w>": 764,
863
+ "ears</w>": 765,
864
+ "sts</w>": 766,
865
+ "any</w>": 767,
866
+ "war": 768,
867
+ "ph": 769,
868
+ "pp": 770,
869
+ "gin": 771,
870
+ "struc": 772,
871
+ "amer": 773,
872
+ "produc": 774,
873
+ "sch": 775,
874
+ "ces": 776,
875
+ "ure</w>": 777,
876
+ "ating</w>": 778,
877
+ "emp": 779,
878
+ "tor": 780,
879
+ "season</w>": 781,
880
+ "fore</w>": 782,
881
+ "ic</w>": 783,
882
+ "city</w>": 784,
883
+ "gro": 785,
884
+ "follow": 786,
885
+ "sub": 787,
886
+ "bel": 788,
887
+ "year</w>": 789,
888
+ "can</w>": 790,
889
+ "sin": 791,
890
+ "where</w>": 792,
891
+ "and": 793,
892
+ "made</w>": 794,
893
+ "relea": 795,
894
+ "sm": 796,
895
+ "bl": 797,
896
+ "ten</w>": 798,
897
+ "with": 799,
898
+ "son": 800,
899
+ "many</w>": 801,
900
+ "are": 802,
901
+ "ed": 803,
902
+ "how": 804,
903
+ "americ": 805,
904
+ "ury</w>": 806,
905
+ "stu": 807,
906
+ "musi": 808,
907
+ "cu": 809,
908
+ "nam": 810,
909
+ "ement</w>": 811,
910
+ "such</w>": 812,
911
+ "albu": 813,
912
+ "buil": 814,
913
+ "before</w>": 815,
914
+ "ef": 816,
915
+ "arm": 817,
916
+ "ton</w>": 818,
917
+ "them</w>": 819,
918
+ "cal": 820,
919
+ "bar": 821,
920
+ "des</w>": 822,
921
+ "mat": 823,
922
+ "gener": 824,
923
+ "od</w>": 825,
924
+ "series</w>": 826,
925
+ "cer": 827,
926
+ "sho": 828,
927
+ "enti": 829,
928
+ "her": 830,
929
+ "over": 831,
930
+ "ann": 832,
931
+ "well</w>": 833,
932
+ "world</w>": 834,
933
+ "gan</w>": 835,
934
+ "est</w>": 836,
935
+ "second</w>": 837,
936
+ "ters</w>": 838,
937
+ "side</w>": 839,
938
+ "tran": 840,
939
+ "line</w>": 841,
940
+ "ture</w>": 842,
941
+ "port</w>": 843,
942
+ "being</w>": 844,
943
+ "years</w>": 845,
944
+ "both</w>": 846,
945
+ "indi": 847,
946
+ "these</w>": 848,
947
+ "national</w>": 849,
948
+ "histor": 850,
949
+ "fe</w>": 851,
950
+ "vo": 852,
951
+ "sted</w>": 853,
952
+ "ani": 854,
953
+ "bas": 855,
954
+ "poin": 856,
955
+ "sing": 857,
956
+ "film</w>": 858,
957
+ "pen": 859,
958
+ "sup": 860,
959
+ "mis": 861,
960
+ "cro": 862,
961
+ "stri": 863,
962
+ "lin": 864,
963
+ "tre": 865,
964
+ "war</w>": 866,
965
+ "however</w>": 867,
966
+ "ying</w>": 868,
967
+ "ling</w>": 869,
968
+ "yp": 870,
969
+ "ected</w>": 871,
970
+ "direc": 872,
971
+ "vision</w>": 873,
972
+ "album</w>": 874,
973
+ "then</w>": 875,
974
+ "ll</w>": 876,
975
+ "sever": 877,
976
+ "through</w>": 878,
977
+ "known</w>": 879,
978
+ "bor": 880,
979
+ "cul": 881,
980
+ "clu": 882,
981
+ "ster</w>": 883,
982
+ "south</w>": 884,
983
+ "ry</w>": 885,
984
+ "ect</w>": 886,
985
+ "low</w>": 887,
986
+ "pr": 888,
987
+ "sk": 889,
988
+ "iso": 890,
989
+ "north</w>": 891,
990
+ "part</w>": 892,
991
+ "fac": 893,
992
+ "tly</w>": 894,
993
+ "peri": 895,
994
+ "eu": 896,
995
+ "batt": 897,
996
+ "state</w>": 898,
997
+ "ced</w>": 899,
998
+ "consi": 900,
999
+ "inf": 901,
1000
+ "poli": 902,
1001
+ "olog": 903,
1002
+ "early</w>": 904,
1003
+ "posi": 905,
1004
+ "ames</w>": 906,
1005
+ "win": 907,
1006
+ "devel": 908,
1007
+ "ob": 909,
1008
+ "ve": 910,
1009
+ "ven</w>": 911,
1010
+ "oper": 912,
1011
+ "ger": 913,
1012
+ "offi": 914,
1013
+ "charac": 915,
1014
+ "ms</w>": 916,
1015
+ "high": 917,
1016
+ "ad</w>": 918,
1017
+ "tho": 919,
1018
+ "several</w>": 920,
1019
+ "dre": 921,
1020
+ "descri": 922,
1021
+ "ale</w>": 923,
1022
+ "number</w>": 924,
1023
+ "air": 925,
1024
+ "including</w>": 926,
1025
+ "inst</w>": 927,
1026
+ "against</w>": 928,
1027
+ "ls</w>": 929,
1028
+ "sul": 930,
1029
+ "episo": 931,
1030
+ "cam": 932,
1031
+ "dif": 933,
1032
+ "soci": 934,
1033
+ "became</w>": 935,
1034
+ "like</w>": 936,
1035
+ "tel": 937,
1036
+ "four</w>": 938,
1037
+ "âĢĶ</w>": 939,
1038
+ "hou": 940,
1039
+ "joh": 941,
1040
+ "united</w>": 942,
1041
+ "inv": 943,
1042
+ "under</w>": 944,
1043
+ "nov": 945,
1044
+ "tiv": 946,
1045
+ "suc": 947,
1046
+ "ations</w>": 948,
1047
+ "ack": 949,
1048
+ "tor</w>": 950,
1049
+ "ron": 951,
1050
+ "und</w>": 952,
1051
+ "ws</w>": 953,
1052
+ "fo": 954,
1053
+ "gr": 955,
1054
+ "develop": 956,
1055
+ "although</w>": 957,
1056
+ "contin": 958,
1057
+ "west</w>": 959,
1058
+ "origin": 960,
1059
+ "music</w>": 961,
1060
+ "ors</w>": 962,
1061
+ "don</w>": 963,
1062
+ "century</w>": 964,
1063
+ "ward</w>": 965,
1064
+ "work</w>": 966,
1065
+ "me</w>": 967,
1066
+ "ami": 968,
1067
+ "cha": 969,
1068
+ "very</w>": 970,
1069
+ "har": 971,
1070
+ "dis": 972,
1071
+ "zed</w>": 973,
1072
+ "do</w>": 974,
1073
+ "gs</w>": 975,
1074
+ "tow": 976,
1075
+ "sol": 977,
1076
+ "following</w>": 978,
1077
+ "lion</w>": 979,
1078
+ "rema": 980,
1079
+ "ns</w>": 981,
1080
+ "tish</w>": 982,
1081
+ "chur": 983,
1082
+ "som": 984,
1083
+ "mp": 985,
1084
+ "tle</w>": 986,
1085
+ "gover": 987,
1086
+ "del": 988,
1087
+ "comple": 989,
1088
+ "cur": 990,
1089
+ "use</w>": 991,
1090
+ "back</w>": 992,
1091
+ "hu": 993,
1092
+ "stern</w>": 994,
1093
+ "began</w>": 995,
1094
+ "fiel": 996,
1095
+ "ause</w>": 997,
1096
+ "dra": 998,
1097
+ "pas": 999,
1098
+ "bil": 1000,
1099
+ "cation</w>": 1001,
1100
+ "dent</w>": 1002,
1101
+ "bed</w>": 1003,
1102
+ "because</w>": 1004,
1103
+ "ant": 1005,
1104
+ "eam</w>": 1006,
1105
+ "phi": 1007,
1106
+ "yo": 1008,
1107
+ "continu": 1009,
1108
+ "tain</w>": 1010,
1109
+ "try</w>": 1011,
1110
+ "fre": 1012,
1111
+ "peop": 1013,
1112
+ "called</w>": 1014,
1113
+ "found</w>": 1015,
1114
+ "episode</w>": 1016,
1115
+ "desig": 1017,
1116
+ "mor": 1018,
1117
+ "set</w>": 1019,
1118
+ "ley</w>": 1020,
1119
+ "east</w>": 1021,
1120
+ "trac": 1022,
1121
+ "cra": 1023
1122
+ },
1123
+ "merges": [
1124
+ "t h",
1125
+ "th e</w>",
1126
+ "i n",
1127
+ "a n",
1128
+ "e d</w>",
1129
+ "e r",
1130
+ "r e",
1131
+ "a r",
1132
+ "t i",
1133
+ "o n",
1134
+ "e n",
1135
+ "o f</w>",
1136
+ "o r",
1137
+ "an d</w>",
1138
+ "e r</w>",
1139
+ "o n</w>",
1140
+ "i n</w>",
1141
+ "in g</w>",
1142
+ "s t",
1143
+ "r o",
1144
+ "a l",
1145
+ "i t",
1146
+ "t o</w>",
1147
+ "a s</w>",
1148
+ "a t",
1149
+ "e s</w>",
1150
+ "o u",
1151
+ "h i",
1152
+ "a c",
1153
+ "s i",
1154
+ "a t</w>",
1155
+ "r i",
1156
+ "a l</w>",
1157
+ "e l",
1158
+ "a n</w>",
1159
+ "a m",
1160
+ "o r</w>",
1161
+ "s t</w>",
1162
+ "l i",
1163
+ "u r",
1164
+ "e c",
1165
+ "o m",
1166
+ "d i",
1167
+ "w as</w>",
1168
+ "l y</w>",
1169
+ "e n</w>",
1170
+ "e a",
1171
+ "c h",
1172
+ "u n",
1173
+ "ti on</w>",
1174
+ "l a",
1175
+ "i s</w>",
1176
+ "f i",
1177
+ "o l",
1178
+ "d e",
1179
+ "- @</w>",
1180
+ "@ -@</w>",
1181
+ "r a",
1182
+ "v i",
1183
+ "l e</w>",
1184
+ "l o",
1185
+ "s h",
1186
+ "e m",
1187
+ "b e",
1188
+ "th at</w>",
1189
+ "' s</w>",
1190
+ "c on",
1191
+ "m a",
1192
+ "f or</w>",
1193
+ "h a",
1194
+ "s u",
1195
+ "b y</w>",
1196
+ "it h</w>",
1197
+ "v e</w>",
1198
+ "w ith</w>",
1199
+ "s e</w>",
1200
+ "c h</w>",
1201
+ "th e",
1202
+ "en t",
1203
+ "p o",
1204
+ "c e</w>",
1205
+ "i l",
1206
+ "s e",
1207
+ "en t</w>",
1208
+ "l e",
1209
+ "c om",
1210
+ "s p",
1211
+ "er e</w>",
1212
+ "p ro",
1213
+ "n o",
1214
+ "b u",
1215
+ "w h",
1216
+ "i t</w>",
1217
+ "t h</w>",
1218
+ "v er",
1219
+ "n e",
1220
+ "c a",
1221
+ "i s",
1222
+ "f or",
1223
+ "a g",
1224
+ "er s</w>",
1225
+ "m o",
1226
+ "g h",
1227
+ "f ro",
1228
+ "t ed</w>",
1229
+ "fro m</w>",
1230
+ "ti on",
1231
+ "o p",
1232
+ "hi s</w>",
1233
+ "a d",
1234
+ "a b",
1235
+ "i c",
1236
+ "h e</w>",
1237
+ "ou n",
1238
+ "a s",
1239
+ "t s</w>",
1240
+ "s c",
1241
+ "d e</w>",
1242
+ "o w",
1243
+ "e x",
1244
+ "w hi",
1245
+ "r u",
1246
+ "t er</w>",
1247
+ "a p",
1248
+ "d s</w>",
1249
+ "w ere</w>",
1250
+ "p re",
1251
+ "d u",
1252
+ "g u",
1253
+ "p ar",
1254
+ "i r",
1255
+ "b o",
1256
+ "th er</w>",
1257
+ "q u",
1258
+ "l u",
1259
+ "t er",
1260
+ "t w",
1261
+ "e s",
1262
+ "re c",
1263
+ "p er",
1264
+ "t a",
1265
+ "at e</w>",
1266
+ "v er</w>",
1267
+ "at ed</w>",
1268
+ "d ing</w>",
1269
+ "it y</w>",
1270
+ "m an",
1271
+ "e ar",
1272
+ "s ed</w>",
1273
+ "d ed</w>",
1274
+ "a u",
1275
+ "al l</w>",
1276
+ "am e</w>",
1277
+ "c i",
1278
+ "on e</w>",
1279
+ "in g",
1280
+ "ar e</w>",
1281
+ "a f",
1282
+ "i r</w>",
1283
+ "a tion</w>",
1284
+ "â Ģ",
1285
+ "ha d</w>",
1286
+ "t r",
1287
+ "u l",
1288
+ "l d</w>",
1289
+ "whi ch</w>",
1290
+ "w a",
1291
+ "i m",
1292
+ "l ea",
1293
+ "b e</w>",
1294
+ "t o",
1295
+ "ti m",
1296
+ "fi r",
1297
+ "w or",
1298
+ "on g</w>",
1299
+ "p or",
1300
+ "m ar",
1301
+ "m e",
1302
+ "al ly</w>",
1303
+ "s o</w>",
1304
+ "ou t</w>",
1305
+ "tion s</w>",
1306
+ "it s</w>",
1307
+ "g h</w>",
1308
+ "g e</w>",
1309
+ "b er</w>",
1310
+ "f e",
1311
+ "p u",
1312
+ "s er",
1313
+ "d er",
1314
+ "p l",
1315
+ "s s</w>",
1316
+ "in e</w>",
1317
+ "in c",
1318
+ "m i",
1319
+ "gh t</w>",
1320
+ "g o",
1321
+ "th is</w>",
1322
+ "t ur",
1323
+ "d a",
1324
+ "ro u",
1325
+ "bu t</w>",
1326
+ "u m",
1327
+ "s on</w>",
1328
+ "w e",
1329
+ "v ed</w>",
1330
+ "si on</w>",
1331
+ "k e</w>",
1332
+ "p la",
1333
+ "the ir</w>",
1334
+ "i es</w>",
1335
+ "fir st</w>",
1336
+ "s a",
1337
+ "o c",
1338
+ "at t",
1339
+ "o f",
1340
+ "p e",
1341
+ "no t</w>",
1342
+ "g i",
1343
+ "n a",
1344
+ "ar y</w>",
1345
+ "m u",
1346
+ "l ed</w>",
1347
+ "âĢ ĵ</w>",
1348
+ "h er</w>",
1349
+ "r an",
1350
+ "c o",
1351
+ "the y</w>",
1352
+ "d er</w>",
1353
+ "al i",
1354
+ "al so</w>",
1355
+ "or e</w>",
1356
+ "e p",
1357
+ "ou ld</w>",
1358
+ "af ter</w>",
1359
+ "s hi",
1360
+ "u s</w>",
1361
+ "e t</w>",
1362
+ "ti c",
1363
+ "st or",
1364
+ "w i",
1365
+ "e v",
1366
+ "o ther</w>",
1367
+ "s h</w>",
1368
+ "t ing</w>",
1369
+ "ar d</w>",
1370
+ "t e",
1371
+ "tw o</w>",
1372
+ "n i",
1373
+ "ha ve</w>",
1374
+ "ou r",
1375
+ "com m",
1376
+ "t e</w>",
1377
+ "ac k</w>",
1378
+ "o o",
1379
+ "f in",
1380
+ "s ec",
1381
+ "ent s</w>",
1382
+ "h as</w>",
1383
+ "com p",
1384
+ "b ec",
1385
+ "k s</w>",
1386
+ "con t",
1387
+ "l and</w>",
1388
+ "be en</w>",
1389
+ "en ce</w>",
1390
+ "k ing</w>",
1391
+ "e l</w>",
1392
+ "ag e</w>",
1393
+ "lo w",
1394
+ "m in",
1395
+ ". @</w>",
1396
+ "@ .@</w>",
1397
+ "om e</w>",
1398
+ "m ent</w>",
1399
+ "ch ar",
1400
+ "g e",
1401
+ "at er</w>",
1402
+ "n or",
1403
+ "h o",
1404
+ "ou s</w>",
1405
+ "wh o</w>",
1406
+ "ea r</w>",
1407
+ "sp ec",
1408
+ "c ol",
1409
+ "el y</w>",
1410
+ "t y</w>",
1411
+ "j o",
1412
+ "ur ing</w>",
1413
+ "du c",
1414
+ "b ri",
1415
+ "st r",
1416
+ "c an",
1417
+ "or i",
1418
+ "t ra",
1419
+ "p a",
1420
+ "sh e</w>",
1421
+ "d o",
1422
+ "ti ve</w>",
1423
+ "m on",
1424
+ "ne w</w>",
1425
+ "r it",
1426
+ "tim e</w>",
1427
+ "on s</w>",
1428
+ "s o",
1429
+ "m an</w>",
1430
+ "d ec",
1431
+ "c ent",
1432
+ "l an",
1433
+ "p i",
1434
+ "ou r</w>",
1435
+ "in ter",
1436
+ "f er",
1437
+ "g ra",
1438
+ "g re",
1439
+ "re s</w>",
1440
+ "inc lu",
1441
+ "m il",
1442
+ "d uring</w>",
1443
+ "ow n</w>",
1444
+ "pre s",
1445
+ "j u",
1446
+ "n ed</w>",
1447
+ "el l</w>",
1448
+ ", @</w>",
1449
+ "@ ,@</w>",
1450
+ "it e</w>",
1451
+ "g en",
1452
+ "wh en</w>",
1453
+ "si g",
1454
+ "b i",
1455
+ "re n",
1456
+ "f a",
1457
+ "g a",
1458
+ "pla y",
1459
+ "en g",
1460
+ "tion al</w>",
1461
+ "oun d</w>",
1462
+ "th ou",
1463
+ "m ore</w>",
1464
+ "re e</w>",
1465
+ "em ber</w>",
1466
+ "e i",
1467
+ "s ou",
1468
+ "s ur",
1469
+ "s ti",
1470
+ "c ar",
1471
+ "for m",
1472
+ "l ar",
1473
+ "s es</w>",
1474
+ "t en",
1475
+ "in to</w>",
1476
+ "t u",
1477
+ "c es</w>",
1478
+ "mo st</w>",
1479
+ "k ed</w>",
1480
+ "wa y</w>",
1481
+ "c re",
1482
+ "c oun",
1483
+ "u p</w>",
1484
+ "l es</w>",
1485
+ "ac e</w>",
1486
+ "al s</w>",
1487
+ "k e",
1488
+ "w ould</w>",
1489
+ "an t</w>",
1490
+ "b er",
1491
+ "f u",
1492
+ "it ed</w>",
1493
+ "p ri",
1494
+ "whi le</w>",
1495
+ "o ver</w>",
1496
+ "ing s</w>",
1497
+ "r e</w>",
1498
+ "fi l",
1499
+ "s y",
1500
+ "e st",
1501
+ "ab le</w>",
1502
+ "w n</w>",
1503
+ "s ea",
1504
+ "ac h",
1505
+ "s ing</w>",
1506
+ "in s</w>",
1507
+ "ti c</w>",
1508
+ "i d</w>",
1509
+ "on ly</w>",
1510
+ "at es</w>",
1511
+ "t ri",
1512
+ "v ing</w>",
1513
+ "b a",
1514
+ "v el",
1515
+ "an ce</w>",
1516
+ "st a",
1517
+ "er n</w>",
1518
+ "f ol",
1519
+ "e en</w>",
1520
+ "in ed</w>",
1521
+ "st ru",
1522
+ "un i",
1523
+ "g ame</w>",
1524
+ "la r</w>",
1525
+ "s el",
1526
+ "b li",
1527
+ "u sed</w>",
1528
+ "n ing</w>",
1529
+ "p s</w>",
1530
+ "ti es</w>",
1531
+ "k no",
1532
+ "c or",
1533
+ "f t</w>",
1534
+ "rec or",
1535
+ "b le</w>",
1536
+ "vi e",
1537
+ "y s</w>",
1538
+ "w il",
1539
+ "ic al</w>",
1540
+ "ap p",
1541
+ "t ro",
1542
+ "th ree</w>",
1543
+ "c la",
1544
+ "ol d</w>",
1545
+ "sh ed</w>",
1546
+ "h ea",
1547
+ "ab out</w>",
1548
+ "w rit",
1549
+ "th an</w>",
1550
+ "st e",
1551
+ "l ater</w>",
1552
+ "ar i",
1553
+ "d y</w>",
1554
+ "pu bli",
1555
+ "lo c",
1556
+ "ag a",
1557
+ "th rou",
1558
+ "s si",
1559
+ "en d</w>",
1560
+ "ma y</w>",
1561
+ "an g",
1562
+ "ac h</w>",
1563
+ "v es</w>",
1564
+ "o g",
1565
+ "hi m</w>",
1566
+ "be tw",
1567
+ "thou gh</w>",
1568
+ "betw een</w>",
1569
+ "u m</w>",
1570
+ "st ar",
1571
+ "sc ri",
1572
+ "re a",
1573
+ "on d</w>",
1574
+ "shi p</w>",
1575
+ "o k</w>",
1576
+ "h el",
1577
+ "s ong</w>",
1578
+ "c hi",
1579
+ "ca p",
1580
+ "e ver</w>",
1581
+ "da y</w>",
1582
+ "c ri",
1583
+ "s ome</w>",
1584
+ "b ro",
1585
+ "n o</w>",
1586
+ "th ere</w>",
1587
+ "an s</w>",
1588
+ "al l",
1589
+ "n um",
1590
+ "r ed</w>",
1591
+ "ear s</w>",
1592
+ "st s</w>",
1593
+ "an y</w>",
1594
+ "w ar",
1595
+ "p h",
1596
+ "p p",
1597
+ "g in",
1598
+ "stru c",
1599
+ "am er",
1600
+ "pro duc",
1601
+ "s ch",
1602
+ "c es",
1603
+ "ur e</w>",
1604
+ "at ing</w>",
1605
+ "em p",
1606
+ "t or",
1607
+ "sea son</w>",
1608
+ "for e</w>",
1609
+ "i c</w>",
1610
+ "c ity</w>",
1611
+ "g ro",
1612
+ "fol low",
1613
+ "su b",
1614
+ "b el",
1615
+ "y ear</w>",
1616
+ "c an</w>",
1617
+ "s in",
1618
+ "wh ere</w>",
1619
+ "an d",
1620
+ "ma de</w>",
1621
+ "re lea",
1622
+ "s m",
1623
+ "b l",
1624
+ "t en</w>",
1625
+ "wi th",
1626
+ "s on",
1627
+ "man y</w>",
1628
+ "a re",
1629
+ "e d",
1630
+ "h ow",
1631
+ "amer ic",
1632
+ "ur y</w>",
1633
+ "st u",
1634
+ "mu si",
1635
+ "c u",
1636
+ "n am",
1637
+ "em ent</w>",
1638
+ "su ch</w>",
1639
+ "al bu",
1640
+ "bu il",
1641
+ "be fore</w>",
1642
+ "e f",
1643
+ "ar m",
1644
+ "t on</w>",
1645
+ "the m</w>",
1646
+ "c al",
1647
+ "b ar",
1648
+ "d es</w>",
1649
+ "m at",
1650
+ "gen er",
1651
+ "o d</w>",
1652
+ "ser ies</w>",
1653
+ "c er",
1654
+ "sh o",
1655
+ "en ti",
1656
+ "h er",
1657
+ "o ver",
1658
+ "an n",
1659
+ "w ell</w>",
1660
+ "wor ld</w>",
1661
+ "g an</w>",
1662
+ "e st</w>",
1663
+ "sec ond</w>",
1664
+ "t ers</w>",
1665
+ "si de</w>",
1666
+ "tr an",
1667
+ "l ine</w>",
1668
+ "tur e</w>",
1669
+ "por t</w>",
1670
+ "be ing</w>",
1671
+ "y ears</w>",
1672
+ "bo th</w>",
1673
+ "in di",
1674
+ "the se</w>",
1675
+ "na tional</w>",
1676
+ "hi stor",
1677
+ "f e</w>",
1678
+ "v o",
1679
+ "st ed</w>",
1680
+ "an i",
1681
+ "b as",
1682
+ "po in",
1683
+ "s ing",
1684
+ "fil m</w>",
1685
+ "p en",
1686
+ "su p",
1687
+ "m is",
1688
+ "c ro",
1689
+ "st ri",
1690
+ "l in",
1691
+ "t re",
1692
+ "wa r</w>",
1693
+ "how ever</w>",
1694
+ "y ing</w>",
1695
+ "l ing</w>",
1696
+ "y p",
1697
+ "ec ted</w>",
1698
+ "di rec",
1699
+ "vi sion</w>",
1700
+ "albu m</w>",
1701
+ "th en</w>",
1702
+ "l l</w>",
1703
+ "se ver",
1704
+ "throu gh</w>",
1705
+ "kno wn</w>",
1706
+ "b or",
1707
+ "c ul",
1708
+ "c lu",
1709
+ "st er</w>",
1710
+ "sou th</w>",
1711
+ "r y</w>",
1712
+ "ec t</w>",
1713
+ "lo w</w>",
1714
+ "p r",
1715
+ "s k",
1716
+ "is o",
1717
+ "nor th</w>",
1718
+ "par t</w>",
1719
+ "f ac",
1720
+ "t ly</w>",
1721
+ "per i",
1722
+ "e u",
1723
+ "b att",
1724
+ "st ate</w>",
1725
+ "c ed</w>",
1726
+ "con si",
1727
+ "in f",
1728
+ "po li",
1729
+ "ol og",
1730
+ "ear ly</w>",
1731
+ "po si",
1732
+ "am es</w>",
1733
+ "w in",
1734
+ "de vel",
1735
+ "o b",
1736
+ "v e",
1737
+ "v en</w>",
1738
+ "op er",
1739
+ "g er",
1740
+ "of fi",
1741
+ "char ac",
1742
+ "m s</w>",
1743
+ "hi gh",
1744
+ "a d</w>",
1745
+ "th o",
1746
+ "sever al</w>",
1747
+ "d re",
1748
+ "de scri",
1749
+ "al e</w>",
1750
+ "num ber</w>",
1751
+ "a ir",
1752
+ "inclu ding</w>",
1753
+ "in st</w>",
1754
+ "aga inst</w>",
1755
+ "l s</w>",
1756
+ "su l",
1757
+ "ep iso",
1758
+ "c am",
1759
+ "di f",
1760
+ "so ci",
1761
+ "bec ame</w>",
1762
+ "li ke</w>",
1763
+ "t el",
1764
+ "f our</w>",
1765
+ "âĢ Ķ</w>",
1766
+ "h ou",
1767
+ "jo h",
1768
+ "un ited</w>",
1769
+ "in v",
1770
+ "un der</w>",
1771
+ "no v",
1772
+ "ti v",
1773
+ "su c",
1774
+ "a tions</w>",
1775
+ "ac k",
1776
+ "t or</w>",
1777
+ "r on",
1778
+ "un d</w>",
1779
+ "w s</w>",
1780
+ "f o",
1781
+ "g r",
1782
+ "devel op",
1783
+ "al though</w>",
1784
+ "cont in",
1785
+ "we st</w>",
1786
+ "ori gin",
1787
+ "musi c</w>",
1788
+ "or s</w>",
1789
+ "d on</w>",
1790
+ "cent ury</w>",
1791
+ "w ard</w>",
1792
+ "wor k</w>",
1793
+ "m e</w>",
1794
+ "am i",
1795
+ "ch a",
1796
+ "ver y</w>",
1797
+ "h ar",
1798
+ "di s",
1799
+ "z ed</w>",
1800
+ "d o</w>",
1801
+ "g s</w>",
1802
+ "t ow",
1803
+ "s ol",
1804
+ "follow ing</w>",
1805
+ "li on</w>",
1806
+ "re ma",
1807
+ "n s</w>",
1808
+ "ti sh</w>",
1809
+ "ch ur",
1810
+ "s om",
1811
+ "m p",
1812
+ "t le</w>",
1813
+ "go ver",
1814
+ "d el",
1815
+ "comp le",
1816
+ "c ur",
1817
+ "u se</w>",
1818
+ "b ack</w>",
1819
+ "h u",
1820
+ "st ern</w>",
1821
+ "be gan</w>",
1822
+ "fi el",
1823
+ "au se</w>",
1824
+ "d ra",
1825
+ "p as",
1826
+ "b il",
1827
+ "ca tion</w>",
1828
+ "d ent</w>",
1829
+ "b ed</w>",
1830
+ "bec ause</w>",
1831
+ "an t",
1832
+ "ea m</w>",
1833
+ "p hi",
1834
+ "y o",
1835
+ "contin u",
1836
+ "ta in</w>",
1837
+ "tr y</w>",
1838
+ "f re",
1839
+ "pe op",
1840
+ "cal led</w>",
1841
+ "f ound</w>",
1842
+ "episo de</w>",
1843
+ "de sig",
1844
+ "m or",
1845
+ "se t</w>",
1846
+ "le y</w>",
1847
+ "ea st</w>",
1848
+ "tr ac",
1849
+ "c ra"
1850
+ ]
1851
+ }
1852
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": {
4
+ "__type": "AddedToken",
5
+ "content": "<|startoftext|>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false
10
+ },
11
+ "clean_up_tokenization_spaces": true,
12
+ "do_lower_case": true,
13
+ "eos_token": {
14
+ "__type": "AddedToken",
15
+ "content": "<|endoftext|>",
16
+ "lstrip": false,
17
+ "normalized": true,
18
+ "rstrip": false,
19
+ "single_word": false
20
+ },
21
+ "errors": "replace",
22
+ "model_max_length": 16,
23
+ "pad_token": "!",
24
+ "processor_class": "OwlViTProcessor",
25
+ "special_tokens_map_file": "/home/runner/.cache/huggingface/hub/models--google--owlvit-base-patch32/snapshots/17740e19dde58d657d21b970ead1cce0ea40f4da/special_tokens_map.json",
26
+ "tokenizer_class": "CLIPTokenizer",
27
+ "unk_token": {
28
+ "__type": "AddedToken",
29
+ "content": "<|endoftext|>",
30
+ "lstrip": false,
31
+ "normalized": true,
32
+ "rstrip": false,
33
+ "single_word": false
34
+ }
35
+ }
vocab.json ADDED
@@ -0,0 +1,1026 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "!": 0,
3
+ "!</w>": 240,
4
+ "\"": 3,
5
+ "\"</w>": 281,
6
+ "#": 4,
7
+ "#</w>": 289,
8
+ "$": 5,
9
+ "$</w>": 291,
10
+ "%": 6,
11
+ "%</w>": 245,
12
+ "&": 7,
13
+ "&</w>": 274,
14
+ "'": 8,
15
+ "'</w>": 203,
16
+ "'s</w>": 363,
17
+ "(": 9,
18
+ "(</w>": 283,
19
+ ")": 10,
20
+ ")</w>": 284,
21
+ "*": 11,
22
+ "*</w>": 279,
23
+ "+": 12,
24
+ "+</w>": 216,
25
+ ",": 13,
26
+ ",</w>": 290,
27
+ ",@</w>": 622,
28
+ "-": 14,
29
+ "-</w>": 227,
30
+ "-@</w>": 353,
31
+ ".": 15,
32
+ ".</w>": 269,
33
+ ".@</w>": 569,
34
+ "/": 16,
35
+ "/</w>": 287,
36
+ "0": 17,
37
+ "0</w>": 256,
38
+ "1": 18,
39
+ "1</w>": 270,
40
+ "2": 19,
41
+ "2</w>": 282,
42
+ "3": 20,
43
+ "3</w>": 267,
44
+ "4": 21,
45
+ "4</w>": 285,
46
+ "5": 22,
47
+ "5</w>": 257,
48
+ "6": 23,
49
+ "6</w>": 296,
50
+ "7": 24,
51
+ "7</w>": 297,
52
+ "8": 25,
53
+ "8</w>": 277,
54
+ "9": 26,
55
+ "9</w>": 286,
56
+ ":": 27,
57
+ ":</w>": 288,
58
+ ";": 28,
59
+ ";</w>": 280,
60
+ "<": 29,
61
+ "<</w>": 293,
62
+ "<|endoftext|>": 2,
63
+ "<|startoftext|>": 1,
64
+ "=": 30,
65
+ "=</w>": 261,
66
+ ">": 31,
67
+ "></w>": 204,
68
+ "?": 32,
69
+ "?</w>": 295,
70
+ "@": 33,
71
+ "@,@</w>": 623,
72
+ "@-@</w>": 354,
73
+ "@.@</w>": 570,
74
+ "@</w>": 273,
75
+ "[": 34,
76
+ "[</w>": 191,
77
+ "\\": 35,
78
+ "\\</w>": 215,
79
+ "]": 36,
80
+ "]</w>": 292,
81
+ "^": 37,
82
+ "^</w>": 252,
83
+ "_": 38,
84
+ "_</w>": 294,
85
+ "`": 39,
86
+ "`</w>": 234,
87
+ "a": 40,
88
+ "a</w>": 172,
89
+ "ab": 408,
90
+ "able</w>": 675,
91
+ "about</w>": 721,
92
+ "ac": 326,
93
+ "ace</w>": 659,
94
+ "ach": 678,
95
+ "ach</w>": 736,
96
+ "ack": 949,
97
+ "ack</w>": 551,
98
+ "ad": 407,
99
+ "ad</w>": 918,
100
+ "af": 455,
101
+ "after</w>": 532,
102
+ "ag": 397,
103
+ "aga": 730,
104
+ "against</w>": 928,
105
+ "age</w>": 566,
106
+ "air": 925,
107
+ "al": 318,
108
+ "al</w>": 330,
109
+ "albu": 813,
110
+ "album</w>": 874,
111
+ "ale</w>": 923,
112
+ "ali": 527,
113
+ "all": 762,
114
+ "all</w>": 449,
115
+ "ally</w>": 476,
116
+ "als</w>": 660,
117
+ "also</w>": 528,
118
+ "although</w>": 957,
119
+ "am": 333,
120
+ "ame</w>": 450,
121
+ "amer": 773,
122
+ "americ": 805,
123
+ "ames</w>": 906,
124
+ "ami": 968,
125
+ "an": 301,
126
+ "an</w>": 332,
127
+ "ance</w>": 689,
128
+ "and": 793,
129
+ "and</w>": 311,
130
+ "ang": 735,
131
+ "ani": 854,
132
+ "ann": 832,
133
+ "ans</w>": 761,
134
+ "ant": 1005,
135
+ "ant</w>": 663,
136
+ "any</w>": 767,
137
+ "ap": 421,
138
+ "app": 714,
139
+ "ar": 305,
140
+ "ard</w>": 543,
141
+ "are": 802,
142
+ "are</w>": 454,
143
+ "ari": 726,
144
+ "arm": 817,
145
+ "ary</w>": 518,
146
+ "as": 412,
147
+ "as</w>": 321,
148
+ "at": 322,
149
+ "at</w>": 328,
150
+ "ate</w>": 439,
151
+ "ated</w>": 441,
152
+ "ater</w>": 575,
153
+ "ates</w>": 684,
154
+ "ating</w>": 778,
155
+ "ation</w>": 457,
156
+ "ations</w>": 948,
157
+ "att": 512,
158
+ "au": 448,
159
+ "ause</w>": 997,
160
+ "b": 41,
161
+ "b</w>": 198,
162
+ "ba": 687,
163
+ "back</w>": 992,
164
+ "bar": 821,
165
+ "bas": 855,
166
+ "batt": 897,
167
+ "be": 361,
168
+ "be</w>": 467,
169
+ "bec": 558,
170
+ "became</w>": 935,
171
+ "because</w>": 1004,
172
+ "bed</w>": 1003,
173
+ "been</w>": 562,
174
+ "before</w>": 815,
175
+ "began</w>": 995,
176
+ "being</w>": 844,
177
+ "bel": 788,
178
+ "ber": 664,
179
+ "ber</w>": 483,
180
+ "betw": 740,
181
+ "between</w>": 742,
182
+ "bi": 628,
183
+ "bil": 1000,
184
+ "bl": 797,
185
+ "ble</w>": 709,
186
+ "bli": 700,
187
+ "bo": 429,
188
+ "bor": 880,
189
+ "both</w>": 846,
190
+ "bri": 588,
191
+ "bro": 758,
192
+ "bu": 388,
193
+ "buil": 814,
194
+ "but</w>": 499,
195
+ "by</w>": 369,
196
+ "c": 42,
197
+ "c</w>": 184,
198
+ "ca": 394,
199
+ "cal": 820,
200
+ "called</w>": 1014,
201
+ "cam": 932,
202
+ "can": 590,
203
+ "can</w>": 790,
204
+ "cap": 753,
205
+ "car": 644,
206
+ "cation</w>": 1001,
207
+ "ce</w>": 378,
208
+ "ced</w>": 899,
209
+ "cent": 605,
210
+ "century</w>": 964,
211
+ "cer": 827,
212
+ "ces": 776,
213
+ "ces</w>": 651,
214
+ "ch": 345,
215
+ "ch</w>": 374,
216
+ "cha": 969,
217
+ "char": 573,
218
+ "charac": 915,
219
+ "chi": 752,
220
+ "chur": 983,
221
+ "ci": 451,
222
+ "city</w>": 784,
223
+ "cla": 717,
224
+ "clu": 882,
225
+ "co": 524,
226
+ "col": 582,
227
+ "com": 383,
228
+ "comm": 549,
229
+ "comp": 557,
230
+ "comple": 989,
231
+ "con": 364,
232
+ "consi": 900,
233
+ "cont": 560,
234
+ "contin": 958,
235
+ "continu": 1009,
236
+ "cor": 706,
237
+ "coun": 656,
238
+ "cra": 1023,
239
+ "cre": 655,
240
+ "cri": 756,
241
+ "cro": 862,
242
+ "cu": 809,
243
+ "cul": 881,
244
+ "cur": 990,
245
+ "d": 43,
246
+ "d</w>": 176,
247
+ "da": 497,
248
+ "day</w>": 755,
249
+ "de": 352,
250
+ "de</w>": 415,
251
+ "dec": 604,
252
+ "ded</w>": 447,
253
+ "del": 988,
254
+ "dent</w>": 1002,
255
+ "der": 487,
256
+ "der</w>": 526,
257
+ "des</w>": 822,
258
+ "descri": 922,
259
+ "desig": 1017,
260
+ "devel": 908,
261
+ "develop": 956,
262
+ "di": 340,
263
+ "dif": 933,
264
+ "ding</w>": 442,
265
+ "direc": 872,
266
+ "dis": 972,
267
+ "do": 595,
268
+ "do</w>": 974,
269
+ "don</w>": 963,
270
+ "dra": 998,
271
+ "dre": 921,
272
+ "ds</w>": 422,
273
+ "du": 425,
274
+ "duc": 587,
275
+ "during</w>": 616,
276
+ "dy</w>": 727,
277
+ "e": 44,
278
+ "e</w>": 167,
279
+ "ea": 344,
280
+ "eam</w>": 1006,
281
+ "ear": 445,
282
+ "ear</w>": 580,
283
+ "early</w>": 904,
284
+ "ears</w>": 765,
285
+ "east</w>": 1021,
286
+ "ec": 338,
287
+ "ect</w>": 886,
288
+ "ected</w>": 871,
289
+ "ed": 803,
290
+ "ed</w>": 302,
291
+ "een</w>": 693,
292
+ "ef": 816,
293
+ "ei": 640,
294
+ "el": 331,
295
+ "el</w>": 565,
296
+ "ell</w>": 621,
297
+ "ely</w>": 583,
298
+ "em": 360,
299
+ "ember</w>": 639,
300
+ "ement</w>": 811,
301
+ "emp": 779,
302
+ "en": 308,
303
+ "en</w>": 343,
304
+ "ence</w>": 563,
305
+ "end</w>": 733,
306
+ "eng": 633,
307
+ "ent": 376,
308
+ "ent</w>": 381,
309
+ "enti": 829,
310
+ "ents</w>": 555,
311
+ "ep": 530,
312
+ "episo": 931,
313
+ "episode</w>": 1016,
314
+ "er": 303,
315
+ "er</w>": 312,
316
+ "ere</w>": 385,
317
+ "ern</w>": 691,
318
+ "ers</w>": 398,
319
+ "es": 435,
320
+ "es</w>": 323,
321
+ "est": 674,
322
+ "est</w>": 836,
323
+ "et</w>": 535,
324
+ "eu": 896,
325
+ "ev": 539,
326
+ "ever</w>": 754,
327
+ "ex": 417,
328
+ "f": 45,
329
+ "f</w>": 182,
330
+ "fa": 630,
331
+ "fac": 893,
332
+ "fe": 484,
333
+ "fe</w>": 851,
334
+ "fer": 610,
335
+ "fi": 350,
336
+ "fiel": 996,
337
+ "fil": 672,
338
+ "film</w>": 858,
339
+ "fin": 553,
340
+ "fir": 470,
341
+ "first</w>": 509,
342
+ "fo": 954,
343
+ "fol": 692,
344
+ "follow": 786,
345
+ "following</w>": 978,
346
+ "for": 396,
347
+ "for</w>": 366,
348
+ "fore</w>": 782,
349
+ "form": 645,
350
+ "found</w>": 1015,
351
+ "four</w>": 938,
352
+ "fre": 1012,
353
+ "fro": 401,
354
+ "from</w>": 403,
355
+ "ft</w>": 707,
356
+ "fu": 665,
357
+ "g": 46,
358
+ "g</w>": 171,
359
+ "ga": 631,
360
+ "game</w>": 697,
361
+ "gan</w>": 835,
362
+ "ge": 574,
363
+ "ge</w>": 482,
364
+ "gen": 625,
365
+ "gener": 824,
366
+ "ger": 913,
367
+ "gh": 400,
368
+ "gh</w>": 481,
369
+ "ght</w>": 493,
370
+ "gi": 516,
371
+ "gin": 771,
372
+ "go": 494,
373
+ "gover": 987,
374
+ "gr": 955,
375
+ "gra": 611,
376
+ "gre": 612,
377
+ "gro": 785,
378
+ "gs</w>": 975,
379
+ "gu": 426,
380
+ "h": 47,
381
+ "h</w>": 181,
382
+ "ha": 367,
383
+ "had</w>": 459,
384
+ "har": 971,
385
+ "has</w>": 556,
386
+ "have</w>": 547,
387
+ "he</w>": 410,
388
+ "hea": 720,
389
+ "hel": 750,
390
+ "her": 830,
391
+ "her</w>": 522,
392
+ "hi": 325,
393
+ "high": 917,
394
+ "him</w>": 739,
395
+ "his</w>": 406,
396
+ "histor": 850,
397
+ "ho": 577,
398
+ "hou": 940,
399
+ "how": 804,
400
+ "however</w>": 867,
401
+ "hu": 993,
402
+ "i": 48,
403
+ "i</w>": 178,
404
+ "ic": 409,
405
+ "ic</w>": 783,
406
+ "ical</w>": 713,
407
+ "id</w>": 682,
408
+ "ies</w>": 508,
409
+ "il": 379,
410
+ "im": 465,
411
+ "in": 300,
412
+ "in</w>": 314,
413
+ "inc": 491,
414
+ "inclu": 614,
415
+ "including</w>": 926,
416
+ "indi": 847,
417
+ "ine</w>": 490,
418
+ "ined</w>": 694,
419
+ "inf": 901,
420
+ "ing": 453,
421
+ "ing</w>": 315,
422
+ "ings</w>": 670,
423
+ "ins</w>": 680,
424
+ "inst</w>": 927,
425
+ "inter": 609,
426
+ "into</w>": 649,
427
+ "inv": 943,
428
+ "ir": 428,
429
+ "ir</w>": 456,
430
+ "is": 395,
431
+ "is</w>": 349,
432
+ "iso": 890,
433
+ "it": 319,
434
+ "it</w>": 390,
435
+ "ite</w>": 624,
436
+ "ited</w>": 666,
437
+ "ith</w>": 370,
438
+ "its</w>": 480,
439
+ "ity</w>": 443,
440
+ "j": 49,
441
+ "j</w>": 195,
442
+ "jo": 585,
443
+ "joh": 941,
444
+ "ju": 619,
445
+ "k": 50,
446
+ "k</w>": 183,
447
+ "ke": 661,
448
+ "ke</w>": 505,
449
+ "ked</w>": 653,
450
+ "king</w>": 564,
451
+ "kno": 705,
452
+ "known</w>": 879,
453
+ "ks</w>": 559,
454
+ "l": 51,
455
+ "l</w>": 180,
456
+ "la": 348,
457
+ "lan": 606,
458
+ "land</w>": 561,
459
+ "lar": 646,
460
+ "lar</w>": 698,
461
+ "later</w>": 725,
462
+ "ld</w>": 462,
463
+ "le": 382,
464
+ "le</w>": 357,
465
+ "lea": 466,
466
+ "led</w>": 520,
467
+ "les</w>": 658,
468
+ "ley</w>": 1020,
469
+ "li": 336,
470
+ "like</w>": 936,
471
+ "lin": 864,
472
+ "line</w>": 841,
473
+ "ling</w>": 869,
474
+ "lion</w>": 979,
475
+ "ll</w>": 876,
476
+ "lo": 358,
477
+ "loc": 729,
478
+ "low": 567,
479
+ "low</w>": 887,
480
+ "ls</w>": 929,
481
+ "lu": 432,
482
+ "ly</w>": 342,
483
+ "m": 52,
484
+ "m</w>": 186,
485
+ "ma": 365,
486
+ "made</w>": 794,
487
+ "man": 444,
488
+ "man</w>": 603,
489
+ "many</w>": 801,
490
+ "mar": 474,
491
+ "mat": 823,
492
+ "may</w>": 734,
493
+ "me": 475,
494
+ "me</w>": 967,
495
+ "ment</w>": 572,
496
+ "mi": 492,
497
+ "mil": 615,
498
+ "min": 568,
499
+ "mis": 861,
500
+ "mo": 399,
501
+ "mon": 597,
502
+ "mor": 1018,
503
+ "more</w>": 637,
504
+ "most</w>": 652,
505
+ "mp": 985,
506
+ "ms</w>": 916,
507
+ "mu": 519,
508
+ "musi": 808,
509
+ "music</w>": 961,
510
+ "n": 53,
511
+ "n</w>": 168,
512
+ "na": 517,
513
+ "nam": 810,
514
+ "national</w>": 849,
515
+ "ne": 393,
516
+ "ned</w>": 620,
517
+ "new</w>": 598,
518
+ "ni": 546,
519
+ "ning</w>": 702,
520
+ "no": 387,
521
+ "no</w>": 759,
522
+ "nor": 576,
523
+ "north</w>": 891,
524
+ "not</w>": 515,
525
+ "nov": 945,
526
+ "ns</w>": 981,
527
+ "num": 763,
528
+ "number</w>": 924,
529
+ "o": 54,
530
+ "o</w>": 170,
531
+ "ob": 909,
532
+ "oc": 511,
533
+ "od</w>": 825,
534
+ "of": 513,
535
+ "of</w>": 309,
536
+ "offi": 914,
537
+ "og": 738,
538
+ "ok</w>": 749,
539
+ "ol": 351,
540
+ "old</w>": 718,
541
+ "olog": 903,
542
+ "om": 339,
543
+ "ome</w>": 571,
544
+ "on": 307,
545
+ "on</w>": 313,
546
+ "ond</w>": 747,
547
+ "one</w>": 452,
548
+ "ong</w>": 472,
549
+ "only</w>": 683,
550
+ "ons</w>": 601,
551
+ "oo": 552,
552
+ "op": 405,
553
+ "oper": 912,
554
+ "or": 310,
555
+ "or</w>": 334,
556
+ "ore</w>": 529,
557
+ "ori": 591,
558
+ "origin": 960,
559
+ "ors</w>": 962,
560
+ "other</w>": 540,
561
+ "ou": 324,
562
+ "ould</w>": 531,
563
+ "oun": 411,
564
+ "ound</w>": 635,
565
+ "our": 548,
566
+ "our</w>": 608,
567
+ "ous</w>": 578,
568
+ "out</w>": 478,
569
+ "over": 831,
570
+ "over</w>": 669,
571
+ "ow": 416,
572
+ "own</w>": 617,
573
+ "p": 55,
574
+ "p</w>": 179,
575
+ "pa": 593,
576
+ "par": 427,
577
+ "part</w>": 892,
578
+ "pas": 999,
579
+ "pe": 514,
580
+ "pen": 859,
581
+ "peop": 1013,
582
+ "per": 437,
583
+ "peri": 895,
584
+ "ph": 769,
585
+ "phi": 1007,
586
+ "pi": 607,
587
+ "pl": 488,
588
+ "pla": 506,
589
+ "play": 632,
590
+ "po": 377,
591
+ "poin": 856,
592
+ "poli": 902,
593
+ "por": 473,
594
+ "port</w>": 843,
595
+ "posi": 905,
596
+ "pp": 770,
597
+ "pr": 888,
598
+ "pre": 424,
599
+ "pres": 618,
600
+ "pri": 667,
601
+ "pro": 386,
602
+ "produc": 774,
603
+ "ps</w>": 703,
604
+ "pu": 485,
605
+ "publi": 728,
606
+ "q": 56,
607
+ "q</w>": 235,
608
+ "qu": 431,
609
+ "r": 57,
610
+ "r</w>": 173,
611
+ "ra": 355,
612
+ "ran": 523,
613
+ "re": 304,
614
+ "re</w>": 671,
615
+ "rea": 746,
616
+ "rec": 436,
617
+ "recor": 708,
618
+ "red</w>": 764,
619
+ "ree</w>": 638,
620
+ "relea": 795,
621
+ "rema": 980,
622
+ "ren": 629,
623
+ "res</w>": 613,
624
+ "ri": 329,
625
+ "rit": 599,
626
+ "ro": 317,
627
+ "ron": 951,
628
+ "rou": 498,
629
+ "ru": 419,
630
+ "ry</w>": 885,
631
+ "s": 58,
632
+ "s</w>": 169,
633
+ "sa": 510,
634
+ "sc": 414,
635
+ "sch": 775,
636
+ "scri": 745,
637
+ "se": 380,
638
+ "se</w>": 373,
639
+ "sea": 677,
640
+ "season</w>": 781,
641
+ "sec": 554,
642
+ "second</w>": 837,
643
+ "sed</w>": 446,
644
+ "sel": 699,
645
+ "ser": 486,
646
+ "series</w>": 826,
647
+ "ses</w>": 647,
648
+ "set</w>": 1019,
649
+ "sever": 877,
650
+ "several</w>": 920,
651
+ "sh": 359,
652
+ "sh</w>": 541,
653
+ "she</w>": 594,
654
+ "shed</w>": 719,
655
+ "shi": 533,
656
+ "ship</w>": 748,
657
+ "sho": 828,
658
+ "si": 327,
659
+ "side</w>": 839,
660
+ "sig": 627,
661
+ "sin": 791,
662
+ "sing": 857,
663
+ "sing</w>": 679,
664
+ "sion</w>": 504,
665
+ "sk": 889,
666
+ "sm": 796,
667
+ "so": 602,
668
+ "so</w>": 477,
669
+ "soci": 934,
670
+ "sol": 977,
671
+ "som": 984,
672
+ "some</w>": 757,
673
+ "son": 800,
674
+ "son</w>": 501,
675
+ "song</w>": 751,
676
+ "sou": 641,
677
+ "south</w>": 884,
678
+ "sp": 384,
679
+ "spec": 581,
680
+ "ss</w>": 489,
681
+ "ssi": 732,
682
+ "st": 316,
683
+ "st</w>": 335,
684
+ "sta": 690,
685
+ "star": 744,
686
+ "state</w>": 898,
687
+ "ste": 724,
688
+ "sted</w>": 853,
689
+ "ster</w>": 883,
690
+ "stern</w>": 994,
691
+ "sti": 643,
692
+ "stor": 537,
693
+ "str": 589,
694
+ "stri": 863,
695
+ "stru": 695,
696
+ "struc": 772,
697
+ "sts</w>": 766,
698
+ "stu": 807,
699
+ "su": 368,
700
+ "sub": 787,
701
+ "suc": 947,
702
+ "such</w>": 812,
703
+ "sul": 930,
704
+ "sup": 860,
705
+ "sur": 642,
706
+ "sy": 673,
707
+ "t": 59,
708
+ "t</w>": 174,
709
+ "ta": 438,
710
+ "tain</w>": 1010,
711
+ "te": 544,
712
+ "te</w>": 550,
713
+ "ted</w>": 402,
714
+ "tel": 937,
715
+ "ten": 648,
716
+ "ten</w>": 798,
717
+ "ter": 433,
718
+ "ter</w>": 420,
719
+ "ters</w>": 838,
720
+ "th": 298,
721
+ "th</w>": 391,
722
+ "than</w>": 723,
723
+ "that</w>": 362,
724
+ "the": 375,
725
+ "the</w>": 299,
726
+ "their</w>": 507,
727
+ "them</w>": 819,
728
+ "then</w>": 875,
729
+ "ther</w>": 430,
730
+ "there</w>": 760,
731
+ "these</w>": 848,
732
+ "they</w>": 525,
733
+ "this</w>": 495,
734
+ "tho": 919,
735
+ "thou": 636,
736
+ "though</w>": 741,
737
+ "three</w>": 716,
738
+ "throu": 731,
739
+ "through</w>": 878,
740
+ "ti": 306,
741
+ "tic": 536,
742
+ "tic</w>": 681,
743
+ "ties</w>": 704,
744
+ "tim": 469,
745
+ "time</w>": 600,
746
+ "ting</w>": 542,
747
+ "tion": 404,
748
+ "tion</w>": 347,
749
+ "tional</w>": 634,
750
+ "tions</w>": 479,
751
+ "tish</w>": 982,
752
+ "tiv": 946,
753
+ "tive</w>": 596,
754
+ "tle</w>": 986,
755
+ "tly</w>": 894,
756
+ "to": 468,
757
+ "to</w>": 320,
758
+ "ton</w>": 818,
759
+ "tor": 780,
760
+ "tor</w>": 950,
761
+ "tow": 976,
762
+ "tr": 460,
763
+ "tra": 592,
764
+ "trac": 1022,
765
+ "tran": 840,
766
+ "tre": 865,
767
+ "tri": 685,
768
+ "tro": 715,
769
+ "try</w>": 1011,
770
+ "ts</w>": 413,
771
+ "tu": 650,
772
+ "tur": 496,
773
+ "ture</w>": 842,
774
+ "tw": 434,
775
+ "two</w>": 545,
776
+ "ty</w>": 584,
777
+ "u": 60,
778
+ "u</w>": 189,
779
+ "ul": 461,
780
+ "um": 500,
781
+ "um</w>": 743,
782
+ "un": 346,
783
+ "und</w>": 952,
784
+ "under</w>": 944,
785
+ "uni": 696,
786
+ "united</w>": 942,
787
+ "up</w>": 657,
788
+ "ur": 337,
789
+ "ure</w>": 777,
790
+ "uring</w>": 586,
791
+ "ury</w>": 806,
792
+ "us</w>": 534,
793
+ "use</w>": 991,
794
+ "used</w>": 701,
795
+ "v": 61,
796
+ "v</w>": 185,
797
+ "ve": 910,
798
+ "ve</w>": 371,
799
+ "ved</w>": 503,
800
+ "vel": 688,
801
+ "ven</w>": 911,
802
+ "ver": 392,
803
+ "ver</w>": 440,
804
+ "very</w>": 970,
805
+ "ves</w>": 737,
806
+ "vi": 356,
807
+ "vie": 710,
808
+ "ving</w>": 686,
809
+ "vision</w>": 873,
810
+ "vo": 852,
811
+ "w": 62,
812
+ "w</w>": 175,
813
+ "wa": 464,
814
+ "war": 768,
815
+ "war</w>": 866,
816
+ "ward</w>": 965,
817
+ "was</w>": 341,
818
+ "way</w>": 654,
819
+ "we": 502,
820
+ "well</w>": 833,
821
+ "were</w>": 423,
822
+ "west</w>": 959,
823
+ "wh": 389,
824
+ "when</w>": 626,
825
+ "where</w>": 792,
826
+ "whi": 418,
827
+ "which</w>": 463,
828
+ "while</w>": 668,
829
+ "who</w>": 579,
830
+ "wi": 538,
831
+ "wil": 712,
832
+ "win": 907,
833
+ "with": 799,
834
+ "with</w>": 372,
835
+ "wn</w>": 676,
836
+ "wor": 471,
837
+ "work</w>": 966,
838
+ "world</w>": 834,
839
+ "would</w>": 662,
840
+ "writ": 722,
841
+ "ws</w>": 953,
842
+ "x": 63,
843
+ "x</w>": 187,
844
+ "y": 64,
845
+ "y</w>": 177,
846
+ "year</w>": 789,
847
+ "years</w>": 845,
848
+ "ying</w>": 868,
849
+ "yo": 1008,
850
+ "yp": 870,
851
+ "ys</w>": 711,
852
+ "z": 65,
853
+ "z</w>": 188,
854
+ "zed</w>": 973,
855
+ "|": 66,
856
+ "|</w>": 193,
857
+ "}": 67,
858
+ "}</w>": 278,
859
+ "~": 68,
860
+ "~</w>": 276,
861
+ "¡": 69,
862
+ "¡</w>": 202,
863
+ "¢": 70,
864
+ "¢</w>": 205,
865
+ "£": 71,
866
+ "£</w>": 246,
867
+ "¤": 72,
868
+ "¤</w>": 210,
869
+ "¥": 73,
870
+ "¥</w>": 265,
871
+ "¦": 74,
872
+ "¦</w>": 247,
873
+ "§": 75,
874
+ "§</w>": 232,
875
+ "¨": 76,
876
+ "¨</w>": 218,
877
+ "©": 77,
878
+ "©</w>": 211,
879
+ "ª": 78,
880
+ "ª</w>": 254,
881
+ "«": 79,
882
+ "«</w>": 209,
883
+ "¬": 80,
884
+ "¬</w>": 268,
885
+ "®": 81,
886
+ "®</w>": 266,
887
+ "¯": 82,
888
+ "¯</w>": 241,
889
+ "°": 83,
890
+ "°</w>": 226,
891
+ "±": 84,
892
+ "±</w>": 206,
893
+ "²": 85,
894
+ "²</w>": 197,
895
+ "³": 86,
896
+ "³</w>": 213,
897
+ "´": 87,
898
+ "´</w>": 250,
899
+ "µ": 88,
900
+ "µ</w>": 237,
901
+ "¶": 89,
902
+ "¶</w>": 208,
903
+ "·": 90,
904
+ "·</w>": 207,
905
+ "¸": 91,
906
+ "¸</w>": 217,
907
+ "¹": 92,
908
+ "¹</w>": 222,
909
+ "º": 93,
910
+ "º</w>": 239,
911
+ "»": 94,
912
+ "»</w>": 255,
913
+ "¼": 95,
914
+ "¼</w>": 194,
915
+ "½": 96,
916
+ "½</w>": 212,
917
+ "¾": 97,
918
+ "¾</w>": 249,
919
+ "¿": 98,
920
+ "¿</w>": 201,
921
+ "Â": 99,
922
+ "Ã": 100,
923
+ "Ä": 101,
924
+ "Å": 102,
925
+ "Æ": 103,
926
+ "Ç": 104,
927
+ "È": 105,
928
+ "É": 106,
929
+ "Ê": 107,
930
+ "Ë": 108,
931
+ "Ì": 109,
932
+ "Í": 110,
933
+ "Î": 111,
934
+ "Ï": 112,
935
+ "Ð": 113,
936
+ "Ñ": 114,
937
+ "Ö": 115,
938
+ "×": 116,
939
+ "Ø": 117,
940
+ "Ù": 118,
941
+ "Ü": 119,
942
+ "à": 120,
943
+ "á": 121,
944
+ "â": 122,
945
+ "âĢ": 458,
946
+ "âĢĵ</w>": 521,
947
+ "âĢĶ</w>": 939,
948
+ "ã": 123,
949
+ "ä": 124,
950
+ "å": 125,
951
+ "æ": 126,
952
+ "ç": 127,
953
+ "è": 128,
954
+ "é": 129,
955
+ "ë": 130,
956
+ "ì": 131,
957
+ "ï": 132,
958
+ "Ģ": 133,
959
+ "Ģ</w>": 224,
960
+ "ģ": 134,
961
+ "ģ</w>": 192,
962
+ "Ĥ": 135,
963
+ "Ĥ</w>": 228,
964
+ "ĥ": 136,
965
+ "ĥ</w>": 264,
966
+ "Ħ": 137,
967
+ "Ħ</w>": 243,
968
+ "ħ": 138,
969
+ "ħ</w>": 230,
970
+ "Ĩ": 139,
971
+ "Ĩ</w>": 223,
972
+ "ĩ": 140,
973
+ "ĩ</w>": 199,
974
+ "Ī": 141,
975
+ "Ī</w>": 263,
976
+ "ī": 142,
977
+ "ī</w>": 238,
978
+ "Ĭ": 143,
979
+ "Ĭ</w>": 231,
980
+ "ĭ": 144,
981
+ "ĭ</w>": 271,
982
+ "Į": 145,
983
+ "Į</w>": 229,
984
+ "į": 146,
985
+ "į</w>": 196,
986
+ "İ": 147,
987
+ "İ</w>": 260,
988
+ "ı": 148,
989
+ "ı</w>": 275,
990
+ "IJ": 149,
991
+ "IJ</w>": 233,
992
+ "ij": 150,
993
+ "ij</w>": 272,
994
+ "Ĵ": 151,
995
+ "Ĵ</w>": 262,
996
+ "ĵ": 152,
997
+ "ĵ</w>": 251,
998
+ "Ķ": 153,
999
+ "Ķ</w>": 253,
1000
+ "ķ": 154,
1001
+ "ķ</w>": 236,
1002
+ "ĸ": 155,
1003
+ "ĸ</w>": 258,
1004
+ "Ĺ": 156,
1005
+ "Ĺ</w>": 225,
1006
+ "ĺ": 157,
1007
+ "ĺ</w>": 220,
1008
+ "Ļ": 158,
1009
+ "Ļ</w>": 200,
1010
+ "ļ": 159,
1011
+ "ļ</w>": 259,
1012
+ "Ľ": 160,
1013
+ "Ľ</w>": 221,
1014
+ "ľ": 161,
1015
+ "ľ</w>": 190,
1016
+ "Ŀ": 162,
1017
+ "Ŀ</w>": 242,
1018
+ "ŀ": 163,
1019
+ "ŀ</w>": 248,
1020
+ "Ł": 164,
1021
+ "Ł</w>": 219,
1022
+ "ł": 165,
1023
+ "ł</w>": 244,
1024
+ "Ń": 166,
1025
+ "Ń</w>": 214
1026
+ }