File size: 17,954 Bytes
d358e26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
UNICODE_BASIC_LATIN = 1
UNICODE_LATIN_1_SUPPLEMENT = 2
UNICODE_LATIN_EXTENDED_A = 3
UNICODE_LATIN_EXTENDED_B = 4
UNICODE_IPA_EXTENSIONS = 5
UNICODE_SPACING_MODIFIER_LETTERS = 6
UNICODE_COMBINING_DIACRITICAL_MARKS = 7
UNICODE_GREEK_AND_COPTIC = 8
UNICODE_CYRILLIC = 9
UNICODE_CYRILLIC_SUPPLEMENT = 10
UNICODE_ARMENIAN = 11
UNICODE_HEBREW = 12
UNICODE_ARABIC = 13
UNICODE_SYRIAC = 14
UNICODE_ARABIC_SUPPLEMENT = 15
UNICODE_THAANA = 16
UNICODE_NKO = 17
UNICODE_SAMARITAN = 18
UNICODE_MANDAIC = 19
UNICODE_ARABIC_EXTENDED_A = 20
UNICODE_DEVANAGARI = 21
UNICODE_BENGALI = 22
UNICODE_GURMUKHI = 23
UNICODE_GUJARATI = 24
UNICODE_ORIYA = 25
UNICODE_TAMIL = 26
UNICODE_TELUGU = 27
UNICODE_KANNADA = 28
UNICODE_MALAYALAM = 29
UNICODE_SINHALA = 30
UNICODE_THAI = 31
UNICODE_LAO = 32
UNICODE_TIBETAN = 33
UNICODE_MYANMAR = 34
UNICODE_GEORGIAN = 35
UNICODE_HANGUL_JAMO = 36
UNICODE_ETHIOPIC = 37
UNICODE_ETHIOPIC_SUPPLEMENT = 38
UNICODE_CHEROKEE = 39
UNICODE_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = 40
UNICODE_OGHAM = 41
UNICODE_RUNIC = 42
UNICODE_TAGALOG = 43
UNICODE_HANUNOO = 44
UNICODE_BUHID = 45
UNICODE_TAGBANWA = 46
UNICODE_KHMER = 47
UNICODE_MONGOLIAN = 48
UNICODE_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 49
UNICODE_LIMBU = 50
UNICODE_TAI_LE = 51
UNICODE_NEW_TAI_LUE = 52
UNICODE_KHMER_SYMBOLS = 53
UNICODE_BUGINESE = 54
UNICODE_TAI_THAM = 55
UNICODE_BALINESE = 56
UNICODE_SUNDANESE = 57
UNICODE_BATAK = 58
UNICODE_LEPCHA = 59
UNICODE_OL_CHIKI = 60
UNICODE_SUNDANESE_SUPPLEMENT = 61
UNICODE_VEDIC_EXTENSIONS = 62
UNICODE_PHONETIC_EXTENSIONS = 63
UNICODE_PHONETIC_EXTENSIONS_SUPPLEMENT = 64
UNICODE_COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 65
UNICODE_LATIN_EXTENDED_ADDITIONAL = 66
UNICODE_GREEK_EXTENDED = 67
UNICODE_GENERAL_PUNCTUATION = 68
UNICODE_SUPERSCRIPTS_AND_SUBSCRIPTS = 69
UNICODE_CURRENCY_SYMBOLS = 70
UNICODE_COMBINING_DIACRITICAL_MARKS_FOR_SYMBOLS = 71
UNICODE_LETTERLIKE_SYMBOLS = 72
UNICODE_NUMBER_FORMS = 73
UNICODE_ARROWS = 74
UNICODE_MATHEMATICAL_OPERATORS = 75
UNICODE_MISCELLANEOUS_TECHNICAL = 76
UNICODE_CONTROL_PICTURES = 77
UNICODE_OPTICAL_CHARACTER_RECOGNITION = 78
UNICODE_ENCLOSED_ALPHANUMERICS = 79
UNICODE_BOX_DRAWING = 80
UNICODE_BLOCK_ELEMENTS = 81
UNICODE_GEOMETRIC_SHAPES = 82
UNICODE_MISCELLANEOUS_SYMBOLS = 83
UNICODE_DINGBATS = 84
UNICODE_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = 85
UNICODE_SUPPLEMENTAL_ARROWS_A = 86
UNICODE_BRAILLE_PATTERNS = 87
UNICODE_SUPPLEMENTAL_ARROWS_B = 88
UNICODE_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = 89
UNICODE_SUPPLEMENTAL_MATHEMATICAL_OPERATORS = 90
UNICODE_MISCELLANEOUS_SYMBOLS_AND_ARROWS = 91
UNICODE_GLAGOLITIC = 92
UNICODE_LATIN_EXTENDED_C = 93
UNICODE_COPTIC = 94
UNICODE_GEORGIAN_SUPPLEMENT = 95
UNICODE_TIFINAGH = 96
UNICODE_ETHIOPIC_EXTENDED = 97
UNICODE_CYRILLIC_EXTENDED_A = 98
UNICODE_SUPPLEMENTAL_PUNCTUATION = 99
UNICODE_CJK_RADICALS_SUPPLEMENT = 100
UNICODE_KANGXI_RADICALS = 101
UNICODE_IDEOGRAPHIC_DESCRIPTION_CHARACTERS = 102
UNICODE_CJK_SYMBOLS_AND_PUNCTUATION = 103
UNICODE_HIRAGANA = 104
UNICODE_KATAKANA = 105
UNICODE_BOPOMOFO = 106
UNICODE_HANGUL_COMPATIBILITY_JAMO = 107
UNICODE_KANBUN = 108
UNICODE_BOPOMOFO_EXTENDED = 109
UNICODE_CJK_STROKES = 110
UNICODE_KATAKANA_PHONETIC_EXTENSIONS = 111
UNICODE_ENCLOSED_CJK_LETTERS_AND_MONTHS = 112
UNICODE_CJK_COMPATIBILITY = 113
UNICODE_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = 114
UNICODE_YIJING_HEXAGRAM_SYMBOLS = 115
UNICODE_CJK_UNIFIED_IDEOGRAPHS = 116
UNICODE_YI_SYLLABLES = 117
UNICODE_YI_RADICALS = 118
UNICODE_LISU = 119
UNICODE_VAI = 120
UNICODE_CYRILLIC_EXTENDED_B = 121
UNICODE_BAMUM = 122
UNICODE_MODIFIER_TONE_LETTERS = 123
UNICODE_LATIN_EXTENDED_D = 124
UNICODE_SYLOTI_NAGRI = 125
UNICODE_COMMON_INDIC_NUMBER_FORMS = 126
UNICODE_PHAGS_PA = 127
UNICODE_SAURASHTRA = 128
UNICODE_DEVANAGARI_EXTENDED = 129
UNICODE_KAYAH_LI = 130
UNICODE_REJANG = 131
UNICODE_HANGUL_JAMO_EXTENDED_A = 132
UNICODE_JAVANESE = 133
UNICODE_CHAM = 134
UNICODE_MYANMAR_EXTENDED_A = 135
UNICODE_TAI_VIET = 136
UNICODE_MEETEI_MAYEK_EXTENSIONS = 137
UNICODE_ETHIOPIC_EXTENDED_A = 138
UNICODE_MEETEI_MAYEK = 139
UNICODE_HANGUL_SYLLABLES = 140
UNICODE_HANGUL_JAMO_EXTENDED_B = 141
UNICODE_HIGH_SURROGATES = 142
UNICODE_HIGH_PRIVATE_USE_SURROGATES = 143
UNICODE_LOW_SURROGATES = 144
UNICODE_PRIVATE_USE_AREA = 145
UNICODE_CJK_COMPATIBILITY_IDEOGRAPHS = 146
UNICODE_ALPHABETIC_PRESENTATION_FORMS = 147
UNICODE_ARABIC_PRESENTATION_FORMS_A = 148
UNICODE_VARIATION_SELECTORS = 149
UNICODE_VERTICAL_FORMS = 150
UNICODE_COMBINING_HALF_MARKS = 151
UNICODE_CJK_COMPATIBILITY_FORMS = 152
UNICODE_SMALL_FORM_VARIANTS = 153
UNICODE_ARABIC_PRESENTATION_FORMS_B = 154
UNICODE_HALFWIDTH_AND_FULLWIDTH_FORMS = 155
UNICODE_SPECIALS = 156
UNICODE_LINEAR_B_SYLLABARY = 157
UNICODE_LINEAR_B_IDEOGRAMS = 158
UNICODE_AEGEAN_NUMBERS = 159
UNICODE_ANCIENT_GREEK_NUMBERS = 160
UNICODE_ANCIENT_SYMBOLS = 161
UNICODE_PHAISTOS_DISC = 162
UNICODE_LYCIAN = 163
UNICODE_CARIAN = 164
UNICODE_OLD_ITALIC = 165
UNICODE_GOTHIC = 166
UNICODE_UGARITIC = 167
UNICODE_OLD_PERSIAN = 168
UNICODE_DESERET = 169
UNICODE_SHAVIAN = 170
UNICODE_OSMANYA = 171
UNICODE_CYPRIOT_SYLLABARY = 172
UNICODE_IMPERIAL_ARAMAIC = 173
UNICODE_PHOENICIAN = 174
UNICODE_LYDIAN = 175
UNICODE_MEROITIC_HIEROGLYPHS = 176
UNICODE_MEROITIC_CURSIVE = 177
UNICODE_KHAROSHTHI = 178
UNICODE_OLD_SOUTH_ARABIAN = 179
UNICODE_AVESTAN = 180
UNICODE_INSCRIPTIONAL_PARTHIAN = 181
UNICODE_INSCRIPTIONAL_PAHLAVI = 182
UNICODE_OLD_TURKIC = 183
UNICODE_RUMI_NUMERAL_SYMBOLS = 184
UNICODE_BRAHMI = 185
UNICODE_KAITHI = 186
UNICODE_SORA_SOMPENG = 187
UNICODE_CHAKMA = 188
UNICODE_SHARADA = 189
UNICODE_TAKRI = 190
UNICODE_CUNEIFORM = 191
UNICODE_CUNEIFORM_NUMBERS_AND_PUNCTUATION = 192
UNICODE_EGYPTIAN_HIEROGLYPHS = 193
UNICODE_BAMUM_SUPPLEMENT = 194
UNICODE_MIAO = 195
UNICODE_KANA_SUPPLEMENT = 196
UNICODE_BYZANTINE_MUSICAL_SYMBOLS = 197
UNICODE_MUSICAL_SYMBOLS = 198
UNICODE_ANCIENT_GREEK_MUSICAL_NOTATION = 199
UNICODE_TAI_XUAN_JING_SYMBOLS = 200
UNICODE_COUNTING_ROD_NUMERALS = 201
UNICODE_MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 202
UNICODE_ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS = 203
UNICODE_MAHJONG_TILES = 204
UNICODE_DOMINO_TILES = 205
UNICODE_PLAYING_CARDS = 206
UNICODE_ENCLOSED_ALPHANUMERIC_SUPPLEMENT = 207
UNICODE_ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = 208
UNICODE_MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = 209
UNICODE_EMOTICONS = 210
UNICODE_TRANSPORT_AND_MAP_SYMBOLS = 211
UNICODE_ALCHEMICAL_SYMBOLS = 212
UNICODE_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = 213
UNICODE_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = 214
UNICODE_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = 215
UNICODE_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 216
UNICODE_TAGS = 217
UNICODE_VARIATION_SELECTORS_SUPPLEMENT = 218
UNICODE_SUPPLEMENTARY_PRIVATE_USE_AREA_A = 219
UNICODE_SUPPLEMENTARY_PRIVATE_USE_AREA_B = 220


_unicode_blocks = [
    (UNICODE_BASIC_LATIN, 0x0000, 0x007F),
    (UNICODE_LATIN_1_SUPPLEMENT, 0x0080, 0x00FF),
    (UNICODE_LATIN_EXTENDED_A, 0x0100, 0x017F),
    (UNICODE_LATIN_EXTENDED_B, 0x0180, 0x024F),
    (UNICODE_IPA_EXTENSIONS, 0x0250, 0x02AF),
    (UNICODE_SPACING_MODIFIER_LETTERS, 0x02B0, 0x02FF),
    (UNICODE_COMBINING_DIACRITICAL_MARKS, 0x0300, 0x036F),
    (UNICODE_GREEK_AND_COPTIC, 0x0370, 0x03FF),
    (UNICODE_CYRILLIC, 0x0400, 0x04FF),
    (UNICODE_CYRILLIC_SUPPLEMENT, 0x0500, 0x052F),
    (UNICODE_ARMENIAN, 0x0530, 0x058F),
    (UNICODE_HEBREW, 0x0590, 0x05FF),
    (UNICODE_ARABIC, 0x0600, 0x06FF),
    (UNICODE_SYRIAC, 0x0700, 0x074F),
    (UNICODE_ARABIC_SUPPLEMENT, 0x0750, 0x077F),
    (UNICODE_THAANA, 0x0780, 0x07BF),
    (UNICODE_NKO, 0x07C0, 0x07FF),
    (UNICODE_SAMARITAN, 0x0800, 0x083F),
    (UNICODE_MANDAIC, 0x0840, 0x085F),
    (UNICODE_ARABIC_EXTENDED_A, 0x08A0, 0x08FF),
    (UNICODE_DEVANAGARI, 0x0900, 0x097F),
    (UNICODE_BENGALI, 0x0980, 0x09FF),
    (UNICODE_GURMUKHI, 0x0A00, 0x0A7F),
    (UNICODE_GUJARATI, 0x0A80, 0x0AFF),
    (UNICODE_ORIYA, 0x0B00, 0x0B7F),
    (UNICODE_TAMIL, 0x0B80, 0x0BFF),
    (UNICODE_TELUGU, 0x0C00, 0x0C7F),
    (UNICODE_KANNADA, 0x0C80, 0x0CFF),
    (UNICODE_MALAYALAM, 0x0D00, 0x0D7F),
    (UNICODE_SINHALA, 0x0D80, 0x0DFF),
    (UNICODE_THAI, 0x0E00, 0x0E7F),
    (UNICODE_LAO, 0x0E80, 0x0EFF),
    (UNICODE_TIBETAN, 0x0F00, 0x0FFF),
    (UNICODE_MYANMAR, 0x1000, 0x109F),
    (UNICODE_GEORGIAN, 0x10A0, 0x10FF),
    (UNICODE_HANGUL_JAMO, 0x1100, 0x11FF),
    (UNICODE_ETHIOPIC, 0x1200, 0x137F),
    (UNICODE_ETHIOPIC_SUPPLEMENT, 0x1380, 0x139F),
    (UNICODE_CHEROKEE, 0x13A0, 0x13FF),
    (UNICODE_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS, 0x1400, 0x167F),
    (UNICODE_OGHAM, 0x1680, 0x169F),
    (UNICODE_RUNIC, 0x16A0, 0x16FF),
    (UNICODE_TAGALOG, 0x1700, 0x171F),
    (UNICODE_HANUNOO, 0x1720, 0x173F),
    (UNICODE_BUHID, 0x1740, 0x175F),
    (UNICODE_TAGBANWA, 0x1760, 0x177F),
    (UNICODE_KHMER, 0x1780, 0x17FF),
    (UNICODE_MONGOLIAN, 0x1800, 0x18AF),
    (UNICODE_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED, 0x18B0, 0x18FF),
    (UNICODE_LIMBU, 0x1900, 0x194F),
    (UNICODE_TAI_LE, 0x1950, 0x197F),
    (UNICODE_NEW_TAI_LUE, 0x1980, 0x19DF),
    (UNICODE_KHMER_SYMBOLS, 0x19E0, 0x19FF),
    (UNICODE_BUGINESE, 0x1A00, 0x1A1F),
    (UNICODE_TAI_THAM, 0x1A20, 0x1AAF),
    (UNICODE_BALINESE, 0x1B00, 0x1B7F),
    (UNICODE_SUNDANESE, 0x1B80, 0x1BBF),
    (UNICODE_BATAK, 0x1BC0, 0x1BFF),
    (UNICODE_LEPCHA, 0x1C00, 0x1C4F),
    (UNICODE_OL_CHIKI, 0x1C50, 0x1C7F),
    (UNICODE_SUNDANESE_SUPPLEMENT, 0x1CC0, 0x1CCF),
    (UNICODE_VEDIC_EXTENSIONS, 0x1CD0, 0x1CFF),
    (UNICODE_PHONETIC_EXTENSIONS, 0x1D00, 0x1D7F),
    (UNICODE_PHONETIC_EXTENSIONS_SUPPLEMENT, 0x1D80, 0x1DBF),
    (UNICODE_COMBINING_DIACRITICAL_MARKS_SUPPLEMENT, 0x1DC0, 0x1DFF),
    (UNICODE_LATIN_EXTENDED_ADDITIONAL, 0x1E00, 0x1EFF),
    (UNICODE_GREEK_EXTENDED, 0x1F00, 0x1FFF),
    (UNICODE_GENERAL_PUNCTUATION, 0x2000, 0x206F),
    (UNICODE_SUPERSCRIPTS_AND_SUBSCRIPTS, 0x2070, 0x209F),
    (UNICODE_CURRENCY_SYMBOLS, 0x20A0, 0x20CF),
    (UNICODE_COMBINING_DIACRITICAL_MARKS_FOR_SYMBOLS, 0x20D0, 0x20FF),
    (UNICODE_LETTERLIKE_SYMBOLS, 0x2100, 0x214F),
    (UNICODE_NUMBER_FORMS, 0x2150, 0x218F),
    (UNICODE_ARROWS, 0x2190, 0x21FF),
    (UNICODE_MATHEMATICAL_OPERATORS, 0x2200, 0x22FF),
    (UNICODE_MISCELLANEOUS_TECHNICAL, 0x2300, 0x23FF),
    (UNICODE_CONTROL_PICTURES, 0x2400, 0x243F),
    (UNICODE_OPTICAL_CHARACTER_RECOGNITION, 0x2440, 0x245F),
    (UNICODE_ENCLOSED_ALPHANUMERICS, 0x2460, 0x24FF),
    (UNICODE_BOX_DRAWING, 0x2500, 0x257F),
    (UNICODE_BLOCK_ELEMENTS, 0x2580, 0x259F),
    (UNICODE_GEOMETRIC_SHAPES, 0x25A0, 0x25FF),
    (UNICODE_MISCELLANEOUS_SYMBOLS, 0x2600, 0x26FF),
    (UNICODE_DINGBATS, 0x2700, 0x27BF),
    (UNICODE_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A, 0x27C0, 0x27EF),
    (UNICODE_SUPPLEMENTAL_ARROWS_A, 0x27F0, 0x27FF),
    (UNICODE_BRAILLE_PATTERNS, 0x2800, 0x28FF),
    (UNICODE_SUPPLEMENTAL_ARROWS_B, 0x2900, 0x297F),
    (UNICODE_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B, 0x2980, 0x29FF),
    (UNICODE_SUPPLEMENTAL_MATHEMATICAL_OPERATORS, 0x2A00, 0x2AFF),
    (UNICODE_MISCELLANEOUS_SYMBOLS_AND_ARROWS, 0x2B00, 0x2BFF),
    (UNICODE_GLAGOLITIC, 0x2C00, 0x2C5F),
    (UNICODE_LATIN_EXTENDED_C, 0x2C60, 0x2C7F),
    (UNICODE_COPTIC, 0x2C80, 0x2CFF),
    (UNICODE_GEORGIAN_SUPPLEMENT, 0x2D00, 0x2D2F),
    (UNICODE_TIFINAGH, 0x2D30, 0x2D7F),
    (UNICODE_ETHIOPIC_EXTENDED, 0x2D80, 0x2DDF),
    (UNICODE_CYRILLIC_EXTENDED_A, 0x2DE0, 0x2DFF),
    (UNICODE_SUPPLEMENTAL_PUNCTUATION, 0x2E00, 0x2E7F),
    (UNICODE_CJK_RADICALS_SUPPLEMENT, 0x2E80, 0x2EFF),
    (UNICODE_KANGXI_RADICALS, 0x2F00, 0x2FDF),
    (UNICODE_IDEOGRAPHIC_DESCRIPTION_CHARACTERS, 0x2FF0, 0x2FFF),
    (UNICODE_CJK_SYMBOLS_AND_PUNCTUATION, 0x3000, 0x303F),
    (UNICODE_HIRAGANA, 0x3040, 0x309F),
    (UNICODE_KATAKANA, 0x30A0, 0x30FF),
    (UNICODE_BOPOMOFO, 0x3100, 0x312F),
    (UNICODE_HANGUL_COMPATIBILITY_JAMO, 0x3130, 0x318F),
    (UNICODE_KANBUN, 0x3190, 0x319F),
    (UNICODE_BOPOMOFO_EXTENDED, 0x31A0, 0x31BF),
    (UNICODE_CJK_STROKES, 0x31C0, 0x31EF),
    (UNICODE_KATAKANA_PHONETIC_EXTENSIONS, 0x31F0, 0x31FF),
    (UNICODE_ENCLOSED_CJK_LETTERS_AND_MONTHS, 0x3200, 0x32FF),
    (UNICODE_CJK_COMPATIBILITY, 0x3300, 0x33FF),
    (UNICODE_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A, 0x3400, 0x4DBF),
    (UNICODE_YIJING_HEXAGRAM_SYMBOLS, 0x4DC0, 0x4DFF),
    (UNICODE_CJK_UNIFIED_IDEOGRAPHS, 0x4E00, 0x9FFF),
    (UNICODE_YI_SYLLABLES, 0xA000, 0xA48F),
    (UNICODE_YI_RADICALS, 0xA490, 0xA4CF),
    (UNICODE_LISU, 0xA4D0, 0xA4FF),
    (UNICODE_VAI, 0xA500, 0xA63F),
    (UNICODE_CYRILLIC_EXTENDED_B, 0xA640, 0xA69F),
    (UNICODE_BAMUM, 0xA6A0, 0xA6FF),
    (UNICODE_MODIFIER_TONE_LETTERS, 0xA700, 0xA71F),
    (UNICODE_LATIN_EXTENDED_D, 0xA720, 0xA7FF),
    (UNICODE_SYLOTI_NAGRI, 0xA800, 0xA82F),
    (UNICODE_COMMON_INDIC_NUMBER_FORMS, 0xA830, 0xA83F),
    (UNICODE_PHAGS_PA, 0xA840, 0xA87F),
    (UNICODE_SAURASHTRA, 0xA880, 0xA8DF),
    (UNICODE_DEVANAGARI_EXTENDED, 0xA8E0, 0xA8FF),
    (UNICODE_KAYAH_LI, 0xA900, 0xA92F),
    (UNICODE_REJANG, 0xA930, 0xA95F),
    (UNICODE_HANGUL_JAMO_EXTENDED_A, 0xA960, 0xA97F),
    (UNICODE_JAVANESE, 0xA980, 0xA9DF),
    (UNICODE_CHAM, 0xAA00, 0xAA5F),
    (UNICODE_MYANMAR_EXTENDED_A, 0xAA60, 0xAA7F),
    (UNICODE_TAI_VIET, 0xAA80, 0xAADF),
    (UNICODE_MEETEI_MAYEK_EXTENSIONS, 0xAAE0, 0xAAFF),
    (UNICODE_ETHIOPIC_EXTENDED_A, 0xAB00, 0xAB2F),
    (UNICODE_MEETEI_MAYEK, 0xABC0, 0xABFF),
    (UNICODE_HANGUL_SYLLABLES, 0xAC00, 0xD7AF),
    (UNICODE_HANGUL_JAMO_EXTENDED_B, 0xD7B0, 0xD7FF),
    (UNICODE_HIGH_SURROGATES, 0xD800, 0xDB7F),
    (UNICODE_HIGH_PRIVATE_USE_SURROGATES, 0xDB80, 0xDBFF),
    (UNICODE_LOW_SURROGATES, 0xDC00, 0xDFFF),
    (UNICODE_PRIVATE_USE_AREA, 0xE000, 0xF8FF),
    (UNICODE_CJK_COMPATIBILITY_IDEOGRAPHS, 0xF900, 0xFAFF),
    (UNICODE_ALPHABETIC_PRESENTATION_FORMS, 0xFB00, 0xFB4F),
    (UNICODE_ARABIC_PRESENTATION_FORMS_A, 0xFB50, 0xFDFF),
    (UNICODE_VARIATION_SELECTORS, 0xFE00, 0xFE0F),
    (UNICODE_VERTICAL_FORMS, 0xFE10, 0xFE1F),
    (UNICODE_COMBINING_HALF_MARKS, 0xFE20, 0xFE2F),
    (UNICODE_CJK_COMPATIBILITY_FORMS, 0xFE30, 0xFE4F),
    (UNICODE_SMALL_FORM_VARIANTS, 0xFE50, 0xFE6F),
    (UNICODE_ARABIC_PRESENTATION_FORMS_B, 0xFE70, 0xFEFF),
    (UNICODE_HALFWIDTH_AND_FULLWIDTH_FORMS, 0xFF00, 0xFFEF),
    (UNICODE_SPECIALS, 0xFFF0, 0xFFFF),
    (UNICODE_LINEAR_B_SYLLABARY, 0x10000, 0x1007F),
    (UNICODE_LINEAR_B_IDEOGRAMS, 0x10080, 0x100FF),
    (UNICODE_AEGEAN_NUMBERS, 0x10100, 0x1013F),
    (UNICODE_ANCIENT_GREEK_NUMBERS, 0x10140, 0x1018F),
    (UNICODE_ANCIENT_SYMBOLS, 0x10190, 0x101CF),
    (UNICODE_PHAISTOS_DISC, 0x101D0, 0x101FF),
    (UNICODE_LYCIAN, 0x10280, 0x1029F),
    (UNICODE_CARIAN, 0x102A0, 0x102DF),
    (UNICODE_OLD_ITALIC, 0x10300, 0x1032F),
    (UNICODE_GOTHIC, 0x10330, 0x1034F),
    (UNICODE_UGARITIC, 0x10380, 0x1039F),
    (UNICODE_OLD_PERSIAN, 0x103A0, 0x103DF),
    (UNICODE_DESERET, 0x10400, 0x1044F),
    (UNICODE_SHAVIAN, 0x10450, 0x1047F),
    (UNICODE_OSMANYA, 0x10480, 0x104AF),
    (UNICODE_CYPRIOT_SYLLABARY, 0x10800, 0x1083F),
    (UNICODE_IMPERIAL_ARAMAIC, 0x10840, 0x1085F),
    (UNICODE_PHOENICIAN, 0x10900, 0x1091F),
    (UNICODE_LYDIAN, 0x10920, 0x1093F),
    (UNICODE_MEROITIC_HIEROGLYPHS, 0x10980, 0x1099F),
    (UNICODE_MEROITIC_CURSIVE, 0x109A0, 0x109FF),
    (UNICODE_KHAROSHTHI, 0x10A00, 0x10A5F),
    (UNICODE_OLD_SOUTH_ARABIAN, 0x10A60, 0x10A7F),
    (UNICODE_AVESTAN, 0x10B00, 0x10B3F),
    (UNICODE_INSCRIPTIONAL_PARTHIAN, 0x10B40, 0x10B5F),
    (UNICODE_INSCRIPTIONAL_PAHLAVI, 0x10B60, 0x10B7F),
    (UNICODE_OLD_TURKIC, 0x10C00, 0x10C4F),
    (UNICODE_RUMI_NUMERAL_SYMBOLS, 0x10E60, 0x10E7F),
    (UNICODE_BRAHMI, 0x11000, 0x1107F),
    (UNICODE_KAITHI, 0x11080, 0x110CF),
    (UNICODE_SORA_SOMPENG, 0x110D0, 0x110FF),
    (UNICODE_CHAKMA, 0x11100, 0x1114F),
    (UNICODE_SHARADA, 0x11180, 0x111DF),
    (UNICODE_TAKRI, 0x11680, 0x116CF),
    (UNICODE_CUNEIFORM, 0x12000, 0x123FF),
    (UNICODE_CUNEIFORM_NUMBERS_AND_PUNCTUATION, 0x12400, 0x1247F),
    (UNICODE_EGYPTIAN_HIEROGLYPHS, 0x13000, 0x1342F),
    (UNICODE_BAMUM_SUPPLEMENT, 0x16800, 0x16A3F),
    (UNICODE_MIAO, 0x16F00, 0x16F9F),
    (UNICODE_KANA_SUPPLEMENT, 0x1B000, 0x1B0FF),
    (UNICODE_BYZANTINE_MUSICAL_SYMBOLS, 0x1D000, 0x1D0FF),
    (UNICODE_MUSICAL_SYMBOLS, 0x1D100, 0x1D1FF),
    (UNICODE_ANCIENT_GREEK_MUSICAL_NOTATION, 0x1D200, 0x1D24F),
    (UNICODE_TAI_XUAN_JING_SYMBOLS, 0x1D300, 0x1D35F),
    (UNICODE_COUNTING_ROD_NUMERALS, 0x1D360, 0x1D37F),
    (UNICODE_MATHEMATICAL_ALPHANUMERIC_SYMBOLS, 0x1D400, 0x1D7FF),
    (UNICODE_ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS, 0x1EE00, 0x1EEFF),
    (UNICODE_MAHJONG_TILES, 0x1F000, 0x1F02F),
    (UNICODE_DOMINO_TILES, 0x1F030, 0x1F09F),
    (UNICODE_PLAYING_CARDS, 0x1F0A0, 0x1F0FF),
    (UNICODE_ENCLOSED_ALPHANUMERIC_SUPPLEMENT, 0x1F100, 0x1F1FF),
    (UNICODE_ENCLOSED_IDEOGRAPHIC_SUPPLEMENT, 0x1F200, 0x1F2FF),
    (UNICODE_MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS, 0x1F300, 0x1F5FF),
    (UNICODE_EMOTICONS, 0x1F600, 0x1F64F),
    (UNICODE_TRANSPORT_AND_MAP_SYMBOLS, 0x1F680, 0x1F6FF),
    (UNICODE_ALCHEMICAL_SYMBOLS, 0x1F700, 0x1F77F),
    (UNICODE_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, 0x20000, 0x2A6DF),
    (UNICODE_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C, 0x2A700, 0x2B73F),
    (UNICODE_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D, 0x2B740, 0x2B81F),
    (UNICODE_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, 0x2F800, 0x2FA1F),
    (UNICODE_TAGS, 0xE0000, 0xE007F),
    (UNICODE_VARIATION_SELECTORS_SUPPLEMENT, 0xE0100, 0xE01EF),
    (UNICODE_SUPPLEMENTARY_PRIVATE_USE_AREA_A, 0xF0000, 0xFFFFF),
    (UNICODE_SUPPLEMENTARY_PRIVATE_USE_AREA_B, 0x100000, 0x10FFFF),
]

NUM_BLOCKS = len(_unicode_blocks)


def unicode_block(ch):
    '''Return the Unicode block name for ch, or None if ch has no block.'''
    cp = ord(ch)
    # special case basic latin
    if cp <= 0x7F:
        return UNICODE_BASIC_LATIN
    # binary search for the correct block
    be, en = 0, NUM_BLOCKS - 1
    while be <= en:
        mid = (be+en) >> 1
        name, start, end = _unicode_blocks[mid]
        if start <= cp <= end:
            return name
        if cp < start:
            en = mid-1
        else:
            be = mid+1