# # Japanese charcter category map # # $Id: char.def 9 2012-12-12 04:13:15Z togiso $; # ################################################################################### # # CHARACTER CATEGORY DEFINITION # # CATEGORY_NAME INVOKE GROUP LENGTH # # - CATEGORY_NAME: Name of category. you have to define DEFAULT class. # - INVOKE: 1/0: always invoke unknown word processing, evan when the word can be found in the lexicon # - GROUP: 1/0: make a new word by grouping the same chracter category # - LENGTH: n: 1 to n length new words are added # DEFAULT 0 1 0 # DEFAULT is a mandatory category! SPACE 0 1 0 KANJI 0 0 2 SYMBOL 1 1 0 NUMERIC 1 1 0 ALPHA 1 1 0 HIRAGANA 0 1 2 KATAKANA 1 1 2 KANJINUMERIC 0 1 0 #change INVOKE 1->0 GREEK 1 1 0 CYRILLIC 1 1 0 ################################################################################### # # CODE(UCS2) TO CATEGORY MAPPING # # SPACE 0x0020 SPACE # DO NOT REMOVE THIS LINE, 0x0020 is reserved for SPACE 0x00D0 SPACE 0x0009 SPACE 0x000B SPACE 0x000A SPACE # ASCII 0x0021..0x002F SYMBOL #!"#$%&'()*+,-./ 0x0030..0x0039 NUMERIC #0-9 0x003A..0x0040 SYMBOL #:;<=>?@ 0x0041..0x005A ALPHA #A-Z 0x005B..0x0060 SYMBOL #[\]^_` 0x0061..0x007A ALPHA #a-z 0x007B..0x007E SYMBOL #{|}~ # Latin 0x00A1..0x00BF SYMBOL # Latin 1 #¡->ß 0x00C0..0x00FF ALPHA # Latin 1 #À->ÿ 0x0100..0x017F ALPHA # Latin Extended A 0x0180..0x0236 ALPHA # Latin Extended B 0x1E00..0x1EF9 ALPHA # Latin Extended Additional # CYRILLIC 0x0400..0x04F9 CYRILLIC #Ѐ->ӹ 0x0500..0x050F CYRILLIC # Cyrillic supplementary # GREEK 0x0374..0x03FB GREEK # Greek and Coptic #ʹ->ϻ # HIRAGANA 0x3041..0x309F HIRAGANA # KATAKANA 0x30A1..0x30FF KATAKANA 0x31F0..0x31FF KATAKANA # Small KU .. Small RO # 0x30FC KATAKANA HIRAGANA # ー 0x30FC KATAKANA # Half KATAKANA 0xFF66..0xFF9D KATAKANA 0xFF9E..0xFF9F KATAKANA # KANJI 0x2E80..0x2EF3 KANJI # CJK Raidcals Supplement 0x2F00..0x2FD5 KANJI 0x3005 KANJI 0x3007 KANJI 0x3400..0x4DB5 KANJI # CJK Unified Ideographs Extention 0x4E00..0x9FA5 KANJI 0xF900..0xFA2D KANJI 0xFA30..0xFA6A KANJI # KANJI-NUMERIC (一 二 三 四 五 六 七 八 九 十 百 千 万 億 兆) 0x4E00 KANJINUMERIC KANJI 0x4E8C KANJINUMERIC KANJI 0x4E09 KANJINUMERIC KANJI 0x56DB KANJINUMERIC KANJI 0x4E94 KANJINUMERIC KANJI 0x516D KANJINUMERIC KANJI 0x4E03 KANJINUMERIC KANJI 0x516B KANJINUMERIC KANJI 0x4E5D KANJINUMERIC KANJI 0x5341 KANJINUMERIC KANJI 0x767E KANJINUMERIC KANJI 0x5343 KANJINUMERIC KANJI 0x4E07 KANJINUMERIC KANJI 0x5104 KANJINUMERIC KANJI 0x5146 KANJINUMERIC KANJI # ZENKAKU 0xFF10..0xFF19 NUMERIC 0xFF21..0xFF3A ALPHA 0xFF41..0xFF5A ALPHA 0xFF01..0xFF0F SYMBOL #!->/ 0xFF1A..0xFF20 SYMBOL #:->@ 0xFF3B..0xFF40 SYMBOL #[->` 0xFF5B..0xFF65 SYMBOL #{->・ 0xFFE0..0xFFEF SYMBOL # HalfWidth and Full width Form # OTHER SYMBOLS 0x2000..0x206F SYMBOL # General Punctuation 0x2070..0x209F NUMERIC # Superscripts and Subscripts 0x20A0..0x20CF SYMBOL # Currency Symbols 0x20D0..0x20FF SYMBOL # Combining Diaritical Marks for Symbols 0x2100..0x214F SYMBOL # Letterlike Symbols 0x2150..0x218F NUMERIC # Number forms 0x2100..0x214B SYMBOL # Letterlike Symbols 0x2190..0x21FF SYMBOL # Arrow 0x2200..0x22FF SYMBOL # Mathematical Operators 0x2300..0x23FF SYMBOL # Miscellaneuos Technical 0x2460..0x24FF SYMBOL # Enclosed NUMERICs 0x2501..0x257F SYMBOL # Box Drawing 0x2580..0x259F SYMBOL # Block Elements 0x25A0..0x25FF SYMBOL # Geometric Shapes 0x2600..0x26FE SYMBOL # Miscellaneous Symbols 0x2700..0x27BF SYMBOL # Dingbats 0x27F0..0x27FF SYMBOL # Supplemental Arrows A 0x27C0..0x27EF SYMBOL # Miscellaneous Mathematical Symbols-A 0x2800..0x28FF SYMBOL # Braille Patterns 0x2900..0x297F SYMBOL # Supplemental Arrows B 0x2B00..0x2BFF SYMBOL # Miscellaneous Symbols and Arrows 0x2A00..0x2AFF SYMBOL # Supplemental Mathematical Operators 0x3300..0x33FF SYMBOL 0x3200..0x32FE SYMBOL # ENclosed CJK Letters and Months 0x3000..0x303F SYMBOL # CJK Symbol and Punctuation 0xFE30..0xFE4F SYMBOL # CJK Compatibility Forms 0xFE50..0xFE6B SYMBOL # Small Form Variants # added 2006/3/13 0x3007 SYMBOL KANJINUMERIC # END OF TABLE