| # | |
| # Japanese charcter category map | |
| # | |
| # $Id: char.def 9 2012-12-12 04:13:15Z togiso $; | |
| # | |
| ################################################################################### | |
| # | |
| # CHARACTER CATEGORY DEFINITION | |
| # | |
| # CATEGORY_NAME INVOKE GROUP LENGTH | |
| # | |
| # - CATEGORY_NAME: Name of category. you have to define DEFAULT class. | |
| # - INVOKE: 1/0: always invoke unknown word processing, evan when the word can be found in the lexicon | |
| # - GROUP: 1/0: make a new word by grouping the same chracter category | |
| # - LENGTH: n: 1 to n length new words are added | |
| # | |
| DEFAULT 0 1 0 # DEFAULT is a mandatory category! | |
| SPACE 0 1 0 | |
| KANJI 0 0 2 | |
| SYMBOL 1 1 0 | |
| NUMERIC 1 1 0 | |
| ALPHA 1 1 0 | |
| HIRAGANA 0 1 2 | |
| KATAKANA 1 1 2 | |
| KANJINUMERIC 0 1 0 #change INVOKE 1->0 | |
| GREEK 1 1 0 | |
| CYRILLIC 1 1 0 | |
| ################################################################################### | |
| # | |
| # CODE(UCS2) TO CATEGORY MAPPING | |
| # | |
| # SPACE | |
| 0x0020 SPACE # DO NOT REMOVE THIS LINE, 0x0020 is reserved for SPACE | |
| 0x00D0 SPACE | |
| 0x0009 SPACE | |
| 0x000B SPACE | |
| 0x000A SPACE | |
| # ASCII | |
| 0x0021..0x002F SYMBOL #!"#$%&'()*+,-./ | |
| 0x0030..0x0039 NUMERIC #0-9 | |
| 0x003A..0x0040 SYMBOL #:;<=>?@ | |
| 0x0041..0x005A ALPHA #A-Z | |
| 0x005B..0x0060 SYMBOL #[\]^_` | |
| 0x0061..0x007A ALPHA #a-z | |
| 0x007B..0x007E SYMBOL #{|}~ | |
| # Latin | |
| 0x00A1..0x00BF SYMBOL # Latin 1 #¡->ß | |
| 0x00C0..0x00FF ALPHA # Latin 1 #À->ÿ | |
| 0x0100..0x017F ALPHA # Latin Extended A | |
| 0x0180..0x0236 ALPHA # Latin Extended B | |
| 0x1E00..0x1EF9 ALPHA # Latin Extended Additional | |
| # CYRILLIC | |
| 0x0400..0x04F9 CYRILLIC #Ѐ->ӹ | |
| 0x0500..0x050F CYRILLIC # Cyrillic supplementary | |
| # GREEK | |
| 0x0374..0x03FB GREEK # Greek and Coptic #ʹ->ϻ | |
| # HIRAGANA | |
| 0x3041..0x309F HIRAGANA | |
| # KATAKANA | |
| 0x30A1..0x30FF KATAKANA | |
| 0x31F0..0x31FF KATAKANA # Small KU .. Small RO | |
| # 0x30FC KATAKANA HIRAGANA # ー | |
| 0x30FC KATAKANA | |
| # Half KATAKANA | |
| 0xFF66..0xFF9D KATAKANA | |
| 0xFF9E..0xFF9F KATAKANA | |
| # KANJI | |
| 0x2E80..0x2EF3 KANJI # CJK Raidcals Supplement | |
| 0x2F00..0x2FD5 KANJI | |
| 0x3005 KANJI | |
| 0x3007 KANJI | |
| 0x3400..0x4DB5 KANJI # CJK Unified Ideographs Extention | |
| 0x4E00..0x9FA5 KANJI | |
| 0xF900..0xFA2D KANJI | |
| 0xFA30..0xFA6A KANJI | |
| # KANJI-NUMERIC (一 二 三 四 五 六 七 八 九 十 百 千 万 億 兆) | |
| 0x4E00 KANJINUMERIC KANJI | |
| 0x4E8C KANJINUMERIC KANJI | |
| 0x4E09 KANJINUMERIC KANJI | |
| 0x56DB KANJINUMERIC KANJI | |
| 0x4E94 KANJINUMERIC KANJI | |
| 0x516D KANJINUMERIC KANJI | |
| 0x4E03 KANJINUMERIC KANJI | |
| 0x516B KANJINUMERIC KANJI | |
| 0x4E5D KANJINUMERIC KANJI | |
| 0x5341 KANJINUMERIC KANJI | |
| 0x767E KANJINUMERIC KANJI | |
| 0x5343 KANJINUMERIC KANJI | |
| 0x4E07 KANJINUMERIC KANJI | |
| 0x5104 KANJINUMERIC KANJI | |
| 0x5146 KANJINUMERIC KANJI | |
| # ZENKAKU | |
| 0xFF10..0xFF19 NUMERIC | |
| 0xFF21..0xFF3A ALPHA | |
| 0xFF41..0xFF5A ALPHA | |
| 0xFF01..0xFF0F SYMBOL #!->/ | |
| 0xFF1A..0xFF20 SYMBOL #:->@ | |
| 0xFF3B..0xFF40 SYMBOL #[->` | |
| 0xFF5B..0xFF65 SYMBOL #{->・ | |
| 0xFFE0..0xFFEF SYMBOL # HalfWidth and Full width Form | |
| # OTHER SYMBOLS | |
| 0x2000..0x206F SYMBOL # General Punctuation | |
| 0x2070..0x209F NUMERIC # Superscripts and Subscripts | |
| 0x20A0..0x20CF SYMBOL # Currency Symbols | |
| 0x20D0..0x20FF SYMBOL # Combining Diaritical Marks for Symbols | |
| 0x2100..0x214F SYMBOL # Letterlike Symbols | |
| 0x2150..0x218F NUMERIC # Number forms | |
| 0x2100..0x214B SYMBOL # Letterlike Symbols | |
| 0x2190..0x21FF SYMBOL # Arrow | |
| 0x2200..0x22FF SYMBOL # Mathematical Operators | |
| 0x2300..0x23FF SYMBOL # Miscellaneuos Technical | |
| 0x2460..0x24FF SYMBOL # Enclosed NUMERICs | |
| 0x2501..0x257F SYMBOL # Box Drawing | |
| 0x2580..0x259F SYMBOL # Block Elements | |
| 0x25A0..0x25FF SYMBOL # Geometric Shapes | |
| 0x2600..0x26FE SYMBOL # Miscellaneous Symbols | |
| 0x2700..0x27BF SYMBOL # Dingbats | |
| 0x27F0..0x27FF SYMBOL # Supplemental Arrows A | |
| 0x27C0..0x27EF SYMBOL # Miscellaneous Mathematical Symbols-A | |
| 0x2800..0x28FF SYMBOL # Braille Patterns | |
| 0x2900..0x297F SYMBOL # Supplemental Arrows B | |
| 0x2B00..0x2BFF SYMBOL # Miscellaneous Symbols and Arrows | |
| 0x2A00..0x2AFF SYMBOL # Supplemental Mathematical Operators | |
| 0x3300..0x33FF SYMBOL | |
| 0x3200..0x32FE SYMBOL # ENclosed CJK Letters and Months | |
| 0x3000..0x303F SYMBOL # CJK Symbol and Punctuation | |
| 0xFE30..0xFE4F SYMBOL # CJK Compatibility Forms | |
| 0xFE50..0xFE6B SYMBOL # Small Form Variants | |
| # added 2006/3/13 | |
| 0x3007 SYMBOL KANJINUMERIC | |
| # END OF TABLE | |