]> code.delx.au - gnu-emacs/blob - lisp/international/characters.el
*** empty log message ***
[gnu-emacs] / lisp / international / characters.el
1 ;;; characters.el --- set syntax and category for multibyte characters
2
3 ;; Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN.
4 ;; Licensed to the Free Software Foundation.
5 ;; Copyright (C) 2001, 2002 Free Software Foundation, Inc.
6 ;; Copyright (C) 2001, 2002
7 ;; National Institute of Advanced Industrial Science and Technology (AIST)
8 ;; Registration Number H13PRO009
9
10 ;; Keywords: multibyte character, character set, syntax, category
11
12 ;; This file is part of GNU Emacs.
13
14 ;; GNU Emacs is free software; you can redistribute it and/or modify
15 ;; it under the terms of the GNU General Public License as published by
16 ;; the Free Software Foundation; either version 2, or (at your option)
17 ;; any later version.
18
19 ;; GNU Emacs is distributed in the hope that it will be useful,
20 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
21 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 ;; GNU General Public License for more details.
23
24 ;; You should have received a copy of the GNU General Public License
25 ;; along with GNU Emacs; see the file COPYING. If not, write to the
26 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
27 ;; Boston, MA 02111-1307, USA.
28
29 ;;; Commentary:
30
31 ;; This file contains multibyte characters. Save this file always in
32 ;; the coding system `iso-2022-7bit'.
33
34 ;; This file does not define the syntax for Latin-N character sets;
35 ;; those are defined by the files latin-N.el.
36
37 ;;; Code:
38
39 ;;; Predefined categories.
40
41 ;; For each character set.
42
43 (define-category ?a "ASCII")
44 (define-category ?l "Latin")
45 (define-category ?t "Thai")
46 (define-category ?g "Greek")
47 (define-category ?b "Arabic")
48 (define-category ?w "Hebrew")
49 (define-category ?y "Cyrillic")
50 (define-category ?k "Japanese katakana")
51 (define-category ?r "Japanese roman")
52 (define-category ?c "Chinese")
53 (define-category ?j "Japanese")
54 (define-category ?h "Korean")
55 (define-category ?e "Ethiopic (Ge'ez)")
56 (define-category ?v "Vietnamese")
57 (define-category ?i "Indian")
58 (define-category ?o "Lao")
59 (define-category ?q "Tibetan")
60
61 ;; For each group (row) of 2-byte character sets.
62
63 (define-category ?A "Alpha-numeric characters of 2-byte character sets")
64 (define-category ?C "Chinese (Han) characters of 2-byte character sets")
65 (define-category ?G "Greek characters of 2-byte character sets")
66 (define-category ?H "Japanese Hiragana characters of 2-byte character sets")
67 (define-category ?K "Japanese Katakana characters of 2-byte character sets")
68 (define-category ?N "Korean Hangul characters of 2-byte character sets")
69 (define-category ?Y "Cyrillic characters of 2-byte character sets")
70 (define-category ?I "Indian Glyphs")
71
72 ;; For phonetic classifications.
73
74 (define-category ?0 "consonant")
75 (define-category ?1 "base (independent) vowel")
76 (define-category ?2 "upper diacritical mark (including upper vowel)")
77 (define-category ?3 "lower diacritical mark (including lower vowel)")
78 (define-category ?4 "tone mark")
79 (define-category ?5 "symbol")
80 (define-category ?6 "digit")
81 (define-category ?7 "vowel-modifying diacritical mark")
82 (define-category ?8 "vowel-signs")
83 (define-category ?9 "semivowel lower")
84
85 ;; For filling.
86 (define-category ?| "While filling, we can break a line at this character.")
87
88 ;; For indentation calculation.
89 (define-category ?
90 "This character counts as a space for indentation purposes.")
91
92 ;; Keep the following for `kinsoku' processing. See comments in
93 ;; kinsoku.el.
94 (define-category ?> "A character which can't be placed at beginning of line.")
95 (define-category ?< "A character which can't be placed at end of line.")
96
97 ;; Combining
98 (define-category ?^ "Combining diacritic or mark")
99 \f
100 ;;; Setting syntax and category.
101
102 ;; ASCII
103
104 ;; All ASCII characters have the category `a' (ASCII) and `l' (Latin).
105 (modify-category-entry '(32 . 127) ?a)
106 (modify-category-entry '(32 . 127) ?l)
107
108 ;; Deal with the CJK charsets first. Since the syntax of blocks is
109 ;; defined per charset, and the charsets may contain e.g. Latin
110 ;; characters, we end up with the wrong syntax definitions if we're
111 ;; not careful.
112
113 ;; Chinese characters (Unicode)
114 (modify-category-entry '(#x3400 . #x9FAF) ?C)
115 (modify-category-entry '(#x3400 . #x9FAF) ?c)
116 (modify-category-entry '(#x3400 . #x9FAF) ?|)
117 (modify-category-entry '(#xF900 . #xFAFF) ?C)
118 (modify-category-entry '(#xF900 . #xFAFF) ?c)
119 (modify-category-entry '(#xF900 . #xFAFF) ?|)
120
121 ;; Chinese character set (GB2312)
122
123 (map-charset-chars #'modify-syntax-entry 'chinese-gb2312 "_" #x2121 #x217E)
124 (map-charset-chars #'modify-syntax-entry 'chinese-gb2312 "_" #x2221 #x227E)
125 (map-charset-chars #'modify-syntax-entry 'chinese-gb2312 "_" #x2921 #x297E)
126
127 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?c)
128 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?|)
129 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?A #x2330 #x2339)
130 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?A #x2341 #x235A)
131 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?A #x2361 #x237A)
132 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?H #x2421 #x247E)
133 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?K #x2521 #x257E)
134 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?G #x2621 #x267E)
135 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?Y #x2721 #x277E)
136 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?C #x3021 #x7E7E)
137
138 ;; Chinese character set (BIG5)
139
140 (map-charset-chars #'modify-category-entry 'big5 ?c)
141 (map-charset-chars #'modify-category-entry 'big5 ?C #xA259 #xA25F)
142 (map-charset-chars #'modify-category-entry 'big5 ?C #xA440 #xC67E)
143 (map-charset-chars #'modify-category-entry 'big5 ?C #xC940 #xF9DF)
144 (map-charset-chars #'modify-category-entry 'big5 ?|)
145
146
147 ;; Chinese character set (CNS11643)
148
149 (dolist (c '(chinese-cns11643-1 chinese-cns11643-2 chinese-cns11643-3
150 chinese-cns11643-4 chinese-cns11643-5 chinese-cns11643-6
151 chinese-cns11643-7))
152 (map-charset-chars #'modify-category-entry c ?c)
153 (if (eq c 'chinese-cns11643-1)
154 (map-charset-chars #'modify-category-entry c ?C #x4421 #x7E7E)
155 (map-charset-chars #'modify-category-entry c ?C))
156 (map-charset-chars #'modify-category-entry c ?|))
157
158 ;; Japanese character set (JISX0201-kana, JISX0201-roman, JISX0208, JISX0212)
159
160 (map-charset-chars #'modify-category-entry 'katakana-jisx0201 ?k)
161
162 (map-charset-chars #'modify-category-entry 'latin-jisx0201 ?r)
163
164 (dolist (l '(katakana-jisx0201 japanese-jisx0208 japanese-jisx0212))
165 (map-charset-chars #'modify-category-entry l ?j)
166 (map-charset-chars #'modify-category-entry l ?\|))
167
168 ;; Unicode equivalents of JISX0201-kana
169 (let ((range '(#xff61 . #xff9f)))
170 (modify-category-entry range ?k)
171 (modify-category-entry range ?j)
172 (modify-category-entry range ?\|))
173
174 ;; Katakana block
175 (let ((range '(#x30a0 . #x30ff)))
176 ;; ?K is double width, ?k isn't specified
177 (modify-category-entry range ?K)
178 (modify-category-entry range ?\|))
179
180 ;; Hiragana block
181 (let ((range '(#x3040 . #x309f)))
182 ;; ?H is actually defined to be double width
183 ;;(modify-category-entry range ?H)
184 ;;(modify-category-entry range ?\|)
185 )
186
187 ;; JISX0208
188 (map-charset-chars #'modify-syntax-entry 'japanese-jisx0208 "_" #x2121 #x227E)
189 (map-charset-chars #'modify-syntax-entry 'japanese-jisx0208 "_" #x2821 #x287E)
190 (let ((chars '(?ー ?゛ ?゜ ?ヽ ?ヾ ?ゝ ?ゞ ?〃 ?仝 ?々 ?〆 ?〇)))
191 (dolist (elt chars)
192 (modify-syntax-entry (car chars) "w")))
193
194 (map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?A #x2321 #x237E)
195 (map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?H #x2421 #x247E)
196 (map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?K #x2521 #x257E)
197 (map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?G #x2621 #x267E)
198 (map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?Y #x2721 #x277E)
199 (map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?C #x3021 #x7E7E)
200 (modify-category-entry ?ー ?K)
201 (let ((chars '(?゛ ?゜)))
202 (while chars
203 (modify-category-entry (car chars) ?K)
204 (modify-category-entry (car chars) ?H)
205 (setq chars (cdr chars))))
206 (let ((chars '(?ヽ ?ヾ ?ゝ ?ゞ ?〃 ?仝 ?々 ?〆 ?〇)))
207 (while chars
208 (modify-category-entry (car chars) ?C)
209 (setq chars (cdr chars))))
210
211 ;; JISX0212
212
213 (map-charset-chars #'modify-syntax-entry 'japanese-jisx0212 "_" #x2121 #x237E)
214
215 ;; JISX0201-Kana
216
217 (let ((chars '(?。 ?、 ?・)))
218 (while chars
219 (modify-syntax-entry (car chars) ".")
220 (setq chars (cdr chars))))
221
222 (modify-syntax-entry ?\「 "(」")
223 (modify-syntax-entry ?\」 "(「")
224
225 ;; Korean character set (KSC5601)
226
227 (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?h)
228
229 (map-charset-chars #'modify-syntax-entry 'korean-ksc5601 "_" #x2121 #x227E)
230 (map-charset-chars #'modify-syntax-entry 'korean-ksc5601 "_" #x2621 #x277E)
231 (map-charset-chars #'modify-syntax-entry 'korean-ksc5601 "_" #x2830 #x287E)
232 (map-charset-chars #'modify-syntax-entry 'korean-ksc5601 "_" #x2930 #x297E)
233 (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?A #x2330 #x2339)
234 (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?A #x2341 #x235A)
235 (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?A #x2361 #x237A)
236 (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?G #x2521 #x257E)
237 (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?H #x2A21 #x2A7E)
238 (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?K #x2B21 #x2B7E)
239 (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?Y #x2C21 #x2C7E)
240
241 ;; These are in more than one charset.
242 (modify-syntax-entry ?\( "()")
243 (modify-syntax-entry ?\[ "(]")
244 (modify-syntax-entry ?\{ "(}")
245 (modify-syntax-entry ?\「 "(」")
246 (modify-syntax-entry ?\『 "(』")
247 (modify-syntax-entry ?\) ")(")
248 (modify-syntax-entry ?\] ")[")
249 (modify-syntax-entry ?\} "){")
250 (modify-syntax-entry ?\」 ")「")
251 (modify-syntax-entry ?\』 ")『")
252
253 (modify-syntax-entry ?\〔 "(〕")
254 (modify-syntax-entry ?\〈 "(〉")
255 (modify-syntax-entry ?\《 "(》")
256 (modify-syntax-entry ?\〖 "(〗")
257 (modify-syntax-entry ?\【 "(】")
258 (modify-syntax-entry ?\〕 ")〔")
259 (modify-syntax-entry ?\〉 ")〈")
260 (modify-syntax-entry ?\》 ")《")
261 (modify-syntax-entry ?\〗 ")〖")
262 (modify-syntax-entry ?\】 ")【")
263 (modify-syntax-entry ?\〚 "(〛")
264 (modify-syntax-entry ?\〛 ")〚")
265
266 ;; Arabic character set
267
268 (let ((charsets '(arabic-iso8859-6
269 arabic-digit
270 arabic-1-column
271 arabic-2-column)))
272 (while charsets
273 (map-charset-chars #'modify-category-entry (car charsets) ?b)
274 (setq charsets (cdr charsets))))
275 (modify-category-entry '(#x600 . #x6ff) ?b)
276 (modify-category-entry '(#xfb50 . #xfdff) ?b)
277 (modify-category-entry '(#xfe70 . #xfefe) ?b)
278
279 ;; Cyrillic character set (ISO-8859-5)
280
281 (modify-syntax-entry ?№ ".")
282
283 ;; Ethiopic character set
284
285 (modify-category-entry '(#x1200 . #x137b) ?e)
286 (let ((chars '(?፡ ?። ?፣ ?፤ ?፥ ?፦ ?፧ ?፨ ? ? ? ? ? ?)))
287 (while chars
288 (modify-syntax-entry (car chars) ".")
289 (setq chars (cdr chars))))
290 (map-charset-chars #'modify-category-entry 'ethiopic ?e)
291
292 ;; Hebrew character set (ISO-8859-8)
293
294 (modify-syntax-entry #x5be ".") ; MAQAF
295 (modify-syntax-entry #x5c0 ".") ; PASEQ
296 (modify-syntax-entry #x5c3 ".") ; SOF PASUQ
297 (modify-syntax-entry #x5f3 ".") ; GERESH
298 (modify-syntax-entry #x5f4 ".") ; GERSHAYIM
299
300 ;; Indian character set (IS 13194 and other Emacs original Indian charsets)
301
302 (modify-category-entry '(#x901 . #x970) ?i)
303 (map-charset-chars #'modify-category-entry 'indian-is13194 ?i)
304 (map-charset-chars #'modify-category-entry 'indian-2-column ?i)
305
306 ;; Lao character set
307
308 (modify-category-entry '(#xe80 . #xeff) ?o)
309 (map-charset-chars #'modify-category-entry 'lao ?o)
310
311 (let ((deflist '(("ກ-ຮ" "w" ?0) ; consonant
312 ("ະາຳຽເ-ໄ" "w" ?1) ; vowel base
313 ("ັິ-ືົໍ" "w" ?2) ; vowel upper
314 ("ຸູ" "w" ?3) ; vowel lower
315 ("່-໋" "w" ?4) ; tone mark
316 ("ຼຽ" "w" ?9) ; semivowel lower
317 ("໐-໙" "w" ?6) ; digit
318 ("ຯໆ" "_" ?5) ; symbol
319 ))
320 elm chars len syntax category to ch i)
321 (while deflist
322 (setq elm (car deflist))
323 (setq chars (car elm)
324 len (length chars)
325 syntax (nth 1 elm)
326 category (nth 2 elm)
327 i 0)
328 (while (< i len)
329 (if (= (aref chars i) ?-)
330 (setq i (1+ i)
331 to (aref chars i))
332 (setq ch (aref chars i)
333 to ch))
334 (while (<= ch to)
335 (unless (string-equal syntax "w")
336 (modify-syntax-entry ch syntax))
337 (modify-category-entry ch category)
338 (setq ch (1+ ch)))
339 (setq i (1+ i)))
340 (setq deflist (cdr deflist))))
341
342 ;; Thai character set (TIS620)
343
344 (modify-category-entry '(#xe00 . #xe7f) ?t)
345 (map-charset-chars #'modify-category-entry 'thai-tis620 ?t)
346
347 (let ((deflist '(;; chars syntax category
348 ("ก-รลว-ฮ" "w" ?0) ; consonant
349 ("ฤฦะาำเ-ๅ" "w" ?1) ; vowel base
350 ("ัิ-ื็๎" "w" ?2) ; vowel upper
351 ("ุ-ฺ" "w" ?3) ; vowel lower
352 ("่-ํ" "w" ?4) ; tone mark
353 ("๐-๙" "w" ?6) ; digit
354 ("ฯๆ฿๏๚๛" "_" ?5) ; symbol
355 ))
356 elm chars len syntax category to ch i)
357 (while deflist
358 (setq elm (car deflist))
359 (setq chars (car elm)
360 len (length chars)
361 syntax (nth 1 elm)
362 category (nth 2 elm)
363 i 0)
364 (while (< i len)
365 (if (= (aref chars i) ?-)
366 (setq i (1+ i)
367 to (aref chars i))
368 (setq ch (aref chars i)
369 to ch))
370 (while (<= ch to)
371 (unless (string-equal syntax "w")
372 (modify-syntax-entry ch syntax))
373 (modify-category-entry ch category)
374 (setq ch (1+ ch)))
375 (setq i (1+ i)))
376 (setq deflist (cdr deflist))))
377
378 ;; Tibetan character set
379
380 (modify-category-entry '(#xf00 . #xfff) ?q)
381 (map-charset-chars #'modify-category-entry 'tibetan ?q)
382 (map-charset-chars #'modify-category-entry 'tibetan-1-column ?q)
383
384 (let ((deflist '(;; chars syntax category
385 ("ཀ-ཀྵཪ" "w" ?0) ; consonant
386 ("ྐ-ྐྵྺྻྼ" "w" ?0) ;
387 ("-" "w" ?0) ;
388 ("-" "w" ?0) ;
389 ("ིེཻོཽྀ" "w" ?2) ; upper vowel
390 ("ཾྂྃ྆྇ྈྉྊྋ" "w" ?2) ; upper modifier
391 ("྄ཱུ༙༵༷" "w" ?3) ; lowel vowel/modifier
392 ("༠-༩༪-༳" "w" ?6) ; digit
393 ("་།-༒༔ཿ" "." ?|) ; line-break char
394 ("་།༏༐༑༔ཿ" "." ?|) ;
395 ("༈་།-༒༔ཿ༽༴" "." ?>) ; prohibition
396 ("་།༏༐༑༔ཿ" "." ?>) ;
397 ("ༀ-༊༼࿁࿂྅" "." ?<) ; prohibition
398 ("༓༕-༘༚-༟༶༸-༻༾༿྾྿-࿏" "." ?q) ; others
399 ))
400 elm chars len syntax category to ch i)
401 (while deflist
402 (setq elm (car deflist))
403 (setq chars (car elm)
404 len (length chars)
405 syntax (nth 1 elm)
406 category (nth 2 elm)
407 i 0)
408 (while (< i len)
409 (if (= (aref chars i) ?-)
410 (setq i (1+ i)
411 to (aref chars i))
412 (setq ch (aref chars i)
413 to ch))
414 (while (<= ch to)
415 (unless (string-equal syntax "w")
416 (modify-syntax-entry ch syntax))
417 (modify-category-entry ch category)
418 (setq ch (1+ ch)))
419 (setq i (1+ i)))
420 (setq deflist (cdr deflist))))
421
422 ;; Vietnamese character set
423
424 ;; To make a word with Latin characters
425 (map-charset-chars #'modify-category-entry 'vietnamese-viscii-lower ?l)
426 (map-charset-chars #'modify-category-entry 'vietnamese-viscii-lower ?v)
427
428 (map-charset-chars #'modify-category-entry 'vietnamese-viscii-upper ?l)
429 (map-charset-chars #'modify-category-entry 'vietnamese-viscii-upper ?v)
430
431 (let ((tbl (standard-case-table))
432 (i 32))
433 (while (< i 128)
434 (let* ((char (decode-char 'vietnamese-viscii-upper i))
435 (charl (decode-char 'vietnamese-viscii-lower i))
436 (uc (encode-char char 'ucs))
437 (lc (encode-char charl 'ucs)))
438 (set-case-syntax-pair char (decode-char 'vietnamese-viscii-lower i)
439 tbl)
440 (if uc (modify-category-entry uc ?v))
441 (if lc (modify-category-entry lc ?v)))
442 (setq i (1+ i))))
443
444
445 ;; Latin
446
447 (modify-category-entry '(#x80 . #x024F) ?l)
448
449 (let ((tbl (standard-case-table)) c)
450
451 ;; In some languages, U+0049 LATIN CAPITAL LETTER I and U+0131 LATIN
452 ;; SMALL LETTER DOTLESS I make a case pair, and so do U+0130 LATIN
453 ;; CAPITAL LETTER I WITH DOT ABOVE and U+0069 LATIN SMALL LETTER I.
454 ;; See the Turkish language environment.
455
456 ;; Latin-1
457
458 ;; Fixme: Some of the non-word syntaxes here perhaps should be
459 ;; reviewed. (Note that the following all implicitly have word
460 ;; syntax: ¢£¤¥¨ª¯²³´¶¸¹º.) There should be a well-defined way of
461 ;; relating Unicode categories to Emacs syntax codes.
462 (set-case-syntax ?  " " tbl) ; dubious
463 (set-case-syntax ?¡ "." tbl)
464 (set-case-syntax ?¦ "_" tbl)
465 (set-case-syntax ?§ "." tbl)
466 (set-case-syntax ?© "_" tbl)
467 (set-case-syntax-delims 171 187 tbl) ; « »
468 (set-case-syntax ?¬ "_" tbl)
469 (set-case-syntax ?­ "_" tbl)
470 (set-case-syntax ?® "_" tbl)
471 (set-case-syntax ?° "_" tbl)
472 (set-case-syntax ?± "_" tbl)
473 (set-case-syntax ?µ "_" tbl)
474 (set-case-syntax ?· "_" tbl)
475 (set-case-syntax ?¼ "_" tbl)
476 (set-case-syntax ?½ "_" tbl)
477 (set-case-syntax ?¾ "_" tbl)
478 (set-case-syntax ?¿ "." tbl)
479 (let ((c 192))
480 (while (<= c 222)
481 (set-case-syntax-pair c (+ c 32) tbl)
482 (setq c (1+ c))))
483 (set-case-syntax ?× "_" tbl)
484 (set-case-syntax ?ß "w" tbl)
485 (set-case-syntax ?÷ "_" tbl)
486 ;; See below for ÿ.
487
488 ;; Latin Extended-A, Latin Extended-B
489 (setq c #x0100)
490 (while (<= c #x0233)
491 (and (or (<= c #x012e)
492 (and (>= c #x014a) (<= c #x0177)))
493 (zerop (% c 2))
494 (set-case-syntax-pair c (1+ c) tbl))
495 (and (>= c #x013a)
496 (<= c #x0148)
497 (zerop (% c 2))
498 (set-case-syntax-pair (1- c) c tbl))
499 (setq c (1+ c)))
500 (set-case-syntax-pair ?IJ ?ij tbl)
501 (set-case-syntax-pair ?Ĵ ?ĵ tbl)
502 (set-case-syntax-pair ?Ķ ?ķ tbl)
503 (set-case-syntax-pair ?Ÿ ?ÿ tbl)
504 (set-case-syntax-pair ?Ź ?ź tbl)
505 (set-case-syntax-pair ?Ż ?ż tbl)
506 (set-case-syntax-pair ?Ž ?ž tbl)
507
508 ;; Latin Extended-B
509 (set-case-syntax-pair ?Ɓ ?ɓ tbl)
510 (set-case-syntax-pair ?Ƃ ?ƃ tbl)
511 (set-case-syntax-pair ?Ƅ ?ƅ tbl)
512 (set-case-syntax-pair ?Ɔ ?ɔ tbl)
513 (set-case-syntax-pair ?Ƈ ?ƈ tbl)
514 (set-case-syntax-pair ?Ɖ ?ɖ tbl)
515 (set-case-syntax-pair ?Ɗ ?ɗ tbl)
516 (set-case-syntax-pair ?Ƌ ?ƌ tbl)
517 (set-case-syntax-pair ?Ǝ ?ǝ tbl)
518 (set-case-syntax-pair ?Ə ?ə tbl)
519 (set-case-syntax-pair ?Ɛ ?ɛ tbl)
520 (set-case-syntax-pair ?Ƒ ?ƒ tbl)
521 (set-case-syntax-pair ?Ɠ ?ɠ tbl)
522 (set-case-syntax-pair ?Ɣ ?ɣ tbl)
523 (set-case-syntax-pair ?Ɩ ?ɩ tbl)
524 (set-case-syntax-pair ?Ɨ ?ɨ tbl)
525 (set-case-syntax-pair ?Ƙ ?ƙ tbl)
526 (set-case-syntax-pair ?Ɯ ?ɯ tbl)
527 (set-case-syntax-pair ?Ɲ ?ɲ tbl)
528 (set-case-syntax-pair ?Ɵ ?ɵ tbl)
529 (set-case-syntax-pair ?Ơ ?ơ tbl)
530 (set-case-syntax-pair ?Ƣ ?ƣ tbl)
531 (set-case-syntax-pair ?Ƥ ?ƥ tbl)
532 (set-case-syntax-pair ?Ʀ ?ʀ tbl)
533 (set-case-syntax-pair ?Ƨ ?ƨ tbl)
534 (set-case-syntax-pair ?Ʃ ?ʃ tbl)
535 (set-case-syntax-pair ?Ƭ ?ƭ tbl)
536 (set-case-syntax-pair ?Ʈ ?ʈ tbl)
537 (set-case-syntax-pair ?Ư ?ư tbl)
538 (set-case-syntax-pair ?Ʊ ?ʊ tbl)
539 (set-case-syntax-pair ?Ʋ ?ʋ tbl)
540 (set-case-syntax-pair ?Ƴ ?ƴ tbl)
541 (set-case-syntax-pair ?Ƶ ?ƶ tbl)
542 (set-case-syntax-pair ?Ʒ ?ʒ tbl)
543 (set-case-syntax-pair ?Ƹ ?ƹ tbl)
544 (set-case-syntax-pair ?Ƽ ?ƽ tbl)
545 (set-case-syntax-pair ?DŽ ?dž tbl)
546 (set-case-syntax-pair ?Dž ?dž tbl)
547 (set-case-syntax-pair ?LJ ?lj tbl)
548 (set-case-syntax-pair ?Lj ?lj tbl)
549 (set-case-syntax-pair ?NJ ?nj tbl)
550 (set-case-syntax-pair ?Nj ?nj tbl)
551 (set-case-syntax-pair ?Ǎ ?ǎ tbl)
552 (set-case-syntax-pair ?Ǐ ?ǐ tbl)
553 (set-case-syntax-pair ?Ǒ ?ǒ tbl)
554 (set-case-syntax-pair ?Ǔ ?ǔ tbl)
555 (set-case-syntax-pair ?Ǖ ?ǖ tbl)
556 (set-case-syntax-pair ?Ǘ ?ǘ tbl)
557 (set-case-syntax-pair ?Ǚ ?ǚ tbl)
558 (set-case-syntax-pair ?Ǜ ?ǜ tbl)
559 (set-case-syntax-pair ?Ǟ ?ǟ tbl)
560 (set-case-syntax-pair ?Ǡ ?ǡ tbl)
561 (set-case-syntax-pair ?Ǣ ?ǣ tbl)
562 (set-case-syntax-pair ?Ǥ ?ǥ tbl)
563 (set-case-syntax-pair ?Ǧ ?ǧ tbl)
564 (set-case-syntax-pair ?Ǩ ?ǩ tbl)
565 (set-case-syntax-pair ?Ǫ ?ǫ tbl)
566 (set-case-syntax-pair ?Ǭ ?ǭ tbl)
567 (set-case-syntax-pair ?Ǯ ?ǯ tbl)
568 ;; 01F0; F; 006A 030C; # LATIN SMALL LETTER J WITH CARON
569 (set-case-syntax-pair ?DZ ?dz tbl)
570 (set-case-syntax-pair ?Dz ?dz tbl)
571 (set-case-syntax-pair ?Ǵ ?ǵ tbl)
572 (set-case-syntax-pair ?Ƕ ?ƕ tbl)
573 (set-case-syntax-pair ?Ƿ ?ƿ tbl)
574 (set-case-syntax-pair ?Ǹ ?ǹ tbl)
575 (set-case-syntax-pair ?Ǻ ?ǻ tbl)
576 (set-case-syntax-pair ?Ǽ ?ǽ tbl)
577 (set-case-syntax-pair ?Ǿ ?ǿ tbl)
578 (set-case-syntax-pair ?Ȁ ?ȁ tbl)
579 (set-case-syntax-pair ?Ȃ ?ȃ tbl)
580 (set-case-syntax-pair ?Ȅ ?ȅ tbl)
581 (set-case-syntax-pair ?Ȇ ?ȇ tbl)
582 (set-case-syntax-pair ?Ȉ ?ȉ tbl)
583 (set-case-syntax-pair ?Ȋ ?ȋ tbl)
584 (set-case-syntax-pair ?Ȍ ?ȍ tbl)
585 (set-case-syntax-pair ?Ȏ ?ȏ tbl)
586 (set-case-syntax-pair ?Ȑ ?ȑ tbl)
587 (set-case-syntax-pair ?Ȓ ?ȓ tbl)
588 (set-case-syntax-pair ?Ȕ ?ȕ tbl)
589 (set-case-syntax-pair ?Ȗ ?ȗ tbl)
590 (set-case-syntax-pair ?Ș ?ș tbl)
591 (set-case-syntax-pair ?Ț ?ț tbl)
592 (set-case-syntax-pair ?Ȝ ?ȝ tbl)
593 (set-case-syntax-pair ?Ȟ ?ȟ tbl)
594 (set-case-syntax-pair ?Ȣ ?ȣ tbl)
595 (set-case-syntax-pair ?Ȥ ?ȥ tbl)
596 (set-case-syntax-pair ?Ȧ ?ȧ tbl)
597 (set-case-syntax-pair ?Ȩ ?ȩ tbl)
598 (set-case-syntax-pair ?Ȫ ?ȫ tbl)
599 (set-case-syntax-pair ?Ȭ ?ȭ tbl)
600 (set-case-syntax-pair ?Ȯ ?ȯ tbl)
601 (set-case-syntax-pair ?Ȱ ?ȱ tbl)
602 (set-case-syntax-pair ?Ȳ ?ȳ tbl)
603
604 ;; Latin Extended Additional
605 (modify-category-entry '(#x1e00 . #x1ef9) ?l)
606 (setq c #x1e00)
607 (while (<= c #x1ef9)
608 (and (zerop (% c 2))
609 (or (<= c #x1e94) (>= c #x1ea0))
610 (set-case-syntax-pair c (1+ c) tbl))
611 (setq c (1+ c)))
612
613 ;; Greek
614 (modify-category-entry '(#x0370 . #x03ff) ?g)
615 (setq c #x0370)
616 (while (<= c #x03ff)
617 (if (or (and (>= c #x0391) (<= c #x03a1))
618 (and (>= c #x03a3) (<= c #x03ab)))
619 (set-case-syntax-pair c (+ c 32) tbl))
620 (and (>= c #x03da)
621 (<= c #x03ee)
622 (zerop (% c 2))
623 (set-case-syntax-pair c (1+ c) tbl))
624 (setq c (1+ c)))
625 (set-case-syntax-pair ?Ά ?ά tbl)
626 (set-case-syntax-pair ?Έ ?έ tbl)
627 (set-case-syntax-pair ?Ή ?ή tbl)
628 (set-case-syntax-pair ?Ί ?ί tbl)
629 (set-case-syntax-pair ?Ό ?ό tbl)
630 (set-case-syntax-pair ?Ύ ?ύ tbl)
631 (set-case-syntax-pair ?Ώ ?ώ tbl)
632
633 ;; Armenian
634 (setq c #x531)
635 (while (<= c #x556)
636 (set-case-syntax-pair c (+ c #x30) tbl)
637 (setq c (1+ c)))
638
639 ;; Greek Extended
640 (modify-category-entry '(#x1f00 . #x1fff) ?g)
641 (setq c #x1f00)
642 (while (<= c #x1fff)
643 (and (<= (logand c #x000f) 7)
644 (<= c #x1fa7)
645 (not (memq c '(#x1f50 #x1f52 #x1f54 #x1f56)))
646 (/= (logand c #x00f0) 7)
647 (set-case-syntax-pair (+ c 8) c tbl))
648 (setq c (1+ c)))
649 (set-case-syntax-pair ?Ᾰ ?ᾰ tbl)
650 (set-case-syntax-pair ?Ᾱ ?ᾱ tbl)
651 (set-case-syntax-pair ?Ὰ ?ὰ tbl)
652 (set-case-syntax-pair ?Ά ?ά tbl)
653 (set-case-syntax-pair ?ᾼ ?ᾳ tbl)
654 (set-case-syntax-pair ?Ὲ ?ὲ tbl)
655 (set-case-syntax-pair ?Έ ?έ tbl)
656 (set-case-syntax-pair ?Ὴ ?ὴ tbl)
657 (set-case-syntax-pair ?Ή ?ή tbl)
658 (set-case-syntax-pair ?ῌ ?ῃ tbl)
659 (set-case-syntax-pair ?Ῐ ?ῐ tbl)
660 (set-case-syntax-pair ?Ῑ ?ῑ tbl)
661 (set-case-syntax-pair ?Ὶ ?ὶ tbl)
662 (set-case-syntax-pair ?Ί ?ί tbl)
663 (set-case-syntax-pair ?Ῠ ?ῠ tbl)
664 (set-case-syntax-pair ?Ῡ ?ῡ tbl)
665 (set-case-syntax-pair ?Ὺ ?ὺ tbl)
666 (set-case-syntax-pair ?Ύ ?ύ tbl)
667 (set-case-syntax-pair ?Ῥ ?ῥ tbl)
668 (set-case-syntax-pair ?Ὸ ?ὸ tbl)
669 (set-case-syntax-pair ?Ό ?ό tbl)
670 (set-case-syntax-pair ?Ὼ ?ὼ tbl)
671 (set-case-syntax-pair ?Ώ ?ώ tbl)
672 (set-case-syntax-pair ?ῼ ?ῳ tbl)
673
674 ;; cyrillic
675 (modify-category-entry '(#x0400 . #x04FF) ?y)
676 (setq c #x0400)
677 (while (<= c #x04ff)
678 (and (>= c #x0400)
679 (<= c #x040f)
680 (set-case-syntax-pair c (+ c 80) tbl))
681 (and (>= c #x0410)
682 (<= c #x042f)
683 (set-case-syntax-pair c (+ c 32) tbl))
684 (and (zerop (% c 2))
685 (or (and (>= c #x0460) (<= c #x0480))
686 (and (>= c #x048c) (<= c #x04be))
687 (and (>= c #x04d0) (<= c #x04f4)))
688 (set-case-syntax-pair c (1+ c) tbl))
689 (setq c (1+ c)))
690 (set-case-syntax-pair ?Ӂ ?ӂ tbl)
691 (set-case-syntax-pair ?Ӄ ?ӄ tbl)
692 (set-case-syntax-pair ?Ӈ ?ӈ tbl)
693 (set-case-syntax-pair ?Ӌ ?ӌ tbl)
694 (set-case-syntax-pair ?Ӹ ?ӹ tbl)
695
696 ;; general punctuation
697 (setq c #x2000)
698 (while (<= c #x200b)
699 (set-case-syntax c " " tbl)
700 (setq c (1+ c)))
701 (while (<= c #x200F)
702 (set-case-syntax c "." tbl)
703 (setq c (1+ c)))
704 ;; Fixme: These aren't all right:
705 (while (<= c #x2027)
706 (set-case-syntax c "_" tbl)
707 (setq c (1+ c)))
708 (while (<= c #x206F)
709 (set-case-syntax c "." tbl)
710 (setq c (1+ c)))
711
712 ;; Roman numerals
713 (setq c #x2160)
714 (while (<= c #x216f)
715 (set-case-syntax-pair c (+ c #x10) tbl)
716 (setq c (1+ c)))
717
718 ;; Fixme: The following blocks might be better as symbol rather than
719 ;; punctuation.
720 ;; Arrows
721 (setq c #x2190)
722 (while (<= c #x21FF)
723 (set-case-syntax c "." tbl)
724 (setq c (1+ c)))
725 ;; Mathematical Operators
726 (while (<= c #x22FF)
727 (set-case-syntax c "." tbl)
728 (setq c (1+ c)))
729 ;; Miscellaneous Technical
730 (while (<= c #x23FF)
731 (set-case-syntax c "." tbl)
732 (setq c (1+ c)))
733 ;; Control Pictures
734 (while (<= c #x243F)
735 (set-case-syntax c "_" tbl)
736 (setq c (1+ c)))
737
738 ;; Circled Latin
739 (setq c #x24b6)
740 (while (<= c #x24cf)
741 (set-case-syntax-pair c (+ c 26) tbl)
742 (modify-category-entry c ?l)
743 (modify-category-entry (+ c 26) ?l)
744 (setq c (1+ c)))
745
746 ;; Fullwidth Latin
747 (setq c #xff21)
748 (while (<= c #xff3a)
749 (set-case-syntax-pair c (+ c #x20) tbl)
750 (modify-category-entry c ?l)
751 (modify-category-entry (+ c #x20) ?l)
752 (setq c (1+ c)))
753
754 ;; Combining diacritics
755 (modify-category-entry '(#x300 . #x362) ?^)
756 ;; Combining marks
757 (modify-category-entry '(#x20d0 . #x20e3) ?^)
758
759 ;; Fixme: syntax for symbols &c
760 )
761 \f
762 ;; For each character set, put the information of the most proper
763 ;; coding system to encode it by `preferred-coding-system' property.
764
765 ;; Fixme: should this be junked?
766 (let ((l '((latin-iso8859-1 . iso-latin-1)
767 (latin-iso8859-2 . iso-latin-2)
768 (latin-iso8859-3 . iso-latin-3)
769 (latin-iso8859-4 . iso-latin-4)
770 (thai-tis620 . thai-tis620)
771 (greek-iso8859-7 . greek-iso-8bit)
772 (arabic-iso8859-6 . iso-2022-7bit)
773 (hebrew-iso8859-8 . hebrew-iso-8bit)
774 (katakana-jisx0201 . japanese-shift-jis)
775 (latin-jisx0201 . japanese-shift-jis)
776 (cyrillic-iso8859-5 . cyrillic-iso-8bit)
777 (latin-iso8859-9 . iso-latin-5)
778 (japanese-jisx0208-1978 . iso-2022-jp)
779 (chinese-gb2312 . cn-gb-2312)
780 (japanese-jisx0208 . iso-2022-jp)
781 (korean-ksc5601 . iso-2022-kr)
782 (japanese-jisx0212 . iso-2022-jp)
783 (chinese-cns11643-1 . iso-2022-cn)
784 (chinese-cns11643-2 . iso-2022-cn)
785 (chinese-big5-1 . chinese-big5)
786 (chinese-big5-2 . chinese-big5)
787 (chinese-sisheng . iso-2022-7bit)
788 (ipa . iso-2022-7bit)
789 (vietnamese-viscii-lower . vietnamese-viscii)
790 (vietnamese-viscii-upper . vietnamese-viscii)
791 (arabic-digit . iso-2022-7bit)
792 (arabic-1-column . iso-2022-7bit)
793 (lao . lao)
794 (arabic-2-column . iso-2022-7bit)
795 (indian-is13194 . devanagari)
796 (indian-glyph . devanagari)
797 (tibetan-1-column . tibetan)
798 (ethiopic . iso-2022-7bit)
799 (chinese-cns11643-3 . iso-2022-cn)
800 (chinese-cns11643-4 . iso-2022-cn)
801 (chinese-cns11643-5 . iso-2022-cn)
802 (chinese-cns11643-6 . iso-2022-cn)
803 (chinese-cns11643-7 . iso-2022-cn)
804 (indian-2-column . devanagari)
805 (tibetan . tibetan)
806 (latin-iso8859-14 . iso-latin-8)
807 (latin-iso8859-15 . iso-latin-9))))
808 (while l
809 (put-charset-property (car (car l)) 'preferred-coding-system (cdr (car l)))
810 (setq l (cdr l))))
811
812 \f
813 ;; Setup auto-fill-chars for charsets that should invoke auto-filling.
814 ;; SPACE and NEWLINE are already set. Also put `nospace-between-words'
815 ;; property on the charsets.
816 (let ((l '(katakana-jisx0201
817 japanese-jisx0208 japanese-jisx0212
818 chinese-gb2312 chinese-big5-1 chinese-big5-2)))
819 (while l
820 ;;(aset auto-fill-chars (make-char (car l)) t)
821 (put-charset-property (car l) 'nospace-between-words t)
822 (setq l (cdr l))))
823
824 \f
825 ;; CJK double width characters.
826 (let ((l '((#x1100 . #x11FF)
827 (#x2E80 . #x9FAF)
828 (#xAC00 . #xD7AF)
829 (#xF900 . #xFAFF)
830 (#xFE30 . #xFE4F)
831 (#xFF00 . #xFF5F)
832 (#xFFE0 . #xFFEF))))
833 (dolist (elt l)
834 (set-char-table-range char-width-table
835 (cons (car elt) (cdr elt))
836 2)))
837 ;; Fixme: Doing this affects non-CJK characters through unification,
838 ;; but presumably CJK users expect those characters to be
839 ;; double-width when using these charsets.
840 ;; (map-charset-chars
841 ;; #'(lambda (range ignore) (set-char-table-range char-width-table range 2))
842 ;; 'japanese-jisx0208)
843 ;; (map-charset-chars
844 ;; #'(lambda (range ignore) (set-char-table-range char-width-table range 2))
845 ;; 'japanese-jisx0212)
846 ;; (map-charset-chars
847 ;; #'(lambda (range ignore) (set-char-table-range char-width-table range 2))
848 ;; 'japanese-jisx0213-1)
849 ;; (map-charset-chars
850 ;; #'(lambda (range ignore) (set-char-table-range char-width-table range 2))
851 ;; 'japanese-jisx0213-2)
852 ;; (map-charset-chars
853 ;; (lambda (range ignore) (set-char-table-range char-width-table range 2))
854 ;; 'korean-ksc5601)
855
856 ;; Other double width
857 (map-charset-chars
858 (lambda (range ignore) (set-char-table-range char-width-table range 2))
859 'ethiopic)
860 (map-charset-chars
861 (lambda (range ignore) (set-char-table-range char-width-table range 2))
862 'tibetan)
863 (map-charset-chars
864 (lambda (range ignore) (set-char-table-range char-width-table range 2))
865 'indian-2-column)
866 (map-charset-chars
867 (lambda (range ignore) (set-char-table-range char-width-table range 2))
868 'arabic-2-column)
869
870 (optimize-char-table (standard-case-table))
871 (optimize-char-table char-width-table)
872 (optimize-char-table (standard-category-table))
873 (optimize-char-table (standard-syntax-table))
874
875 ;; The Unicode blocks actually extend past some of these ranges with
876 ;; undefined codepoints.
877 (let ((script-list nil))
878 (dolist
879 (elt
880 '((#x0000 #x007F latin)
881 (#x00A0 #x036F latin)
882 (#x0370 #x03E1 greek)
883 (#x03E2 #x03EF coptic)
884 (#x03F0 #x03F3 greek)
885 (#x0400 #x04FF cyrillic)
886 (#x0530 #x058F armenian)
887 (#x0590 #x05FF hebrew)
888 (#x0600 #x06FF arabic)
889 (#x0700 #x074F syriac)
890 (#x0780 #x07BF thaana)
891 (#x0900 #x097F devanagari)
892 (#x0980 #x09FF bengali)
893 (#x0A00 #x0A7F gurmukhi)
894 (#x0A80 #x0AFF gujarati)
895 (#x0B00 #x0B7F oriya)
896 (#x0B80 #x0BFF tamil)
897 (#x0C00 #x0C7F telugu)
898 (#x0C80 #x0CFF kannada)
899 (#x0D00 #x0D7F malayalam)
900 (#x0D80 #x0DFF sinhala)
901 (#x0E00 #x0E5F thai)
902 (#x0E80 #x0EDF lao)
903 (#x0F00 #x0FFF tibetan)
904 (#x1000 #x105F myanmar)
905 (#x10A0 #x10FF georgian)
906 (#x1100 #x11FF hangul)
907 (#x1200 #x137F ethiopic)
908 (#x13A0 #x13FF cherokee)
909 (#x1400 #x167F canadian-aboriginal)
910 (#x1680 #x169F ogham)
911 (#x16A0 #x16FF runic)
912 (#x1780 #x17FF khmer)
913 (#x1800 #x18AF mongolian)
914 (#x1E00 #x1EFF latin)
915 (#x1F00 #x1FFF greek)
916 (#x20A0 #x20AF currency)
917 (#x2800 #x28FF braille)
918 (#x2E80 #x2FDF han)
919 (#x2FF0 #x2FFF ideographic-description)
920 (#x3000 #x303F cjk-misc)
921 (#x3040 #x30FF kana)
922 (#x3100 #x312F bopomofo)
923 (#x3130 #x318F hangul)
924 (#x3190 #x319F kanbun)
925 (#x31A0 #x31BF bopomofo)
926 (#x3400 #x9FAF han)
927 (#xA000 #xA4CF yi)
928 (#xAC00 #xD7AF hangul)
929 (#xF900 #xFA5F han)
930 (#xFB1D #xFB4F hebrew)
931 (#xFB50 #xFDFF arabic)
932 (#xFE70 #xFEFC arabic)
933 (#xFF00 #xFF5F cjk-misc)
934 (#xFF61 #xFF9F kana)
935 (#xFFE0 #xFFE6 cjk-misc)
936 (#x20000 #x2AFFF han)
937 (#x2F800 #x2FFFF han)))
938 (set-char-table-range char-script-table
939 (cons (car elt) (nth 1 elt)) (nth 2 elt))
940 (or (memq (nth 2 elt) script-list)
941 (setq script-list (cons (nth 2 elt) script-list))))
942 (set-char-table-extra-slot char-script-table 0 (nreverse script-list)))
943
944 (map-charset-chars
945 #'(lambda (range ignore)
946 (set-char-table-range char-script-table range 'tibetan))
947 'tibetan)
948
949 \f
950 ;;; Setting word boundary.
951
952 (defun next-word-boundary-han (pos limit)
953 (if (<= pos limit)
954 (save-excursion
955 (goto-char pos)
956 (looking-at "\\cC+")
957 (goto-char (match-end 0))
958 (if (looking-at "\\cH+")
959 (goto-char (match-end 0)))
960 (point))
961 (while (and (> pos limit)
962 (eq (aref char-script-table (char-after (1- pos))) 'han))
963 (setq pos (1- pos)))
964 pos))
965
966 (defun next-word-boundary-kana (pos limit)
967 (if (<= pos limit)
968 (save-excursion
969 (goto-char pos)
970 (if (looking-at "\\cK+")
971 (goto-char (match-end 0)))
972 (if (looking-at "\\cH+")
973 (goto-char (match-end 0)))
974 (point))
975 (let ((category-set (char-category-set (char-after pos)))
976 category)
977 (if (aref category-set ?K)
978 (while (and (> pos limit)
979 (aref (char-category-set (char-after (1- pos))) ?K))
980 (setq pos (1- pos)))
981 (while (and (> pos limit)
982 (aref (setq category-set
983 (char-category-set (char-after (1- pos)))) ?H))
984 (setq pos (1- pos)))
985 (setq category (cond ((aref category-set ?C) ?C)
986 ((aref category-set ?K) ?K)
987 ((aref category-set ?A) ?A)))
988 (when category
989 (setq pos (1- pos))
990 (while (and (> pos limit)
991 (aref (char-category-set (char-after (1- pos)))
992 category))
993 (setq pos (1- pos)))))
994 pos)))
995
996 (map-char-table
997 #'(lambda (char script)
998 (cond ((eq script 'han)
999 (set-char-table-range next-word-boundary-function-table
1000 char #'next-word-boundary-han))
1001 ((eq script 'kana)
1002 (set-char-table-range next-word-boundary-function-table
1003 char #'next-word-boundary-kana))))
1004 char-script-table)
1005
1006 (setq word-combining-categories
1007 '((?l . ?l)))
1008
1009 (setq word-separating-categories ; (2-byte character sets)
1010 '((?A . ?K) ; Alpha numeric - Katakana
1011 (?A . ?C) ; Alpha numeric - Chinese
1012 (?H . ?A) ; Hiragana - Alpha numeric
1013 (?H . ?K) ; Hiragana - Katakana
1014 (?H . ?C) ; Hiragana - Chinese
1015 (?K . ?A) ; Katakana - Alpha numeric
1016 (?K . ?C) ; Katakana - Chinese
1017 (?C . ?A) ; Chinese - Alpha numeric
1018 (?C . ?K) ; Chinese - Katakana
1019 ))
1020
1021 ;;; Local Variables:
1022 ;;; coding: utf-8-emacs
1023 ;;; End:
1024
1025 ;;; characters.el ends here