]> code.delx.au - gnu-emacs/blob - lisp/international/characters.el
Delete syntax stuff for Latin-1 and Latin-2.
[gnu-emacs] / lisp / international / characters.el
1 ;;; characters.el --- set syntax and category for multibyte characters
2
3 ;; Copyright (C) 1995 Free Software Foundation, Inc.
4 ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
5
6 ;; Keywords: multibyte character, character set, syntax, category
7
8 ;; This file is part of GNU Emacs.
9
10 ;; GNU Emacs is free software; you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation; either version 2, or (at your option)
13 ;; any later version.
14
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
19
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs; see the file COPYING. If not, write to the
22 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
24
25 ;;; Commentary:
26
27 ;; This file contains multibyte characters. Save this file always in
28 ;; the coding system `iso-2022-7bit'.
29
30 ;; This file does not define the syntax for Latin-N character sets;
31 ;; those are defined by the files latin-N.el.
32
33 ;;; Predefined categories.
34
35 ;; For each character set.
36
37 (define-category ?a "ASCII")
38 (define-category ?l "Latin")
39 (define-category ?t "Thai")
40 (define-category ?g "Greek")
41 (define-category ?b "Arabic")
42 (define-category ?w "Hebrew")
43 (define-category ?y "Cyrillic")
44 (define-category ?k "Japanese katakana")
45 (define-category ?r "Japanese roman")
46 (define-category ?c "Chinese")
47 (define-category ?j "Japanese")
48 (define-category ?h "Korean")
49 (define-category ?e "Ethiopic (Ge'ez)")
50 (define-category ?v "Vietnamese")
51 (define-category ?i "Indian")
52 (define-category ?o "Lao")
53 (define-category ?q "Tibetan")
54
55 ;; For each group (row) of 2-byte character sets.
56
57 (define-category ?A "Alpha numeric characters of 2-byte character sets")
58 (define-category ?C "Chinese (Han) characters of 2-byte character sets")
59 (define-category ?G "Greek characters of 2-byte characters sets")
60 (define-category ?H "Japanese Hiragana characters of 2-byte character sets")
61 (define-category ?K "Japanese Katakana characters of 2-byte character sets")
62 (define-category ?N "Korean Hangul characters of 2-byte character sets")
63 (define-category ?Y "Cyrillic character of 2-byte character sets")
64 (define-category ?I "Indian Glyphs")
65
66 ;; For phonetic classifications.
67
68 (define-category ?0 "consonant")
69 (define-category ?1 "base vowel")
70 (define-category ?2 "upper diacritical mark (including upper vowel)")
71 (define-category ?3 "lower diacritical mark (including lower vowel)")
72 (define-category ?4 "tone mark")
73 (define-category ?5 "vowel")
74 (define-category ?6 "digit")
75 (define-category ?7 "vowel-modifying diacritical mark")
76 (define-category ?8 "vowel-signs")
77 (define-category ?9 "semivowel lower")
78
79 ;; For filling.
80 (define-category ?| "While filling, we can break a line at this character.")
81
82 ;; Keep the followings for `kinsoku' processing. See comments in
83 ;; kinsoku.el.
84 (define-category ?> "A character which can't be placed at beginning of line.")
85 (define-category ?< "A character which can't be placed at end of line.")
86
87 \f
88 ;;; Setting syntax and category.
89
90 ;; ASCII
91
92 (let ((ch 32))
93 (while (< ch 127) ; All ASCII characters have
94 (modify-category-entry ch ?a) ; the category `a' (ASCII)
95 (modify-category-entry ch ?l) ; and `l' (Latin).
96 (setq ch (1+ ch))))
97
98 ;; Arabic character set
99
100 (let ((charsets '(arabic-iso8859-6
101 arabic-digit
102 arabic-1-column
103 arabic-2-column)))
104 (while charsets
105 (modify-syntax-entry (make-char (car charsets)) "w")
106 (modify-category-entry (make-char (car charsets)) ?b)
107 (setq charsets (cdr charsets))))
108
109 ;; Chinese character set (GB2312)
110
111 (modify-syntax-entry (make-char 'chinese-gb2312) "w")
112 (modify-syntax-entry (make-char 'chinese-gb2312 33) "_")
113 (modify-syntax-entry (make-char 'chinese-gb2312 34) "_")
114 (modify-syntax-entry (make-char 'chinese-gb2312 41) "_")
115 (modify-syntax-entry ?\\e$A!2\e(B "(\e$A!3\e(B")
116 (modify-syntax-entry ?\\e$A!4\e(B "(\e$A!5\e(B")
117 (modify-syntax-entry ?\\e$A!6\e(B "(\e$A!7\e(B")
118 (modify-syntax-entry ?\\e$A!8\e(B "(\e$A!9\e(B")
119 (modify-syntax-entry ?\\e$A!:\e(B "(\e$A!;\e(B")
120 (modify-syntax-entry ?\\e$A!<\e(B "(\e$A!=\e(B")
121 (modify-syntax-entry ?\\e$A!>\e(B "(\e$A!?\e(B")
122 (modify-syntax-entry ?\\e$A!3\e(B ")\e$A!2\e(B")
123 (modify-syntax-entry ?\\e$A!5\e(B ")\e$A!4\e(B")
124 (modify-syntax-entry ?\\e$A!7\e(B ")\e$A!6\e(B")
125 (modify-syntax-entry ?\\e$A!9\e(B ")\e$A!8\e(B")
126 (modify-syntax-entry ?\\e$A!;\e(B ")\e$A!:\e(B")
127 (modify-syntax-entry ?\\e$A!=\e(B ")\e$A!<\e(B")
128 (modify-syntax-entry ?\\e$A!?\e(B ")\e$A!>\e(B")
129
130 (modify-category-entry (make-char 'chinese-gb2312) ?c)
131 (modify-category-entry (make-char 'chinese-gb2312) ?\|)
132 (modify-category-entry (make-char 'chinese-gb2312 35) ?A)
133 (modify-category-entry (make-char 'chinese-gb2312 36) ?H)
134 (modify-category-entry (make-char 'chinese-gb2312 37) ?K)
135 (modify-category-entry (make-char 'chinese-gb2312 38) ?G)
136 (modify-category-entry (make-char 'chinese-gb2312 39) ?Y)
137 (modify-category-entry (make-char 'chinese-gb2312 35) ?A)
138 (let ((row 48))
139 (while (< row 127)
140 (modify-category-entry (make-char 'chinese-gb2312 row) ?C)
141 (setq row (1+ row))))
142
143 ;; Chinese character set (BIG5)
144
145 (let ((generic-big5-1-char (make-char 'chinese-big5-1))
146 (generic-big5-2-char (make-char 'chinese-big5-2)))
147 (modify-syntax-entry generic-big5-1-char "w")
148 (modify-syntax-entry generic-big5-2-char "w")
149
150 (modify-category-entry generic-big5-1-char ?c)
151 (modify-category-entry generic-big5-2-char ?c)
152
153 (modify-category-entry generic-big5-1-char ?C)
154 (modify-category-entry generic-big5-2-char ?C)
155
156 (modify-category-entry generic-big5-1-char ?\|)
157 (modify-category-entry generic-big5-2-char ?\|))
158
159
160 ;; Chinese character set (CNS11643)
161
162 (let ((cns-list '(chinese-cns11643-1
163 chinese-cns11643-2
164 chinese-cns11643-3
165 chinese-cns11643-4
166 chinese-cns11643-5
167 chinese-cns11643-6
168 chinese-cns11643-7))
169 generic-char)
170 (while cns-list
171 (setq generic-char (make-char (car cns-list)))
172 (modify-syntax-entry generic-char "w")
173 (modify-category-entry generic-char ?c)
174 (modify-category-entry generic-char ?C)
175 (modify-category-entry generic-char ?|)
176 (setq cns-list (cdr cns-list))))
177
178 ;; Cyrillic character set (ISO-8859-5)
179
180 (modify-category-entry (make-char 'cyrillic-iso8859-5) ?y)
181
182 (let ((c 160))
183 (while (< c 256)
184 (modify-syntax-entry (make-char 'cyrillic-iso8859-5 c) "w")
185 (setq c (1+ c))))
186 (modify-syntax-entry ?\e,L-\e(B ".")
187 (modify-syntax-entry ?\e,Lp\e(B ".")
188 (modify-syntax-entry ?\e,L}\e(B ".")
189
190 ;; Devanagari character set
191
192 (let ((deflist '(;; chars syntax category
193 ("\e$(5!!!"!#\e(B" "w" ?7) ; vowel-modifying diacritical mark
194 ; chandrabindu, anuswar, visarga
195 ("\e$(5!$\e(B-\e$(5!2\e(B" "w" ?5) ; independent vowel
196 ("\e$(5!3\e(B-\e$(5!X\e(B" "w" ?0) ; consonant
197 ("\e$(5!Z\e(B-\e$(5!g\e(B" "w" ?8) ; matra
198 ("\e$(5!q\e(B-\e$(5!z\e(B" "w" ?6) ; digit
199 ))
200 elm chars len syntax category to ch i)
201 (while deflist
202 (setq elm (car deflist))
203 (setq chars (car elm)
204 len (length chars)
205 syntax (nth 1 elm)
206 category (nth 2 elm)
207 i 0)
208 (while (< i len)
209 (if (= (aref chars i) ?-)
210 (setq i (1+ i)
211 to (sref chars i))
212 (setq ch (sref chars i)
213 to ch))
214 (while (<= ch to)
215 (modify-syntax-entry ch syntax)
216 (modify-category-entry ch category)
217 (setq ch (1+ ch)))
218 (setq i (+ i (char-bytes to))))
219 (setq deflist (cdr deflist))))
220
221 ;; Ethiopic character set
222
223 (modify-category-entry (make-char 'ethiopic) ?e)
224
225 ;; European character set (Latin-1,2,3,4,5)
226
227 (modify-category-entry (make-char 'latin-iso8859-1) ?l)
228 (modify-category-entry (make-char 'latin-iso8859-2) ?l)
229 (modify-category-entry (make-char 'latin-iso8859-3) ?l)
230 (modify-category-entry (make-char 'latin-iso8859-4) ?l)
231 (modify-category-entry (make-char 'latin-iso8859-9) ?l)
232
233 ;; Greek character set (ISO-8859-7)
234
235 (modify-category-entry (make-char 'greek-iso8859-7) ?g)
236
237 (let ((c 182))
238 (while (< c 255)
239 (modify-syntax-entry (make-char 'greek-iso8859-7 c) "w")
240 (setq c (1+ c))))
241 (modify-syntax-entry (make-char 'greek-iso8859-7 160) "w") ; NBSP
242 (modify-syntax-entry ?\e,F7\e(B ".")
243 (modify-syntax-entry ?\e,F;\e(B ".")
244 (modify-syntax-entry ?\e,F=\e(B ".")
245
246 ;; Hebrew character set (ISO-8859-8)
247
248 (modify-category-entry (make-char 'hebrew-iso8859-8) ?w)
249
250 (let ((c 224))
251 (while (< c 251)
252 (modify-syntax-entry (make-char 'hebrew-iso8859-8 c) "w")
253 (setq c (1+ c))))
254 (modify-syntax-entry (make-char 'hebrew-iso8859-8 160) "w") ; NBSP
255
256 ;; Indian character set (IS 13194 and other Emacs original Indian charsets)
257
258 (modify-category-entry (make-char 'indian-is13194) ?i)
259 (modify-category-entry (make-char 'indian-2-column) ?I)
260 (modify-category-entry (make-char 'indian-1-column) ?I)
261
262 ;; Japanese character set (JISX0201-kana, JISX0201-roman, JISX0208, JISX0212)
263
264 (modify-category-entry (make-char 'katakana-jisx0201) ?k)
265 (modify-category-entry (make-char 'latin-jisx0201) ?r)
266 (modify-category-entry (make-char 'japanese-jisx0208) ?j)
267 (modify-category-entry (make-char 'japanese-jisx0212) ?j)
268 (modify-category-entry (make-char 'katakana-jisx0201) ?\|)
269 (modify-category-entry (make-char 'japanese-jisx0208) ?\|)
270 (modify-category-entry (make-char 'japanese-jisx0212) ?\|)
271
272 ;; JISX0208
273 (modify-syntax-entry (make-char 'japanese-jisx0208) "w")
274 (modify-syntax-entry (make-char 'japanese-jisx0208 33) "_")
275 (modify-syntax-entry (make-char 'japanese-jisx0208 34) "_")
276 (modify-syntax-entry (make-char 'japanese-jisx0208 40) "_")
277 (let ((chars '(?\e$B!<\e(B ?\e$B!+\e(B ?\e$B!,\e(B ?\e$B!3\e(B ?\e$B!4\e(B ?\e$B!5\e(B ?\e$B!6\e(B ?\e$B!7\e(B ?\e$B!8\e(B ?\e$B!9\e(B ?\e$B!:\e(B ?\e$B!;\e(B)))
278 (while chars
279 (modify-syntax-entry (car chars) "w")
280 (setq chars (cdr chars))))
281 (modify-syntax-entry ?\\e$B!J\e(B "(\e$B!K\e(B")
282 (modify-syntax-entry ?\\e$B!N\e(B "(\e$B!O\e(B")
283 (modify-syntax-entry ?\\e$B!P\e(B "(\e$B!Q\e(B")
284 (modify-syntax-entry ?\\e$B!V\e(B "(\e$B!W\e(B")
285 (modify-syntax-entry ?\\e$B!X\e(B "(\e$B!Y\e(B")
286 (modify-syntax-entry ?\\e$B!K\e(B ")\e$B!J\e(B")
287 (modify-syntax-entry ?\\e$B!O\e(B ")\e$B!N\e(B")
288 (modify-syntax-entry ?\\e$B!Q\e(B ")\e$B!P\e(B")
289 (modify-syntax-entry ?\\e$B!W\e(B ")\e$B!V\e(B")
290 (modify-syntax-entry ?\\e$B!Y\e(B ")\e$B!X\e(B")
291
292 (modify-category-entry (make-char 'japanese-jisx0208 35) ?A)
293 (modify-category-entry (make-char 'japanese-jisx0208 36) ?H)
294 (modify-category-entry (make-char 'japanese-jisx0208 37) ?K)
295 (modify-category-entry (make-char 'japanese-jisx0208 38) ?G)
296 (modify-category-entry (make-char 'japanese-jisx0208 39) ?Y)
297 (let ((row 48))
298 (while (< row 127)
299 (modify-category-entry (make-char 'japanese-jisx0208 row) ?C)
300 (setq row (1+ row))))
301 (modify-category-entry ?\e$B!<\e(B ?K)
302 (let ((chars '(?\e$B!+\e(B ?\e$B!,\e(B)))
303 (while chars
304 (modify-category-entry (car chars) ?K)
305 (modify-category-entry (car chars) ?H)
306 (setq chars (cdr chars))))
307 (let ((chars '(?\e$B!3\e(B ?\e$B!4\e(B ?\e$B!5\e(B ?\e$B!6\e(B ?\e$B!7\e(B ?\e$B!8\e(B ?\e$B!9\e(B ?\e$B!:\e(B ?\e$B!;\e(B)))
308 (while chars
309 (modify-category-entry (car chars) ?C)
310 (setq chars (cdr chars))))
311
312 ;; JISX0212
313 (modify-syntax-entry (make-char 'japanese-jisx0212) "w")
314 (modify-syntax-entry (make-char 'japanese-jisx0212 33) "_")
315 (modify-syntax-entry (make-char 'japanese-jisx0212 34) "_")
316 (modify-syntax-entry (make-char 'japanese-jisx0212 35) "_")
317
318 (modify-category-entry (make-char 'japanese-jisx0212 ) ?C)
319
320 ;; JISX0201-Kana
321 (modify-syntax-entry (make-char 'katakana-jisx0201) "w")
322 (let ((chars '(?\e(I!\e(B ?\e(I"\e(B ?\e(I#\e(B ?\e(I$\e(B ?\e(I%\e(B)))
323 (while chars
324 (modify-syntax-entry (car chars) ".")
325 (setq chars (cdr chars))))
326
327 ;; Korean character set (KSC5601)
328
329 (modify-syntax-entry (make-char 'korean-ksc5601) "w")
330 (modify-syntax-entry (make-char 'korean-ksc5601 33) "_")
331 (modify-syntax-entry (make-char 'korean-ksc5601 34) "_")
332 (modify-syntax-entry (make-char 'korean-ksc5601 38) "_")
333 (modify-syntax-entry (make-char 'korean-ksc5601 39) "_")
334 (modify-syntax-entry (make-char 'korean-ksc5601 40) "_")
335 (modify-syntax-entry (make-char 'korean-ksc5601 41) "_")
336
337 (modify-category-entry (make-char 'korean-ksc5601) ?h)
338 (modify-category-entry (make-char 'korean-ksc5601 35) ?A)
339 (modify-category-entry (make-char 'korean-ksc5601 37) ?G)
340 (modify-category-entry (make-char 'korean-ksc5601 42) ?H)
341 (modify-category-entry (make-char 'korean-ksc5601 43) ?K)
342 (modify-category-entry (make-char 'korean-ksc5601 44) ?Y)
343
344 ;; Lao character set
345
346 (modify-category-entry (make-char 'lao) ?o)
347
348 (let ((deflist '(;; chars syntax category
349 ("\e(1!\e(B-\e(1N\e(B" "w" ?0) ; consonant
350 ("\e(1PRS]`\e(B-\e(1d\e(B" "w" ?1) ; vowel base
351 ("\e(1QT\e(B-\e(1W[m\e(B" "w" ?2) ; vowel upper
352 ("\e(1XY\e(B" "w" ?3) ; vowel lower
353 ("\e(1h\e(B-\e(1l\e(B" "w" ?4) ; tone mark
354 ("\e(1\\e(B" "w" ?9) ; semivowel lower
355 ("\e(1p\e(B-\e(1y\e(B" "w" ?0) ; digit and misc
356 ("\e(1Of\e(B" "_" ?0) ; symbol
357 ))
358 elm chars len syntax category to ch i)
359 (while deflist
360 (setq elm (car deflist))
361 (setq chars (car elm)
362 len (length chars)
363 syntax (nth 1 elm)
364 category (nth 2 elm)
365 i 0)
366 (while (< i len)
367 (if (= (aref chars i) ?-)
368 (setq i (1+ i)
369 to (sref chars i))
370 (setq ch (sref chars i)
371 to ch))
372 (while (<= ch to)
373 (modify-syntax-entry ch syntax)
374 (modify-category-entry ch category)
375 (setq ch (1+ ch)))
376 (setq i (+ i (char-bytes to))))
377 (setq deflist (cdr deflist))))
378
379 ;; Thai character set (TIS620)
380
381 (modify-category-entry (make-char 'thai-tis620) ?t)
382
383 (let ((deflist '(;; chars syntax category
384 ("\e,T!\e(B-\e,TCEG\e(B-\e,TN\e(B" "w" ?0) ; consonant
385 ("\e,TDFPRS`\e(B-\e,Te\e(B" "w" ?1) ; vowel base
386 ("\e,TQT\e(B-\e,TWgn\e(B" "w" ?2) ; vowel upper
387 ("\e,TX\e(B-\e,TZ\e(B" "w" ?3) ; vowel lower
388 ("\e,Th\e(B-\e,Tm\e(B" "w" ?4) ; tone mark
389 ("\e,TOfp\e(B-\e,Ty\e(B" "w" ?0) ; digit and misc
390 ("\e,T_oz{\e(B" "_" ?0) ; symbol
391 ))
392 elm chars len syntax category to ch i)
393 (while deflist
394 (setq elm (car deflist))
395 (setq chars (car elm)
396 len (length chars)
397 syntax (nth 1 elm)
398 category (nth 2 elm)
399 i 0)
400 (while (< i len)
401 (if (= (aref chars i) ?-)
402 (setq i (1+ i)
403 to (sref chars i))
404 (setq ch (sref chars i)
405 to ch))
406 (while (<= ch to)
407 (modify-syntax-entry ch syntax)
408 (modify-category-entry ch category)
409 (setq ch (1+ ch)))
410 (setq i (+ i (char-bytes to))))
411 (setq deflist (cdr deflist))))
412
413 ;; Tibetan character set
414
415 (let ((row 33))
416 (while (< row 38)
417 (modify-category-entry (make-char 'tibetan row) ?q)
418 (setq row (1+ row))))
419
420 (modify-category-entry (make-char 'tibetan-1-column 33) ?q)
421
422 (let ((deflist '(;; chars syntax category
423 ("\e$(7"!\e(B-\e$(7"J\e(B" "w" ?0) ; consonant
424 ("\e$(7#!\e(B-\e$(7#J#P#Q\e(B" "w" ?0) ;
425 ("\e$(7$!\e(B-\e$(7$e\e(B" "w" ?0) ;
426 ("\e$(7%!\e(B-\e$(7%u\e(B" "w" ?0) ;
427 ("\e$(7"S"["\"]"^"a\e(B" "w" ?2) ; upper vowel
428 ("\e$(7"_"c"d"g"h"i"j"k"l\e(B" "w" ?2) ; upper modifier
429 ("\e$(7!I"Q"U"e!e!g\e(B" "w" ?3) ; lowel vowel/modifier
430 ("\e$(7!P\e(B-\e$(7!Y!Z\e(B-\e$(7!c\e(B" "w" ?6) ; digit
431 ("\e$(7!;!=\e(B-\e$(7!B!D"`\e(B" "." ?|) ; line-break char
432 ("\e$(8!;!=!?!@!A!D"`\e(B" "." ?|) ;
433 ("\e$(7!8!;!=\e(B-\e$(7!B!D"`!m!d\e(B" "." ?>) ; prohibition
434 ("\e$(8!;!=!?!@!A!D"`\e(B" "." ?>) ;
435 ("\e$(7!0\e(B-\e$(7!:!l#R#S"f\e(B" "." ?<) ; prohibition
436 ("\e$(7!C!E\e(B-\e$(7!H!J\e(B-\e$(7!O!f!h\e(B-\e$(7!k!n!o\e(B" "." ?q) ; others
437 ))
438 elm chars len syntax category to ch i)
439 (while deflist
440 (setq elm (car deflist))
441 (setq chars (car elm)
442 len (length chars)
443 syntax (nth 1 elm)
444 category (nth 2 elm)
445 i 0)
446 (while (< i len)
447 (if (= (aref chars i) ?-)
448 (setq i (1+ i)
449 to (sref chars i))
450 (setq ch (sref chars i)
451 to ch))
452 (while (<= ch to)
453 (modify-syntax-entry ch syntax)
454 (modify-category-entry ch category)
455 (setq ch (1+ ch)))
456 (setq i (+ i (char-bytes to))))
457 (setq deflist (cdr deflist))))
458
459 ;; Vietnamese character set
460
461 (let ((lower (make-char 'vietnamese-viscii-lower))
462 (upper (make-char 'vietnamese-viscii-upper)))
463 (modify-syntax-entry lower "w")
464 (modify-syntax-entry upper "w")
465 (modify-category-entry lower ?v)
466 (modify-category-entry upper ?v)
467 (modify-category-entry lower ?l) ; To make a word with
468 (modify-category-entry upper ?l) ; latin characters.
469 )
470
471 \f
472 ;;; Setting word boundary.
473
474 (setq word-combining-categories
475 '((?l . ?l)))
476
477 (setq word-separating-categories ; (2-byte character sets)
478 '((?A . ?K) ; Alpha numeric - Katakana
479 (?A . ?C) ; Alpha numeric - Chinese
480 (?H . ?A) ; Hiragana - Alpha numeric
481 (?H . ?K) ; Hiragana - Katakana
482 (?H . ?C) ; Hiragana - Chinese
483 (?K . ?A) ; Katakana - Alpha numeric
484 (?K . ?C) ; Katakana - Chinese
485 (?C . ?A) ; Chinese - Alpha numeric
486 (?C . ?K) ; Chinese - Katakana
487 ))