]> code.delx.au - gnu-emacs/blob - lisp/international/characters.el
Fix syntax/category setting of Tibetan characters.
[gnu-emacs] / lisp / international / characters.el
1 ;;; characters.el --- set syntax and category for multibyte characters
2
3 ;; Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN.
4 ;; Licensed to the Free Software Foundation.
5
6 ;; Keywords: multibyte character, character set, syntax, category
7
8 ;; This file is part of GNU Emacs.
9
10 ;; GNU Emacs is free software; you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation; either version 2, or (at your option)
13 ;; any later version.
14
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
19
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs; see the file COPYING. If not, write to the
22 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
24
25 ;;; Commentary:
26
27 ;; This file contains multibyte characters. Save this file always in
28 ;; the coding system `iso-2022-7bit'.
29
30 ;; This file does not define the syntax for Latin-N character sets;
31 ;; those are defined by the files latin-N.el.
32
33 ;;; Predefined categories.
34
35 ;; For each character set.
36
37 (define-category ?a "ASCII")
38 (define-category ?l "Latin")
39 (define-category ?t "Thai")
40 (define-category ?g "Greek")
41 (define-category ?b "Arabic")
42 (define-category ?w "Hebrew")
43 (define-category ?y "Cyrillic")
44 (define-category ?k "Japanese katakana")
45 (define-category ?r "Japanese roman")
46 (define-category ?c "Chinese")
47 (define-category ?j "Japanese")
48 (define-category ?h "Korean")
49 (define-category ?e "Ethiopic (Ge'ez)")
50 (define-category ?v "Vietnamese")
51 (define-category ?i "Indian")
52 (define-category ?o "Lao")
53 (define-category ?q "Tibetan")
54
55 ;; For each group (row) of 2-byte character sets.
56
57 (define-category ?A "Alpha-numeric characters of 2-byte character sets")
58 (define-category ?C "Chinese (Han) characters of 2-byte character sets")
59 (define-category ?G "Greek characters of 2-byte character sets")
60 (define-category ?H "Japanese Hiragana characters of 2-byte character sets")
61 (define-category ?K "Japanese Katakana characters of 2-byte character sets")
62 (define-category ?N "Korean Hangul characters of 2-byte character sets")
63 (define-category ?Y "Cyrillic characters of 2-byte character sets")
64 (define-category ?I "Indian Glyphs")
65
66 ;; For phonetic classifications.
67
68 (define-category ?0 "consonant")
69 (define-category ?1 "base (independent) vowel")
70 (define-category ?2 "upper diacritical mark (including upper vowel)")
71 (define-category ?3 "lower diacritical mark (including lower vowel)")
72 (define-category ?4 "tone mark")
73 (define-category ?5 "symbol")
74 (define-category ?6 "digit")
75 (define-category ?7 "vowel-modifying diacritical mark")
76 (define-category ?8 "vowel-signs")
77 (define-category ?9 "semivowel lower")
78
79 ;; For filling.
80 (define-category ?| "While filling, we can break a line at this character.")
81
82 ;; For indentation calculation.
83 (define-category ?
84 "This character counts as a space for indentation purposes.")
85
86 ;; Keep the following for `kinsoku' processing. See comments in
87 ;; kinsoku.el.
88 (define-category ?> "A character which can't be placed at beginning of line.")
89 (define-category ?< "A character which can't be placed at end of line.")
90
91 \f
92 ;;; Setting syntax and category.
93
94 ;; ASCII
95
96 (let ((ch 32))
97 (while (< ch 127) ; All ASCII characters have
98 (modify-category-entry ch ?a) ; the category `a' (ASCII)
99 (modify-category-entry ch ?l) ; and `l' (Latin).
100 (setq ch (1+ ch))))
101
102 ;; Arabic character set
103
104 (let ((charsets '(arabic-iso8859-6
105 arabic-digit
106 arabic-1-column
107 arabic-2-column)))
108 (while charsets
109 (modify-syntax-entry (make-char (car charsets)) "w")
110 (modify-category-entry (make-char (car charsets)) ?b)
111 (setq charsets (cdr charsets))))
112
113 ;; Chinese character set (GB2312)
114
115 (modify-syntax-entry (make-char 'chinese-gb2312) "w")
116 (modify-syntax-entry (make-char 'chinese-gb2312 33) "_")
117 (modify-syntax-entry (make-char 'chinese-gb2312 34) "_")
118 (modify-syntax-entry (make-char 'chinese-gb2312 41) "_")
119 (modify-syntax-entry ?\\e$A!2\e(B "(\e$A!3\e(B")
120 (modify-syntax-entry ?\\e$A!4\e(B "(\e$A!5\e(B")
121 (modify-syntax-entry ?\\e$A!6\e(B "(\e$A!7\e(B")
122 (modify-syntax-entry ?\\e$A!8\e(B "(\e$A!9\e(B")
123 (modify-syntax-entry ?\\e$A!:\e(B "(\e$A!;\e(B")
124 (modify-syntax-entry ?\\e$A!<\e(B "(\e$A!=\e(B")
125 (modify-syntax-entry ?\\e$A!>\e(B "(\e$A!?\e(B")
126 (modify-syntax-entry ?\\e$A!3\e(B ")\e$A!2\e(B")
127 (modify-syntax-entry ?\\e$A!5\e(B ")\e$A!4\e(B")
128 (modify-syntax-entry ?\\e$A!7\e(B ")\e$A!6\e(B")
129 (modify-syntax-entry ?\\e$A!9\e(B ")\e$A!8\e(B")
130 (modify-syntax-entry ?\\e$A!;\e(B ")\e$A!:\e(B")
131 (modify-syntax-entry ?\\e$A!=\e(B ")\e$A!<\e(B")
132 (modify-syntax-entry ?\\e$A!?\e(B ")\e$A!>\e(B")
133
134 (modify-category-entry (make-char 'chinese-gb2312) ?c)
135 (modify-category-entry (make-char 'chinese-gb2312) ?\|)
136 (modify-category-entry (make-char 'chinese-gb2312 35) ?A)
137 (modify-category-entry (make-char 'chinese-gb2312 36) ?H)
138 (modify-category-entry (make-char 'chinese-gb2312 37) ?K)
139 (modify-category-entry (make-char 'chinese-gb2312 38) ?G)
140 (modify-category-entry (make-char 'chinese-gb2312 39) ?Y)
141 (let ((row 48))
142 (while (< row 127)
143 (modify-category-entry (make-char 'chinese-gb2312 row) ?C)
144 (setq row (1+ row))))
145
146 ;; Chinese character set (BIG5)
147
148 (let ((generic-big5-1-char (make-char 'chinese-big5-1))
149 (generic-big5-2-char (make-char 'chinese-big5-2)))
150 (modify-syntax-entry generic-big5-1-char "w")
151 (modify-syntax-entry generic-big5-2-char "w")
152
153 (modify-category-entry generic-big5-1-char ?c)
154 (modify-category-entry generic-big5-2-char ?c)
155
156 (modify-category-entry generic-big5-1-char ?C)
157 (modify-category-entry generic-big5-2-char ?C)
158
159 (modify-category-entry generic-big5-1-char ?\|)
160 (modify-category-entry generic-big5-2-char ?\|))
161
162
163 ;; Chinese character set (CNS11643)
164
165 (let ((cns-list '(chinese-cns11643-1
166 chinese-cns11643-2
167 chinese-cns11643-3
168 chinese-cns11643-4
169 chinese-cns11643-5
170 chinese-cns11643-6
171 chinese-cns11643-7))
172 generic-char)
173 (while cns-list
174 (setq generic-char (make-char (car cns-list)))
175 (modify-syntax-entry generic-char "w")
176 (modify-category-entry generic-char ?c)
177 (modify-category-entry generic-char ?C)
178 (modify-category-entry generic-char ?|)
179 (setq cns-list (cdr cns-list))))
180
181 ;; Cyrillic character set (ISO-8859-5)
182
183 (modify-category-entry (make-char 'cyrillic-iso8859-5) ?y)
184
185 (modify-syntax-entry (make-char 'cyrillic-iso8859-5 160) " ")
186 (modify-syntax-entry ?\e,L-\e(B ".")
187 (modify-syntax-entry ?\e,Lp\e(B ".")
188 (modify-syntax-entry ?\e,L}\e(B ".")
189 (let ((tbl (standard-case-table)))
190 (set-case-syntax-pair ?\e,L!\e(B ?\e,Lq\e(B tbl)
191 (set-case-syntax-pair ?\e,L"\e(B ?\e,Lr\e(B tbl)
192 (set-case-syntax-pair ?\e,L#\e(B ?\e,Ls\e(B tbl)
193 (set-case-syntax-pair ?\e,L$\e(B ?\e,Lt\e(B tbl)
194 (set-case-syntax-pair ?\e,L%\e(B ?\e,Lu\e(B tbl)
195 (set-case-syntax-pair ?\e,L&\e(B ?\e,Lv\e(B tbl)
196 (set-case-syntax-pair ?\e,L'\e(B ?\e,Lw\e(B tbl)
197 (set-case-syntax-pair ?\e,L(\e(B ?\e,Lx\e(B tbl)
198 (set-case-syntax-pair ?\e,L)\e(B ?\e,Ly\e(B tbl)
199 (set-case-syntax-pair ?\e,L*\e(B ?\e,Lz\e(B tbl)
200 (set-case-syntax-pair ?\e,L+\e(B ?\e,L{\e(B tbl)
201 (set-case-syntax-pair ?\e,L,\e(B ?\e,L|\e(B tbl)
202 (set-case-syntax-pair ?\e,L.\e(B ?\e,L~\e(B tbl)
203 (set-case-syntax-pair ?\e,L/\e(B ?\e,L\7f\e(B tbl)
204 (set-case-syntax-pair ?\e,L0\e(B ?\e,LP\e(B tbl)
205 (set-case-syntax-pair ?\e,L1\e(B ?\e,LQ\e(B tbl)
206 (set-case-syntax-pair ?\e,L2\e(B ?\e,LR\e(B tbl)
207 (set-case-syntax-pair ?\e,L3\e(B ?\e,LS\e(B tbl)
208 (set-case-syntax-pair ?\e,L4\e(B ?\e,LT\e(B tbl)
209 (set-case-syntax-pair ?\e,L5\e(B ?\e,LU\e(B tbl)
210 (set-case-syntax-pair ?\e,L6\e(B ?\e,LV\e(B tbl)
211 (set-case-syntax-pair ?\e,L7\e(B ?\e,LW\e(B tbl)
212 (set-case-syntax-pair ?\e,L8\e(B ?\e,LX\e(B tbl)
213 (set-case-syntax-pair ?\e,L9\e(B ?\e,LY\e(B tbl)
214 (set-case-syntax-pair ?\e,L:\e(B ?\e,LZ\e(B tbl)
215 (set-case-syntax-pair ?\e,L;\e(B ?\e,L[\e(B tbl)
216 (set-case-syntax-pair ?\e,L<\e(B ?\e,L\\e(B tbl)
217 (set-case-syntax-pair ?\e,L=\e(B ?\e,L]\e(B tbl)
218 (set-case-syntax-pair ?\e,L>\e(B ?\e,L^\e(B tbl)
219 (set-case-syntax-pair ?\e,L?\e(B ?\e,L_\e(B tbl)
220 (set-case-syntax-pair ?\e,L@\e(B ?\e,L`\e(B tbl)
221 (set-case-syntax-pair ?\e,LA\e(B ?\e,La\e(B tbl)
222 (set-case-syntax-pair ?\e,LB\e(B ?\e,Lb\e(B tbl)
223 (set-case-syntax-pair ?\e,LC\e(B ?\e,Lc\e(B tbl)
224 (set-case-syntax-pair ?\e,LD\e(B ?\e,Ld\e(B tbl)
225 (set-case-syntax-pair ?\e,LE\e(B ?\e,Le\e(B tbl)
226 (set-case-syntax-pair ?\e,LF\e(B ?\e,Lf\e(B tbl)
227 (set-case-syntax-pair ?\e,LG\e(B ?\e,Lg\e(B tbl)
228 (set-case-syntax-pair ?\e,LH\e(B ?\e,Lh\e(B tbl)
229 (set-case-syntax-pair ?\e,LI\e(B ?\e,Li\e(B tbl)
230 (set-case-syntax-pair ?\e,LJ\e(B ?\e,Lj\e(B tbl)
231 (set-case-syntax-pair ?\e,LK\e(B ?\e,Lk\e(B tbl)
232 (set-case-syntax-pair ?\e,LL\e(B ?\e,Ll\e(B tbl)
233 (set-case-syntax-pair ?\e,LM\e(B ?\e,Lm\e(B tbl)
234 (set-case-syntax-pair ?\e,LN\e(B ?\e,Ln\e(B tbl)
235 (set-case-syntax-pair ?\e,LO\e(B ?\e,Lo\e(B tbl))
236
237 ;; Devanagari character set
238
239 (let ((deflist '(;; chars syntax category
240 ("\e$(5!!!"!#\e(B" "w" ?7) ; vowel-modifying diacritical mark
241 ; chandrabindu, anuswar, visarga
242 ("\e$(5!$\e(B-\e$(5!2\e(B" "w" ?1) ; independent vowel
243 ("\e$(5!3\e(B-\e$(5!X\e(B" "w" ?0) ; consonant
244 ("\e$(5!Z\e(B-\e$(5!g\e(B" "w" ?8) ; matra
245 ("\e$(5!q\e(B-\e$(5!z\e(B" "w" ?6) ; digit
246 ))
247 elm chars len syntax category to ch i)
248 (while deflist
249 (setq elm (car deflist))
250 (setq chars (car elm)
251 len (length chars)
252 syntax (nth 1 elm)
253 category (nth 2 elm)
254 i 0)
255 (while (< i len)
256 (if (= (aref chars i) ?-)
257 (setq i (1+ i)
258 to (aref chars i))
259 (setq ch (aref chars i)
260 to ch))
261 (while (<= ch to)
262 (modify-syntax-entry ch syntax)
263 (modify-category-entry ch category)
264 (setq ch (1+ ch)))
265 (setq i (1+ i)))
266 (setq deflist (cdr deflist))))
267
268 ;; Ethiopic character set
269
270 (modify-category-entry (make-char 'ethiopic) ?e)
271 (modify-syntax-entry (make-char 'ethiopic) "w")
272 (let ((chars '(?\e$(3$h\e(B ?\e$(3$i\e(B ?\e$(3$j\e(B ?\e$(3$k\e(B ?\e$(3$l\e(B ?\e$(3$m\e(B ?\e$(3$n\e(B ?\e$(3$o\e(B ?\e$(3%i\e(B ?\e$(3%t\e(B ?\e$(3%u\e(B ?\e$(3%v\e(B ?\e$(3%w\e(B ?\e$(3%x\e(B)))
273 (while chars
274 (modify-syntax-entry (car chars) ".")
275 (setq chars (cdr chars))))
276
277 ;; European character set (Latin-1,2,3,4,5)
278
279 (modify-category-entry (make-char 'latin-iso8859-1) ?l)
280 (modify-category-entry (make-char 'latin-iso8859-2) ?l)
281 (modify-category-entry (make-char 'latin-iso8859-3) ?l)
282 (modify-category-entry (make-char 'latin-iso8859-4) ?l)
283 (modify-category-entry (make-char 'latin-iso8859-9) ?l)
284 (modify-category-entry (make-char 'latin-iso8859-14) ?l)
285 (modify-category-entry (make-char 'latin-iso8859-15) ?l)
286
287 (modify-category-entry (make-char 'latin-iso8859-1 160) ?\ )
288 (modify-category-entry (make-char 'latin-iso8859-2 160) ?\ )
289 (modify-category-entry (make-char 'latin-iso8859-3 160) ?\ )
290 (modify-category-entry (make-char 'latin-iso8859-4 160) ?\ )
291 (modify-category-entry (make-char 'latin-iso8859-9 160) ?\ )
292 (modify-category-entry (make-char 'latin-iso8859-14 160) ?\ )
293 (modify-category-entry (make-char 'latin-iso8859-15 160) ?\ )
294
295 ;; Greek character set (ISO-8859-7)
296
297 (modify-category-entry (make-char 'greek-iso8859-7) ?g)
298
299 (let ((c 182))
300 (while (< c 255)
301 (modify-syntax-entry (make-char 'greek-iso8859-7 c) "w")
302 (setq c (1+ c))))
303 (modify-syntax-entry (make-char 'greek-iso8859-7 160) "w") ; NBSP
304 (modify-syntax-entry ?\e,F7\e(B ".")
305 (modify-syntax-entry ?\e,F;\e(B ".")
306 (modify-syntax-entry ?\e,F=\e(B ".")
307
308 ;; Hebrew character set (ISO-8859-8)
309
310 (modify-category-entry (make-char 'hebrew-iso8859-8) ?w)
311
312 (let ((c 224))
313 (while (< c 251)
314 (modify-syntax-entry (make-char 'hebrew-iso8859-8 c) "w")
315 (setq c (1+ c))))
316 (modify-syntax-entry (make-char 'hebrew-iso8859-8 160) "w") ; NBSP
317
318 ;; Indian character set (IS 13194 and other Emacs original Indian charsets)
319
320 (modify-category-entry (make-char 'indian-is13194) ?i)
321 (modify-category-entry (make-char 'indian-2-column) ?I)
322 (modify-category-entry (make-char 'indian-1-column) ?I)
323
324 (let ((deflist
325 '(;; chars syntax category
326 ("\e(5!"#\e(B" "w" ?7) ; vowel-modifying diacritical mark
327 ; chandrabindu, anuswar, visarga
328 ("\e(5$\e(B-\e(52\e(B" "w" ?1) ; base (independent) vowel
329 ("\e(53\e(B-\e(5X\e(B" "w" ?0) ; consonant
330 ("\e(5Z\e(B-\e(5g\e(B" "w" ?8) ; matra
331 ("\e(5q\e(B-\e(5z\e(B" "w" ?6) ; digit
332 ))
333 elm chars len syntax category to ch i)
334 (while deflist
335 (setq elm (car deflist))
336 (setq chars (car elm)
337 len (length chars)
338 syntax (nth 1 elm)
339 category (nth 2 elm)
340 i 0)
341 (while (< i len)
342 (if (= (aref chars i) ?-)
343 (setq i (1+ i)
344 to (aref chars i))
345 (setq ch (aref chars i)
346 to ch))
347 (while (<= ch to)
348 (modify-syntax-entry ch syntax)
349 (modify-category-entry ch category)
350 (setq ch (1+ ch)))
351 (setq i (1+ i)))
352 (setq deflist (cdr deflist))))
353
354
355 ;; Japanese character set (JISX0201-kana, JISX0201-roman, JISX0208, JISX0212)
356
357 (modify-category-entry (make-char 'katakana-jisx0201) ?k)
358 (modify-category-entry (make-char 'latin-jisx0201) ?r)
359 (modify-category-entry (make-char 'japanese-jisx0208) ?j)
360 (modify-category-entry (make-char 'japanese-jisx0212) ?j)
361 (modify-category-entry (make-char 'katakana-jisx0201) ?\|)
362 (modify-category-entry (make-char 'japanese-jisx0208) ?\|)
363 (modify-category-entry (make-char 'japanese-jisx0212) ?\|)
364
365 ;; JISX0208
366 (modify-syntax-entry (make-char 'japanese-jisx0208) "w")
367 (modify-syntax-entry (make-char 'japanese-jisx0208 33) "_")
368 (modify-syntax-entry (make-char 'japanese-jisx0208 34) "_")
369 (modify-syntax-entry (make-char 'japanese-jisx0208 40) "_")
370 (let ((chars '(?\e$B!<\e(B ?\e$B!+\e(B ?\e$B!,\e(B ?\e$B!3\e(B ?\e$B!4\e(B ?\e$B!5\e(B ?\e$B!6\e(B ?\e$B!7\e(B ?\e$B!8\e(B ?\e$B!9\e(B ?\e$B!:\e(B ?\e$B!;\e(B)))
371 (while chars
372 (modify-syntax-entry (car chars) "w")
373 (setq chars (cdr chars))))
374 (modify-syntax-entry ?\\e$B!J\e(B "(\e$B!K\e(B")
375 (modify-syntax-entry ?\\e$B!N\e(B "(\e$B!O\e(B")
376 (modify-syntax-entry ?\\e$B!P\e(B "(\e$B!Q\e(B")
377 (modify-syntax-entry ?\\e$B!V\e(B "(\e$B!W\e(B")
378 (modify-syntax-entry ?\\e$B!X\e(B "(\e$B!Y\e(B")
379 (modify-syntax-entry ?\\e$B!K\e(B ")\e$B!J\e(B")
380 (modify-syntax-entry ?\\e$B!O\e(B ")\e$B!N\e(B")
381 (modify-syntax-entry ?\\e$B!Q\e(B ")\e$B!P\e(B")
382 (modify-syntax-entry ?\\e$B!W\e(B ")\e$B!V\e(B")
383 (modify-syntax-entry ?\\e$B!Y\e(B ")\e$B!X\e(B")
384
385 (modify-category-entry (make-char 'japanese-jisx0208 35) ?A)
386 (modify-category-entry (make-char 'japanese-jisx0208 36) ?H)
387 (modify-category-entry (make-char 'japanese-jisx0208 37) ?K)
388 (modify-category-entry (make-char 'japanese-jisx0208 38) ?G)
389 (modify-category-entry (make-char 'japanese-jisx0208 39) ?Y)
390 (let ((row 48))
391 (while (< row 127)
392 (modify-category-entry (make-char 'japanese-jisx0208 row) ?C)
393 (setq row (1+ row))))
394 (modify-category-entry ?\e$B!<\e(B ?K)
395 (let ((chars '(?\e$B!+\e(B ?\e$B!,\e(B)))
396 (while chars
397 (modify-category-entry (car chars) ?K)
398 (modify-category-entry (car chars) ?H)
399 (setq chars (cdr chars))))
400 (let ((chars '(?\e$B!3\e(B ?\e$B!4\e(B ?\e$B!5\e(B ?\e$B!6\e(B ?\e$B!7\e(B ?\e$B!8\e(B ?\e$B!9\e(B ?\e$B!:\e(B ?\e$B!;\e(B)))
401 (while chars
402 (modify-category-entry (car chars) ?C)
403 (setq chars (cdr chars))))
404
405 ;; JISX0212
406 (modify-syntax-entry (make-char 'japanese-jisx0212) "w")
407 (modify-syntax-entry (make-char 'japanese-jisx0212 33) "_")
408 (modify-syntax-entry (make-char 'japanese-jisx0212 34) "_")
409 (modify-syntax-entry (make-char 'japanese-jisx0212 35) "_")
410
411 (modify-category-entry (make-char 'japanese-jisx0212 ) ?C)
412
413 ;; JISX0201-Kana
414 (modify-syntax-entry (make-char 'katakana-jisx0201) "w")
415 (let ((chars '(?\e(I!\e(B ?\e(I"\e(B ?\e(I#\e(B ?\e(I$\e(B ?\e(I%\e(B)))
416 (while chars
417 (modify-syntax-entry (car chars) ".")
418 (setq chars (cdr chars))))
419
420 ;; Korean character set (KSC5601)
421
422 (modify-syntax-entry (make-char 'korean-ksc5601) "w")
423 (modify-syntax-entry (make-char 'korean-ksc5601 33) "_")
424 (modify-syntax-entry (make-char 'korean-ksc5601 34) "_")
425 (modify-syntax-entry (make-char 'korean-ksc5601 38) "_")
426 (modify-syntax-entry (make-char 'korean-ksc5601 39) "_")
427 (modify-syntax-entry (make-char 'korean-ksc5601 40) "_")
428 (modify-syntax-entry (make-char 'korean-ksc5601 41) "_")
429
430 (modify-category-entry (make-char 'korean-ksc5601) ?h)
431 (modify-category-entry (make-char 'korean-ksc5601 35) ?A)
432 (modify-category-entry (make-char 'korean-ksc5601 37) ?G)
433 (modify-category-entry (make-char 'korean-ksc5601 42) ?H)
434 (modify-category-entry (make-char 'korean-ksc5601 43) ?K)
435 (modify-category-entry (make-char 'korean-ksc5601 44) ?Y)
436
437 ;; Lao character set
438
439 (modify-category-entry (make-char 'lao) ?o)
440
441 (let ((deflist '(;; chars syntax category
442 ("\e(1!\e(B-\e(1N\e(B" "w" ?0) ; consonant
443 ("\e(1PRS]`\e(B-\e(1d\e(B" "w" ?1) ; vowel base
444 ("\e(1QT\e(B-\e(1W[m\e(B" "w" ?2) ; vowel upper
445 ("\e(1XY\e(B" "w" ?3) ; vowel lower
446 ("\e(1h\e(B-\e(1l\e(B" "w" ?4) ; tone mark
447 ("\e(1\\e(B" "w" ?9) ; semivowel lower
448 ("\e(1p\e(B-\e(1y\e(B" "w" ?6) ; digit
449 ("\e(1Of\e(B" "_" ?5) ; symbol
450 ))
451 elm chars len syntax category to ch i)
452 (while deflist
453 (setq elm (car deflist))
454 (setq chars (car elm)
455 len (length chars)
456 syntax (nth 1 elm)
457 category (nth 2 elm)
458 i 0)
459 (while (< i len)
460 (if (= (aref chars i) ?-)
461 (setq i (1+ i)
462 to (aref chars i))
463 (setq ch (aref chars i)
464 to ch))
465 (while (<= ch to)
466 (modify-syntax-entry ch syntax)
467 (modify-category-entry ch category)
468 (setq ch (1+ ch)))
469 (setq i (1+ i)))
470 (setq deflist (cdr deflist))))
471
472 ;; Thai character set (TIS620)
473
474 (modify-category-entry (make-char 'thai-tis620) ?t)
475
476 (let ((deflist '(;; chars syntax category
477 ("\e,T!\e(B-\e,TCEG\e(B-\e,TN\e(B" "w" ?0) ; consonant
478 ("\e,TDFPRS`\e(B-\e,Te\e(B" "w" ?1) ; vowel base
479 ("\e,TQT\e(B-\e,TWgn\e(B" "w" ?2) ; vowel upper
480 ("\e,TX\e(B-\e,TZ\e(B" "w" ?3) ; vowel lower
481 ("\e,Th\e(B-\e,Tm\e(B" "w" ?4) ; tone mark
482 ("\e,Tp\e(B-\e,Ty\e(B" "w" ?6) ; digit
483 ("\e,TOf_oz{\e(B" "_" ?5) ; symbol
484 ))
485 elm chars len syntax category to ch i)
486 (while deflist
487 (setq elm (car deflist))
488 (setq chars (car elm)
489 len (length chars)
490 syntax (nth 1 elm)
491 category (nth 2 elm)
492 i 0)
493 (while (< i len)
494 (if (= (aref chars i) ?-)
495 (setq i (1+ i)
496 to (aref chars i))
497 (setq ch (aref chars i)
498 to ch))
499 (while (<= ch to)
500 (modify-syntax-entry ch syntax)
501 (modify-category-entry ch category)
502 (setq ch (1+ ch)))
503 (setq i (1+ i)))
504 (setq deflist (cdr deflist))))
505
506 ;; Tibetan character set
507
508 (modify-category-entry (make-char 'tibetan) ?q)
509 (modify-category-entry (make-char 'tibetan-1-column) ?q)
510
511 (let ((deflist '(;; chars syntax category
512 ("\e$(7"!\e(B-\e$(7"J"K\e(B" "w" ?0) ; consonant
513 ("\e$(7#!\e(B-\e$(7#J#K#L#M!"!#\e(B" "w" ?0) ;
514 ("\e$(7$!\e(B-\e$(7$e\e(B" "w" ?0) ;
515 ("\e$(7%!\e(B-\e$(7%u\e(B" "w" ?0) ;
516 ("\e$(7"S"["\"]"^"a\e(B" "w" ?2) ; upper vowel
517 ("\e$(7"_"c"d"g"h"i"j"k"l\e(B" "w" ?2) ; upper modifier
518 ("\e$(7!I"Q"R"U"e!e!g\e(B" "w" ?3) ; lowel vowel/modifier
519 ("\e$(7!P\e(B-\e$(7!Y!Z\e(B-\e$(7!c\e(B" "w" ?6) ; digit
520 ("\e$(7!;!=\e(B-\e$(7!B!D"`\e(B" "." ?|) ; line-break char
521 ("\e$(8!;!=!?!@!A!D"`\e(B" "." ?|) ;
522 ("\e$(7!8!;!=\e(B-\e$(7!B!D"`!m!d\e(B" "." ?>) ; prohibition
523 ("\e$(8!;!=!?!@!A!D"`\e(B" "." ?>) ;
524 ("\e$(7!0\e(B-\e$(7!:!l#R#S"f\e(B" "." ?<) ; prohibition
525 ("\e$(7!C!E\e(B-\e$(7!H!J\e(B-\e$(7!O!f!h\e(B-\e$(7!k!n!o#O#P\e(B-\e$(7#`\e(B" "." ?q) ; others
526 ))
527 elm chars len syntax category to ch i)
528 (while deflist
529 (setq elm (car deflist))
530 (setq chars (car elm)
531 len (length chars)
532 syntax (nth 1 elm)
533 category (nth 2 elm)
534 i 0)
535 (while (< i len)
536 (if (= (aref chars i) ?-)
537 (setq i (1+ i)
538 to (aref chars i))
539 (setq ch (aref chars i)
540 to ch))
541 (while (<= ch to)
542 (modify-syntax-entry ch syntax)
543 (modify-category-entry ch category)
544 (setq ch (1+ ch)))
545 (setq i (1+ i)))
546 (setq deflist (cdr deflist))))
547
548 ;; Vietnamese character set
549
550 (let ((lower (make-char 'vietnamese-viscii-lower))
551 (upper (make-char 'vietnamese-viscii-upper)))
552 (modify-syntax-entry lower "w")
553 (modify-syntax-entry upper "w")
554 (modify-category-entry lower ?v)
555 (modify-category-entry upper ?v)
556 (modify-category-entry lower ?l) ; To make a word with
557 (modify-category-entry upper ?l) ; latin characters.
558 )
559
560 (let ((tbl (standard-case-table))
561 (i 32))
562 (while (< i 128)
563 (set-case-syntax-pair (make-char 'vietnamese-viscii-upper i)
564 (make-char 'vietnamese-viscii-lower i)
565 tbl)
566 (setq i (1+ i))))
567
568 \f
569 ;;; Setting word boundary.
570
571 (setq word-combining-categories
572 '((?l . ?l)))
573
574 (setq word-separating-categories ; (2-byte character sets)
575 '((?A . ?K) ; Alpha numeric - Katakana
576 (?A . ?C) ; Alpha numeric - Chinese
577 (?H . ?A) ; Hiragana - Alpha numeric
578 (?H . ?K) ; Hiragana - Katakana
579 (?H . ?C) ; Hiragana - Chinese
580 (?K . ?A) ; Katakana - Alpha numeric
581 (?K . ?C) ; Katakana - Chinese
582 (?C . ?A) ; Chinese - Alpha numeric
583 (?C . ?K) ; Chinese - Katakana
584 ))
585
586 \f
587 ;; For each character set, put the information of the most proper
588 ;; coding system to encode it by `preferred-coding-system' property.
589
590 (let ((l '((latin-iso8859-1 . iso-latin-1)
591 (latin-iso8859-2 . iso-latin-2)
592 (latin-iso8859-3 . iso-latin-3)
593 (latin-iso8859-4 . iso-latin-4)
594 (thai-tis620 . thai-tis620)
595 (greek-iso8859-7 . greek-iso-8bit)
596 (arabic-iso8859-6 . iso-2022-7bit)
597 (hebrew-iso8859-8 . hebrew-iso-8bit)
598 (katakana-jisx0201 . japanese-shift-jis)
599 (latin-jisx0201 . japanese-shift-jis)
600 (cyrillic-iso8859-5 . cyrillic-iso-8bit)
601 (latin-iso8859-9 . iso-latin-5)
602 (japanese-jisx0208-1978 . iso-2022-jp)
603 (chinese-gb2312 . cn-gb-2312)
604 (japanese-jisx0208 . iso-2022-jp)
605 (korean-ksc5601 . iso-2022-kr)
606 (japanese-jisx0212 . iso-2022-jp)
607 (chinese-cns11643-1 . iso-2022-cn)
608 (chinese-cns11643-2 . iso-2022-cn)
609 (chinese-big5-1 . chinese-big5)
610 (chinese-big5-2 . chinese-big5)
611 (chinese-sisheng . iso-2022-7bit)
612 (ipa . iso-2022-7bit)
613 (vietnamese-viscii-lower . vietnamese-viscii)
614 (vietnamese-viscii-upper . vietnamese-viscii)
615 (arabic-digit . iso-2022-7bit)
616 (arabic-1-column . iso-2022-7bit)
617 (ascii-right-to-left . iso-2022-7bit)
618 (lao . lao)
619 (arabic-2-column . iso-2022-7bit)
620 (indian-is13194 . devanagari)
621 (indian-1-column . devanagari)
622 (tibetan-1-column . tibetan)
623 (ethiopic . iso-2022-7bit)
624 (chinese-cns11643-3 . iso-2022-cn)
625 (chinese-cns11643-4 . iso-2022-cn)
626 (chinese-cns11643-5 . iso-2022-cn)
627 (chinese-cns11643-6 . iso-2022-cn)
628 (chinese-cns11643-7 . iso-2022-cn)
629 (indian-2-column . devanagari)
630 (tibetan . tibetan)
631 (latin-iso8859-14 . iso-latin-8)
632 (latin-iso8859-15 . iso-latin-9))))
633 (while l
634 (put-charset-property (car (car l)) 'preferred-coding-system (cdr (car l)))
635 (setq l (cdr l))))
636
637 \f
638 ;; Setup auto-fill-chars for charsets that should invoke auto-filling.
639 ;; SPACE and NEWLIE are already set. Also put `nospace-between-words'
640 ;; property to the charsets.
641 (let ((l '(katakana-jisx0201
642 japanese-jisx0208 japanese-jisx0212
643 chinese-gb2312 chinese-big5-1 chinese-big5-2)))
644 (while l
645 (aset auto-fill-chars (make-char (car l)) t)
646 (put-charset-property (car l) 'nospace-between-words t)
647 (setq l (cdr l))))
648
649 ;;; Local Variables:
650 ;;; coding: iso-2022-7bit
651 ;;; End:
652
653 ;;; end of characters.el