]> code.delx.au - gnu-emacs/blob - lisp/international/characters.el
Set category `q' for all characters
[gnu-emacs] / lisp / international / characters.el
1 ;;; characters.el --- set syntax and category for multibyte characters
2
3 ;; Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN.
4 ;; Licensed to the Free Software Foundation.
5
6 ;; Keywords: multibyte character, character set, syntax, category
7
8 ;; This file is part of GNU Emacs.
9
10 ;; GNU Emacs is free software; you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation; either version 2, or (at your option)
13 ;; any later version.
14
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
19
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs; see the file COPYING. If not, write to the
22 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
24
25 ;;; Commentary:
26
27 ;; This file contains multibyte characters. Save this file always in
28 ;; the coding system `iso-2022-7bit'.
29
30 ;; This file does not define the syntax for Latin-N character sets;
31 ;; those are defined by the files latin-N.el.
32
33 ;;; Predefined categories.
34
35 ;; For each character set.
36
37 (define-category ?a "ASCII")
38 (define-category ?l "Latin")
39 (define-category ?t "Thai")
40 (define-category ?g "Greek")
41 (define-category ?b "Arabic")
42 (define-category ?w "Hebrew")
43 (define-category ?y "Cyrillic")
44 (define-category ?k "Japanese katakana")
45 (define-category ?r "Japanese roman")
46 (define-category ?c "Chinese")
47 (define-category ?j "Japanese")
48 (define-category ?h "Korean")
49 (define-category ?e "Ethiopic (Ge'ez)")
50 (define-category ?v "Vietnamese")
51 (define-category ?i "Indian")
52 (define-category ?o "Lao")
53 (define-category ?q "Tibetan")
54
55 ;; For each group (row) of 2-byte character sets.
56
57 (define-category ?A "Alpha-numeric characters of 2-byte character sets")
58 (define-category ?C "Chinese (Han) characters of 2-byte character sets")
59 (define-category ?G "Greek characters of 2-byte character sets")
60 (define-category ?H "Japanese Hiragana characters of 2-byte character sets")
61 (define-category ?K "Japanese Katakana characters of 2-byte character sets")
62 (define-category ?N "Korean Hangul characters of 2-byte character sets")
63 (define-category ?Y "Cyrillic characters of 2-byte character sets")
64 (define-category ?I "Indian Glyphs")
65
66 ;; For phonetic classifications.
67
68 (define-category ?0 "consonant")
69 (define-category ?1 "base (independent) vowel")
70 (define-category ?2 "upper diacritical mark (including upper vowel)")
71 (define-category ?3 "lower diacritical mark (including lower vowel)")
72 (define-category ?4 "tone mark")
73 (define-category ?5 "symbol")
74 (define-category ?6 "digit")
75 (define-category ?7 "vowel-modifying diacritical mark")
76 (define-category ?8 "vowel-signs")
77 (define-category ?9 "semivowel lower")
78
79 ;; For filling.
80 (define-category ?| "While filling, we can break a line at this character.")
81
82 ;; For indentation calculation.
83 (define-category ?
84 "This character counts as a space for indentation purposes.")
85
86 ;; Keep the following for `kinsoku' processing. See comments in
87 ;; kinsoku.el.
88 (define-category ?> "A character which can't be placed at beginning of line.")
89 (define-category ?< "A character which can't be placed at end of line.")
90
91 \f
92 ;;; Setting syntax and category.
93
94 ;; ASCII
95
96 (let ((ch 32))
97 (while (< ch 127) ; All ASCII characters have
98 (modify-category-entry ch ?a) ; the category `a' (ASCII)
99 (modify-category-entry ch ?l) ; and `l' (Latin).
100 (setq ch (1+ ch))))
101
102 ;; Arabic character set
103
104 (let ((charsets '(arabic-iso8859-6
105 arabic-digit
106 arabic-1-column
107 arabic-2-column)))
108 (while charsets
109 (modify-syntax-entry (make-char (car charsets)) "w")
110 (modify-category-entry (make-char (car charsets)) ?b)
111 (setq charsets (cdr charsets))))
112
113 ;; Chinese character set (GB2312)
114
115 (modify-syntax-entry (make-char 'chinese-gb2312) "w")
116 (modify-syntax-entry (make-char 'chinese-gb2312 33) "_")
117 (modify-syntax-entry (make-char 'chinese-gb2312 34) "_")
118 (modify-syntax-entry (make-char 'chinese-gb2312 41) "_")
119 (modify-syntax-entry ?\\e$A!2\e(B "(\e$A!3\e(B")
120 (modify-syntax-entry ?\\e$A!4\e(B "(\e$A!5\e(B")
121 (modify-syntax-entry ?\\e$A!6\e(B "(\e$A!7\e(B")
122 (modify-syntax-entry ?\\e$A!8\e(B "(\e$A!9\e(B")
123 (modify-syntax-entry ?\\e$A!:\e(B "(\e$A!;\e(B")
124 (modify-syntax-entry ?\\e$A!<\e(B "(\e$A!=\e(B")
125 (modify-syntax-entry ?\\e$A!>\e(B "(\e$A!?\e(B")
126 (modify-syntax-entry ?\\e$A!3\e(B ")\e$A!2\e(B")
127 (modify-syntax-entry ?\\e$A!5\e(B ")\e$A!4\e(B")
128 (modify-syntax-entry ?\\e$A!7\e(B ")\e$A!6\e(B")
129 (modify-syntax-entry ?\\e$A!9\e(B ")\e$A!8\e(B")
130 (modify-syntax-entry ?\\e$A!;\e(B ")\e$A!:\e(B")
131 (modify-syntax-entry ?\\e$A!=\e(B ")\e$A!<\e(B")
132 (modify-syntax-entry ?\\e$A!?\e(B ")\e$A!>\e(B")
133
134 (modify-category-entry (make-char 'chinese-gb2312) ?c)
135 (modify-category-entry (make-char 'chinese-gb2312) ?\|)
136 (modify-category-entry (make-char 'chinese-gb2312 35) ?A)
137 (modify-category-entry (make-char 'chinese-gb2312 36) ?H)
138 (modify-category-entry (make-char 'chinese-gb2312 37) ?K)
139 (modify-category-entry (make-char 'chinese-gb2312 38) ?G)
140 (modify-category-entry (make-char 'chinese-gb2312 39) ?Y)
141 (modify-category-entry (make-char 'chinese-gb2312 35) ?A)
142 (let ((row 48))
143 (while (< row 127)
144 (modify-category-entry (make-char 'chinese-gb2312 row) ?C)
145 (setq row (1+ row))))
146
147 ;; Chinese character set (BIG5)
148
149 (let ((generic-big5-1-char (make-char 'chinese-big5-1))
150 (generic-big5-2-char (make-char 'chinese-big5-2)))
151 (modify-syntax-entry generic-big5-1-char "w")
152 (modify-syntax-entry generic-big5-2-char "w")
153
154 (modify-category-entry generic-big5-1-char ?c)
155 (modify-category-entry generic-big5-2-char ?c)
156
157 (modify-category-entry generic-big5-1-char ?C)
158 (modify-category-entry generic-big5-2-char ?C)
159
160 (modify-category-entry generic-big5-1-char ?\|)
161 (modify-category-entry generic-big5-2-char ?\|))
162
163
164 ;; Chinese character set (CNS11643)
165
166 (let ((cns-list '(chinese-cns11643-1
167 chinese-cns11643-2
168 chinese-cns11643-3
169 chinese-cns11643-4
170 chinese-cns11643-5
171 chinese-cns11643-6
172 chinese-cns11643-7))
173 generic-char)
174 (while cns-list
175 (setq generic-char (make-char (car cns-list)))
176 (modify-syntax-entry generic-char "w")
177 (modify-category-entry generic-char ?c)
178 (modify-category-entry generic-char ?C)
179 (modify-category-entry generic-char ?|)
180 (setq cns-list (cdr cns-list))))
181
182 ;; Cyrillic character set (ISO-8859-5)
183
184 (modify-category-entry (make-char 'cyrillic-iso8859-5) ?y)
185
186 (modify-syntax-entry (make-char 'cyrillic-iso8859-5 160) " ")
187 (modify-syntax-entry ?\e,L-\e(B ".")
188 (modify-syntax-entry ?\e,Lp\e(B ".")
189 (modify-syntax-entry ?\e,L}\e(B ".")
190 (let ((tbl (standard-case-table)))
191 (set-case-syntax-pair ?\e,L!\e(B ?\e,Lq\e(B tbl)
192 (set-case-syntax-pair ?\e,L"\e(B ?\e,Lr\e(B tbl)
193 (set-case-syntax-pair ?\e,L#\e(B ?\e,Ls\e(B tbl)
194 (set-case-syntax-pair ?\e,L$\e(B ?\e,Lt\e(B tbl)
195 (set-case-syntax-pair ?\e,L%\e(B ?\e,Lu\e(B tbl)
196 (set-case-syntax-pair ?\e,L&\e(B ?\e,Lv\e(B tbl)
197 (set-case-syntax-pair ?\e,L'\e(B ?\e,Lw\e(B tbl)
198 (set-case-syntax-pair ?\e,L(\e(B ?\e,Lx\e(B tbl)
199 (set-case-syntax-pair ?\e,L)\e(B ?\e,Ly\e(B tbl)
200 (set-case-syntax-pair ?\e,L*\e(B ?\e,Lz\e(B tbl)
201 (set-case-syntax-pair ?\e,L+\e(B ?\e,L{\e(B tbl)
202 (set-case-syntax-pair ?\e,L,\e(B ?\e,L|\e(B tbl)
203 (set-case-syntax-pair ?\e,L.\e(B ?\e,L~\e(B tbl)
204 (set-case-syntax-pair ?\e,L/\e(B ?\e,L\7f\e(B tbl)
205 (set-case-syntax-pair ?\e,L0\e(B ?\e,LP\e(B tbl)
206 (set-case-syntax-pair ?\e,L1\e(B ?\e,LQ\e(B tbl)
207 (set-case-syntax-pair ?\e,L2\e(B ?\e,LR\e(B tbl)
208 (set-case-syntax-pair ?\e,L3\e(B ?\e,LS\e(B tbl)
209 (set-case-syntax-pair ?\e,L4\e(B ?\e,LT\e(B tbl)
210 (set-case-syntax-pair ?\e,L5\e(B ?\e,LU\e(B tbl)
211 (set-case-syntax-pair ?\e,L6\e(B ?\e,LV\e(B tbl)
212 (set-case-syntax-pair ?\e,L7\e(B ?\e,LW\e(B tbl)
213 (set-case-syntax-pair ?\e,L8\e(B ?\e,LX\e(B tbl)
214 (set-case-syntax-pair ?\e,L9\e(B ?\e,LY\e(B tbl)
215 (set-case-syntax-pair ?\e,L:\e(B ?\e,LZ\e(B tbl)
216 (set-case-syntax-pair ?\e,L;\e(B ?\e,L[\e(B tbl)
217 (set-case-syntax-pair ?\e,L<\e(B ?\e,L\\e(B tbl)
218 (set-case-syntax-pair ?\e,L=\e(B ?\e,L]\e(B tbl)
219 (set-case-syntax-pair ?\e,L>\e(B ?\e,L^\e(B tbl)
220 (set-case-syntax-pair ?\e,L?\e(B ?\e,L_\e(B tbl)
221 (set-case-syntax-pair ?\e,L@\e(B ?\e,L`\e(B tbl)
222 (set-case-syntax-pair ?\e,LA\e(B ?\e,La\e(B tbl)
223 (set-case-syntax-pair ?\e,LB\e(B ?\e,Lb\e(B tbl)
224 (set-case-syntax-pair ?\e,LC\e(B ?\e,Lc\e(B tbl)
225 (set-case-syntax-pair ?\e,LD\e(B ?\e,Ld\e(B tbl)
226 (set-case-syntax-pair ?\e,LE\e(B ?\e,Le\e(B tbl)
227 (set-case-syntax-pair ?\e,LF\e(B ?\e,Lf\e(B tbl)
228 (set-case-syntax-pair ?\e,LG\e(B ?\e,Lg\e(B tbl)
229 (set-case-syntax-pair ?\e,LH\e(B ?\e,Lh\e(B tbl)
230 (set-case-syntax-pair ?\e,LI\e(B ?\e,Li\e(B tbl)
231 (set-case-syntax-pair ?\e,LJ\e(B ?\e,Lj\e(B tbl)
232 (set-case-syntax-pair ?\e,LK\e(B ?\e,Lk\e(B tbl)
233 (set-case-syntax-pair ?\e,LL\e(B ?\e,Ll\e(B tbl)
234 (set-case-syntax-pair ?\e,LM\e(B ?\e,Lm\e(B tbl)
235 (set-case-syntax-pair ?\e,LN\e(B ?\e,Ln\e(B tbl)
236 (set-case-syntax-pair ?\e,LO\e(B ?\e,Lo\e(B tbl))
237
238 ;; Devanagari character set
239
240 (let ((deflist '(;; chars syntax category
241 ("\e$(5!!!"!#\e(B" "w" ?7) ; vowel-modifying diacritical mark
242 ; chandrabindu, anuswar, visarga
243 ("\e$(5!$\e(B-\e$(5!2\e(B" "w" ?1) ; independent vowel
244 ("\e$(5!3\e(B-\e$(5!X\e(B" "w" ?0) ; consonant
245 ("\e$(5!Z\e(B-\e$(5!g\e(B" "w" ?8) ; matra
246 ("\e$(5!q\e(B-\e$(5!z\e(B" "w" ?6) ; digit
247 ))
248 elm chars len syntax category to ch i)
249 (while deflist
250 (setq elm (car deflist))
251 (setq chars (car elm)
252 len (length chars)
253 syntax (nth 1 elm)
254 category (nth 2 elm)
255 i 0)
256 (while (< i len)
257 (if (= (aref chars i) ?-)
258 (setq i (1+ i)
259 to (aref chars i))
260 (setq ch (aref chars i)
261 to ch))
262 (while (<= ch to)
263 (modify-syntax-entry ch syntax)
264 (modify-category-entry ch category)
265 (setq ch (1+ ch)))
266 (setq i (1+ i)))
267 (setq deflist (cdr deflist))))
268
269 ;; Ethiopic character set
270
271 (modify-category-entry (make-char 'ethiopic) ?e)
272 (modify-syntax-entry (make-char 'ethiopic) "w")
273 (let ((chars '(?\e$(3$h\e(B ?\e$(3$i\e(B ?\e$(3$j\e(B ?\e$(3$k\e(B ?\e$(3$l\e(B ?\e$(3$m\e(B ?\e$(3$n\e(B ?\e$(3$o\e(B ?\e$(3%i\e(B ?\e$(3%t\e(B ?\e$(3%u\e(B ?\e$(3%v\e(B ?\e$(3%w\e(B ?\e$(3%x\e(B)))
274 (while chars
275 (modify-syntax-entry (car chars) ".")
276 (setq chars (cdr chars))))
277
278 ;; European character set (Latin-1,2,3,4,5)
279
280 (modify-category-entry (make-char 'latin-iso8859-1) ?l)
281 (modify-category-entry (make-char 'latin-iso8859-2) ?l)
282 (modify-category-entry (make-char 'latin-iso8859-3) ?l)
283 (modify-category-entry (make-char 'latin-iso8859-4) ?l)
284 (modify-category-entry (make-char 'latin-iso8859-9) ?l)
285
286 (modify-category-entry (make-char 'latin-iso8859-1 160) ?\ )
287 (modify-category-entry (make-char 'latin-iso8859-2 160) ?\ )
288 (modify-category-entry (make-char 'latin-iso8859-3 160) ?\ )
289 (modify-category-entry (make-char 'latin-iso8859-4 160) ?\ )
290 (modify-category-entry (make-char 'latin-iso8859-9 160) ?\ )
291
292 ;; Greek character set (ISO-8859-7)
293
294 (modify-category-entry (make-char 'greek-iso8859-7) ?g)
295
296 (let ((c 182))
297 (while (< c 255)
298 (modify-syntax-entry (make-char 'greek-iso8859-7 c) "w")
299 (setq c (1+ c))))
300 (modify-syntax-entry (make-char 'greek-iso8859-7 160) "w") ; NBSP
301 (modify-syntax-entry ?\e,F7\e(B ".")
302 (modify-syntax-entry ?\e,F;\e(B ".")
303 (modify-syntax-entry ?\e,F=\e(B ".")
304
305 ;; Hebrew character set (ISO-8859-8)
306
307 (modify-category-entry (make-char 'hebrew-iso8859-8) ?w)
308
309 (let ((c 224))
310 (while (< c 251)
311 (modify-syntax-entry (make-char 'hebrew-iso8859-8 c) "w")
312 (setq c (1+ c))))
313 (modify-syntax-entry (make-char 'hebrew-iso8859-8 160) "w") ; NBSP
314
315 ;; Indian character set (IS 13194 and other Emacs original Indian charsets)
316
317 (modify-category-entry (make-char 'indian-is13194) ?i)
318 (modify-category-entry (make-char 'indian-2-column) ?I)
319 (modify-category-entry (make-char 'indian-1-column) ?I)
320
321 (let ((deflist
322 '(;; chars syntax category
323 ("\e(5!"#\e(B" "w" ?7) ; vowel-modifying diacritical mark
324 ; chandrabindu, anuswar, visarga
325 ("\e(5$\e(B-\e(52\e(B" "w" ?1) ; base (independent) vowel
326 ("\e(53\e(B-\e(5X\e(B" "w" ?0) ; consonant
327 ("\e(5Z\e(B-\e(5g\e(B" "w" ?8) ; matra
328 ("\e(5q\e(B-\e(5z\e(B" "w" ?6) ; digit
329 ))
330 elm chars len syntax category to ch i)
331 (while deflist
332 (setq elm (car deflist))
333 (setq chars (car elm)
334 len (length chars)
335 syntax (nth 1 elm)
336 category (nth 2 elm)
337 i 0)
338 (while (< i len)
339 (if (= (aref chars i) ?-)
340 (setq i (1+ i)
341 to (aref chars i))
342 (setq ch (aref chars i)
343 to ch))
344 (while (<= ch to)
345 (modify-syntax-entry ch syntax)
346 (modify-category-entry ch category)
347 (setq ch (1+ ch)))
348 (setq i (1+ i)))
349 (setq deflist (cdr deflist))))
350
351
352 ;; Japanese character set (JISX0201-kana, JISX0201-roman, JISX0208, JISX0212)
353
354 (modify-category-entry (make-char 'katakana-jisx0201) ?k)
355 (modify-category-entry (make-char 'latin-jisx0201) ?r)
356 (modify-category-entry (make-char 'japanese-jisx0208) ?j)
357 (modify-category-entry (make-char 'japanese-jisx0212) ?j)
358 (modify-category-entry (make-char 'katakana-jisx0201) ?\|)
359 (modify-category-entry (make-char 'japanese-jisx0208) ?\|)
360 (modify-category-entry (make-char 'japanese-jisx0212) ?\|)
361
362 ;; JISX0208
363 (modify-syntax-entry (make-char 'japanese-jisx0208) "w")
364 (modify-syntax-entry (make-char 'japanese-jisx0208 33) "_")
365 (modify-syntax-entry (make-char 'japanese-jisx0208 34) "_")
366 (modify-syntax-entry (make-char 'japanese-jisx0208 40) "_")
367 (let ((chars '(?\e$B!<\e(B ?\e$B!+\e(B ?\e$B!,\e(B ?\e$B!3\e(B ?\e$B!4\e(B ?\e$B!5\e(B ?\e$B!6\e(B ?\e$B!7\e(B ?\e$B!8\e(B ?\e$B!9\e(B ?\e$B!:\e(B ?\e$B!;\e(B)))
368 (while chars
369 (modify-syntax-entry (car chars) "w")
370 (setq chars (cdr chars))))
371 (modify-syntax-entry ?\\e$B!J\e(B "(\e$B!K\e(B")
372 (modify-syntax-entry ?\\e$B!N\e(B "(\e$B!O\e(B")
373 (modify-syntax-entry ?\\e$B!P\e(B "(\e$B!Q\e(B")
374 (modify-syntax-entry ?\\e$B!V\e(B "(\e$B!W\e(B")
375 (modify-syntax-entry ?\\e$B!X\e(B "(\e$B!Y\e(B")
376 (modify-syntax-entry ?\\e$B!K\e(B ")\e$B!J\e(B")
377 (modify-syntax-entry ?\\e$B!O\e(B ")\e$B!N\e(B")
378 (modify-syntax-entry ?\\e$B!Q\e(B ")\e$B!P\e(B")
379 (modify-syntax-entry ?\\e$B!W\e(B ")\e$B!V\e(B")
380 (modify-syntax-entry ?\\e$B!Y\e(B ")\e$B!X\e(B")
381
382 (modify-category-entry (make-char 'japanese-jisx0208 35) ?A)
383 (modify-category-entry (make-char 'japanese-jisx0208 36) ?H)
384 (modify-category-entry (make-char 'japanese-jisx0208 37) ?K)
385 (modify-category-entry (make-char 'japanese-jisx0208 38) ?G)
386 (modify-category-entry (make-char 'japanese-jisx0208 39) ?Y)
387 (let ((row 48))
388 (while (< row 127)
389 (modify-category-entry (make-char 'japanese-jisx0208 row) ?C)
390 (setq row (1+ row))))
391 (modify-category-entry ?\e$B!<\e(B ?K)
392 (let ((chars '(?\e$B!+\e(B ?\e$B!,\e(B)))
393 (while chars
394 (modify-category-entry (car chars) ?K)
395 (modify-category-entry (car chars) ?H)
396 (setq chars (cdr chars))))
397 (let ((chars '(?\e$B!3\e(B ?\e$B!4\e(B ?\e$B!5\e(B ?\e$B!6\e(B ?\e$B!7\e(B ?\e$B!8\e(B ?\e$B!9\e(B ?\e$B!:\e(B ?\e$B!;\e(B)))
398 (while chars
399 (modify-category-entry (car chars) ?C)
400 (setq chars (cdr chars))))
401
402 ;; JISX0212
403 (modify-syntax-entry (make-char 'japanese-jisx0212) "w")
404 (modify-syntax-entry (make-char 'japanese-jisx0212 33) "_")
405 (modify-syntax-entry (make-char 'japanese-jisx0212 34) "_")
406 (modify-syntax-entry (make-char 'japanese-jisx0212 35) "_")
407
408 (modify-category-entry (make-char 'japanese-jisx0212 ) ?C)
409
410 ;; JISX0201-Kana
411 (modify-syntax-entry (make-char 'katakana-jisx0201) "w")
412 (let ((chars '(?\e(I!\e(B ?\e(I"\e(B ?\e(I#\e(B ?\e(I$\e(B ?\e(I%\e(B)))
413 (while chars
414 (modify-syntax-entry (car chars) ".")
415 (setq chars (cdr chars))))
416
417 ;; Korean character set (KSC5601)
418
419 (modify-syntax-entry (make-char 'korean-ksc5601) "w")
420 (modify-syntax-entry (make-char 'korean-ksc5601 33) "_")
421 (modify-syntax-entry (make-char 'korean-ksc5601 34) "_")
422 (modify-syntax-entry (make-char 'korean-ksc5601 38) "_")
423 (modify-syntax-entry (make-char 'korean-ksc5601 39) "_")
424 (modify-syntax-entry (make-char 'korean-ksc5601 40) "_")
425 (modify-syntax-entry (make-char 'korean-ksc5601 41) "_")
426
427 (modify-category-entry (make-char 'korean-ksc5601) ?h)
428 (modify-category-entry (make-char 'korean-ksc5601 35) ?A)
429 (modify-category-entry (make-char 'korean-ksc5601 37) ?G)
430 (modify-category-entry (make-char 'korean-ksc5601 42) ?H)
431 (modify-category-entry (make-char 'korean-ksc5601 43) ?K)
432 (modify-category-entry (make-char 'korean-ksc5601 44) ?Y)
433
434 ;; Lao character set
435
436 (modify-category-entry (make-char 'lao) ?o)
437
438 (let ((deflist '(;; chars syntax category
439 ("\e(1!\e(B-\e(1N\e(B" "w" ?0) ; consonant
440 ("\e(1PRS]`\e(B-\e(1d\e(B" "w" ?1) ; vowel base
441 ("\e(1QT\e(B-\e(1W[m\e(B" "w" ?2) ; vowel upper
442 ("\e(1XY\e(B" "w" ?3) ; vowel lower
443 ("\e(1h\e(B-\e(1l\e(B" "w" ?4) ; tone mark
444 ("\e(1\\e(B" "w" ?9) ; semivowel lower
445 ("\e(1p\e(B-\e(1y\e(B" "w" ?6) ; digit
446 ("\e(1Of\e(B" "_" ?0) ; symbol
447 ))
448 elm chars len syntax category to ch i)
449 (while deflist
450 (setq elm (car deflist))
451 (setq chars (car elm)
452 len (length chars)
453 syntax (nth 1 elm)
454 category (nth 2 elm)
455 i 0)
456 (while (< i len)
457 (if (= (aref chars i) ?-)
458 (setq i (1+ i)
459 to (aref chars i))
460 (setq ch (aref chars i)
461 to ch))
462 (while (<= ch to)
463 (modify-syntax-entry ch syntax)
464 (modify-category-entry ch category)
465 (setq ch (1+ ch)))
466 (setq i (1+ i)))
467 (setq deflist (cdr deflist))))
468
469 ;; Thai character set (TIS620)
470
471 (modify-category-entry (make-char 'thai-tis620) ?t)
472
473 (let ((deflist '(;; chars syntax category
474 ("\e,T!\e(B-\e,TCEG\e(B-\e,TN\e(B" "w" ?0) ; consonant
475 ("\e,TDFPRS`\e(B-\e,Te\e(B" "w" ?1) ; vowel base
476 ("\e,TQT\e(B-\e,TWgn\e(B" "w" ?2) ; vowel upper
477 ("\e,TX\e(B-\e,TZ\e(B" "w" ?3) ; vowel lower
478 ("\e,Th\e(B-\e,Tm\e(B" "w" ?4) ; tone mark
479 ("\e,Tp\e(B-\e,Ty\e(B" "w" ?6) ; digit
480 ("\e,TOf_oz{\e(B" "_" ?5) ; symbol
481 ))
482 elm chars len syntax category to ch i)
483 (while deflist
484 (setq elm (car deflist))
485 (setq chars (car elm)
486 len (length chars)
487 syntax (nth 1 elm)
488 category (nth 2 elm)
489 i 0)
490 (while (< i len)
491 (if (= (aref chars i) ?-)
492 (setq i (1+ i)
493 to (aref chars i))
494 (setq ch (aref chars i)
495 to ch))
496 (while (<= ch to)
497 (modify-syntax-entry ch syntax)
498 (modify-category-entry ch category)
499 (setq ch (1+ ch)))
500 (setq i (1+ i)))
501 (setq deflist (cdr deflist))))
502
503 ;; Tibetan character set
504
505 (modify-category-entry (make-char 'tibetan) ?q)
506 (modify-category-entry (make-char 'tibetan-1-column) ?q)
507
508 (let ((deflist '(;; chars syntax category
509 ("\e$(7"!\e(B-\e$(7"J\e(B" "w" ?0) ; consonant
510 ("\e$(7#!\e(B-\e$(7#J#P#Q\e(B" "w" ?0) ;
511 ("\e$(7$!\e(B-\e$(7$e\e(B" "w" ?0) ;
512 ("\e$(7%!\e(B-\e$(7%u\e(B" "w" ?0) ;
513 ("\e$(7"S"["\"]"^"a\e(B" "w" ?2) ; upper vowel
514 ("\e$(7"_"c"d"g"h"i"j"k"l\e(B" "w" ?2) ; upper modifier
515 ("\e$(7!I"Q"U"e!e!g\e(B" "w" ?3) ; lowel vowel/modifier
516 ("\e$(7!P\e(B-\e$(7!Y!Z\e(B-\e$(7!c\e(B" "w" ?6) ; digit
517 ("\e$(7!;!=\e(B-\e$(7!B!D"`\e(B" "." ?|) ; line-break char
518 ("\e$(8!;!=!?!@!A!D"`\e(B" "." ?|) ;
519 ("\e$(7!8!;!=\e(B-\e$(7!B!D"`!m!d\e(B" "." ?>) ; prohibition
520 ("\e$(8!;!=!?!@!A!D"`\e(B" "." ?>) ;
521 ("\e$(7!0\e(B-\e$(7!:!l#R#S"f\e(B" "." ?<) ; prohibition
522 ("\e$(7!C!E\e(B-\e$(7!H!J\e(B-\e$(7!O!f!h\e(B-\e$(7!k!n!o\e(B" "." ?q) ; others
523 ))
524 elm chars len syntax category to ch i)
525 (while deflist
526 (setq elm (car deflist))
527 (setq chars (car elm)
528 len (length chars)
529 syntax (nth 1 elm)
530 category (nth 2 elm)
531 i 0)
532 (while (< i len)
533 (if (= (aref chars i) ?-)
534 (setq i (1+ i)
535 to (aref chars i))
536 (setq ch (aref chars i)
537 to ch))
538 (while (<= ch to)
539 (modify-syntax-entry ch syntax)
540 (modify-category-entry ch category)
541 (setq ch (1+ ch)))
542 (setq i (1+ i)))
543 (setq deflist (cdr deflist))))
544
545 ;; Vietnamese character set
546
547 (let ((lower (make-char 'vietnamese-viscii-lower))
548 (upper (make-char 'vietnamese-viscii-upper)))
549 (modify-syntax-entry lower "w")
550 (modify-syntax-entry upper "w")
551 (modify-category-entry lower ?v)
552 (modify-category-entry upper ?v)
553 (modify-category-entry lower ?l) ; To make a word with
554 (modify-category-entry upper ?l) ; latin characters.
555 )
556
557 \f
558 ;;; Setting word boundary.
559
560 (setq word-combining-categories
561 '((?l . ?l)))
562
563 (setq word-separating-categories ; (2-byte character sets)
564 '((?A . ?K) ; Alpha numeric - Katakana
565 (?A . ?C) ; Alpha numeric - Chinese
566 (?H . ?A) ; Hiragana - Alpha numeric
567 (?H . ?K) ; Hiragana - Katakana
568 (?H . ?C) ; Hiragana - Chinese
569 (?K . ?A) ; Katakana - Alpha numeric
570 (?K . ?C) ; Katakana - Chinese
571 (?C . ?A) ; Chinese - Alpha numeric
572 (?C . ?K) ; Chinese - Katakana
573 ))
574
575 \f
576 ;; For each character set, put the information of the most proper
577 ;; coding system to encode it by `prefered-coding-system' property.
578
579 (let ((l '((latin-iso8859-1 . iso-latin-1)
580 (latin-iso8859-2 . iso-latin-2)
581 (latin-iso8859-3 . iso-latin-3)
582 (latin-iso8859-4 . iso-latin-4)
583 (thai-tis620 . thai-tis620)
584 (greek-iso8859-7 . greek-iso-8bit)
585 (arabic-iso8859-6 . iso-2022-7bit)
586 (hebrew-iso8859-8 . hebrew-iso-8bit)
587 (katakana-jisx0201 . japanese-shift-jis)
588 (latin-jisx0201 . japanese-shift-jis)
589 (cyrillic-iso8859-5 . cyrillic-iso-8bit)
590 (latin-iso8859-9 . iso-latin-5)
591 (japanese-jisx0208-1978 . iso-2022-jp)
592 (chinese-gb2312 . cn-gb-2312)
593 (japanese-jisx0208 . iso-2022-jp)
594 (korean-ksc5601 . iso-2022-kr)
595 (japanese-jisx0212 . iso-2022-jp)
596 (chinese-cns11643-1 . iso-2022-cn)
597 (chinese-cns11643-2 . iso-2022-cn)
598 (chinese-big5-1 . chinese-big5)
599 (chinese-big5-2 . chinese-big5)
600 (chinese-sisheng . iso-2022-7bit)
601 (ipa . iso-2022-7bit)
602 (vietnamese-viscii-lower . vietnamese-viscii)
603 (vietnamese-viscii-upper . vietnamese-viscii)
604 (arabic-digit . iso-2022-7bit)
605 (arabic-1-column . iso-2022-7bit)
606 (ascii-right-to-left . iso-2022-7bit)
607 (lao . lao)
608 (arabic-2-column . iso-2022-7bit)
609 (indian-is13194 . devanagari)
610 (indian-1-column . devanagari)
611 (tibetan-1-column . tibetan)
612 (ethiopic . iso-2022-jp)
613 (chinese-cns11643-3 . iso-2022-cn)
614 (chinese-cns11643-4 . iso-2022-cn)
615 (chinese-cns11643-5 . iso-2022-cn)
616 (chinese-cns11643-6 . iso-2022-cn)
617 (chinese-cns11643-7 . iso-2022-cn)
618 (indian-2-column . devanagari)
619 (tibetan . tibetan))))
620 (while l
621 (put-charset-property (car (car l)) 'prefered-coding-system (cdr (car l)))
622 (setq l (cdr l))))
623
624 \f
625 ;; Setup auto-fill-chars for charsets that should invoke auto-filling.
626 ;; SPACE and NEWLIE are already set. Also put `nospace-between-words'
627 ;; property to the charsets.
628 (let ((l '(katakana-jisx0201
629 japanese-jisx0208 japanese-jisx0212
630 chinese-gb2312 chinese-big5-1 chinese-big5-2)))
631 (while l
632 (aset auto-fill-chars (make-char (car l)) t)
633 (put-charset-property (car l) 'nospace-between-words t)
634 (setq l (cdr l))))
635
636 ;;; Local Variables:
637 ;;; coding: iso-2022-7bit
638 ;;; End:
639
640 ;;; end of characters.el