]> code.delx.au - gnu-emacs/blob - lisp/international/characters.el
Some fixes to follow coding conventions.
[gnu-emacs] / lisp / international / characters.el
1 ;;; characters.el --- set syntax and category for multibyte characters
2
3 ;; Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN.
4 ;; Licensed to the Free Software Foundation.
5
6 ;; Keywords: multibyte character, character set, syntax, category
7
8 ;; This file is part of GNU Emacs.
9
10 ;; GNU Emacs is free software; you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation; either version 2, or (at your option)
13 ;; any later version.
14
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
19
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs; see the file COPYING. If not, write to the
22 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
24
25 ;;; Commentary:
26
27 ;; This file contains multibyte characters. Save this file always in
28 ;; the coding system `iso-2022-7bit'.
29
30 ;; This file does not define the syntax for Latin-N character sets;
31 ;; those are defined by the files latin-N.el.
32
33 ;;; Code:
34
35 ;;; Predefined categories.
36
37 ;; For each character set.
38
39 (define-category ?a "ASCII")
40 (define-category ?l "Latin")
41 (define-category ?t "Thai")
42 (define-category ?g "Greek")
43 (define-category ?b "Arabic")
44 (define-category ?w "Hebrew")
45 (define-category ?y "Cyrillic")
46 (define-category ?k "Japanese katakana")
47 (define-category ?r "Japanese roman")
48 (define-category ?c "Chinese")
49 (define-category ?j "Japanese")
50 (define-category ?h "Korean")
51 (define-category ?e "Ethiopic (Ge'ez)")
52 (define-category ?v "Vietnamese")
53 (define-category ?i "Indian")
54 (define-category ?o "Lao")
55 (define-category ?q "Tibetan")
56
57 ;; For each group (row) of 2-byte character sets.
58
59 (define-category ?A "Alpha-numeric characters of 2-byte character sets")
60 (define-category ?C "Chinese (Han) characters of 2-byte character sets")
61 (define-category ?G "Greek characters of 2-byte character sets")
62 (define-category ?H "Japanese Hiragana characters of 2-byte character sets")
63 (define-category ?K "Japanese Katakana characters of 2-byte character sets")
64 (define-category ?N "Korean Hangul characters of 2-byte character sets")
65 (define-category ?Y "Cyrillic characters of 2-byte character sets")
66 (define-category ?I "Indian Glyphs")
67
68 ;; For phonetic classifications.
69
70 (define-category ?0 "consonant")
71 (define-category ?1 "base (independent) vowel")
72 (define-category ?2 "upper diacritical mark (including upper vowel)")
73 (define-category ?3 "lower diacritical mark (including lower vowel)")
74 (define-category ?4 "tone mark")
75 (define-category ?5 "symbol")
76 (define-category ?6 "digit")
77 (define-category ?7 "vowel-modifying diacritical mark")
78 (define-category ?8 "vowel-signs")
79 (define-category ?9 "semivowel lower")
80
81 ;; For filling.
82 (define-category ?| "While filling, we can break a line at this character.")
83
84 ;; For indentation calculation.
85 (define-category ?
86 "This character counts as a space for indentation purposes.")
87
88 ;; Keep the following for `kinsoku' processing. See comments in
89 ;; kinsoku.el.
90 (define-category ?> "A character which can't be placed at beginning of line.")
91 (define-category ?< "A character which can't be placed at end of line.")
92
93 \f
94 ;;; Setting syntax and category.
95
96 ;; ASCII
97
98 (let ((ch 32))
99 (while (< ch 127) ; All ASCII characters have
100 (modify-category-entry ch ?a) ; the category `a' (ASCII)
101 (modify-category-entry ch ?l) ; and `l' (Latin).
102 (setq ch (1+ ch))))
103
104 ;; Arabic character set
105
106 (let ((charsets '(arabic-iso8859-6
107 arabic-digit
108 arabic-1-column
109 arabic-2-column)))
110 (while charsets
111 (modify-syntax-entry (make-char (car charsets)) "w")
112 (modify-category-entry (make-char (car charsets)) ?b)
113 (setq charsets (cdr charsets))))
114
115 ;; Chinese character set (GB2312)
116
117 (modify-syntax-entry (make-char 'chinese-gb2312) "w")
118 (modify-syntax-entry (make-char 'chinese-gb2312 33) "_")
119 (modify-syntax-entry (make-char 'chinese-gb2312 34) "_")
120 (modify-syntax-entry (make-char 'chinese-gb2312 41) "_")
121 (modify-syntax-entry ?\\e$A!2\e(B "(\e$A!3\e(B")
122 (modify-syntax-entry ?\\e$A!4\e(B "(\e$A!5\e(B")
123 (modify-syntax-entry ?\\e$A!6\e(B "(\e$A!7\e(B")
124 (modify-syntax-entry ?\\e$A!8\e(B "(\e$A!9\e(B")
125 (modify-syntax-entry ?\\e$A!:\e(B "(\e$A!;\e(B")
126 (modify-syntax-entry ?\\e$A!<\e(B "(\e$A!=\e(B")
127 (modify-syntax-entry ?\\e$A!>\e(B "(\e$A!?\e(B")
128 (modify-syntax-entry ?\\e$A!3\e(B ")\e$A!2\e(B")
129 (modify-syntax-entry ?\\e$A!5\e(B ")\e$A!4\e(B")
130 (modify-syntax-entry ?\\e$A!7\e(B ")\e$A!6\e(B")
131 (modify-syntax-entry ?\\e$A!9\e(B ")\e$A!8\e(B")
132 (modify-syntax-entry ?\\e$A!;\e(B ")\e$A!:\e(B")
133 (modify-syntax-entry ?\\e$A!=\e(B ")\e$A!<\e(B")
134 (modify-syntax-entry ?\\e$A!?\e(B ")\e$A!>\e(B")
135
136 (modify-category-entry (make-char 'chinese-gb2312) ?c)
137 (modify-category-entry (make-char 'chinese-gb2312) ?\|)
138 (modify-category-entry (make-char 'chinese-gb2312 35) ?A)
139 (modify-category-entry (make-char 'chinese-gb2312 36) ?H)
140 (modify-category-entry (make-char 'chinese-gb2312 37) ?K)
141 (modify-category-entry (make-char 'chinese-gb2312 38) ?G)
142 (modify-category-entry (make-char 'chinese-gb2312 39) ?Y)
143 (let ((row 48))
144 (while (< row 127)
145 (modify-category-entry (make-char 'chinese-gb2312 row) ?C)
146 (setq row (1+ row))))
147
148 ;; Chinese character set (BIG5)
149
150 (let ((generic-big5-1-char (make-char 'chinese-big5-1))
151 (generic-big5-2-char (make-char 'chinese-big5-2)))
152 (modify-syntax-entry generic-big5-1-char "w")
153 (modify-syntax-entry generic-big5-2-char "w")
154
155 (modify-category-entry generic-big5-1-char ?c)
156 (modify-category-entry generic-big5-2-char ?c)
157
158 (modify-category-entry generic-big5-1-char ?C)
159 (modify-category-entry generic-big5-2-char ?C)
160
161 (modify-category-entry generic-big5-1-char ?\|)
162 (modify-category-entry generic-big5-2-char ?\|))
163
164
165 ;; Chinese character set (CNS11643)
166
167 (let ((cns-list '(chinese-cns11643-1
168 chinese-cns11643-2
169 chinese-cns11643-3
170 chinese-cns11643-4
171 chinese-cns11643-5
172 chinese-cns11643-6
173 chinese-cns11643-7))
174 generic-char)
175 (while cns-list
176 (setq generic-char (make-char (car cns-list)))
177 (modify-syntax-entry generic-char "w")
178 (modify-category-entry generic-char ?c)
179 (modify-category-entry generic-char ?C)
180 (modify-category-entry generic-char ?|)
181 (setq cns-list (cdr cns-list))))
182
183 ;; Cyrillic character set (ISO-8859-5)
184
185 (modify-category-entry (make-char 'cyrillic-iso8859-5) ?y)
186
187 (modify-syntax-entry (make-char 'cyrillic-iso8859-5 160) " ")
188 (modify-syntax-entry ?\e,L-\e(B ".")
189 (modify-syntax-entry ?\e,Lp\e(B ".")
190 (modify-syntax-entry ?\e,L}\e(B ".")
191 (let ((tbl (standard-case-table)))
192 (set-case-syntax-pair ?\e,L!\e(B ?\e,Lq\e(B tbl)
193 (set-case-syntax-pair ?\e,L"\e(B ?\e,Lr\e(B tbl)
194 (set-case-syntax-pair ?\e,L#\e(B ?\e,Ls\e(B tbl)
195 (set-case-syntax-pair ?\e,L$\e(B ?\e,Lt\e(B tbl)
196 (set-case-syntax-pair ?\e,L%\e(B ?\e,Lu\e(B tbl)
197 (set-case-syntax-pair ?\e,L&\e(B ?\e,Lv\e(B tbl)
198 (set-case-syntax-pair ?\e,L'\e(B ?\e,Lw\e(B tbl)
199 (set-case-syntax-pair ?\e,L(\e(B ?\e,Lx\e(B tbl)
200 (set-case-syntax-pair ?\e,L)\e(B ?\e,Ly\e(B tbl)
201 (set-case-syntax-pair ?\e,L*\e(B ?\e,Lz\e(B tbl)
202 (set-case-syntax-pair ?\e,L+\e(B ?\e,L{\e(B tbl)
203 (set-case-syntax-pair ?\e,L,\e(B ?\e,L|\e(B tbl)
204 (set-case-syntax-pair ?\e,L.\e(B ?\e,L~\e(B tbl)
205 (set-case-syntax-pair ?\e,L/\e(B ?\e,L\7f\e(B tbl)
206 (set-case-syntax-pair ?\e,L0\e(B ?\e,LP\e(B tbl)
207 (set-case-syntax-pair ?\e,L1\e(B ?\e,LQ\e(B tbl)
208 (set-case-syntax-pair ?\e,L2\e(B ?\e,LR\e(B tbl)
209 (set-case-syntax-pair ?\e,L3\e(B ?\e,LS\e(B tbl)
210 (set-case-syntax-pair ?\e,L4\e(B ?\e,LT\e(B tbl)
211 (set-case-syntax-pair ?\e,L5\e(B ?\e,LU\e(B tbl)
212 (set-case-syntax-pair ?\e,L6\e(B ?\e,LV\e(B tbl)
213 (set-case-syntax-pair ?\e,L7\e(B ?\e,LW\e(B tbl)
214 (set-case-syntax-pair ?\e,L8\e(B ?\e,LX\e(B tbl)
215 (set-case-syntax-pair ?\e,L9\e(B ?\e,LY\e(B tbl)
216 (set-case-syntax-pair ?\e,L:\e(B ?\e,LZ\e(B tbl)
217 (set-case-syntax-pair ?\e,L;\e(B ?\e,L[\e(B tbl)
218 (set-case-syntax-pair ?\e,L<\e(B ?\e,L\\e(B tbl)
219 (set-case-syntax-pair ?\e,L=\e(B ?\e,L]\e(B tbl)
220 (set-case-syntax-pair ?\e,L>\e(B ?\e,L^\e(B tbl)
221 (set-case-syntax-pair ?\e,L?\e(B ?\e,L_\e(B tbl)
222 (set-case-syntax-pair ?\e,L@\e(B ?\e,L`\e(B tbl)
223 (set-case-syntax-pair ?\e,LA\e(B ?\e,La\e(B tbl)
224 (set-case-syntax-pair ?\e,LB\e(B ?\e,Lb\e(B tbl)
225 (set-case-syntax-pair ?\e,LC\e(B ?\e,Lc\e(B tbl)
226 (set-case-syntax-pair ?\e,LD\e(B ?\e,Ld\e(B tbl)
227 (set-case-syntax-pair ?\e,LE\e(B ?\e,Le\e(B tbl)
228 (set-case-syntax-pair ?\e,LF\e(B ?\e,Lf\e(B tbl)
229 (set-case-syntax-pair ?\e,LG\e(B ?\e,Lg\e(B tbl)
230 (set-case-syntax-pair ?\e,LH\e(B ?\e,Lh\e(B tbl)
231 (set-case-syntax-pair ?\e,LI\e(B ?\e,Li\e(B tbl)
232 (set-case-syntax-pair ?\e,LJ\e(B ?\e,Lj\e(B tbl)
233 (set-case-syntax-pair ?\e,LK\e(B ?\e,Lk\e(B tbl)
234 (set-case-syntax-pair ?\e,LL\e(B ?\e,Ll\e(B tbl)
235 (set-case-syntax-pair ?\e,LM\e(B ?\e,Lm\e(B tbl)
236 (set-case-syntax-pair ?\e,LN\e(B ?\e,Ln\e(B tbl)
237 (set-case-syntax-pair ?\e,LO\e(B ?\e,Lo\e(B tbl))
238
239 ;; Devanagari character set
240
241 (let ((deflist '(;; chars syntax category
242 ("\e$(5!!!"!#\e(B" "w" ?7) ; vowel-modifying diacritical mark
243 ; chandrabindu, anuswar, visarga
244 ("\e$(5!$\e(B-\e$(5!2\e(B" "w" ?1) ; independent vowel
245 ("\e$(5!3\e(B-\e$(5!X\e(B" "w" ?0) ; consonant
246 ("\e$(5!Z\e(B-\e$(5!g\e(B" "w" ?8) ; matra
247 ("\e$(5!q\e(B-\e$(5!z\e(B" "w" ?6) ; digit
248 ))
249 elm chars len syntax category to ch i)
250 (while deflist
251 (setq elm (car deflist))
252 (setq chars (car elm)
253 len (length chars)
254 syntax (nth 1 elm)
255 category (nth 2 elm)
256 i 0)
257 (while (< i len)
258 (if (= (aref chars i) ?-)
259 (setq i (1+ i)
260 to (aref chars i))
261 (setq ch (aref chars i)
262 to ch))
263 (while (<= ch to)
264 (modify-syntax-entry ch syntax)
265 (modify-category-entry ch category)
266 (setq ch (1+ ch)))
267 (setq i (1+ i)))
268 (setq deflist (cdr deflist))))
269
270 ;; Ethiopic character set
271
272 (modify-category-entry (make-char 'ethiopic) ?e)
273 (modify-syntax-entry (make-char 'ethiopic) "w")
274 (let ((chars '(?\e$(3$h\e(B ?\e$(3$i\e(B ?\e$(3$j\e(B ?\e$(3$k\e(B ?\e$(3$l\e(B ?\e$(3$m\e(B ?\e$(3$n\e(B ?\e$(3$o\e(B ?\e$(3%i\e(B ?\e$(3%t\e(B ?\e$(3%u\e(B ?\e$(3%v\e(B ?\e$(3%w\e(B ?\e$(3%x\e(B)))
275 (while chars
276 (modify-syntax-entry (car chars) ".")
277 (setq chars (cdr chars))))
278
279 ;; Greek character set (ISO-8859-7)
280
281 (modify-category-entry (make-char 'greek-iso8859-7) ?g)
282
283 (let ((c 182))
284 (while (< c 255)
285 (modify-syntax-entry (make-char 'greek-iso8859-7 c) "w")
286 (setq c (1+ c))))
287 (modify-syntax-entry (make-char 'greek-iso8859-7 160) "w") ; NBSP
288 (modify-syntax-entry ?\e,F7\e(B ".")
289 (modify-syntax-entry ?\e,F;\e(B ".")
290 (modify-syntax-entry ?\e,F=\e(B ".")
291 (let ((tbl (standard-case-table)))
292 (set-case-syntax-pair ?\e,FA\e(B ?\e,Fa\e(B tbl)
293 (set-case-syntax-pair ?\e,FB\e(B ?\e,Fb\e(B tbl)
294 (set-case-syntax-pair ?\e,FC\e(B ?\e,Fc\e(B tbl)
295 (set-case-syntax-pair ?\e,FD\e(B ?\e,Fd\e(B tbl)
296 (set-case-syntax-pair ?\e,FE\e(B ?\e,Fe\e(B tbl)
297 (set-case-syntax-pair ?\e,FF\e(B ?\e,Ff\e(B tbl)
298 (set-case-syntax-pair ?\e,FG\e(B ?\e,Fg\e(B tbl)
299 (set-case-syntax-pair ?\e,FH\e(B ?\e,Fh\e(B tbl)
300 (set-case-syntax-pair ?\e,FI\e(B ?\e,Fi\e(B tbl)
301 (set-case-syntax-pair ?\e,FJ\e(B ?\e,Fj\e(B tbl)
302 (set-case-syntax-pair ?\e,FK\e(B ?\e,Fk\e(B tbl)
303 (set-case-syntax-pair ?\e,FL\e(B ?\e,Fl\e(B tbl)
304 (set-case-syntax-pair ?\e,FM\e(B ?\e,Fm\e(B tbl)
305 (set-case-syntax-pair ?\e,FN\e(B ?\e,Fn\e(B tbl)
306 (set-case-syntax-pair ?\e,FO\e(B ?\e,Fo\e(B tbl)
307 (set-case-syntax-pair ?\e,FP\e(B ?\e,Fp\e(B tbl)
308 (set-case-syntax-pair ?\e,FQ\e(B ?\e,Fq\e(B tbl)
309 (set-case-syntax-pair ?\e,FS\e(B ?\e,Fs\e(B tbl)
310 (set-case-syntax-pair ?\e,FT\e(B ?\e,Ft\e(B tbl)
311 (set-case-syntax-pair ?\e,FU\e(B ?\e,Fu\e(B tbl)
312 (set-case-syntax-pair ?\e,FV\e(B ?\e,Fv\e(B tbl)
313 (set-case-syntax-pair ?\e,FW\e(B ?\e,Fw\e(B tbl)
314 (set-case-syntax-pair ?\e,FX\e(B ?\e,Fx\e(B tbl)
315 (set-case-syntax-pair ?\e,FY\e(B ?\e,Fy\e(B tbl)
316 (set-case-syntax-pair ?\e,FZ\e(B ?\e,Fz\e(B tbl)
317 (set-case-syntax-pair ?\e,F[\e(B ?\e,F{\e(B tbl)
318 (set-case-syntax-pair ?\e,F?\e(B ?\e,F~\e(B tbl)
319 (set-case-syntax-pair ?\e,F>\e(B ?\e,F}\e(B tbl)
320 (set-case-syntax-pair ?\e,F<\e(B ?\e,F|\e(B tbl)
321 (set-case-syntax-pair ?\e,F6\e(B ?\e,F\\e(B tbl)
322 (set-case-syntax-pair ?\e,F8\e(B ?\e,F]\e(B tbl)
323 (set-case-syntax-pair ?\e,F9\e(B ?\e,F^\e(B tbl)
324 (set-case-syntax-pair ?\e,F:\e(B ?\e,F_\e(B tbl))
325
326 ;; Hebrew character set (ISO-8859-8)
327
328 (modify-category-entry (make-char 'hebrew-iso8859-8) ?w)
329
330 (let ((c 224))
331 (while (< c 251)
332 (modify-syntax-entry (make-char 'hebrew-iso8859-8 c) "w")
333 (setq c (1+ c))))
334 (modify-syntax-entry (make-char 'hebrew-iso8859-8 160) "w") ; NBSP
335
336 ;; Indian character set (IS 13194 and other Emacs original Indian charsets)
337
338 (modify-category-entry (make-char 'indian-is13194) ?i)
339 (modify-category-entry (make-char 'indian-2-column) ?I)
340 (modify-category-entry (make-char 'indian-1-column) ?I)
341
342 (let ((deflist
343 '(;; chars syntax category
344 ("\e(5!"#\e(B" "w" ?7) ; vowel-modifying diacritical mark
345 ; chandrabindu, anuswar, visarga
346 ("\e(5$\e(B-\e(52\e(B" "w" ?1) ; base (independent) vowel
347 ("\e(53\e(B-\e(5X\e(B" "w" ?0) ; consonant
348 ("\e(5Z\e(B-\e(5g\e(B" "w" ?8) ; matra
349 ("\e(5q\e(B-\e(5z\e(B" "w" ?6) ; digit
350 ))
351 elm chars len syntax category to ch i)
352 (while deflist
353 (setq elm (car deflist))
354 (setq chars (car elm)
355 len (length chars)
356 syntax (nth 1 elm)
357 category (nth 2 elm)
358 i 0)
359 (while (< i len)
360 (if (= (aref chars i) ?-)
361 (setq i (1+ i)
362 to (aref chars i))
363 (setq ch (aref chars i)
364 to ch))
365 (while (<= ch to)
366 (modify-syntax-entry ch syntax)
367 (modify-category-entry ch category)
368 (setq ch (1+ ch)))
369 (setq i (1+ i)))
370 (setq deflist (cdr deflist))))
371
372
373 ;; Japanese character set (JISX0201-kana, JISX0201-roman, JISX0208, JISX0212)
374
375 (modify-category-entry (make-char 'katakana-jisx0201) ?k)
376 (modify-category-entry (make-char 'katakana-jisx0201) ?j)
377 (modify-category-entry (make-char 'latin-jisx0201) ?r)
378 (modify-category-entry (make-char 'japanese-jisx0208) ?j)
379 (modify-category-entry (make-char 'japanese-jisx0212) ?j)
380 (modify-category-entry (make-char 'katakana-jisx0201) ?\|)
381 (modify-category-entry (make-char 'japanese-jisx0208) ?\|)
382 (modify-category-entry (make-char 'japanese-jisx0212) ?\|)
383
384 ;; JISX0208
385 (modify-syntax-entry (make-char 'japanese-jisx0208) "w")
386 (modify-syntax-entry (make-char 'japanese-jisx0208 33) "_")
387 (modify-syntax-entry (make-char 'japanese-jisx0208 34) "_")
388 (modify-syntax-entry (make-char 'japanese-jisx0208 40) "_")
389 (let ((chars '(?\e$B!<\e(B ?\e$B!+\e(B ?\e$B!,\e(B ?\e$B!3\e(B ?\e$B!4\e(B ?\e$B!5\e(B ?\e$B!6\e(B ?\e$B!7\e(B ?\e$B!8\e(B ?\e$B!9\e(B ?\e$B!:\e(B ?\e$B!;\e(B)))
390 (while chars
391 (modify-syntax-entry (car chars) "w")
392 (setq chars (cdr chars))))
393 (modify-syntax-entry ?\\e$B!J\e(B "(\e$B!K\e(B")
394 (modify-syntax-entry ?\\e$B!N\e(B "(\e$B!O\e(B")
395 (modify-syntax-entry ?\\e$B!P\e(B "(\e$B!Q\e(B")
396 (modify-syntax-entry ?\\e$B!V\e(B "(\e$B!W\e(B")
397 (modify-syntax-entry ?\\e$B!X\e(B "(\e$B!Y\e(B")
398 (modify-syntax-entry ?\\e$B!K\e(B ")\e$B!J\e(B")
399 (modify-syntax-entry ?\\e$B!O\e(B ")\e$B!N\e(B")
400 (modify-syntax-entry ?\\e$B!Q\e(B ")\e$B!P\e(B")
401 (modify-syntax-entry ?\\e$B!W\e(B ")\e$B!V\e(B")
402 (modify-syntax-entry ?\\e$B!Y\e(B ")\e$B!X\e(B")
403
404 (modify-category-entry (make-char 'japanese-jisx0208 35) ?A)
405 (modify-category-entry (make-char 'japanese-jisx0208 36) ?H)
406 (modify-category-entry (make-char 'japanese-jisx0208 37) ?K)
407 (modify-category-entry (make-char 'japanese-jisx0208 38) ?G)
408 (modify-category-entry (make-char 'japanese-jisx0208 39) ?Y)
409 (let ((row 48))
410 (while (< row 127)
411 (modify-category-entry (make-char 'japanese-jisx0208 row) ?C)
412 (setq row (1+ row))))
413 (modify-category-entry ?\e$B!<\e(B ?K)
414 (let ((chars '(?\e$B!+\e(B ?\e$B!,\e(B)))
415 (while chars
416 (modify-category-entry (car chars) ?K)
417 (modify-category-entry (car chars) ?H)
418 (setq chars (cdr chars))))
419 (let ((chars '(?\e$B!3\e(B ?\e$B!4\e(B ?\e$B!5\e(B ?\e$B!6\e(B ?\e$B!7\e(B ?\e$B!8\e(B ?\e$B!9\e(B ?\e$B!:\e(B ?\e$B!;\e(B)))
420 (while chars
421 (modify-category-entry (car chars) ?C)
422 (setq chars (cdr chars))))
423
424 ;; JISX0212
425 (modify-syntax-entry (make-char 'japanese-jisx0212) "w")
426 (modify-syntax-entry (make-char 'japanese-jisx0212 33) "_")
427 (modify-syntax-entry (make-char 'japanese-jisx0212 34) "_")
428 (modify-syntax-entry (make-char 'japanese-jisx0212 35) "_")
429
430 (modify-category-entry (make-char 'japanese-jisx0212 ) ?C)
431
432 ;; JISX0201-Kana
433 (modify-syntax-entry (make-char 'katakana-jisx0201) "w")
434 (let ((chars '(?\e(I!\e(B ?\e(I$\e(B ?\e(I%\e(B)))
435 (while chars
436 (modify-syntax-entry (car chars) ".")
437 (setq chars (cdr chars))))
438
439 (modify-syntax-entry ?\\e(I"\e(B "(\e(I#\e(B")
440 (modify-syntax-entry ?\\e(I#\e(B "(\e(I"\e(B")
441
442 ;; Korean character set (KSC5601)
443
444 (modify-syntax-entry (make-char 'korean-ksc5601) "w")
445 (modify-syntax-entry (make-char 'korean-ksc5601 33) "_")
446 (modify-syntax-entry (make-char 'korean-ksc5601 34) "_")
447 (modify-syntax-entry (make-char 'korean-ksc5601 38) "_")
448 (modify-syntax-entry (make-char 'korean-ksc5601 39) "_")
449 (modify-syntax-entry (make-char 'korean-ksc5601 40) "_")
450 (modify-syntax-entry (make-char 'korean-ksc5601 41) "_")
451
452 (modify-category-entry (make-char 'korean-ksc5601) ?h)
453 (modify-category-entry (make-char 'korean-ksc5601 35) ?A)
454 (modify-category-entry (make-char 'korean-ksc5601 37) ?G)
455 (modify-category-entry (make-char 'korean-ksc5601 42) ?H)
456 (modify-category-entry (make-char 'korean-ksc5601 43) ?K)
457 (modify-category-entry (make-char 'korean-ksc5601 44) ?Y)
458
459 ;; Latin character set (latin-1,2,3,4,5,8,9)
460
461 (modify-category-entry (make-char 'latin-iso8859-1) ?l)
462 (modify-category-entry (make-char 'latin-iso8859-2) ?l)
463 (modify-category-entry (make-char 'latin-iso8859-3) ?l)
464 (modify-category-entry (make-char 'latin-iso8859-4) ?l)
465 (modify-category-entry (make-char 'latin-iso8859-9) ?l)
466 (modify-category-entry (make-char 'latin-iso8859-14) ?l)
467 (modify-category-entry (make-char 'latin-iso8859-15) ?l)
468
469 (modify-category-entry (make-char 'latin-iso8859-1 160) ?\ )
470 (modify-category-entry (make-char 'latin-iso8859-2 160) ?\ )
471 (modify-category-entry (make-char 'latin-iso8859-3 160) ?\ )
472 (modify-category-entry (make-char 'latin-iso8859-4 160) ?\ )
473 (modify-category-entry (make-char 'latin-iso8859-9 160) ?\ )
474 (modify-category-entry (make-char 'latin-iso8859-14 160) ?\ )
475 (modify-category-entry (make-char 'latin-iso8859-15 160) ?\ )
476
477 ;; Lao character set
478
479 (modify-category-entry (make-char 'lao) ?o)
480
481 (let ((deflist '(;; chars syntax category
482 ("\e(1!\e(B-\e(1N\e(B" "w" ?0) ; consonant
483 ("\e(1PRS]`\e(B-\e(1d\e(B" "w" ?1) ; vowel base
484 ("\e(1QT\e(B-\e(1W[m\e(B" "w" ?2) ; vowel upper
485 ("\e(1XY\e(B" "w" ?3) ; vowel lower
486 ("\e(1h\e(B-\e(1l\e(B" "w" ?4) ; tone mark
487 ("\e(1\\e(B" "w" ?9) ; semivowel lower
488 ("\e(1p\e(B-\e(1y\e(B" "w" ?6) ; digit
489 ("\e(1Of\e(B" "_" ?5) ; symbol
490 ))
491 elm chars len syntax category to ch i)
492 (while deflist
493 (setq elm (car deflist))
494 (setq chars (car elm)
495 len (length chars)
496 syntax (nth 1 elm)
497 category (nth 2 elm)
498 i 0)
499 (while (< i len)
500 (if (= (aref chars i) ?-)
501 (setq i (1+ i)
502 to (aref chars i))
503 (setq ch (aref chars i)
504 to ch))
505 (while (<= ch to)
506 (modify-syntax-entry ch syntax)
507 (modify-category-entry ch category)
508 (setq ch (1+ ch)))
509 (setq i (1+ i)))
510 (setq deflist (cdr deflist))))
511
512 ;; Thai character set (TIS620)
513
514 (modify-category-entry (make-char 'thai-tis620) ?t)
515
516 (let ((deflist '(;; chars syntax category
517 ("\e,T!\e(B-\e,TCEG\e(B-\e,TN\e(B" "w" ?0) ; consonant
518 ("\e,TDFPRS`\e(B-\e,Te\e(B" "w" ?1) ; vowel base
519 ("\e,TQT\e(B-\e,TWgn\e(B" "w" ?2) ; vowel upper
520 ("\e,TX\e(B-\e,TZ\e(B" "w" ?3) ; vowel lower
521 ("\e,Th\e(B-\e,Tm\e(B" "w" ?4) ; tone mark
522 ("\e,Tp\e(B-\e,Ty\e(B" "w" ?6) ; digit
523 ("\e,TOf_oz{\e(B" "_" ?5) ; symbol
524 ))
525 elm chars len syntax category to ch i)
526 (while deflist
527 (setq elm (car deflist))
528 (setq chars (car elm)
529 len (length chars)
530 syntax (nth 1 elm)
531 category (nth 2 elm)
532 i 0)
533 (while (< i len)
534 (if (= (aref chars i) ?-)
535 (setq i (1+ i)
536 to (aref chars i))
537 (setq ch (aref chars i)
538 to ch))
539 (while (<= ch to)
540 (modify-syntax-entry ch syntax)
541 (modify-category-entry ch category)
542 (setq ch (1+ ch)))
543 (setq i (1+ i)))
544 (setq deflist (cdr deflist))))
545
546 ;; Tibetan character set
547
548 (modify-category-entry (make-char 'tibetan) ?q)
549 (modify-category-entry (make-char 'tibetan-1-column) ?q)
550
551 (let ((deflist '(;; chars syntax category
552 ("\e$(7"!\e(B-\e$(7"J"K\e(B" "w" ?0) ; consonant
553 ("\e$(7#!\e(B-\e$(7#J#K#L#M!"!#\e(B" "w" ?0) ;
554 ("\e$(7$!\e(B-\e$(7$e\e(B" "w" ?0) ;
555 ("\e$(7%!\e(B-\e$(7%u\e(B" "w" ?0) ;
556 ("\e$(7"S"["\"]"^"a\e(B" "w" ?2) ; upper vowel
557 ("\e$(7"_"c"d"g"h"i"j"k"l\e(B" "w" ?2) ; upper modifier
558 ("\e$(7!I"Q"R"U"e!e!g\e(B" "w" ?3) ; lowel vowel/modifier
559 ("\e$(7!P\e(B-\e$(7!Y!Z\e(B-\e$(7!c\e(B" "w" ?6) ; digit
560 ("\e$(7!;!=\e(B-\e$(7!B!D"`\e(B" "." ?|) ; line-break char
561 ("\e$(8!;!=!?!@!A!D"`\e(B" "." ?|) ;
562 ("\e$(7!8!;!=\e(B-\e$(7!B!D"`!m!d\e(B" "." ?>) ; prohibition
563 ("\e$(8!;!=!?!@!A!D"`\e(B" "." ?>) ;
564 ("\e$(7!0\e(B-\e$(7!:!l#R#S"f\e(B" "." ?<) ; prohibition
565 ("\e$(7!C!E\e(B-\e$(7!H!J\e(B-\e$(7!O!f!h\e(B-\e$(7!k!n!o#O#P\e(B-\e$(7#`\e(B" "." ?q) ; others
566 ))
567 elm chars len syntax category to ch i)
568 (while deflist
569 (setq elm (car deflist))
570 (setq chars (car elm)
571 len (length chars)
572 syntax (nth 1 elm)
573 category (nth 2 elm)
574 i 0)
575 (while (< i len)
576 (if (= (aref chars i) ?-)
577 (setq i (1+ i)
578 to (aref chars i))
579 (setq ch (aref chars i)
580 to ch))
581 (while (<= ch to)
582 (modify-syntax-entry ch syntax)
583 (modify-category-entry ch category)
584 (setq ch (1+ ch)))
585 (setq i (1+ i)))
586 (setq deflist (cdr deflist))))
587
588 ;; Vietnamese character set
589
590 (let ((lower (make-char 'vietnamese-viscii-lower))
591 (upper (make-char 'vietnamese-viscii-upper)))
592 (modify-syntax-entry lower "w")
593 (modify-syntax-entry upper "w")
594 (modify-category-entry lower ?v)
595 (modify-category-entry upper ?v)
596 (modify-category-entry lower ?l) ; To make a word with
597 (modify-category-entry upper ?l) ; latin characters.
598 )
599
600 (let ((tbl (standard-case-table))
601 (i 32))
602 (while (< i 128)
603 (set-case-syntax-pair (make-char 'vietnamese-viscii-upper i)
604 (make-char 'vietnamese-viscii-lower i)
605 tbl)
606 (setq i (1+ i))))
607
608 ;; Unicode (mule-unicode-0100-24ff)
609
610 (let ((tbl (standard-case-table)) c)
611
612 ;; In some languages, U+0049 LATIN CAPITAL LETTER I and U+0131 LATIN
613 ;; SMALL LETTER DOTLESS I make a case pair, and so do U+0130 LATIN
614 ;; CAPITAL LETTER I WITH DOT ABOVE and U+0069 LATIN SMALL LETTER I.
615 ;; Thus we have to check language-environment to handle casing
616 ;; correctly. Currently only I<->i is available.
617
618 ;; case-syntax-pair's are not yet given for Latin Extendet-B
619
620 ;; Latin Extended-A, Latin Extended-B
621 (setq c #x0100)
622 (while (<= c #x0233)
623 (modify-category-entry (decode-char 'ucs c) ?l)
624 (and (or (<= c #x012e)
625 (and (>= c #x014a) (<= c #x0177)))
626 (zerop (% c 2))
627 (set-case-syntax-pair
628 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
629 (and (>= c #x013a)
630 (<= c #x0148)
631 (zerop (% c 2))
632 (set-case-syntax-pair
633 (decode-char 'ucs (1- c)) (decode-char 'ucs c) tbl))
634 (setq c (1+ c)))
635 (set-case-syntax-pair ?\e$,1 R\e(B ?\e$,1 S\e(B tbl)
636 (set-case-syntax-pair ?\e$,1 T\e(B ?\e$,1 U\e(B tbl)
637 (set-case-syntax-pair ?\e$,1 V\e(B ?\e$,1 W\e(B tbl)
638 ; (set-case-syntax-pair ?\e$,1!8\e(B ?\e,A\7f\e(B tbl) ; these two have different length!
639 (set-case-syntax-pair ?\e$,1!9\e(B ?\e$,1!:\e(B tbl)
640 (set-case-syntax-pair ?\e$,1!;\e(B ?\e$,1!<\e(B tbl)
641 (set-case-syntax-pair ?\e$,1!=\e(B ?\e$,1!>\e(B tbl)
642
643 ;; Latin Extended Additional
644 (setq c #x1e00)
645 (while (<= c #x1ef9)
646 (modify-category-entry (decode-char 'ucs c) ?l)
647 (and (zerop (% c 2))
648 (or (<= c #x1e94) (>= c #x1ea0))
649 (set-case-syntax-pair
650 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
651 (setq c (1+ c)))
652
653 ;; Greek
654 (setq c #x0370)
655 (while (<= c #x03ff)
656 (modify-category-entry (decode-char 'ucs c) ?g)
657 (if (or (and (>= c #x0391) (<= c #x03a1))
658 (and (>= c #x03a3) (<= c #x03ab)))
659 (set-case-syntax-pair
660 (decode-char 'ucs c) (decode-char 'ucs (+ c 32)) tbl))
661 (and (>= c #x03da)
662 (<= c #x03ee)
663 (zerop (% c 2))
664 (set-case-syntax-pair
665 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
666 (setq c (1+ c)))
667 (set-case-syntax-pair ?\e$,1&f\e(B ?\e$,1',\e(B tbl)
668 (set-case-syntax-pair ?\e$,1&h\e(B ?\e$,1'-\e(B tbl)
669 (set-case-syntax-pair ?\e$,1&i\e(B ?\e$,1'.\e(B tbl)
670 (set-case-syntax-pair ?\e$,1&j\e(B ?\e$,1'/\e(B tbl)
671 (set-case-syntax-pair ?\e$,1&l\e(B ?\e$,1'L\e(B tbl)
672 (set-case-syntax-pair ?\e$,1&n\e(B ?\e$,1'M\e(B tbl)
673 (set-case-syntax-pair ?\e$,1&o\e(B ?\e$,1'N\e(B tbl)
674
675 ;; Greek Extended
676 (setq c #x1f00)
677 (while (<= c #x1fff)
678 (modify-category-entry (decode-char 'ucs c) ?g)
679 (and (<= (logand c #x000f) 7)
680 (<= c #x1fa7)
681 (not (memq c '(#x1f50 #x1f52 #x1f54 #x1f56)))
682 (/= (logand c #x00f0) 7)
683 (set-case-syntax-pair
684 (decode-char 'ucs (+ c 8)) (decode-char 'ucs c) tbl))
685 (setq c (1+ c)))
686 (set-case-syntax-pair ?\e$,1qx\e(B ?\e$,1qp\e(B tbl)
687 (set-case-syntax-pair ?\e$,1qy\e(B ?\e$,1qq\e(B tbl)
688 (set-case-syntax-pair ?\e$,1qz\e(B ?\e$,1q0\e(B tbl)
689 (set-case-syntax-pair ?\e$,1q{\e(B ?\e$,1q1\e(B tbl)
690 (set-case-syntax-pair ?\e$,1q|\e(B ?\e$,1qs\e(B tbl)
691 (set-case-syntax-pair ?\e$,1r(\e(B ?\e$,1q2\e(B tbl)
692 (set-case-syntax-pair ?\e$,1r)\e(B ?\e$,1q3\e(B tbl)
693 (set-case-syntax-pair ?\e$,1r*\e(B ?\e$,1q4\e(B tbl)
694 (set-case-syntax-pair ?\e$,1r+\e(B ?\e$,1q5\e(B tbl)
695 (set-case-syntax-pair ?\e$,1r,\e(B ?\e$,1r#\e(B tbl)
696 (set-case-syntax-pair ?\e$,1r8\e(B ?\e$,1r0\e(B tbl)
697 (set-case-syntax-pair ?\e$,1r9\e(B ?\e$,1r1\e(B tbl)
698 (set-case-syntax-pair ?\e$,1r:\e(B ?\e$,1q6\e(B tbl)
699 (set-case-syntax-pair ?\e$,1r;\e(B ?\e$,1q7\e(B tbl)
700 (set-case-syntax-pair ?\e$,1rH\e(B ?\e$,1r@\e(B tbl)
701 (set-case-syntax-pair ?\e$,1rI\e(B ?\e$,1rA\e(B tbl)
702 (set-case-syntax-pair ?\e$,1rJ\e(B ?\e$,1q:\e(B tbl)
703 (set-case-syntax-pair ?\e$,1rK\e(B ?\e$,1q;\e(B tbl)
704 (set-case-syntax-pair ?\e$,1rL\e(B ?\e$,1rE\e(B tbl)
705 (set-case-syntax-pair ?\e$,1rX\e(B ?\e$,1q8\e(B tbl)
706 (set-case-syntax-pair ?\e$,1rY\e(B ?\e$,1q9\e(B tbl)
707 (set-case-syntax-pair ?\e$,1rZ\e(B ?\e$,1q<\e(B tbl)
708 (set-case-syntax-pair ?\e$,1r[\e(B ?\e$,1q=\e(B tbl)
709 (set-case-syntax-pair ?\e$,1r\\e(B ?\e$,1rS\e(B tbl)
710
711 ;; cyrillic
712 (setq c #x0400)
713 (while (<= c #x04ff)
714 (modify-category-entry (decode-char 'ucs c) ?y)
715 (and (>= c #x0400)
716 (<= c #x040f)
717 (set-case-syntax-pair
718 (decode-char 'ucs c) (decode-char 'ucs (+ c 80)) tbl))
719 (and (>= c #x0410)
720 (<= c #x042f)
721 (set-case-syntax-pair
722 (decode-char 'ucs c) (decode-char 'ucs (+ c 32)) tbl))
723 (and (zerop (% c 2))
724 (or (and (>= c #x0460) (<= c #x0480))
725 (and (>= c #x048c) (<= c #x04be))
726 (and (>= c #x04d0) (<= c #x04f4)))
727 (set-case-syntax-pair
728 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
729 (setq c (1+ c)))
730 (set-case-syntax-pair ?\e$,1*!\e(B ?\e$,1*"\e(B tbl)
731 (set-case-syntax-pair ?\e$,1*#\e(B ?\e$,1*$\e(B tbl)
732 (set-case-syntax-pair ?\e$,1*'\e(B ?\e$,1*(\e(B tbl)
733 (set-case-syntax-pair ?\e$,1*+\e(B ?\e$,1*,\e(B tbl)
734 (set-case-syntax-pair ?\e$,1*X\e(B ?\e$,1*Y\e(B tbl)
735
736 ;; general punctuation
737 (setq c #x2000)
738 (while (<= c #x200b)
739 (set-case-syntax c " " tbl)
740 (setq c (1+ c)))
741 (setq c #x2010)
742 (while (<= c #x2027)
743 (set-case-syntax c "_" tbl)
744 (setq c (1+ c)))
745 )
746
747 \f
748 ;;; Setting word boundary.
749
750 (setq word-combining-categories
751 '((?l . ?l)))
752
753 (setq word-separating-categories ; (2-byte character sets)
754 '((?A . ?K) ; Alpha numeric - Katakana
755 (?A . ?C) ; Alpha numeric - Chinese
756 (?H . ?A) ; Hiragana - Alpha numeric
757 (?H . ?K) ; Hiragana - Katakana
758 (?H . ?C) ; Hiragana - Chinese
759 (?K . ?A) ; Katakana - Alpha numeric
760 (?K . ?C) ; Katakana - Chinese
761 (?C . ?A) ; Chinese - Alpha numeric
762 (?C . ?K) ; Chinese - Katakana
763 ))
764
765 \f
766 ;; For each character set, put the information of the most proper
767 ;; coding system to encode it by `preferred-coding-system' property.
768
769 (let ((l '((latin-iso8859-1 . iso-latin-1)
770 (latin-iso8859-2 . iso-latin-2)
771 (latin-iso8859-3 . iso-latin-3)
772 (latin-iso8859-4 . iso-latin-4)
773 (thai-tis620 . thai-tis620)
774 (greek-iso8859-7 . greek-iso-8bit)
775 (arabic-iso8859-6 . iso-2022-7bit)
776 (hebrew-iso8859-8 . hebrew-iso-8bit)
777 (katakana-jisx0201 . japanese-shift-jis)
778 (latin-jisx0201 . japanese-shift-jis)
779 (cyrillic-iso8859-5 . cyrillic-iso-8bit)
780 (latin-iso8859-9 . iso-latin-5)
781 (japanese-jisx0208-1978 . iso-2022-jp)
782 (chinese-gb2312 . cn-gb-2312)
783 (japanese-jisx0208 . iso-2022-jp)
784 (korean-ksc5601 . iso-2022-kr)
785 (japanese-jisx0212 . iso-2022-jp)
786 (chinese-cns11643-1 . iso-2022-cn)
787 (chinese-cns11643-2 . iso-2022-cn)
788 (chinese-big5-1 . chinese-big5)
789 (chinese-big5-2 . chinese-big5)
790 (chinese-sisheng . iso-2022-7bit)
791 (ipa . iso-2022-7bit)
792 (vietnamese-viscii-lower . vietnamese-viscii)
793 (vietnamese-viscii-upper . vietnamese-viscii)
794 (arabic-digit . iso-2022-7bit)
795 (arabic-1-column . iso-2022-7bit)
796 (ascii-right-to-left . iso-2022-7bit)
797 (lao . lao)
798 (arabic-2-column . iso-2022-7bit)
799 (indian-is13194 . devanagari)
800 (indian-1-column . devanagari)
801 (tibetan-1-column . tibetan)
802 (ethiopic . iso-2022-7bit)
803 (chinese-cns11643-3 . iso-2022-cn)
804 (chinese-cns11643-4 . iso-2022-cn)
805 (chinese-cns11643-5 . iso-2022-cn)
806 (chinese-cns11643-6 . iso-2022-cn)
807 (chinese-cns11643-7 . iso-2022-cn)
808 (indian-2-column . devanagari)
809 (tibetan . tibetan)
810 (latin-iso8859-14 . iso-latin-8)
811 (latin-iso8859-15 . iso-latin-9))))
812 (while l
813 (put-charset-property (car (car l)) 'preferred-coding-system (cdr (car l)))
814 (setq l (cdr l))))
815
816 \f
817 ;; Setup auto-fill-chars for charsets that should invoke auto-filling.
818 ;; SPACE and NEWLIE are already set. Also put `nospace-between-words'
819 ;; property to the charsets.
820 (let ((l '(katakana-jisx0201
821 japanese-jisx0208 japanese-jisx0212
822 chinese-gb2312 chinese-big5-1 chinese-big5-2)))
823 (while l
824 (aset auto-fill-chars (make-char (car l)) t)
825 (put-charset-property (car l) 'nospace-between-words t)
826 (setq l (cdr l))))
827
828 ;;; Local Variables:
829 ;;; coding: iso-2022-7bit
830 ;;; End:
831
832 ;;; characters.el ends here