]> code.delx.au - gnu-emacs/blob - lisp/international/characters.el
0b7c223c258e1011c7f7dd5cb368fa3a9b51fbef
[gnu-emacs] / lisp / international / characters.el
1 ;;; characters.el --- set syntax and category for multibyte characters
2
3 ;; Copyright (C) 1997, 2000, 2001, 2002, 2003, 2004
4 ;; Free Software Foundation, Inc.
5 ;; Copyright (C) 1995, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005
6 ;; National Institute of Advanced Industrial Science and Technology (AIST)
7 ;; Registration Number H14PRO021
8
9 ;; Keywords: multibyte character, character set, syntax, category
10
11 ;; This file is part of GNU Emacs.
12
13 ;; GNU Emacs is free software; you can redistribute it and/or modify
14 ;; it under the terms of the GNU General Public License as published by
15 ;; the Free Software Foundation; either version 2, or (at your option)
16 ;; any later version.
17
18 ;; GNU Emacs is distributed in the hope that it will be useful,
19 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
20 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 ;; GNU General Public License for more details.
22
23 ;; You should have received a copy of the GNU General Public License
24 ;; along with GNU Emacs; see the file COPYING. If not, write to the
25 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
26 ;; Boston, MA 02110-1301, USA.
27
28 ;;; Commentary:
29
30 ;; This file contains multibyte characters. Save this file always in
31 ;; the coding system `iso-2022-7bit'.
32
33 ;; This file does not define the syntax for Latin-N character sets;
34 ;; those are defined by the files latin-N.el.
35
36 ;;; Code:
37
38 ;; We must set utf-translate-cjk-mode to nil while loading this file
39 ;; to avoid translating CJK characters in decode-char.
40 (defvar saved-utf-translate-cjk-mode utf-translate-cjk-mode)
41 (setq utf-translate-cjk-mode nil)
42
43 ;;; Predefined categories.
44
45 ;; For each character set.
46
47 (define-category ?a "ASCII")
48 (define-category ?l "Latin")
49 (define-category ?t "Thai")
50 (define-category ?g "Greek")
51 (define-category ?b "Arabic")
52 (define-category ?w "Hebrew")
53 (define-category ?y "Cyrillic")
54 (define-category ?k "Japanese katakana")
55 (define-category ?r "Japanese roman")
56 (define-category ?c "Chinese")
57 (define-category ?j "Japanese")
58 (define-category ?h "Korean")
59 (define-category ?e "Ethiopic (Ge'ez)")
60 (define-category ?v "Vietnamese")
61 (define-category ?i "Indian")
62 (define-category ?o "Lao")
63 (define-category ?q "Tibetan")
64
65 ;; For each group (row) of 2-byte character sets.
66
67 (define-category ?A "Alpha-numeric characters of 2-byte character sets")
68 (define-category ?C "Chinese (Han) characters of 2-byte character sets")
69 (define-category ?G "Greek characters of 2-byte character sets")
70 (define-category ?H "Japanese Hiragana characters of 2-byte character sets")
71 (define-category ?K "Japanese Katakana characters of 2-byte character sets")
72 (define-category ?N "Korean Hangul characters of 2-byte character sets")
73 (define-category ?Y "Cyrillic characters of 2-byte character sets")
74 (define-category ?I "Indian Glyphs")
75
76 ;; For phonetic classifications.
77
78 (define-category ?0 "consonant")
79 (define-category ?1 "base (independent) vowel")
80 (define-category ?2 "upper diacritical mark (including upper vowel)")
81 (define-category ?3 "lower diacritical mark (including lower vowel)")
82 (define-category ?4 "tone mark")
83 (define-category ?5 "symbol")
84 (define-category ?6 "digit")
85 (define-category ?7 "vowel-modifying diacritical mark")
86 (define-category ?8 "vowel-signs")
87 (define-category ?9 "semivowel lower")
88
89 ;; For filling.
90 (define-category ?| "While filling, we can break a line at this character.")
91
92 ;; For indentation calculation.
93 (define-category ?\s
94 "This character counts as a space for indentation purposes.")
95
96 ;; Keep the following for `kinsoku' processing. See comments in
97 ;; kinsoku.el.
98 (define-category ?> "A character which can't be placed at beginning of line.")
99 (define-category ?< "A character which can't be placed at end of line.")
100
101 ;; Combining
102 (define-category ?^ "Combining diacritic or mark")
103 \f
104 ;;; Setting syntax and category.
105
106 ;; ASCII
107
108 (let ((ch 32))
109 (while (< ch 127) ; All ASCII characters have
110 (modify-category-entry ch ?a) ; the category `a' (ASCII)
111 (modify-category-entry ch ?l) ; and `l' (Latin).
112 (setq ch (1+ ch))))
113
114 ;; Arabic character set
115
116 (let ((charsets '(arabic-iso8859-6
117 arabic-digit
118 arabic-1-column
119 arabic-2-column)))
120 (while charsets
121 ;; (modify-syntax-entry (make-char (car charsets)) "w")
122 (modify-category-entry (make-char (car charsets)) ?b)
123 (setq charsets (cdr charsets))))
124 (let ((ch #x600))
125 (while (<= ch #x6ff)
126 (modify-category-entry (decode-char 'ucs ch) ?b)
127 (setq ch (1+ ch)))
128 (setq ch #xfb50)
129 (while (<= ch #xfdff)
130 (modify-category-entry (decode-char 'ucs ch) ?b)
131 (setq ch (1+ ch)))
132 (setq ch #xfe70)
133 (while (<= ch #xfefe)
134 (modify-category-entry (decode-char 'ucs ch) ?b)
135 (setq ch (1+ ch))))
136
137 ;; Chinese character set (GB2312)
138
139 ;; (modify-syntax-entry (make-char 'chinese-gb2312) "w")
140 (modify-syntax-entry (make-char 'chinese-gb2312 33) "_")
141 (modify-syntax-entry (make-char 'chinese-gb2312 34) "_")
142 (modify-syntax-entry (make-char 'chinese-gb2312 41) "_")
143 (modify-syntax-entry ?\\e$A!2\e(B "(\e$A!3\e(B")
144 (modify-syntax-entry ?\\e$A!4\e(B "(\e$A!5\e(B")
145 (modify-syntax-entry ?\\e$A!6\e(B "(\e$A!7\e(B")
146 (modify-syntax-entry ?\\e$A!8\e(B "(\e$A!9\e(B")
147 (modify-syntax-entry ?\\e$A!:\e(B "(\e$A!;\e(B")
148 (modify-syntax-entry ?\\e$A!<\e(B "(\e$A!=\e(B")
149 (modify-syntax-entry ?\\e$A!>\e(B "(\e$A!?\e(B")
150 (modify-syntax-entry ?\\e$A#(\e(B "(\e$A#)\e(B")
151 (modify-syntax-entry ?\\e$A#{\e(B "(\e$A#}\e(B")
152 (modify-syntax-entry ?\\e$A#[\e(B "(\e$A#]\e(B")
153 (modify-syntax-entry ?\\e$A!3\e(B ")\e$A!2\e(B")
154 (modify-syntax-entry ?\\e$A!5\e(B ")\e$A!4\e(B")
155 (modify-syntax-entry ?\\e$A!7\e(B ")\e$A!6\e(B")
156 (modify-syntax-entry ?\\e$A!9\e(B ")\e$A!8\e(B")
157 (modify-syntax-entry ?\\e$A!;\e(B ")\e$A!:\e(B")
158 (modify-syntax-entry ?\\e$A!=\e(B ")\e$A!<\e(B")
159 (modify-syntax-entry ?\\e$A!?\e(B ")\e$A!>\e(B")
160 (modify-syntax-entry ?\\e$A#)\e(B ")\e$A#(\e(B")
161 (modify-syntax-entry ?\\e$A#}\e(B ")\e$A#{\e(B")
162 (modify-syntax-entry ?\\e$A#]\e(B ")\e$A#[\e(B")
163
164 (let ((chars "\e$A#,!"!##.!$#;#:#?#!!C!-!'#|#_!.!/!0!1#"!e#`!d\e(B"))
165 (dotimes (i (length chars))
166 (modify-syntax-entry (aref chars i) ".")))
167
168 (modify-category-entry (make-char 'chinese-gb2312) ?c)
169 (modify-category-entry (make-char 'chinese-gb2312) ?\|)
170 (modify-category-entry (make-char 'chinese-gb2312 35) ?A)
171 (modify-category-entry (make-char 'chinese-gb2312 36) ?H)
172 (modify-category-entry (make-char 'chinese-gb2312 37) ?K)
173 (modify-category-entry (make-char 'chinese-gb2312 38) ?G)
174 (modify-category-entry (make-char 'chinese-gb2312 39) ?Y)
175 (let ((row 48))
176 (while (< row 127)
177 (modify-category-entry (make-char 'chinese-gb2312 row) ?C)
178 (setq row (1+ row))))
179
180 (let ((tbl (standard-case-table)))
181 (dotimes (i 26)
182 (set-case-syntax-pair (make-char 'chinese-gb2312 #x23 (+ #x41 i))
183 (make-char 'chinese-gb2312 #x23 (+ #x61 i)) tbl))
184 (dotimes (i 24)
185 (set-case-syntax-pair (make-char 'chinese-gb2312 #x26 (+ #x21 i))
186 (make-char 'chinese-gb2312 #x26 (+ #x41 i)) tbl))
187 (dotimes (i 33)
188 (set-case-syntax-pair (make-char 'chinese-gb2312 #x27 (+ #x21 i))
189 (make-char 'chinese-gb2312 #x27 (+ #x51 i)) tbl)))
190
191 ;; Chinese character set (BIG5)
192
193 (let ((from (decode-big5-char #xA141))
194 (to (decode-big5-char #xA15D)))
195 (while (< from to)
196 (modify-syntax-entry from ".")
197 (setq from (1+ from))))
198 (let ((from (decode-big5-char #xA1A5))
199 (to (decode-big5-char #xA1AD)))
200 (while (< from to)
201 (modify-syntax-entry from ".")
202 (setq from (1+ from))))
203 (let ((from (decode-big5-char #xA1AD))
204 (to (decode-big5-char #xA2AF)))
205 (while (< from to)
206 (modify-syntax-entry from "_")
207 (setq from (1+ from))))
208
209 (let ((parens "\e$(0!>!?!@!A!B!C!D!E!F!G!H!I!J!K!L!M!N!O!P!Q!R!S!T!U!V!W!X!Y!Z![!\!]!^!_!`!a!b!c\e(B")
210 open close)
211 (dotimes (i (/ (length parens) 2))
212 (setq open (aref parens (* i 2))
213 close (aref parens (1+ (* i 2))))
214 (modify-syntax-entry open (format "(%c" close))
215 (modify-syntax-entry close (format ")%c" open))))
216
217 (let ((generic-big5-1-char (make-char 'chinese-big5-1))
218 (generic-big5-2-char (make-char 'chinese-big5-2)))
219 ;; (modify-syntax-entry generic-big5-1-char "w")
220 ;; (modify-syntax-entry generic-big5-2-char "w")
221
222 (modify-category-entry generic-big5-1-char ?c)
223 (modify-category-entry generic-big5-2-char ?c)
224
225 (modify-category-entry generic-big5-1-char ?C)
226 (modify-category-entry generic-big5-2-char ?C)
227
228 (modify-category-entry generic-big5-1-char ?\|)
229 (modify-category-entry generic-big5-2-char ?\|))
230
231 (let ((tbl (standard-case-table)))
232 (dotimes (i 22)
233 (set-case-syntax-pair (decode-big5-char (+ #xA2CF i))
234 (decode-big5-char (+ #xA2CF i 26)) tbl))
235 (dotimes (i 4)
236 (set-case-syntax-pair (decode-big5-char (+ #xA2E4 i))
237 (decode-big5-char (+ #xA340 i)) tbl))
238 (dotimes (i 24)
239 (set-case-syntax-pair (decode-big5-char (+ #xA344 i))
240 (decode-big5-char (+ #xA344 i 24)) tbl)))
241
242
243 ;; Chinese character set (CNS11643)
244
245 (let ((cns-list '(chinese-cns11643-1
246 chinese-cns11643-2
247 chinese-cns11643-3
248 chinese-cns11643-4
249 chinese-cns11643-5
250 chinese-cns11643-6
251 chinese-cns11643-7))
252 generic-char)
253 (while cns-list
254 (setq generic-char (make-char (car cns-list)))
255 ;; (modify-syntax-entry generic-char "w")
256 (modify-category-entry generic-char ?c)
257 (modify-category-entry generic-char ?C)
258 (modify-category-entry generic-char ?|)
259 (setq cns-list (cdr cns-list))))
260
261 (let ((parens "\e$(G!>!?!@!A!B!C!D!E!F!G!H!I!J!K!L!M!N!O!P!Q!R!S!T!U!V!W!X!Y!Z![!\!]!^!_!`!a!b!c\e(B")
262 open close)
263 (dotimes (i (/ (length parens) 2))
264 (setq open (aref parens (* i 2))
265 close (aref parens (1+ (* i 2))))
266 (modify-syntax-entry open (format "(%c" close))
267 (modify-syntax-entry close (format ")%c" open))))
268
269 ;; Cyrillic character set (ISO-8859-5)
270
271 (modify-category-entry (make-char 'cyrillic-iso8859-5) ?y)
272
273 (modify-syntax-entry (make-char 'cyrillic-iso8859-5 160) " ")
274 (modify-syntax-entry ?\e,L-\e(B ".")
275 (modify-syntax-entry ?\e,Lp\e(B ".")
276 (modify-syntax-entry ?\e,L}\e(B ".")
277 (let ((tbl (standard-case-table)))
278 (set-case-syntax-pair ?\e,L!\e(B ?\e,Lq\e(B tbl)
279 (set-case-syntax-pair ?\e,L"\e(B ?\e,Lr\e(B tbl)
280 (set-case-syntax-pair ?\e,L#\e(B ?\e,Ls\e(B tbl)
281 (set-case-syntax-pair ?\e,L$\e(B ?\e,Lt\e(B tbl)
282 (set-case-syntax-pair ?\e,L%\e(B ?\e,Lu\e(B tbl)
283 (set-case-syntax-pair ?\e,L&\e(B ?\e,Lv\e(B tbl)
284 (set-case-syntax-pair ?\e,L'\e(B ?\e,Lw\e(B tbl)
285 (set-case-syntax-pair ?\e,L(\e(B ?\e,Lx\e(B tbl)
286 (set-case-syntax-pair ?\e,L)\e(B ?\e,Ly\e(B tbl)
287 (set-case-syntax-pair ?\e,L*\e(B ?\e,Lz\e(B tbl)
288 (set-case-syntax-pair ?\e,L+\e(B ?\e,L{\e(B tbl)
289 (set-case-syntax-pair ?\e,L,\e(B ?\e,L|\e(B tbl)
290 (set-case-syntax-pair ?\e,L.\e(B ?\e,L~\e(B tbl)
291 (set-case-syntax-pair ?\e,L/\e(B ?\e,L\7f\e(B tbl)
292 (set-case-syntax-pair ?\e,L0\e(B ?\e,LP\e(B tbl)
293 (set-case-syntax-pair ?\e,L1\e(B ?\e,LQ\e(B tbl)
294 (set-case-syntax-pair ?\e,L2\e(B ?\e,LR\e(B tbl)
295 (set-case-syntax-pair ?\e,L3\e(B ?\e,LS\e(B tbl)
296 (set-case-syntax-pair ?\e,L4\e(B ?\e,LT\e(B tbl)
297 (set-case-syntax-pair ?\e,L5\e(B ?\e,LU\e(B tbl)
298 (set-case-syntax-pair ?\e,L6\e(B ?\e,LV\e(B tbl)
299 (set-case-syntax-pair ?\e,L7\e(B ?\e,LW\e(B tbl)
300 (set-case-syntax-pair ?\e,L8\e(B ?\e,LX\e(B tbl)
301 (set-case-syntax-pair ?\e,L9\e(B ?\e,LY\e(B tbl)
302 (set-case-syntax-pair ?\e,L:\e(B ?\e,LZ\e(B tbl)
303 (set-case-syntax-pair ?\e,L;\e(B ?\e,L[\e(B tbl)
304 (set-case-syntax-pair ?\e,L<\e(B ?\e,L\\e(B tbl)
305 (set-case-syntax-pair ?\e,L=\e(B ?\e,L]\e(B tbl)
306 (set-case-syntax-pair ?\e,L>\e(B ?\e,L^\e(B tbl)
307 (set-case-syntax-pair ?\e,L?\e(B ?\e,L_\e(B tbl)
308 (set-case-syntax-pair ?\e,L@\e(B ?\e,L`\e(B tbl)
309 (set-case-syntax-pair ?\e,LA\e(B ?\e,La\e(B tbl)
310 (set-case-syntax-pair ?\e,LB\e(B ?\e,Lb\e(B tbl)
311 (set-case-syntax-pair ?\e,LC\e(B ?\e,Lc\e(B tbl)
312 (set-case-syntax-pair ?\e,LD\e(B ?\e,Ld\e(B tbl)
313 (set-case-syntax-pair ?\e,LE\e(B ?\e,Le\e(B tbl)
314 (set-case-syntax-pair ?\e,LF\e(B ?\e,Lf\e(B tbl)
315 (set-case-syntax-pair ?\e,LG\e(B ?\e,Lg\e(B tbl)
316 (set-case-syntax-pair ?\e,LH\e(B ?\e,Lh\e(B tbl)
317 (set-case-syntax-pair ?\e,LI\e(B ?\e,Li\e(B tbl)
318 (set-case-syntax-pair ?\e,LJ\e(B ?\e,Lj\e(B tbl)
319 (set-case-syntax-pair ?\e,LK\e(B ?\e,Lk\e(B tbl)
320 (set-case-syntax-pair ?\e,LL\e(B ?\e,Ll\e(B tbl)
321 (set-case-syntax-pair ?\e,LM\e(B ?\e,Lm\e(B tbl)
322 (set-case-syntax-pair ?\e,LN\e(B ?\e,Ln\e(B tbl)
323 (set-case-syntax-pair ?\e,LO\e(B ?\e,Lo\e(B tbl)
324 (set-case-syntax-pair ?\e$,1(!\e(B ?\e$,1(q\e(B tbl)
325 (set-case-syntax-pair ?\e$,1("\e(B ?\e$,1(r\e(B tbl)
326 (set-case-syntax-pair ?\e$,1(#\e(B ?\e$,1(s\e(B tbl)
327 (set-case-syntax-pair ?\e$,1($\e(B ?\e$,1(t\e(B tbl)
328 (set-case-syntax-pair ?\e$,1(%\e(B ?\e$,1(u\e(B tbl)
329 (set-case-syntax-pair ?\e$,1(&\e(B ?\e$,1(v\e(B tbl)
330 (set-case-syntax-pair ?\e$,1('\e(B ?\e$,1(w\e(B tbl)
331 (set-case-syntax-pair ?\e$,1((\e(B ?\e$,1(x\e(B tbl)
332 (set-case-syntax-pair ?\e$,1()\e(B ?\e$,1(y\e(B tbl)
333 (set-case-syntax-pair ?\e$,1(*\e(B ?\e$,1(z\e(B tbl)
334 (set-case-syntax-pair ?\e$,1(+\e(B ?\e$,1({\e(B tbl)
335 (set-case-syntax-pair ?\e$,1(,\e(B ?\e$,1(|\e(B tbl)
336 (set-case-syntax-pair ?\e$,1(.\e(B ?\e$,1(~\e(B tbl)
337 (set-case-syntax-pair ?\e$,1(/\e(B ?\e$,1(\7f\e(B tbl)
338 (set-case-syntax-pair ?\e$,1(0\e(B ?\e$,1(P\e(B tbl)
339 (set-case-syntax-pair ?\e$,1(1\e(B ?\e$,1(Q\e(B tbl)
340 (set-case-syntax-pair ?\e$,1(2\e(B ?\e$,1(R\e(B tbl)
341 (set-case-syntax-pair ?\e$,1(3\e(B ?\e$,1(S\e(B tbl)
342 (set-case-syntax-pair ?\e$,1(4\e(B ?\e$,1(T\e(B tbl)
343 (set-case-syntax-pair ?\e$,1(5\e(B ?\e$,1(U\e(B tbl)
344 (set-case-syntax-pair ?\e$,1(6\e(B ?\e$,1(V\e(B tbl)
345 (set-case-syntax-pair ?\e$,1(7\e(B ?\e$,1(W\e(B tbl)
346 (set-case-syntax-pair ?\e$,1(8\e(B ?\e$,1(X\e(B tbl)
347 (set-case-syntax-pair ?\e$,1(9\e(B ?\e$,1(Y\e(B tbl)
348 (set-case-syntax-pair ?\e$,1(:\e(B ?\e$,1(Z\e(B tbl)
349 (set-case-syntax-pair ?\e$,1(;\e(B ?\e$,1([\e(B tbl)
350 (set-case-syntax-pair ?\e$,1(<\e(B ?\e$,1(\\e(B tbl)
351 (set-case-syntax-pair ?\e$,1(=\e(B ?\e$,1(]\e(B tbl)
352 (set-case-syntax-pair ?\e$,1(>\e(B ?\e$,1(^\e(B tbl)
353 (set-case-syntax-pair ?\e$,1(?\e(B ?\e$,1(_\e(B tbl)
354 (set-case-syntax-pair ?\e$,1(@\e(B ?\e$,1(`\e(B tbl)
355 (set-case-syntax-pair ?\e$,1(A\e(B ?\e$,1(a\e(B tbl)
356 (set-case-syntax-pair ?\e$,1(B\e(B ?\e$,1(b\e(B tbl)
357 (set-case-syntax-pair ?\e$,1(C\e(B ?\e$,1(c\e(B tbl)
358 (set-case-syntax-pair ?\e$,1(D\e(B ?\e$,1(d\e(B tbl)
359 (set-case-syntax-pair ?\e$,1(E\e(B ?\e$,1(e\e(B tbl)
360 (set-case-syntax-pair ?\e$,1(F\e(B ?\e$,1(f\e(B tbl)
361 (set-case-syntax-pair ?\e$,1(G\e(B ?\e$,1(g\e(B tbl)
362 (set-case-syntax-pair ?\e$,1(H\e(B ?\e$,1(h\e(B tbl)
363 (set-case-syntax-pair ?\e$,1(I\e(B ?\e$,1(i\e(B tbl)
364 (set-case-syntax-pair ?\e$,1(J\e(B ?\e$,1(j\e(B tbl)
365 (set-case-syntax-pair ?\e$,1(K\e(B ?\e$,1(k\e(B tbl)
366 (set-case-syntax-pair ?\e$,1(L\e(B ?\e$,1(l\e(B tbl)
367 (set-case-syntax-pair ?\e$,1(M\e(B ?\e$,1(m\e(B tbl)
368 (set-case-syntax-pair ?\e$,1(N\e(B ?\e$,1(n\e(B tbl)
369 (set-case-syntax-pair ?\e$,1(O\e(B ?\e$,1(o\e(B tbl))
370
371 ;; Devanagari character set
372
373 ;;; Commented out since the categories appear not to be used anywhere
374 ;;; and word syntax is the default.
375 ;; (let ((deflist '(;; chars syntax category
376 ;; ("\e$(5!!!"!#\e(B" "w" ?7) ; vowel-modifying diacritical mark
377 ;; ; chandrabindu, anuswar, visarga
378 ;; ("\e$(5!$\e(B-\e$(5!2\e(B" "w" ?1) ; independent vowel
379 ;; ("\e$(5!3\e(B-\e$(5!X\e(B" "w" ?0) ; consonant
380 ;; ("\e$(5!Z\e(B-\e$(5!g\e(B" "w" ?8) ; matra
381 ;; ("\e$(5!q\e(B-\e$(5!z\e(B" "w" ?6) ; digit
382 ;; ;; Unicode equivalents
383 ;; ("\e$,15A5B5C\e(B" "w" ?7) ; vowel-modifying diacritical mark
384 ;; ; chandrabindu, anuswar, visarga
385 ;; ("\e$,15E\e(B-\e$,15M\e(B" "w" ?1) ; independent vowel
386 ;; ("\e$,15U\e(B-\e$,15y\e(B" "w" ?0) ; consonant
387 ;; ("\e$,15~\e(B-\e$,16)\e(B" "w" ?8) ; matra
388 ;; ("\e$,16F\e(B-\e$,16O\e(B" "w" ?6) ; digit
389 ;; ))
390 ;; elm chars len syntax category to ch i)
391 ;; (while deflist
392 ;; (setq elm (car deflist))
393 ;; (setq chars (car elm)
394 ;; len (length chars)
395 ;; syntax (nth 1 elm)
396 ;; category (nth 2 elm)
397 ;; i 0)
398 ;; (while (< i len)
399 ;; (if (= (aref chars i) ?-)
400 ;; (setq i (1+ i)
401 ;; to (aref chars i))
402 ;; (setq ch (aref chars i)
403 ;; to ch))
404 ;; (while (<= ch to)
405 ;; (modify-syntax-entry ch syntax)
406 ;; (modify-category-entry ch category)
407 ;; (setq ch (1+ ch)))
408 ;; (setq i (1+ i)))
409 ;; (setq deflist (cdr deflist))))
410
411 ;; Ethiopic character set
412
413 (modify-category-entry (make-char 'ethiopic) ?e)
414 ;; (modify-syntax-entry (make-char 'ethiopic) "w")
415 (dotimes (i (1+ (- #x137c #x1200)))
416 (modify-category-entry (decode-char 'ucs (+ #x1200 i)) ?e))
417 (let ((chars '(?\e$(3$h\e(B ?\e$(3$i\e(B ?\e$(3$j\e(B ?\e$(3$k\e(B ?\e$(3$l\e(B ?\e$(3$m\e(B ?\e$(3$n\e(B ?\e$(3$o\e(B ?\e$(3%i\e(B ?\e$(3%t\e(B ?\e$(3%u\e(B ?\e$(3%v\e(B ?\e$(3%w\e(B ?\e$(3%x\e(B
418 ;; Unicode equivalents of the above:
419 ?\e$,1Q!\e(B ?\e$,1Q"\e(B ?\e$,1Q#\e(B ?\e$,1Q$\e(B ?\e$,1Q%\e(B ?\e$,1Q&\e(B ?\e$,1Q'\e(B ?\e$,1Q(\e(B ?\e$,3op\e(B ?\e$,3o{\e(B ?\e$,3o|\e(B ?\e$,3o}\e(B ?\e$,3o~\e(B ?\e$,3o\7f\e(B)))
420 (while chars
421 (modify-syntax-entry (car chars) ".")
422 (setq chars (cdr chars))))
423
424 ;; Greek character set (ISO-8859-7)
425
426 (modify-category-entry (make-char 'greek-iso8859-7) ?g)
427 (let ((c #x370))
428 (while (<= c #x3ff)
429 (modify-category-entry (decode-char 'ucs c) ?g)
430 (setq c (1+ c))))
431
432 ;; (let ((c 182))
433 ;; (while (< c 255)
434 ;; (modify-syntax-entry (make-char 'greek-iso8859-7 c) "w")
435 ;; (setq c (1+ c))))
436 ;; (modify-syntax-entry (make-char 'greek-iso8859-7 160) "w") ; NBSP
437 (modify-syntax-entry ?\e,F7\e(B ".")
438 (modify-syntax-entry ?\e,F;\e(B ".")
439 (modify-syntax-entry ?\e,F=\e(B ".")
440 (let ((tbl (standard-case-table)))
441 ;; Fixme: non-letter syntax copied from latin-1, but that's dubious
442 ;; in several cases.
443 (set-case-syntax ?\e,F!\e(B "." tbl)
444 (set-case-syntax ?\e,F"\e(B "." tbl)
445 (set-case-syntax ?\e,F&\e(B "." tbl)
446 (set-case-syntax ?\e,F&\e(B "_" tbl)
447 (set-case-syntax ?\e,F'\e(B "." tbl)
448 (set-case-syntax ?\e,F)\e(B "_" tbl)
449 (set-case-syntax ?\e,F+\e(B "." tbl)
450 (set-case-syntax ?\e,F,\e(B "_" tbl)
451 (set-case-syntax ?\e,F-\e(B "_" tbl)
452 (set-case-syntax ?\e,F/\e(B "." tbl)
453 (set-case-syntax ?\e,F0\e(B "_" tbl)
454 (set-case-syntax ?\e,F1\e(B "_" tbl)
455 ;; (set-case-syntax ?\e,F7\e(B "_" tbl)
456 ;; (set-case-syntax ?\e,F=\e(B "_" tbl)
457 (set-case-syntax-pair ?\e,FA\e(B ?\e,Fa\e(B tbl)
458 (set-case-syntax-pair ?\e,FB\e(B ?\e,Fb\e(B tbl)
459 (set-case-syntax-pair ?\e,FC\e(B ?\e,Fc\e(B tbl)
460 (set-case-syntax-pair ?\e,FD\e(B ?\e,Fd\e(B tbl)
461 (set-case-syntax-pair ?\e,FE\e(B ?\e,Fe\e(B tbl)
462 (set-case-syntax-pair ?\e,FF\e(B ?\e,Ff\e(B tbl)
463 (set-case-syntax-pair ?\e,FG\e(B ?\e,Fg\e(B tbl)
464 (set-case-syntax-pair ?\e,FH\e(B ?\e,Fh\e(B tbl)
465 (set-case-syntax-pair ?\e,FI\e(B ?\e,Fi\e(B tbl)
466 (set-case-syntax-pair ?\e,FJ\e(B ?\e,Fj\e(B tbl)
467 (set-case-syntax-pair ?\e,FK\e(B ?\e,Fk\e(B tbl)
468 (set-case-syntax-pair ?\e,FL\e(B ?\e,Fl\e(B tbl)
469 (set-case-syntax-pair ?\e,FM\e(B ?\e,Fm\e(B tbl)
470 (set-case-syntax-pair ?\e,FN\e(B ?\e,Fn\e(B tbl)
471 (set-case-syntax-pair ?\e,FO\e(B ?\e,Fo\e(B tbl)
472 (set-case-syntax-pair ?\e,FP\e(B ?\e,Fp\e(B tbl)
473 (set-case-syntax-pair ?\e,FQ\e(B ?\e,Fq\e(B tbl)
474 (set-upcase-syntax ?\e,FS\e(B ?\e,Fr\e(B tbl)
475 (set-case-syntax-pair ?\e,FS\e(B ?\e,Fs\e(B tbl)
476 (set-case-syntax-pair ?\e,FT\e(B ?\e,Ft\e(B tbl)
477 (set-case-syntax-pair ?\e,FU\e(B ?\e,Fu\e(B tbl)
478 (set-case-syntax-pair ?\e,FV\e(B ?\e,Fv\e(B tbl)
479 (set-case-syntax-pair ?\e,FW\e(B ?\e,Fw\e(B tbl)
480 (set-case-syntax-pair ?\e,FX\e(B ?\e,Fx\e(B tbl)
481 (set-case-syntax-pair ?\e,FY\e(B ?\e,Fy\e(B tbl)
482 (set-case-syntax-pair ?\e,FZ\e(B ?\e,Fz\e(B tbl)
483 (set-case-syntax-pair ?\e,F[\e(B ?\e,F{\e(B tbl)
484 (set-case-syntax-pair ?\e,F?\e(B ?\e,F~\e(B tbl)
485 (set-case-syntax-pair ?\e,F>\e(B ?\e,F}\e(B tbl)
486 (set-case-syntax-pair ?\e,F<\e(B ?\e,F|\e(B tbl)
487 (set-case-syntax-pair ?\e,F6\e(B ?\e,F\\e(B tbl)
488 (set-case-syntax-pair ?\e,F8\e(B ?\e,F]\e(B tbl)
489 (set-case-syntax-pair ?\e,F9\e(B ?\e,F^\e(B tbl)
490 (set-case-syntax-pair ?\e,F:\e(B ?\e,F_\e(B tbl)
491 ;; Unicode equivalents
492 (set-case-syntax-pair ?\e$,1&q\e(B ?\e$,1'1\e(B tbl)
493 (set-case-syntax-pair ?\e$,1&r\e(B ?\e$,1'2\e(B tbl)
494 (set-case-syntax-pair ?\e$,1&s\e(B ?\e$,1'3\e(B tbl)
495 (set-case-syntax-pair ?\e$,1&t\e(B ?\e$,1'4\e(B tbl)
496 (set-case-syntax-pair ?\e$,1&u\e(B ?\e$,1'5\e(B tbl)
497 (set-case-syntax-pair ?\e$,1&v\e(B ?\e$,1'6\e(B tbl)
498 (set-case-syntax-pair ?\e$,1&w\e(B ?\e$,1'7\e(B tbl)
499 (set-case-syntax-pair ?\e$,1&x\e(B ?\e$,1'8\e(B tbl)
500 (set-case-syntax-pair ?\e$,1&y\e(B ?\e$,1'9\e(B tbl)
501 (set-case-syntax-pair ?\e$,1&z\e(B ?\e$,1':\e(B tbl)
502 (set-case-syntax-pair ?\e$,1&{\e(B ?\e$,1';\e(B tbl)
503 (set-case-syntax-pair ?\e$,1&|\e(B ?\e$,1'<\e(B tbl)
504 (set-case-syntax-pair ?\e$,1&}\e(B ?\e$,1'=\e(B tbl)
505 (set-case-syntax-pair ?\e$,1&~\e(B ?\e$,1'>\e(B tbl)
506 (set-case-syntax-pair ?\e$,1&\7f\e(B ?\e$,1'?\e(B tbl)
507 (set-case-syntax-pair ?\e$,1' \e(B ?\e$,1'@\e(B tbl)
508 (set-case-syntax-pair ?\e$,1'!\e(B ?\e$,1'A\e(B tbl)
509 (set-upcase-syntax ?\e$,1'#\e(B ?\e$,1'B\e(B tbl)
510 (set-case-syntax-pair ?\e$,1'#\e(B ?\e$,1'C\e(B tbl)
511 (set-case-syntax-pair ?\e$,1'$\e(B ?\e$,1'D\e(B tbl)
512 (set-case-syntax-pair ?\e$,1'%\e(B ?\e$,1'E\e(B tbl)
513 (set-case-syntax-pair ?\e$,1'&\e(B ?\e$,1'F\e(B tbl)
514 (set-case-syntax-pair ?\e$,1''\e(B ?\e$,1'G\e(B tbl)
515 (set-case-syntax-pair ?\e$,1'(\e(B ?\e$,1'H\e(B tbl)
516 (set-case-syntax-pair ?\e$,1')\e(B ?\e$,1'I\e(B tbl)
517 (set-case-syntax-pair ?\e$,1'*\e(B ?\e$,1'J\e(B tbl)
518 (set-case-syntax-pair ?\e$,1'+\e(B ?\e$,1'K\e(B tbl)
519 (set-case-syntax-pair ?\e$,1&o\e(B ?\e$,1'N\e(B tbl)
520 (set-case-syntax-pair ?\e$,1&n\e(B ?\e$,1'M\e(B tbl)
521 (set-case-syntax-pair ?\e$,1&l\e(B ?\e$,1'L\e(B tbl)
522 (set-case-syntax-pair ?\e$,1&f\e(B ?\e$,1',\e(B tbl)
523 (set-case-syntax-pair ?\e$,1&h\e(B ?\e$,1'-\e(B tbl)
524 (set-case-syntax-pair ?\e$,1&i\e(B ?\e$,1'.\e(B tbl)
525 (set-case-syntax-pair ?\e$,1&j\e(B ?\e$,1'/\e(B tbl))
526
527 ;; Hebrew character set (ISO-8859-8)
528
529 (modify-category-entry (make-char 'hebrew-iso8859-8) ?w)
530 (let ((c #x591))
531 (while (<= c #x5f4)
532 (modify-category-entry (decode-char 'ucs c) ?w)
533 (setq c (1+ c))))
534
535 (modify-syntax-entry (make-char 'hebrew-iso8859-8 208) ".") ; PASEQ
536 (modify-syntax-entry (make-char 'hebrew-iso8859-8 211) ".") ; SOF PASUQ
537 (modify-syntax-entry (decode-char 'ucs #x5be) ".") ; MAQAF
538 (modify-syntax-entry (decode-char 'ucs #x5c0) ".") ; PASEQ
539 (modify-syntax-entry (decode-char 'ucs #x5c3) ".") ; SOF PASUQ
540 (modify-syntax-entry (decode-char 'ucs #x5f3) ".") ; GERESH
541 (modify-syntax-entry (decode-char 'ucs #x5f4) ".") ; GERSHAYIM
542
543 ;; (let ((c 224))
544 ;; (while (< c 251)
545 ;; (modify-syntax-entry (make-char 'hebrew-iso8859-8 c) "w")
546 ;; (setq c (1+ c))))
547 ;; (modify-syntax-entry (make-char 'hebrew-iso8859-8 160) "w") ; NBSP
548
549 ;; Indian character set (IS 13194 and other Emacs original Indian charsets)
550
551 (modify-category-entry (make-char 'indian-is13194) ?i)
552 (modify-category-entry (make-char 'indian-2-column) ?I)
553 (modify-category-entry (make-char 'indian-glyph) ?I)
554 ;; Unicode Devanagari block
555 (let ((c #x901))
556 (while (<= c #x970)
557 (modify-category-entry (decode-char 'ucs c) ?i)
558 (setq c (1+ c))))
559
560 (let ((l '(;; RANGE CATEGORY MEANINGS
561 (#x01 #x03 ?7) ; vowel modifier
562 (#x05 #x14 ?1) ; base vowel
563 (#x15 #x39 ?0) ; consonants
564 (#x3e #x4d ?8) ; vowel modifier
565 (#x51 #x54 ?4) ; stress/tone mark
566 (#x58 #x5f ?0) ; consonants
567 (#x60 #x61 ?1) ; base vowel
568 (#x62 #x63 ?8) ; vowel modifier
569 (#x66 #x6f ?6) ; digits
570 )))
571 (dolist (elt1 '(#x900 #x980 #xa00 #xa80 #xb00 #xb80 #xc00 #xc80 #xd00))
572 (dolist (elt2 l)
573 (let* ((from (car elt2))
574 (counts (1+ (- (nth 1 elt2) from)))
575 (category (nth 2 elt2)))
576 (dotimes (i counts)
577 (modify-category-entry (decode-char 'ucs (+ elt1 from i))
578 category))))))
579
580 ;; Japanese character set (JISX0201-kana, JISX0201-roman, JISX0208, JISX0212)
581
582 (modify-category-entry (make-char 'katakana-jisx0201) ?k)
583 (modify-category-entry (make-char 'katakana-jisx0201) ?j)
584 (modify-category-entry (make-char 'latin-jisx0201) ?r)
585 (modify-category-entry (make-char 'japanese-jisx0208) ?j)
586 (modify-category-entry (make-char 'japanese-jisx0212) ?j)
587 (modify-category-entry (make-char 'katakana-jisx0201) ?\|)
588 (modify-category-entry (make-char 'japanese-jisx0208) ?\|)
589 (modify-category-entry (make-char 'japanese-jisx0212) ?\|)
590
591 ;; Unicode equivalents of JISX0201-kana
592 (let ((c #xff61))
593 (while (<= c #xff9f)
594 (modify-category-entry (decode-char 'ucs c) ?k)
595 (modify-category-entry (decode-char 'ucs c) ?j)
596 (modify-category-entry (decode-char 'ucs c) ?\|)
597 (setq c (1+ c))))
598
599 ;; Katakana block
600 (let ((c #x30a0))
601 (while (<= c #x30ff)
602 ;; ?K is double width, ?k isn't specified
603 (modify-category-entry (decode-char 'ucs c) ?k)
604 (modify-category-entry (decode-char 'ucs c) ?j)
605 (modify-category-entry (decode-char 'ucs c) ?\|)
606 (setq c (1+ c))))
607
608 ;; Hiragana block
609 (let ((c #x3040))
610 (while (<= c #x309f)
611 ;; ?H is actually defined to be double width
612 (modify-category-entry (decode-char 'ucs c) ?H)
613 ;;(modify-category-entry (decode-char 'ucs c) ?j)
614 (modify-category-entry (decode-char 'ucs c) ?\|)
615 (setq c (1+ c))))
616
617 ;; JISX0208
618 ;; (modify-syntax-entry (make-char 'japanese-jisx0208) "w")
619 (modify-syntax-entry (make-char 'japanese-jisx0208 33) "_")
620 (modify-syntax-entry (make-char 'japanese-jisx0208 34) "_")
621 (modify-syntax-entry (make-char 'japanese-jisx0208 40) "_")
622 (let ((chars '(?\e$B!<\e(B ?\e$B!+\e(B ?\e$B!,\e(B ?\e$B!3\e(B ?\e$B!4\e(B ?\e$B!5\e(B ?\e$B!6\e(B ?\e$B!7\e(B ?\e$B!8\e(B ?\e$B!9\e(B ?\e$B!:\e(B ?\e$B!;\e(B)))
623 (while chars
624 (modify-syntax-entry (car chars) "w")
625 (setq chars (cdr chars))))
626 (let ((parens "\e$B!J!K!L!M!N!O!P!Q!R!S!T!U!V!W!X!Y!Z![\e(B" )
627 open close)
628 (dotimes (i (/ (length parens) 2))
629 (setq open (aref parens (* i 2))
630 close (aref parens (1+ (* i 2))))
631 (modify-syntax-entry open (format "(%c" close))
632 (modify-syntax-entry close (format ")%c" open))))
633
634 (modify-category-entry (make-char 'japanese-jisx0208 35) ?A)
635 (modify-category-entry (make-char 'japanese-jisx0208 36) ?H)
636 (modify-category-entry (make-char 'japanese-jisx0208 37) ?K)
637 (modify-category-entry (make-char 'japanese-jisx0208 38) ?G)
638 (modify-category-entry (make-char 'japanese-jisx0208 39) ?Y)
639 (let ((row 48))
640 (while (< row 127)
641 (modify-category-entry (make-char 'japanese-jisx0208 row) ?C)
642 (setq row (1+ row))))
643 (modify-category-entry ?\e$B!<\e(B ?K)
644 (let ((chars '(?\e$B!+\e(B ?\e$B!,\e(B)))
645 (while chars
646 (modify-category-entry (car chars) ?K)
647 (modify-category-entry (car chars) ?H)
648 (setq chars (cdr chars))))
649 (let ((chars '(?\e$B!3\e(B ?\e$B!4\e(B ?\e$B!5\e(B ?\e$B!6\e(B ?\e$B!7\e(B ?\e$B!8\e(B ?\e$B!9\e(B ?\e$B!:\e(B ?\e$B!;\e(B)))
650 (while chars
651 (modify-category-entry (car chars) ?C)
652 (setq chars (cdr chars))))
653
654 (let ((tbl (standard-case-table)))
655 (dotimes (i 26)
656 (set-case-syntax-pair (make-char 'japanese-jisx0208 #x23 (+ #x41 i))
657 (make-char 'japanese-jisx0208 #x23 (+ #x61 i)) tbl))
658 (dotimes (i 24)
659 (set-case-syntax-pair (make-char 'japanese-jisx0208 #x26 (+ #x21 i))
660 (make-char 'japanese-jisx0208 #x26 (+ #x41 i)) tbl))
661 (dotimes (i 33)
662 (set-case-syntax-pair (make-char 'japanese-jisx0208 #x27 (+ #x21 i))
663 (make-char 'japanese-jisx0208 #x27 (+ #x51 i)) tbl)))
664
665 ;; JISX0212
666 ;; (modify-syntax-entry (make-char 'japanese-jisx0212) "w")
667 (modify-syntax-entry (make-char 'japanese-jisx0212 33) "_")
668 (modify-syntax-entry (make-char 'japanese-jisx0212 34) "_")
669 (modify-syntax-entry (make-char 'japanese-jisx0212 35) "_")
670
671 (modify-category-entry (make-char 'japanese-jisx0212 ) ?C)
672
673 ;; JISX0201-Kana
674 ;; (modify-syntax-entry (make-char 'katakana-jisx0201) "w")
675 (let ((chars '(?\e(I!\e(B ?\e(I$\e(B ?\e(I%\e(B
676 ;; Unicode:
677 ?\e$,3sa\e(B ?\e$,3sd\e(B ?\e$,3se\e(B)))
678 (while chars
679 (modify-syntax-entry (car chars) ".")
680 (setq chars (cdr chars))))
681
682 (modify-syntax-entry ?\\e(I"\e(B "(\e(I#\e(B")
683 (modify-syntax-entry ?\\e(I#\e(B "(\e(I"\e(B")
684
685 ;; Korean character set (KSC5601)
686
687 ;; (modify-syntax-entry (make-char 'korean-ksc5601) "w")
688 (modify-syntax-entry (make-char 'korean-ksc5601 33) "_")
689 (modify-syntax-entry (make-char 'korean-ksc5601 34) "_")
690 (modify-syntax-entry (make-char 'korean-ksc5601 38) "_")
691 (modify-syntax-entry (make-char 'korean-ksc5601 39) "_")
692 (modify-syntax-entry (make-char 'korean-ksc5601 40) "_")
693 (modify-syntax-entry (make-char 'korean-ksc5601 41) "_")
694
695 (modify-category-entry (make-char 'korean-ksc5601) ?h)
696 (modify-category-entry (make-char 'korean-ksc5601 35) ?A)
697 (modify-category-entry (make-char 'korean-ksc5601 37) ?G)
698 (modify-category-entry (make-char 'korean-ksc5601 42) ?H)
699 (modify-category-entry (make-char 'korean-ksc5601 43) ?K)
700 (modify-category-entry (make-char 'korean-ksc5601 44) ?Y)
701
702 (let ((parens "\e$(C!2!3!4!5!6!7!8!9!:!;!<!=#(#)#[#]#{#}\e(B" )
703 open close)
704 (dotimes (i (/ (length parens) 2))
705 (setq open (aref parens (* i 2))
706 close (aref parens (1+ (* i 2))))
707 (modify-syntax-entry open (format "(%c" close))
708 (modify-syntax-entry close (format ")%c" open))))
709
710 (let ((tbl (standard-case-table)))
711 (dotimes (i 26)
712 (set-case-syntax-pair (make-char 'korean-ksc5601 #x23 (+ #x41 i))
713 (make-char 'korean-ksc5601 #x23 (+ #x61 i)) tbl))
714 (dotimes (i 10)
715 (set-case-syntax-pair (make-char 'korean-ksc5601 #x25 (+ #x21 i))
716 (make-char 'korean-ksc5601 #x25 (+ #x30 i)) tbl))
717 (dotimes (i 24)
718 (set-case-syntax-pair (make-char 'korean-ksc5601 #x25 (+ #x41 i))
719 (make-char 'korean-ksc5601 #x25 (+ #x61 i)) tbl))
720 (dotimes (i 33)
721 (set-case-syntax-pair (make-char 'korean-ksc5601 #x2C (+ #x21 i))
722 (make-char 'korean-ksc5601 #x2C (+ #x51 i)) tbl)))
723
724 ;; Latin character set (latin-1,2,3,4,5,8,9)
725
726 (modify-category-entry (make-char 'latin-iso8859-1) ?l)
727 (modify-category-entry (make-char 'latin-iso8859-2) ?l)
728 (modify-category-entry (make-char 'latin-iso8859-3) ?l)
729 (modify-category-entry (make-char 'latin-iso8859-4) ?l)
730 (modify-category-entry (make-char 'latin-iso8859-9) ?l)
731 (modify-category-entry (make-char 'latin-iso8859-14) ?l)
732 (modify-category-entry (make-char 'latin-iso8859-15) ?l)
733
734 (modify-category-entry (make-char 'latin-iso8859-1 160) ?\ )
735 (modify-category-entry (make-char 'latin-iso8859-2 160) ?\ )
736 (modify-category-entry (make-char 'latin-iso8859-3 160) ?\ )
737 (modify-category-entry (make-char 'latin-iso8859-4 160) ?\ )
738 (modify-category-entry (make-char 'latin-iso8859-9 160) ?\ )
739 (modify-category-entry (make-char 'latin-iso8859-14 160) ?\ )
740 (modify-category-entry (make-char 'latin-iso8859-15 160) ?\ )
741
742 ;; Lao character set
743
744 (modify-category-entry (make-char 'lao) ?o)
745 (dotimes (i (1+ (- #xeff #xe80)))
746 (modify-category-entry (decode-char 'ucs (+ i #xe80)) ?o))
747
748 (let ((deflist '(;; chars syntax category
749 ("\e(1!\e(B-\e(1N\e(B" "w" ?0) ; consonant
750 ("\e(1PRS]`\e(B-\e(1d\e(B" "w" ?1) ; vowel base
751 ("\e(1QT\e(B-\e(1W[m\e(B" "w" ?2) ; vowel upper
752 ("\e(1XY\e(B" "w" ?3) ; vowel lower
753 ("\e(1h\e(B-\e(1l\e(B" "w" ?4) ; tone mark
754 ("\e(1\\e(B" "w" ?9) ; semivowel lower
755 ("\e(1p\e(B-\e(1y\e(B" "w" ?6) ; digit
756 ("\e(1Of\e(B" "_" ?5) ; symbol
757 ;; Unicode equivalents
758 ("\e$,1D!\e(B-\e$,1DN\e(B" "w" ?0) ; consonant
759 ("\e$,1DPDRDSD]D`\e(B-\e$,1Dd\e(B" "w" ?1) ; vowel base
760 ("\e$,1DQDT\e(B-\e$,1DWD[Dm\e(B" "w" ?2) ; vowel upper
761 ("\e$,1DXDY\e(B" "w" ?3) ; vowel lower
762 ("\e$,1Dh\e(B-\e$,1Dk\e(B" "w" ?4) ; tone mark
763 ("\e$,1D\D]\e(B" "w" ?9) ; semivowel lower
764 ("\e$,1Dp\e(B-\e$,1Dy\e(B" "w" ?6) ; digit
765 ("\e$,1DODf\e(B" "_" ?5) ; symbol
766 ))
767 elm chars len syntax category to ch i)
768 (while deflist
769 (setq elm (car deflist))
770 (setq chars (car elm)
771 len (length chars)
772 syntax (nth 1 elm)
773 category (nth 2 elm)
774 i 0)
775 (while (< i len)
776 (if (= (aref chars i) ?-)
777 (setq i (1+ i)
778 to (aref chars i))
779 (setq ch (aref chars i)
780 to ch))
781 (while (<= ch to)
782 (unless (string-equal syntax "w")
783 (modify-syntax-entry ch syntax))
784 (modify-category-entry ch category)
785 (setq ch (1+ ch)))
786 (setq i (1+ i)))
787 (setq deflist (cdr deflist))))
788
789 ;; Thai character set (TIS620)
790
791 (modify-category-entry (make-char 'thai-tis620) ?t)
792 (dotimes (i (1+ (- #xe7f #xe00)))
793 (modify-category-entry (decode-char 'ucs (+ i #xe00)) ?t))
794
795 (let ((deflist '(;; chars syntax category
796 ("\e,T!\e(B-\e,TCEG\e(B-\e,TN\e(B" "w" ?0) ; consonant
797 ("\e,TDFPRS`\e(B-\e,Te\e(B" "w" ?1) ; vowel base
798 ("\e,TQT\e(B-\e,TWgn\e(B" "w" ?2) ; vowel upper
799 ("\e,TX\e(B-\e,TZ\e(B" "w" ?3) ; vowel lower
800 ("\e,Th\e(B-\e,Tm\e(B" "w" ?4) ; tone mark
801 ("\e,Tp\e(B-\e,Ty\e(B" "w" ?6) ; digit
802 ("\e,TOf_oz{\e(B" "_" ?5) ; symbol
803 ;; Unicode equivalents
804 ("\e$,1Ba\e(B-\e$,1C#C%C'\e(B-\e$,1C.\e(B" "w" ?0) ; consonant
805 ("\e$,1C$C&C0C2C3C@\e(B-\e$,1CE\e(B" "w" ?1) ; vowel base
806 ("\e$,1C1C4\e(B-\e$,1C7CGCN\e(B" "w" ?2) ; vowel upper
807 ("\e$,1C8\e(B-\e$,1C:\e(B" "w" ?3) ; vowel lower
808 ("\e$,1CH\e(B-\e$,1CM\e(B" "w" ?4) ; tone mark
809 ("\e$,1CP\e(B-\e$,1CY\e(B" "w" ?6) ; digit
810 ("\e$,1C/CFC?COCZC[\e(B" "_" ?5) ; symbol
811 ))
812 elm chars len syntax category to ch i)
813 (while deflist
814 (setq elm (car deflist))
815 (setq chars (car elm)
816 len (length chars)
817 syntax (nth 1 elm)
818 category (nth 2 elm)
819 i 0)
820 (while (< i len)
821 (if (= (aref chars i) ?-)
822 (setq i (1+ i)
823 to (aref chars i))
824 (setq ch (aref chars i)
825 to ch))
826 (while (<= ch to)
827 (unless (string-equal syntax "w")
828 (modify-syntax-entry ch syntax))
829 (modify-category-entry ch category)
830 (setq ch (1+ ch)))
831 (setq i (1+ i)))
832 (setq deflist (cdr deflist))))
833
834 ;; Tibetan character set
835
836 (modify-category-entry (make-char 'tibetan) ?q)
837 (modify-category-entry (make-char 'tibetan-1-column) ?q)
838 (dotimes (i (1+ (- #xfff #xf00)))
839 (modify-category-entry (decode-char 'ucs (+ i #xf00)) ?q))
840
841 (let ((deflist '(;; chars syntax category
842 ("\e4\e$(7"!\e0"!\e1\e(B-\e4\e$(7"J\e0"J\e1\e4"K\e0"K\e1\e(B" "w" ?0) ; consonant
843 ("\e$(7#!\e(B-\e$(7#J#K#L#M!"!#\e(B" "w" ?0) ;
844 ("\e$(7$!\e(B-\e$(7$e\e(B" "w" ?0) ;
845 ("\e$(7%!\e(B-\e$(7%u\e(B" "w" ?0) ;
846 ("\e$(7"S"["\"]"^"a\e(B" "w" ?2) ; upper vowel
847 ("\e$(7"_"c"d"g"h"i"j"k"l\e(B" "w" ?2) ; upper modifier
848 ("\e$(7!I"Q"R"U"e!e!g\e(B" "w" ?3) ; lowel vowel/modifier
849 ("\e$(7!P\e(B-\e$(7!Y!Z\e(B-\e$(7!c\e(B" "w" ?6) ; digit
850 ("\e$(7!;!=\e(B-\e$(7!B!D"`\e(B" "." ?|) ; line-break char
851 ("\e$(8!;!=!?!@!A!D"`\e(B" "." ?|) ;
852 ("\e$(7!8!;!=\e(B-\e$(7!B!D"`!m!d\e(B" "." ?>) ; prohibition
853 ("\e$(8!;!=!?!@!A!D"`\e(B" "." ?>) ;
854 ("\e$(7!0\e(B-\e$(7!:!l#R#S"f\e(B" "." ?<) ; prohibition
855 ("\e$(7!C!E\e(B-\e$(7!H!J\e(B-\e$(7!O!f!h\e(B-\e$(7!k!n!o#O#P\e(B-\e$(7#`\e(B" "." ?q) ; others
856
857 ;; Unicode version (not complete)
858 ("\e$,1F \e(B-\e$,1FIFJ\e(B" "w" ?0) ; consonant
859 ("\e$,1Fp\e(B-\e$,1G9G:G;G<\e(B" "w" ?0) ;
860 ("\e$,1FRFZF[F\F]F`\e(B" "w" ?2) ; upper vowel
861 ("\e$,1F^FbFcFfFgFhFiFjFk\e(B" "w" ?2) ; upper modifier
862 ("\e$,1EYFPFQFTFdEuEw\e(B" "w" ?3) ; lowel vowel/modifier
863 ("\e$,1E`\e(B-\e$,1EiEj\e(B-\e$,1Es\e(B" "w" ?6) ; digit
864 ("\e$,1EKEM\e(B-\e$,1ERETF_\e(B" "." ?|) ; line-break char
865 ("\e$,1EHEKEM\e(B-\e$,1ERETF_E}Et\e(B" "." ?>) ; prohibition
866 ("\e$,1E@\e(B-\e$,1EJE|GAGBFe\e(B" "." ?<) ; prohibition
867 ("\e$,1ESEU\e(B-\e$,1EXEZ\e(B-\e$,1E_EvEx\e(B-\e$,1E{E~E\7fG>G?\e(B-\e$,1GO\e(B" "." ?q) ; others
868 ))
869 elm chars len syntax category to ch i)
870 (while deflist
871 (setq elm (car deflist))
872 (setq chars (car elm)
873 len (length chars)
874 syntax (nth 1 elm)
875 category (nth 2 elm)
876 i 0)
877 (while (< i len)
878 (if (= (aref chars i) ?-)
879 (setq i (1+ i)
880 to (aref chars i))
881 (setq ch (aref chars i)
882 to ch))
883 (while (<= ch to)
884 (unless (string-equal syntax "w")
885 (modify-syntax-entry ch syntax))
886 (modify-category-entry ch category)
887 (setq ch (1+ ch)))
888 (setq i (1+ i)))
889 (setq deflist (cdr deflist))))
890
891 ;; Vietnamese character set
892
893 (let ((lower (make-char 'vietnamese-viscii-lower))
894 (upper (make-char 'vietnamese-viscii-upper)))
895 ;; (modify-syntax-entry lower "w")
896 ;; (modify-syntax-entry upper "w")
897 (modify-category-entry lower ?v)
898 (modify-category-entry upper ?v)
899 (modify-category-entry lower ?l) ; To make a word with
900 (modify-category-entry upper ?l) ; latin characters.
901 )
902
903 (let ((tbl (standard-case-table))
904 (i 32))
905 (while (< i 128)
906 (set-case-syntax-pair (make-char 'vietnamese-viscii-upper i)
907 (make-char 'vietnamese-viscii-lower i)
908 tbl)
909 (setq i (1+ i))))
910
911 ;; Unicode (mule-unicode-0100-24ff)
912
913 (let ((tbl (standard-case-table)) c)
914
915 ;; In some languages, U+0049 LATIN CAPITAL LETTER I and U+0131 LATIN
916 ;; SMALL LETTER DOTLESS I make a case pair, and so do U+0130 LATIN
917 ;; CAPITAL LETTER I WITH DOT ABOVE and U+0069 LATIN SMALL LETTER I.
918 ;; Thus we have to check language-environment to handle casing
919 ;; correctly. Currently only I<->i is available.
920
921 ;; Latin Extended-A, Latin Extended-B
922 (setq c #x0100)
923 (while (<= c #x0233)
924 (modify-category-entry (decode-char 'ucs c) ?l)
925 (and (or (<= c #x012e)
926 (and (>= c #x014a) (<= c #x0177)))
927 (zerop (% c 2))
928 (set-case-syntax-pair
929 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
930 (and (>= c #x013a)
931 (<= c #x0148)
932 (zerop (% c 2))
933 (set-case-syntax-pair
934 (decode-char 'ucs (1- c)) (decode-char 'ucs c) tbl))
935 (setq c (1+ c)))
936 (set-downcase-syntax ?\e$,1 P\e(B ?i tbl)
937 (set-upcase-syntax ?I ?\e$,1 Q\e(B tbl)
938 (set-case-syntax-pair ?\e$,1 R\e(B ?\e$,1 S\e(B tbl)
939 (set-case-syntax-pair ?\e$,1 T\e(B ?\e$,1 U\e(B tbl)
940 (set-case-syntax-pair ?\e$,1 V\e(B ?\e$,1 W\e(B tbl)
941 (set-case-syntax-pair ?\e$,1!8\e(B ?\e,A\7f\e(B tbl)
942 (set-case-syntax-pair ?\e$,1!9\e(B ?\e$,1!:\e(B tbl)
943 (set-case-syntax-pair ?\e$,1!;\e(B ?\e$,1!<\e(B tbl)
944 (set-case-syntax-pair ?\e$,1!=\e(B ?\e$,1!>\e(B tbl)
945
946 ;; Latin Extended-B
947 (set-case-syntax-pair ?\e$,1!A\e(B ?\e$,1#S\e(B tbl)
948 (set-case-syntax-pair ?\e$,1!B\e(B ?\e$,1!C\e(B tbl)
949 (set-case-syntax-pair ?\e$,1!D\e(B ?\e$,1!E\e(B tbl)
950 (set-case-syntax-pair ?\e$,1!F\e(B ?\e$,1#T\e(B tbl)
951 (set-case-syntax-pair ?\e$,1!G\e(B ?\e$,1!H\e(B tbl)
952 (set-case-syntax-pair ?\e$,1!I\e(B ?\e$,1#V\e(B tbl)
953 (set-case-syntax-pair ?\e$,1!J\e(B ?\e$,1#W\e(B tbl)
954 (set-case-syntax-pair ?\e$,1!K\e(B ?\e$,1!L\e(B tbl)
955 (set-case-syntax-pair ?\e$,1!N\e(B ?\e$,1"=\e(B tbl)
956 (set-case-syntax-pair ?\e$,1!O\e(B ?\e$,1#Y\e(B tbl)
957 (set-case-syntax-pair ?\e$,1!P\e(B ?\e$,1#[\e(B tbl)
958 (set-case-syntax-pair ?\e$,1!Q\e(B ?\e$,1!R\e(B tbl)
959 (set-case-syntax-pair ?\e$,1!S\e(B ?\e$,1#`\e(B tbl)
960 (set-case-syntax-pair ?\e$,1!T\e(B ?\e$,1#c\e(B tbl)
961 (set-case-syntax-pair ?\e$,1!V\e(B ?\e$,1#i\e(B tbl)
962 (set-case-syntax-pair ?\e$,1!W\e(B ?\e$,1#h\e(B tbl)
963 (set-case-syntax-pair ?\e$,1!X\e(B ?\e$,1!Y\e(B tbl)
964 (set-case-syntax-pair ?\e$,1!\\e(B ?\e$,1#o\e(B tbl)
965 (set-case-syntax-pair ?\e$,1!]\e(B ?\e$,1#r\e(B tbl)
966 (set-case-syntax-pair ?\e$,1!_\e(B ?\e$,1#u\e(B tbl)
967 (set-case-syntax-pair ?\e$,1!`\e(B ?\e$,1!a\e(B tbl)
968 (set-case-syntax-pair ?\e$,1!b\e(B ?\e$,1!c\e(B tbl)
969 (set-case-syntax-pair ?\e$,1!d\e(B ?\e$,1!e\e(B tbl)
970 (set-case-syntax-pair ?\e$,1!f\e(B ?\e$,1$ \e(B tbl)
971 (set-case-syntax-pair ?\e$,1!g\e(B ?\e$,1!h\e(B tbl)
972 (set-case-syntax-pair ?\e$,1!i\e(B ?\e$,1$#\e(B tbl)
973 (set-case-syntax-pair ?\e$,1!l\e(B ?\e$,1!m\e(B tbl)
974 (set-case-syntax-pair ?\e$,1!n\e(B ?\e$,1$(\e(B tbl)
975 (set-case-syntax-pair ?\e$,1!o\e(B ?\e$,1!p\e(B tbl)
976 (set-case-syntax-pair ?\e$,1!q\e(B ?\e$,1$*\e(B tbl)
977 (set-case-syntax-pair ?\e$,1!r\e(B ?\e$,1$+\e(B tbl)
978 (set-case-syntax-pair ?\e$,1!s\e(B ?\e$,1!t\e(B tbl)
979 (set-case-syntax-pair ?\e$,1!u\e(B ?\e$,1!v\e(B tbl)
980 (set-case-syntax-pair ?\e$,1!w\e(B ?\e$,1$2\e(B tbl)
981 (set-case-syntax-pair ?\e$,1!x\e(B ?\e$,1!y\e(B tbl)
982 (set-case-syntax-pair ?\e$,1!|\e(B ?\e$,1!}\e(B tbl)
983 (set-case-syntax-pair ?\e$,1"$\e(B ?\e$,1"&\e(B tbl)
984 (set-case-syntax-pair ?\e$,1"%\e(B ?\e$,1"&\e(B tbl)
985 (set-case-syntax-pair ?\e$,1"'\e(B ?\e$,1")\e(B tbl)
986 (set-case-syntax-pair ?\e$,1"(\e(B ?\e$,1")\e(B tbl)
987 (set-case-syntax-pair ?\e$,1"*\e(B ?\e$,1",\e(B tbl)
988 (set-case-syntax-pair ?\e$,1"+\e(B ?\e$,1",\e(B tbl)
989 (set-case-syntax-pair ?\e$,1"-\e(B ?\e$,1".\e(B tbl)
990 (set-case-syntax-pair ?\e$,1"/\e(B ?\e$,1"0\e(B tbl)
991 (set-case-syntax-pair ?\e$,1"1\e(B ?\e$,1"2\e(B tbl)
992 (set-case-syntax-pair ?\e$,1"3\e(B ?\e$,1"4\e(B tbl)
993 (set-case-syntax-pair ?\e$,1"5\e(B ?\e$,1"6\e(B tbl)
994 (set-case-syntax-pair ?\e$,1"7\e(B ?\e$,1"8\e(B tbl)
995 (set-case-syntax-pair ?\e$,1"9\e(B ?\e$,1":\e(B tbl)
996 (set-case-syntax-pair ?\e$,1";\e(B ?\e$,1"<\e(B tbl)
997 (set-case-syntax-pair ?\e$,1">\e(B ?\e$,1"?\e(B tbl)
998 (set-case-syntax-pair ?\e$,1"@\e(B ?\e$,1"A\e(B tbl)
999 (set-case-syntax-pair ?\e$,1"B\e(B ?\e$,1"C\e(B tbl)
1000 (set-case-syntax-pair ?\e$,1"D\e(B ?\e$,1"E\e(B tbl)
1001 (set-case-syntax-pair ?\e$,1"F\e(B ?\e$,1"G\e(B tbl)
1002 (set-case-syntax-pair ?\e$,1"H\e(B ?\e$,1"I\e(B tbl)
1003 (set-case-syntax-pair ?\e$,1"J\e(B ?\e$,1"K\e(B tbl)
1004 (set-case-syntax-pair ?\e$,1"L\e(B ?\e$,1"M\e(B tbl)
1005 (set-case-syntax-pair ?\e$,1"N\e(B ?\e$,1"O\e(B tbl)
1006 ;; 01F0; F; 006A 030C; # LATIN SMALL LETTER J WITH CARON
1007 (set-case-syntax-pair ?\e$,1"Q\e(B ?\e$,1"S\e(B tbl)
1008 (set-case-syntax-pair ?\e$,1"R\e(B ?\e$,1"S\e(B tbl)
1009 (set-case-syntax-pair ?\e$,1"T\e(B ?\e$,1"U\e(B tbl)
1010 (set-case-syntax-pair ?\e$,1"V\e(B ?\e$,1!U\e(B tbl)
1011 (set-case-syntax-pair ?\e$,1"W\e(B ?\e$,1!\7f\e(B tbl)
1012 (set-case-syntax-pair ?\e$,1"X\e(B ?\e$,1"Y\e(B tbl)
1013 (set-case-syntax-pair ?\e$,1"Z\e(B ?\e$,1"[\e(B tbl)
1014 (set-case-syntax-pair ?\e$,1"\\e(B ?\e$,1"]\e(B tbl)
1015 (set-case-syntax-pair ?\e$,1"^\e(B ?\e$,1"_\e(B tbl)
1016 (set-case-syntax-pair ?\e$,1"`\e(B ?\e$,1"a\e(B tbl)
1017 (set-case-syntax-pair ?\e$,1"b\e(B ?\e$,1"c\e(B tbl)
1018 (set-case-syntax-pair ?\e$,1"d\e(B ?\e$,1"e\e(B tbl)
1019 (set-case-syntax-pair ?\e$,1"f\e(B ?\e$,1"g\e(B tbl)
1020 (set-case-syntax-pair ?\e$,1"h\e(B ?\e$,1"i\e(B tbl)
1021 (set-case-syntax-pair ?\e$,1"j\e(B ?\e$,1"k\e(B tbl)
1022 (set-case-syntax-pair ?\e$,1"l\e(B ?\e$,1"m\e(B tbl)
1023 (set-case-syntax-pair ?\e$,1"n\e(B ?\e$,1"o\e(B tbl)
1024 (set-case-syntax-pair ?\e$,1"p\e(B ?\e$,1"q\e(B tbl)
1025 (set-case-syntax-pair ?\e$,1"r\e(B ?\e$,1"s\e(B tbl)
1026 (set-case-syntax-pair ?\e$,1"t\e(B ?\e$,1"u\e(B tbl)
1027 (set-case-syntax-pair ?\e$,1"v\e(B ?\e$,1"w\e(B tbl)
1028 (set-case-syntax-pair ?\e$,1"x\e(B ?\e$,1"y\e(B tbl)
1029 (set-case-syntax-pair ?\e$,1"z\e(B ?\e$,1"{\e(B tbl)
1030 (set-case-syntax-pair ?\e$,1"|\e(B ?\e$,1"}\e(B tbl)
1031 (set-case-syntax-pair ?\e$,1"~\e(B ?\e$,1"\7f\e(B tbl)
1032 (set-case-syntax-pair ?\e$,1#"\e(B ?\e$,1##\e(B tbl)
1033 (set-case-syntax-pair ?\e$,1#$\e(B ?\e$,1#%\e(B tbl)
1034 (set-case-syntax-pair ?\e$,1#&\e(B ?\e$,1#'\e(B tbl)
1035 (set-case-syntax-pair ?\e$,1#(\e(B ?\e$,1#)\e(B tbl)
1036 (set-case-syntax-pair ?\e$,1#*\e(B ?\e$,1#+\e(B tbl)
1037 (set-case-syntax-pair ?\e$,1#,\e(B ?\e$,1#-\e(B tbl)
1038 (set-case-syntax-pair ?\e$,1#.\e(B ?\e$,1#/\e(B tbl)
1039 (set-case-syntax-pair ?\e$,1#0\e(B ?\e$,1#1\e(B tbl)
1040 (set-case-syntax-pair ?\e$,1#2\e(B ?\e$,1#3\e(B tbl)
1041
1042 ;; Latin Extended Additional
1043 (setq c #x1e00)
1044 (while (<= c #x1ef9)
1045 (modify-category-entry (decode-char 'ucs c) ?l)
1046 (and (zerop (% c 2))
1047 (or (<= c #x1e94) (>= c #x1ea0))
1048 (set-case-syntax-pair
1049 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
1050 (setq c (1+ c)))
1051
1052 ;; Greek
1053 (setq c #x0370)
1054 (while (<= c #x03ff)
1055 (modify-category-entry (decode-char 'ucs c) ?g)
1056 (if (or (and (>= c #x0391) (<= c #x03a1))
1057 (and (>= c #x03a3) (<= c #x03ab)))
1058 (set-case-syntax-pair
1059 (decode-char 'ucs c) (decode-char 'ucs (+ c 32)) tbl))
1060 (and (>= c #x03da)
1061 (<= c #x03ee)
1062 (zerop (% c 2))
1063 (set-case-syntax-pair
1064 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
1065 (setq c (1+ c)))
1066 (set-case-syntax-pair ?\e$,1&f\e(B ?\e$,1',\e(B tbl)
1067 (set-case-syntax-pair ?\e$,1&h\e(B ?\e$,1'-\e(B tbl)
1068 (set-case-syntax-pair ?\e$,1&i\e(B ?\e$,1'.\e(B tbl)
1069 (set-case-syntax-pair ?\e$,1&j\e(B ?\e$,1'/\e(B tbl)
1070 (set-case-syntax-pair ?\e$,1&l\e(B ?\e$,1'L\e(B tbl)
1071 (set-case-syntax-pair ?\e$,1&n\e(B ?\e$,1'M\e(B tbl)
1072 (set-case-syntax-pair ?\e$,1&o\e(B ?\e$,1'N\e(B tbl)
1073
1074 ;; Armenian
1075 (setq c #x531)
1076 (while (<= c #x556)
1077 (set-case-syntax-pair (decode-char 'ucs c)
1078 (decode-char 'ucs (+ c #x30)) tbl)
1079 (setq c (1+ c)))
1080
1081 ;; Greek Extended
1082 (setq c #x1f00)
1083 (while (<= c #x1fff)
1084 (modify-category-entry (decode-char 'ucs c) ?g)
1085 (and (<= (logand c #x000f) 7)
1086 (<= c #x1fa7)
1087 (not (memq c '(#x1f50 #x1f52 #x1f54 #x1f56)))
1088 (/= (logand c #x00f0) 7)
1089 (set-case-syntax-pair
1090 (decode-char 'ucs (+ c 8)) (decode-char 'ucs c) tbl))
1091 (setq c (1+ c)))
1092 (set-case-syntax-pair ?\e$,1qx\e(B ?\e$,1qp\e(B tbl)
1093 (set-case-syntax-pair ?\e$,1qy\e(B ?\e$,1qq\e(B tbl)
1094 (set-case-syntax-pair ?\e$,1qz\e(B ?\e$,1q0\e(B tbl)
1095 (set-case-syntax-pair ?\e$,1q{\e(B ?\e$,1q1\e(B tbl)
1096 (set-case-syntax-pair ?\e$,1q|\e(B ?\e$,1qs\e(B tbl)
1097 (set-case-syntax-pair ?\e$,1r(\e(B ?\e$,1q2\e(B tbl)
1098 (set-case-syntax-pair ?\e$,1r)\e(B ?\e$,1q3\e(B tbl)
1099 (set-case-syntax-pair ?\e$,1r*\e(B ?\e$,1q4\e(B tbl)
1100 (set-case-syntax-pair ?\e$,1r+\e(B ?\e$,1q5\e(B tbl)
1101 (set-case-syntax-pair ?\e$,1r,\e(B ?\e$,1r#\e(B tbl)
1102 (set-case-syntax-pair ?\e$,1r8\e(B ?\e$,1r0\e(B tbl)
1103 (set-case-syntax-pair ?\e$,1r9\e(B ?\e$,1r1\e(B tbl)
1104 (set-case-syntax-pair ?\e$,1r:\e(B ?\e$,1q6\e(B tbl)
1105 (set-case-syntax-pair ?\e$,1r;\e(B ?\e$,1q7\e(B tbl)
1106 (set-case-syntax-pair ?\e$,1rH\e(B ?\e$,1r@\e(B tbl)
1107 (set-case-syntax-pair ?\e$,1rI\e(B ?\e$,1rA\e(B tbl)
1108 (set-case-syntax-pair ?\e$,1rJ\e(B ?\e$,1q:\e(B tbl)
1109 (set-case-syntax-pair ?\e$,1rK\e(B ?\e$,1q;\e(B tbl)
1110 (set-case-syntax-pair ?\e$,1rL\e(B ?\e$,1rE\e(B tbl)
1111 (set-case-syntax-pair ?\e$,1rX\e(B ?\e$,1q8\e(B tbl)
1112 (set-case-syntax-pair ?\e$,1rY\e(B ?\e$,1q9\e(B tbl)
1113 (set-case-syntax-pair ?\e$,1rZ\e(B ?\e$,1q<\e(B tbl)
1114 (set-case-syntax-pair ?\e$,1r[\e(B ?\e$,1q=\e(B tbl)
1115 (set-case-syntax-pair ?\e$,1r\\e(B ?\e$,1rS\e(B tbl)
1116
1117 ;; cyrillic
1118 (setq c #x0400)
1119 (while (<= c #x04ff)
1120 (modify-category-entry (decode-char 'ucs c) ?y)
1121 (and (>= c #x0400)
1122 (<= c #x040f)
1123 (set-case-syntax-pair
1124 (decode-char 'ucs c) (decode-char 'ucs (+ c 80)) tbl))
1125 (and (>= c #x0410)
1126 (<= c #x042f)
1127 (set-case-syntax-pair
1128 (decode-char 'ucs c) (decode-char 'ucs (+ c 32)) tbl))
1129 (and (zerop (% c 2))
1130 (or (and (>= c #x0460) (<= c #x0480))
1131 (and (>= c #x048c) (<= c #x04be))
1132 (and (>= c #x04d0) (<= c #x04f4)))
1133 (set-case-syntax-pair
1134 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
1135 (setq c (1+ c)))
1136 (set-case-syntax-pair ?\e$,1*!\e(B ?\e$,1*"\e(B tbl)
1137 (set-case-syntax-pair ?\e$,1*#\e(B ?\e$,1*$\e(B tbl)
1138 (set-case-syntax-pair ?\e$,1*'\e(B ?\e$,1*(\e(B tbl)
1139 (set-case-syntax-pair ?\e$,1*+\e(B ?\e$,1*,\e(B tbl)
1140 (set-case-syntax-pair ?\e$,1*X\e(B ?\e$,1*Y\e(B tbl)
1141
1142 ;; general punctuation
1143 (setq c #x2000)
1144 (while (<= c #x200b)
1145 (set-case-syntax (decode-char 'ucs c) " " tbl)
1146 (setq c (1+ c)))
1147 (setq c #x2010)
1148 (while (<= c #x2016)
1149 (set-case-syntax (decode-char 'ucs c) "_" tbl)
1150 (setq c (1+ c)))
1151 ;; Punctuation syntax for quotation marks (like `)
1152 (while (<= c #x201f)
1153 (set-case-syntax (decode-char 'ucs c) "." tbl)
1154 (setq c (1+ c)))
1155 (while (<= c #x2027)
1156 (set-case-syntax (decode-char 'ucs c) "_" tbl)
1157 (setq c (1+ c)))
1158
1159 ;; Roman numerals
1160 (setq c #x2160)
1161 (while (<= c #x216f)
1162 (set-case-syntax-pair (decode-char 'ucs c)
1163 (decode-char 'ucs (+ c #x10)) tbl)
1164 (setq c (1+ c)))
1165
1166 ;; Circled Latin
1167 (setq c #x24b6)
1168 (while (<= c #x24cf)
1169 (set-case-syntax-pair (decode-char 'ucs c)
1170 (decode-char 'ucs (+ c 26)) tbl)
1171 (modify-category-entry (decode-char 'ucs c) ?l)
1172 (modify-category-entry (decode-char 'ucs (+ c 26)) ?l)
1173 (setq c (1+ c)))
1174
1175 ;; Fullwidth Latin
1176 (setq c #xff21)
1177 (while (<= c #xff3a)
1178 (set-case-syntax-pair (decode-char 'ucs c)
1179 (decode-char 'ucs (+ c #x20)) tbl)
1180 (modify-category-entry (decode-char 'ucs c) ?l)
1181 (modify-category-entry (decode-char 'ucs (+ c #x20)) ?l)
1182 (setq c (1+ c)))
1183
1184 ;; Combining diacritics
1185 (setq c #x300)
1186 (while (<= c #x362)
1187 (modify-category-entry (decode-char 'ucs c) ?^)
1188 (setq c (1+ c)))
1189
1190 ;; Combining marks
1191 (setq c #x20d0)
1192 (while (<= c #x20e3)
1193 (modify-category-entry (decode-char 'ucs c) ?^)
1194 (setq c (1+ c)))
1195
1196 ;; Fixme: syntax for symbols &c
1197 )
1198
1199 (let ((pairs
1200 '("\e$,1sEsF\e(B" ; U+2045 U+2046
1201 "\e$,1s}s~\e(B" ; U+207D U+207E
1202 "\e$,1t-t.\e(B" ; U+208D U+208E
1203 "\e$,1{){*\e(B" ; U+2329 U+232A
1204 "\e$,1|T|U\e(B" ; U+23B4 U+23B5
1205 "\e$,2&H&I\e(B" ; U+2768 U+2769
1206 "\e$,2&J&K\e(B" ; U+276A U+276B
1207 "\e$,2&L&M\e(B" ; U+276C U+276D
1208 "\e$,2&P&Q\e(B" ; U+2770 U+2771
1209 "\e$,2&R&S\e(B" ; U+2772 U+2773
1210 "\e$,2&T&U\e(B" ; U+2774 U+2775
1211 "\e$,2'f'g\e(B" ; U+27E6 U+27E7
1212 "\e$,2'h'i\e(B" ; U+27E8 U+27E9
1213 "\e$,2'j'k\e(B" ; U+27EA U+27EB
1214 "\e$,2,#,$\e(B" ; U+2983 U+2984
1215 "\e$,2,%,&\e(B" ; U+2985 U+2986
1216 "\e$,2,',(\e(B" ; U+2987 U+2988
1217 "\e$,2,),*\e(B" ; U+2989 U+298A
1218 "\e$,2,+,,\e(B" ; U+298B U+298C
1219 "\e$,2,-,.\e(B" ; U+298D U+298E
1220 "\e$,2,/,0\e(B" ; U+298F U+2990
1221 "\e$,2,1,2\e(B" ; U+2991 U+2992
1222 "\e$,2,3,4\e(B" ; U+2993 U+2994
1223 "\e$,2,5,6\e(B" ; U+2995 U+2996
1224 "\e$,2,7,8\e(B" ; U+2997 U+2998
1225 "\e$,2-<-=\e(B" ; U+29FC U+29FD
1226 "\e$,2=H=I\e(B" ; U+3008 U+3009
1227 "\e$,2=J=K\e(B" ; U+300A U+300B
1228 "\e$,2=L=M\e(B" ; U+300C U+300D
1229 "\e$,2=N=O\e(B" ; U+300E U+300F
1230 "\e$,2=P=Q\e(B" ; U+3010 U+3011
1231 "\e$,2=T=U\e(B" ; U+3014 U+3015
1232 "\e$,2=V=W\e(B" ; U+3016 U+3017
1233 "\e$,2=X=Y\e(B" ; U+3018 U+3019
1234 "\e$,2=Z=[\e(B" ; U+301A U+301B
1235 "\e$,3m~m\7f\e(B" ; U+FD3E U+FD3F
1236 "\e$,3pUpV\e(B" ; U+FE35 U+FE36
1237 "\e$,3pWpX\e(B" ; U+FE37 U+FE38
1238 "\e$,3pYpZ\e(B" ; U+FE39 U+FE3A
1239 "\e$,3p[p\\e(B" ; U+FE3B U+FE3C
1240 "\e$,3p]p^\e(B" ; U+FE3D U+FE3E
1241 "\e$,3p_p`\e(B" ; U+FE3F U+FE40
1242 "\e$,3papb\e(B" ; U+FE41 U+FE42
1243 "\e$,3pcpd\e(B" ; U+FE43 U+FE44
1244 "\e$,3pypz\e(B" ; U+FE59 U+FE5A
1245 "\e$,3p{p|\e(B" ; U+FE5B U+FE5C
1246 "\e$,3p}p~\e(B" ; U+FE5D U+FE5E
1247 "\e$,3rhri\e(B" ; U+FF08 U+FF09
1248 "\e$,3s;s=\e(B" ; U+FF3B U+FF3D
1249 "\e$,3s[s]\e(B" ; U+FF5B U+FF5D
1250 "\e$,3s_s`\e(B" ; U+FF5F U+FF60
1251 "\e$,3sbsc\e(B" ; U+FF62 U+FF63
1252 )))
1253 (dolist (elt pairs)
1254 (modify-syntax-entry (aref elt 0) (string ?\( (aref elt 1)))
1255 (modify-syntax-entry (aref elt 1) (string ?\) (aref elt 0)))))
1256
1257 \f
1258 ;;; Setting word boundary.
1259
1260 (setq word-combining-categories
1261 '((?l . ?l)
1262 (?C . ?C)
1263 (?C . ?H)
1264 (?C . ?K)))
1265
1266 (setq word-separating-categories ; (2-byte character sets)
1267 '((?A . ?K) ; Alpha numeric - Katakana
1268 (?A . ?C) ; Alpha numeric - Chinese
1269 (?H . ?A) ; Hiragana - Alpha numeric
1270 (?H . ?K) ; Hiragana - Katakana
1271 (?H . ?C) ; Hiragana - Chinese
1272 (?K . ?A) ; Katakana - Alpha numeric
1273 (?K . ?C) ; Katakana - Chinese
1274 (?C . ?A) ; Chinese - Alpha numeric
1275 (?C . ?K) ; Chinese - Katakana
1276 ))
1277
1278 \f
1279 ;; For each character set, put the information of the most proper
1280 ;; coding system to encode it by `preferred-coding-system' property.
1281
1282 (let ((l '((latin-iso8859-1 . iso-latin-1)
1283 (latin-iso8859-2 . iso-latin-2)
1284 (latin-iso8859-3 . iso-latin-3)
1285 (latin-iso8859-4 . iso-latin-4)
1286 (thai-tis620 . thai-tis620)
1287 (greek-iso8859-7 . greek-iso-8bit)
1288 (arabic-iso8859-6 . iso-2022-7bit)
1289 (hebrew-iso8859-8 . hebrew-iso-8bit)
1290 (katakana-jisx0201 . japanese-shift-jis)
1291 (latin-jisx0201 . japanese-shift-jis)
1292 (cyrillic-iso8859-5 . cyrillic-iso-8bit)
1293 (latin-iso8859-9 . iso-latin-5)
1294 (japanese-jisx0208-1978 . iso-2022-jp)
1295 (chinese-gb2312 . cn-gb-2312)
1296 (japanese-jisx0208 . iso-2022-jp)
1297 (korean-ksc5601 . iso-2022-kr)
1298 (japanese-jisx0212 . iso-2022-jp)
1299 (chinese-cns11643-1 . iso-2022-cn)
1300 (chinese-cns11643-2 . iso-2022-cn)
1301 (chinese-big5-1 . chinese-big5)
1302 (chinese-big5-2 . chinese-big5)
1303 (chinese-sisheng . iso-2022-7bit)
1304 (ipa . iso-2022-7bit)
1305 (vietnamese-viscii-lower . vietnamese-viscii)
1306 (vietnamese-viscii-upper . vietnamese-viscii)
1307 (arabic-digit . iso-2022-7bit)
1308 (arabic-1-column . iso-2022-7bit)
1309 (ascii-right-to-left . iso-2022-7bit)
1310 (lao . lao)
1311 (arabic-2-column . iso-2022-7bit)
1312 (indian-is13194 . devanagari)
1313 (indian-glyph . devanagari)
1314 (tibetan-1-column . tibetan)
1315 (ethiopic . iso-2022-7bit)
1316 (chinese-cns11643-3 . iso-2022-cn)
1317 (chinese-cns11643-4 . iso-2022-cn)
1318 (chinese-cns11643-5 . iso-2022-cn)
1319 (chinese-cns11643-6 . iso-2022-cn)
1320 (chinese-cns11643-7 . iso-2022-cn)
1321 (indian-2-column . devanagari)
1322 (tibetan . tibetan)
1323 (latin-iso8859-14 . iso-latin-8)
1324 (latin-iso8859-15 . iso-latin-9))))
1325 (while l
1326 (put-charset-property (car (car l)) 'preferred-coding-system (cdr (car l)))
1327 (setq l (cdr l))))
1328
1329 \f
1330 ;; Setup auto-fill-chars for charsets that should invoke auto-filling.
1331 ;; SPACE and NEWLINE are already set. Also put `nospace-between-words'
1332 ;; property on the charsets.
1333 (let ((l '(katakana-jisx0201
1334 japanese-jisx0208 japanese-jisx0212
1335 chinese-gb2312 chinese-big5-1 chinese-big5-2)))
1336 (while l
1337 (aset auto-fill-chars (make-char (car l)) t)
1338 (put-charset-property (car l) 'nospace-between-words t)
1339 (setq l (cdr l))))
1340
1341 \f
1342 (setq utf-translate-cjk-mode saved-utf-translate-cjk-mode)
1343 (makunbound 'saved-utf-translate-cjk-mode)
1344
1345 ;;; Local Variables:
1346 ;;; coding: iso-2022-7bit
1347 ;;; End:
1348
1349 ;;; arch-tag: 85889c35-9f4d-4912-9bf5-82de31b0d42d
1350 ;;; characters.el ends here