]> code.delx.au - gnu-emacs/blob - lisp/international/characters.el
Update FSF's address.
[gnu-emacs] / lisp / international / characters.el
1 ;;; characters.el --- set syntax and category for multibyte characters
2
3 ;; Copyright (C) 1997, 2000, 2001, 2002, 2003, 2004
4 ;; Free Software Foundation, Inc.
5 ;; Copyright (C) 1995, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005
6 ;; National Institute of Advanced Industrial Science and Technology (AIST)
7 ;; Registration Number H14PRO021
8
9 ;; Keywords: multibyte character, character set, syntax, category
10
11 ;; This file is part of GNU Emacs.
12
13 ;; GNU Emacs is free software; you can redistribute it and/or modify
14 ;; it under the terms of the GNU General Public License as published by
15 ;; the Free Software Foundation; either version 2, or (at your option)
16 ;; any later version.
17
18 ;; GNU Emacs is distributed in the hope that it will be useful,
19 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
20 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 ;; GNU General Public License for more details.
22
23 ;; You should have received a copy of the GNU General Public License
24 ;; along with GNU Emacs; see the file COPYING. If not, write to the
25 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
26 ;; Boston, MA 02110-1301, USA.
27
28 ;;; Commentary:
29
30 ;; This file contains multibyte characters. Save this file always in
31 ;; the coding system `iso-2022-7bit'.
32
33 ;; This file does not define the syntax for Latin-N character sets;
34 ;; those are defined by the files latin-N.el.
35
36 ;;; Code:
37
38 ;; We must set utf-translate-cjk-mode to nil while loading this file
39 ;; to avoid translating CJK characters in decode-char.
40 (defvar saved-utf-translate-cjk-mode utf-translate-cjk-mode)
41 (setq utf-translate-cjk-mode nil)
42
43 ;;; Predefined categories.
44
45 ;; For each character set.
46
47 (define-category ?a "ASCII")
48 (define-category ?l "Latin")
49 (define-category ?t "Thai")
50 (define-category ?g "Greek")
51 (define-category ?b "Arabic")
52 (define-category ?w "Hebrew")
53 (define-category ?y "Cyrillic")
54 (define-category ?k "Japanese katakana")
55 (define-category ?r "Japanese roman")
56 (define-category ?c "Chinese")
57 (define-category ?j "Japanese")
58 (define-category ?h "Korean")
59 (define-category ?e "Ethiopic (Ge'ez)")
60 (define-category ?v "Vietnamese")
61 (define-category ?i "Indian")
62 (define-category ?o "Lao")
63 (define-category ?q "Tibetan")
64
65 ;; For each group (row) of 2-byte character sets.
66
67 (define-category ?A "Alpha-numeric characters of 2-byte character sets")
68 (define-category ?C "Chinese (Han) characters of 2-byte character sets")
69 (define-category ?G "Greek characters of 2-byte character sets")
70 (define-category ?H "Japanese Hiragana characters of 2-byte character sets")
71 (define-category ?K "Japanese Katakana characters of 2-byte character sets")
72 (define-category ?N "Korean Hangul characters of 2-byte character sets")
73 (define-category ?Y "Cyrillic characters of 2-byte character sets")
74 (define-category ?I "Indian Glyphs")
75
76 ;; For phonetic classifications.
77
78 (define-category ?0 "consonant")
79 (define-category ?1 "base (independent) vowel")
80 (define-category ?2 "upper diacritical mark (including upper vowel)")
81 (define-category ?3 "lower diacritical mark (including lower vowel)")
82 (define-category ?4 "tone mark")
83 (define-category ?5 "symbol")
84 (define-category ?6 "digit")
85 (define-category ?7 "vowel-modifying diacritical mark")
86 (define-category ?8 "vowel-signs")
87 (define-category ?9 "semivowel lower")
88
89 ;; For filling.
90 (define-category ?| "While filling, we can break a line at this character.")
91
92 ;; For indentation calculation.
93 (define-category ?\s
94 "This character counts as a space for indentation purposes.")
95
96 ;; Keep the following for `kinsoku' processing. See comments in
97 ;; kinsoku.el.
98 (define-category ?> "A character which can't be placed at beginning of line.")
99 (define-category ?< "A character which can't be placed at end of line.")
100
101 ;; Combining
102 (define-category ?^ "Combining diacritic or mark")
103 \f
104 ;;; Setting syntax and category.
105
106 ;; ASCII
107
108 (let ((ch 32))
109 (while (< ch 127) ; All ASCII characters have
110 (modify-category-entry ch ?a) ; the category `a' (ASCII)
111 (modify-category-entry ch ?l) ; and `l' (Latin).
112 (setq ch (1+ ch))))
113
114 ;; Arabic character set
115
116 (let ((charsets '(arabic-iso8859-6
117 arabic-digit
118 arabic-1-column
119 arabic-2-column)))
120 (while charsets
121 ;; (modify-syntax-entry (make-char (car charsets)) "w")
122 (modify-category-entry (make-char (car charsets)) ?b)
123 (setq charsets (cdr charsets))))
124 (let ((ch #x600))
125 (while (<= ch #x6ff)
126 (modify-category-entry (decode-char 'ucs ch) ?b)
127 (setq ch (1+ ch)))
128 (setq ch #xfb50)
129 (while (<= ch #xfdff)
130 (modify-category-entry (decode-char 'ucs ch) ?b)
131 (setq ch (1+ ch)))
132 (setq ch #xfe70)
133 (while (<= ch #xfefe)
134 (modify-category-entry (decode-char 'ucs ch) ?b)
135 (setq ch (1+ ch))))
136
137 ;; Chinese character set (GB2312)
138
139 ;; (modify-syntax-entry (make-char 'chinese-gb2312) "w")
140 (modify-syntax-entry (make-char 'chinese-gb2312 33) "_")
141 (modify-syntax-entry (make-char 'chinese-gb2312 34) "_")
142 (modify-syntax-entry (make-char 'chinese-gb2312 41) "_")
143 (modify-syntax-entry ?\\e$A!2\e(B "(\e$A!3\e(B")
144 (modify-syntax-entry ?\\e$A!4\e(B "(\e$A!5\e(B")
145 (modify-syntax-entry ?\\e$A!6\e(B "(\e$A!7\e(B")
146 (modify-syntax-entry ?\\e$A!8\e(B "(\e$A!9\e(B")
147 (modify-syntax-entry ?\\e$A!:\e(B "(\e$A!;\e(B")
148 (modify-syntax-entry ?\\e$A!<\e(B "(\e$A!=\e(B")
149 (modify-syntax-entry ?\\e$A!>\e(B "(\e$A!?\e(B")
150 (modify-syntax-entry ?\\e$A#(\e(B "(\e$A#)\e(B")
151 (modify-syntax-entry ?\\e$A#{\e(B "(\e$A#}\e(B")
152 (modify-syntax-entry ?\\e$A#[\e(B "(\e$A#]\e(B")
153 (modify-syntax-entry ?\\e$A!3\e(B ")\e$A!2\e(B")
154 (modify-syntax-entry ?\\e$A!5\e(B ")\e$A!4\e(B")
155 (modify-syntax-entry ?\\e$A!7\e(B ")\e$A!6\e(B")
156 (modify-syntax-entry ?\\e$A!9\e(B ")\e$A!8\e(B")
157 (modify-syntax-entry ?\\e$A!;\e(B ")\e$A!:\e(B")
158 (modify-syntax-entry ?\\e$A!=\e(B ")\e$A!<\e(B")
159 (modify-syntax-entry ?\\e$A!?\e(B ")\e$A!>\e(B")
160 (modify-syntax-entry ?\\e$A#)\e(B ")\e$A#(\e(B")
161 (modify-syntax-entry ?\\e$A#}\e(B ")\e$A#{\e(B")
162 (modify-syntax-entry ?\\e$A#]\e(B ")\e$A#[\e(B")
163
164 (let ((chars "\e$A#,!"!##.!$#;#:#?#!!C!-!'#|#_!.!/!0!1#"!e#`!d\e(B"))
165 (dotimes (i (length chars))
166 (modify-syntax-entry (aref chars i) ".")))
167
168 (modify-category-entry (make-char 'chinese-gb2312) ?c)
169 (modify-category-entry (make-char 'chinese-gb2312) ?\|)
170 (modify-category-entry (make-char 'chinese-gb2312 35) ?A)
171 (modify-category-entry (make-char 'chinese-gb2312 36) ?H)
172 (modify-category-entry (make-char 'chinese-gb2312 37) ?K)
173 (modify-category-entry (make-char 'chinese-gb2312 38) ?G)
174 (modify-category-entry (make-char 'chinese-gb2312 39) ?Y)
175 (let ((row 48))
176 (while (< row 127)
177 (modify-category-entry (make-char 'chinese-gb2312 row) ?C)
178 (setq row (1+ row))))
179
180 ;; Chinese character set (BIG5)
181
182 (let ((from (decode-big5-char #xA141))
183 (to (decode-big5-char #xA15D)))
184 (while (< from to)
185 (modify-syntax-entry from ".")
186 (setq from (1+ from))))
187 (let ((from (decode-big5-char #xA1A5))
188 (to (decode-big5-char #xA1AD)))
189 (while (< from to)
190 (modify-syntax-entry from ".")
191 (setq from (1+ from))))
192 (let ((from (decode-big5-char #xA1AD))
193 (to (decode-big5-char #xA2AF)))
194 (while (< from to)
195 (modify-syntax-entry from "_")
196 (setq from (1+ from))))
197
198 (let ((parens "\e$(0!>!?!@!A!B!C!D!E!F!G!H!I!J!K!L!M!N!O!P!Q!R!S!T!U!V!W!X!Y!Z![!\!]!^!_!`!a!b!c\e(B")
199 open close)
200 (dotimes (i (/ (length parens) 2))
201 (setq open (aref parens (* i 2))
202 close (aref parens (1+ (* i 2))))
203 (modify-syntax-entry open (format "(%c" close))
204 (modify-syntax-entry close (format ")%c" open))))
205
206 (let ((generic-big5-1-char (make-char 'chinese-big5-1))
207 (generic-big5-2-char (make-char 'chinese-big5-2)))
208 ;; (modify-syntax-entry generic-big5-1-char "w")
209 ;; (modify-syntax-entry generic-big5-2-char "w")
210
211 (modify-category-entry generic-big5-1-char ?c)
212 (modify-category-entry generic-big5-2-char ?c)
213
214 (modify-category-entry generic-big5-1-char ?C)
215 (modify-category-entry generic-big5-2-char ?C)
216
217 (modify-category-entry generic-big5-1-char ?\|)
218 (modify-category-entry generic-big5-2-char ?\|))
219
220
221 ;; Chinese character set (CNS11643)
222
223 (let ((cns-list '(chinese-cns11643-1
224 chinese-cns11643-2
225 chinese-cns11643-3
226 chinese-cns11643-4
227 chinese-cns11643-5
228 chinese-cns11643-6
229 chinese-cns11643-7))
230 generic-char)
231 (while cns-list
232 (setq generic-char (make-char (car cns-list)))
233 ;; (modify-syntax-entry generic-char "w")
234 (modify-category-entry generic-char ?c)
235 (modify-category-entry generic-char ?C)
236 (modify-category-entry generic-char ?|)
237 (setq cns-list (cdr cns-list))))
238
239 (let ((parens "\e$(G!>!?!@!A!B!C!D!E!F!G!H!I!J!K!L!M!N!O!P!Q!R!S!T!U!V!W!X!Y!Z![!\!]!^!_!`!a!b!c\e(B")
240 open close)
241 (dotimes (i (/ (length parens) 2))
242 (setq open (aref parens (* i 2))
243 close (aref parens (1+ (* i 2))))
244 (modify-syntax-entry open (format "(%c" close))
245 (modify-syntax-entry close (format ")%c" open))))
246
247 ;; Cyrillic character set (ISO-8859-5)
248
249 (modify-category-entry (make-char 'cyrillic-iso8859-5) ?y)
250
251 (modify-syntax-entry (make-char 'cyrillic-iso8859-5 160) " ")
252 (modify-syntax-entry ?\e,L-\e(B ".")
253 (modify-syntax-entry ?\e,Lp\e(B ".")
254 (modify-syntax-entry ?\e,L}\e(B ".")
255 (let ((tbl (standard-case-table)))
256 (set-case-syntax-pair ?\e,L!\e(B ?\e,Lq\e(B tbl)
257 (set-case-syntax-pair ?\e,L"\e(B ?\e,Lr\e(B tbl)
258 (set-case-syntax-pair ?\e,L#\e(B ?\e,Ls\e(B tbl)
259 (set-case-syntax-pair ?\e,L$\e(B ?\e,Lt\e(B tbl)
260 (set-case-syntax-pair ?\e,L%\e(B ?\e,Lu\e(B tbl)
261 (set-case-syntax-pair ?\e,L&\e(B ?\e,Lv\e(B tbl)
262 (set-case-syntax-pair ?\e,L'\e(B ?\e,Lw\e(B tbl)
263 (set-case-syntax-pair ?\e,L(\e(B ?\e,Lx\e(B tbl)
264 (set-case-syntax-pair ?\e,L)\e(B ?\e,Ly\e(B tbl)
265 (set-case-syntax-pair ?\e,L*\e(B ?\e,Lz\e(B tbl)
266 (set-case-syntax-pair ?\e,L+\e(B ?\e,L{\e(B tbl)
267 (set-case-syntax-pair ?\e,L,\e(B ?\e,L|\e(B tbl)
268 (set-case-syntax-pair ?\e,L.\e(B ?\e,L~\e(B tbl)
269 (set-case-syntax-pair ?\e,L/\e(B ?\e,L\7f\e(B tbl)
270 (set-case-syntax-pair ?\e,L0\e(B ?\e,LP\e(B tbl)
271 (set-case-syntax-pair ?\e,L1\e(B ?\e,LQ\e(B tbl)
272 (set-case-syntax-pair ?\e,L2\e(B ?\e,LR\e(B tbl)
273 (set-case-syntax-pair ?\e,L3\e(B ?\e,LS\e(B tbl)
274 (set-case-syntax-pair ?\e,L4\e(B ?\e,LT\e(B tbl)
275 (set-case-syntax-pair ?\e,L5\e(B ?\e,LU\e(B tbl)
276 (set-case-syntax-pair ?\e,L6\e(B ?\e,LV\e(B tbl)
277 (set-case-syntax-pair ?\e,L7\e(B ?\e,LW\e(B tbl)
278 (set-case-syntax-pair ?\e,L8\e(B ?\e,LX\e(B tbl)
279 (set-case-syntax-pair ?\e,L9\e(B ?\e,LY\e(B tbl)
280 (set-case-syntax-pair ?\e,L:\e(B ?\e,LZ\e(B tbl)
281 (set-case-syntax-pair ?\e,L;\e(B ?\e,L[\e(B tbl)
282 (set-case-syntax-pair ?\e,L<\e(B ?\e,L\\e(B tbl)
283 (set-case-syntax-pair ?\e,L=\e(B ?\e,L]\e(B tbl)
284 (set-case-syntax-pair ?\e,L>\e(B ?\e,L^\e(B tbl)
285 (set-case-syntax-pair ?\e,L?\e(B ?\e,L_\e(B tbl)
286 (set-case-syntax-pair ?\e,L@\e(B ?\e,L`\e(B tbl)
287 (set-case-syntax-pair ?\e,LA\e(B ?\e,La\e(B tbl)
288 (set-case-syntax-pair ?\e,LB\e(B ?\e,Lb\e(B tbl)
289 (set-case-syntax-pair ?\e,LC\e(B ?\e,Lc\e(B tbl)
290 (set-case-syntax-pair ?\e,LD\e(B ?\e,Ld\e(B tbl)
291 (set-case-syntax-pair ?\e,LE\e(B ?\e,Le\e(B tbl)
292 (set-case-syntax-pair ?\e,LF\e(B ?\e,Lf\e(B tbl)
293 (set-case-syntax-pair ?\e,LG\e(B ?\e,Lg\e(B tbl)
294 (set-case-syntax-pair ?\e,LH\e(B ?\e,Lh\e(B tbl)
295 (set-case-syntax-pair ?\e,LI\e(B ?\e,Li\e(B tbl)
296 (set-case-syntax-pair ?\e,LJ\e(B ?\e,Lj\e(B tbl)
297 (set-case-syntax-pair ?\e,LK\e(B ?\e,Lk\e(B tbl)
298 (set-case-syntax-pair ?\e,LL\e(B ?\e,Ll\e(B tbl)
299 (set-case-syntax-pair ?\e,LM\e(B ?\e,Lm\e(B tbl)
300 (set-case-syntax-pair ?\e,LN\e(B ?\e,Ln\e(B tbl)
301 (set-case-syntax-pair ?\e,LO\e(B ?\e,Lo\e(B tbl)
302 (set-case-syntax-pair ?\e$,1(!\e(B ?\e$,1(q\e(B tbl)
303 (set-case-syntax-pair ?\e$,1("\e(B ?\e$,1(r\e(B tbl)
304 (set-case-syntax-pair ?\e$,1(#\e(B ?\e$,1(s\e(B tbl)
305 (set-case-syntax-pair ?\e$,1($\e(B ?\e$,1(t\e(B tbl)
306 (set-case-syntax-pair ?\e$,1(%\e(B ?\e$,1(u\e(B tbl)
307 (set-case-syntax-pair ?\e$,1(&\e(B ?\e$,1(v\e(B tbl)
308 (set-case-syntax-pair ?\e$,1('\e(B ?\e$,1(w\e(B tbl)
309 (set-case-syntax-pair ?\e$,1((\e(B ?\e$,1(x\e(B tbl)
310 (set-case-syntax-pair ?\e$,1()\e(B ?\e$,1(y\e(B tbl)
311 (set-case-syntax-pair ?\e$,1(*\e(B ?\e$,1(z\e(B tbl)
312 (set-case-syntax-pair ?\e$,1(+\e(B ?\e$,1({\e(B tbl)
313 (set-case-syntax-pair ?\e$,1(,\e(B ?\e$,1(|\e(B tbl)
314 (set-case-syntax-pair ?\e$,1(.\e(B ?\e$,1(~\e(B tbl)
315 (set-case-syntax-pair ?\e$,1(/\e(B ?\e$,1(\7f\e(B tbl)
316 (set-case-syntax-pair ?\e$,1(0\e(B ?\e$,1(P\e(B tbl)
317 (set-case-syntax-pair ?\e$,1(1\e(B ?\e$,1(Q\e(B tbl)
318 (set-case-syntax-pair ?\e$,1(2\e(B ?\e$,1(R\e(B tbl)
319 (set-case-syntax-pair ?\e$,1(3\e(B ?\e$,1(S\e(B tbl)
320 (set-case-syntax-pair ?\e$,1(4\e(B ?\e$,1(T\e(B tbl)
321 (set-case-syntax-pair ?\e$,1(5\e(B ?\e$,1(U\e(B tbl)
322 (set-case-syntax-pair ?\e$,1(6\e(B ?\e$,1(V\e(B tbl)
323 (set-case-syntax-pair ?\e$,1(7\e(B ?\e$,1(W\e(B tbl)
324 (set-case-syntax-pair ?\e$,1(8\e(B ?\e$,1(X\e(B tbl)
325 (set-case-syntax-pair ?\e$,1(9\e(B ?\e$,1(Y\e(B tbl)
326 (set-case-syntax-pair ?\e$,1(:\e(B ?\e$,1(Z\e(B tbl)
327 (set-case-syntax-pair ?\e$,1(;\e(B ?\e$,1([\e(B tbl)
328 (set-case-syntax-pair ?\e$,1(<\e(B ?\e$,1(\\e(B tbl)
329 (set-case-syntax-pair ?\e$,1(=\e(B ?\e$,1(]\e(B tbl)
330 (set-case-syntax-pair ?\e$,1(>\e(B ?\e$,1(^\e(B tbl)
331 (set-case-syntax-pair ?\e$,1(?\e(B ?\e$,1(_\e(B tbl)
332 (set-case-syntax-pair ?\e$,1(@\e(B ?\e$,1(`\e(B tbl)
333 (set-case-syntax-pair ?\e$,1(A\e(B ?\e$,1(a\e(B tbl)
334 (set-case-syntax-pair ?\e$,1(B\e(B ?\e$,1(b\e(B tbl)
335 (set-case-syntax-pair ?\e$,1(C\e(B ?\e$,1(c\e(B tbl)
336 (set-case-syntax-pair ?\e$,1(D\e(B ?\e$,1(d\e(B tbl)
337 (set-case-syntax-pair ?\e$,1(E\e(B ?\e$,1(e\e(B tbl)
338 (set-case-syntax-pair ?\e$,1(F\e(B ?\e$,1(f\e(B tbl)
339 (set-case-syntax-pair ?\e$,1(G\e(B ?\e$,1(g\e(B tbl)
340 (set-case-syntax-pair ?\e$,1(H\e(B ?\e$,1(h\e(B tbl)
341 (set-case-syntax-pair ?\e$,1(I\e(B ?\e$,1(i\e(B tbl)
342 (set-case-syntax-pair ?\e$,1(J\e(B ?\e$,1(j\e(B tbl)
343 (set-case-syntax-pair ?\e$,1(K\e(B ?\e$,1(k\e(B tbl)
344 (set-case-syntax-pair ?\e$,1(L\e(B ?\e$,1(l\e(B tbl)
345 (set-case-syntax-pair ?\e$,1(M\e(B ?\e$,1(m\e(B tbl)
346 (set-case-syntax-pair ?\e$,1(N\e(B ?\e$,1(n\e(B tbl)
347 (set-case-syntax-pair ?\e$,1(O\e(B ?\e$,1(o\e(B tbl))
348
349 ;; Devanagari character set
350
351 ;;; Commented out since the categories appear not to be used anywhere
352 ;;; and word syntax is the default.
353 ;; (let ((deflist '(;; chars syntax category
354 ;; ("\e$(5!!!"!#\e(B" "w" ?7) ; vowel-modifying diacritical mark
355 ;; ; chandrabindu, anuswar, visarga
356 ;; ("\e$(5!$\e(B-\e$(5!2\e(B" "w" ?1) ; independent vowel
357 ;; ("\e$(5!3\e(B-\e$(5!X\e(B" "w" ?0) ; consonant
358 ;; ("\e$(5!Z\e(B-\e$(5!g\e(B" "w" ?8) ; matra
359 ;; ("\e$(5!q\e(B-\e$(5!z\e(B" "w" ?6) ; digit
360 ;; ;; Unicode equivalents
361 ;; ("\e$,15A5B5C\e(B" "w" ?7) ; vowel-modifying diacritical mark
362 ;; ; chandrabindu, anuswar, visarga
363 ;; ("\e$,15E\e(B-\e$,15M\e(B" "w" ?1) ; independent vowel
364 ;; ("\e$,15U\e(B-\e$,15y\e(B" "w" ?0) ; consonant
365 ;; ("\e$,15~\e(B-\e$,16)\e(B" "w" ?8) ; matra
366 ;; ("\e$,16F\e(B-\e$,16O\e(B" "w" ?6) ; digit
367 ;; ))
368 ;; elm chars len syntax category to ch i)
369 ;; (while deflist
370 ;; (setq elm (car deflist))
371 ;; (setq chars (car elm)
372 ;; len (length chars)
373 ;; syntax (nth 1 elm)
374 ;; category (nth 2 elm)
375 ;; i 0)
376 ;; (while (< i len)
377 ;; (if (= (aref chars i) ?-)
378 ;; (setq i (1+ i)
379 ;; to (aref chars i))
380 ;; (setq ch (aref chars i)
381 ;; to ch))
382 ;; (while (<= ch to)
383 ;; (modify-syntax-entry ch syntax)
384 ;; (modify-category-entry ch category)
385 ;; (setq ch (1+ ch)))
386 ;; (setq i (1+ i)))
387 ;; (setq deflist (cdr deflist))))
388
389 ;; Ethiopic character set
390
391 (modify-category-entry (make-char 'ethiopic) ?e)
392 ;; (modify-syntax-entry (make-char 'ethiopic) "w")
393 (dotimes (i (1+ (- #x137c #x1200)))
394 (modify-category-entry (decode-char 'ucs (+ #x1200 i)) ?e))
395 (let ((chars '(?\e$(3$h\e(B ?\e$(3$i\e(B ?\e$(3$j\e(B ?\e$(3$k\e(B ?\e$(3$l\e(B ?\e$(3$m\e(B ?\e$(3$n\e(B ?\e$(3$o\e(B ?\e$(3%i\e(B ?\e$(3%t\e(B ?\e$(3%u\e(B ?\e$(3%v\e(B ?\e$(3%w\e(B ?\e$(3%x\e(B
396 ;; Unicode equivalents of the above:
397 ?\e$,1Q!\e(B ?\e$,1Q"\e(B ?\e$,1Q#\e(B ?\e$,1Q$\e(B ?\e$,1Q%\e(B ?\e$,1Q&\e(B ?\e$,1Q'\e(B ?\e$,1Q(\e(B ?\e$,3op\e(B ?\e$,3o{\e(B ?\e$,3o|\e(B ?\e$,3o}\e(B ?\e$,3o~\e(B ?\e$,3o\7f\e(B)))
398 (while chars
399 (modify-syntax-entry (car chars) ".")
400 (setq chars (cdr chars))))
401
402 ;; Greek character set (ISO-8859-7)
403
404 (modify-category-entry (make-char 'greek-iso8859-7) ?g)
405 (let ((c #x370))
406 (while (<= c #x3ff)
407 (modify-category-entry (decode-char 'ucs c) ?g)
408 (setq c (1+ c))))
409
410 ;; (let ((c 182))
411 ;; (while (< c 255)
412 ;; (modify-syntax-entry (make-char 'greek-iso8859-7 c) "w")
413 ;; (setq c (1+ c))))
414 ;; (modify-syntax-entry (make-char 'greek-iso8859-7 160) "w") ; NBSP
415 (modify-syntax-entry ?\e,F7\e(B ".")
416 (modify-syntax-entry ?\e,F;\e(B ".")
417 (modify-syntax-entry ?\e,F=\e(B ".")
418 (let ((tbl (standard-case-table)))
419 ;; Fixme: non-letter syntax copied from latin-1, but that's dubious
420 ;; in several cases.
421 (set-case-syntax ?\e,F!\e(B "." tbl)
422 (set-case-syntax ?\e,F"\e(B "." tbl)
423 (set-case-syntax ?\e,F&\e(B "." tbl)
424 (set-case-syntax ?\e,F&\e(B "_" tbl)
425 (set-case-syntax ?\e,F'\e(B "." tbl)
426 (set-case-syntax ?\e,F)\e(B "_" tbl)
427 (set-case-syntax ?\e,F+\e(B "." tbl)
428 (set-case-syntax ?\e,F,\e(B "_" tbl)
429 (set-case-syntax ?\e,F-\e(B "_" tbl)
430 (set-case-syntax ?\e,F/\e(B "." tbl)
431 (set-case-syntax ?\e,F0\e(B "_" tbl)
432 (set-case-syntax ?\e,F1\e(B "_" tbl)
433 ;; (set-case-syntax ?\e,F7\e(B "_" tbl)
434 ;; (set-case-syntax ?\e,F=\e(B "_" tbl)
435 (set-case-syntax-pair ?\e,FA\e(B ?\e,Fa\e(B tbl)
436 (set-case-syntax-pair ?\e,FB\e(B ?\e,Fb\e(B tbl)
437 (set-case-syntax-pair ?\e,FC\e(B ?\e,Fc\e(B tbl)
438 (set-case-syntax-pair ?\e,FD\e(B ?\e,Fd\e(B tbl)
439 (set-case-syntax-pair ?\e,FE\e(B ?\e,Fe\e(B tbl)
440 (set-case-syntax-pair ?\e,FF\e(B ?\e,Ff\e(B tbl)
441 (set-case-syntax-pair ?\e,FG\e(B ?\e,Fg\e(B tbl)
442 (set-case-syntax-pair ?\e,FH\e(B ?\e,Fh\e(B tbl)
443 (set-case-syntax-pair ?\e,FI\e(B ?\e,Fi\e(B tbl)
444 (set-case-syntax-pair ?\e,FJ\e(B ?\e,Fj\e(B tbl)
445 (set-case-syntax-pair ?\e,FK\e(B ?\e,Fk\e(B tbl)
446 (set-case-syntax-pair ?\e,FL\e(B ?\e,Fl\e(B tbl)
447 (set-case-syntax-pair ?\e,FM\e(B ?\e,Fm\e(B tbl)
448 (set-case-syntax-pair ?\e,FN\e(B ?\e,Fn\e(B tbl)
449 (set-case-syntax-pair ?\e,FO\e(B ?\e,Fo\e(B tbl)
450 (set-case-syntax-pair ?\e,FP\e(B ?\e,Fp\e(B tbl)
451 (set-case-syntax-pair ?\e,FQ\e(B ?\e,Fq\e(B tbl)
452 (set-upcase-syntax ?\e,FS\e(B ?\e,Fr\e(B tbl)
453 (set-case-syntax-pair ?\e,FS\e(B ?\e,Fs\e(B tbl)
454 (set-case-syntax-pair ?\e,FT\e(B ?\e,Ft\e(B tbl)
455 (set-case-syntax-pair ?\e,FU\e(B ?\e,Fu\e(B tbl)
456 (set-case-syntax-pair ?\e,FV\e(B ?\e,Fv\e(B tbl)
457 (set-case-syntax-pair ?\e,FW\e(B ?\e,Fw\e(B tbl)
458 (set-case-syntax-pair ?\e,FX\e(B ?\e,Fx\e(B tbl)
459 (set-case-syntax-pair ?\e,FY\e(B ?\e,Fy\e(B tbl)
460 (set-case-syntax-pair ?\e,FZ\e(B ?\e,Fz\e(B tbl)
461 (set-case-syntax-pair ?\e,F[\e(B ?\e,F{\e(B tbl)
462 (set-case-syntax-pair ?\e,F?\e(B ?\e,F~\e(B tbl)
463 (set-case-syntax-pair ?\e,F>\e(B ?\e,F}\e(B tbl)
464 (set-case-syntax-pair ?\e,F<\e(B ?\e,F|\e(B tbl)
465 (set-case-syntax-pair ?\e,F6\e(B ?\e,F\\e(B tbl)
466 (set-case-syntax-pair ?\e,F8\e(B ?\e,F]\e(B tbl)
467 (set-case-syntax-pair ?\e,F9\e(B ?\e,F^\e(B tbl)
468 (set-case-syntax-pair ?\e,F:\e(B ?\e,F_\e(B tbl)
469 ;; Unicode equivalents
470 (set-case-syntax-pair ?\e$,1&q\e(B ?\e$,1'1\e(B tbl)
471 (set-case-syntax-pair ?\e$,1&r\e(B ?\e$,1'2\e(B tbl)
472 (set-case-syntax-pair ?\e$,1&s\e(B ?\e$,1'3\e(B tbl)
473 (set-case-syntax-pair ?\e$,1&t\e(B ?\e$,1'4\e(B tbl)
474 (set-case-syntax-pair ?\e$,1&u\e(B ?\e$,1'5\e(B tbl)
475 (set-case-syntax-pair ?\e$,1&v\e(B ?\e$,1'6\e(B tbl)
476 (set-case-syntax-pair ?\e$,1&w\e(B ?\e$,1'7\e(B tbl)
477 (set-case-syntax-pair ?\e$,1&x\e(B ?\e$,1'8\e(B tbl)
478 (set-case-syntax-pair ?\e$,1&y\e(B ?\e$,1'9\e(B tbl)
479 (set-case-syntax-pair ?\e$,1&z\e(B ?\e$,1':\e(B tbl)
480 (set-case-syntax-pair ?\e$,1&{\e(B ?\e$,1';\e(B tbl)
481 (set-case-syntax-pair ?\e$,1&|\e(B ?\e$,1'<\e(B tbl)
482 (set-case-syntax-pair ?\e$,1&}\e(B ?\e$,1'=\e(B tbl)
483 (set-case-syntax-pair ?\e$,1&~\e(B ?\e$,1'>\e(B tbl)
484 (set-case-syntax-pair ?\e$,1&\7f\e(B ?\e$,1'?\e(B tbl)
485 (set-case-syntax-pair ?\e$,1' \e(B ?\e$,1'@\e(B tbl)
486 (set-case-syntax-pair ?\e$,1'!\e(B ?\e$,1'A\e(B tbl)
487 (set-upcase-syntax ?\e$,1'#\e(B ?\e$,1'B\e(B tbl)
488 (set-case-syntax-pair ?\e$,1'#\e(B ?\e$,1'C\e(B tbl)
489 (set-case-syntax-pair ?\e$,1'$\e(B ?\e$,1'D\e(B tbl)
490 (set-case-syntax-pair ?\e$,1'%\e(B ?\e$,1'E\e(B tbl)
491 (set-case-syntax-pair ?\e$,1'&\e(B ?\e$,1'F\e(B tbl)
492 (set-case-syntax-pair ?\e$,1''\e(B ?\e$,1'G\e(B tbl)
493 (set-case-syntax-pair ?\e$,1'(\e(B ?\e$,1'H\e(B tbl)
494 (set-case-syntax-pair ?\e$,1')\e(B ?\e$,1'I\e(B tbl)
495 (set-case-syntax-pair ?\e$,1'*\e(B ?\e$,1'J\e(B tbl)
496 (set-case-syntax-pair ?\e$,1'+\e(B ?\e$,1'K\e(B tbl)
497 (set-case-syntax-pair ?\e$,1&o\e(B ?\e$,1'N\e(B tbl)
498 (set-case-syntax-pair ?\e$,1&n\e(B ?\e$,1'M\e(B tbl)
499 (set-case-syntax-pair ?\e$,1&l\e(B ?\e$,1'L\e(B tbl)
500 (set-case-syntax-pair ?\e$,1&f\e(B ?\e$,1',\e(B tbl)
501 (set-case-syntax-pair ?\e$,1&h\e(B ?\e$,1'-\e(B tbl)
502 (set-case-syntax-pair ?\e$,1&i\e(B ?\e$,1'.\e(B tbl)
503 (set-case-syntax-pair ?\e$,1&j\e(B ?\e$,1'/\e(B tbl))
504
505 ;; Hebrew character set (ISO-8859-8)
506
507 (modify-category-entry (make-char 'hebrew-iso8859-8) ?w)
508 (let ((c #x591))
509 (while (<= c #x5f4)
510 (modify-category-entry (decode-char 'ucs c) ?w)
511 (setq c (1+ c))))
512
513 (modify-syntax-entry (make-char 'hebrew-iso8859-8 208) ".") ; PASEQ
514 (modify-syntax-entry (make-char 'hebrew-iso8859-8 211) ".") ; SOF PASUQ
515 (modify-syntax-entry (decode-char 'ucs #x5be) ".") ; MAQAF
516 (modify-syntax-entry (decode-char 'ucs #x5c0) ".") ; PASEQ
517 (modify-syntax-entry (decode-char 'ucs #x5c3) ".") ; SOF PASUQ
518 (modify-syntax-entry (decode-char 'ucs #x5f3) ".") ; GERESH
519 (modify-syntax-entry (decode-char 'ucs #x5f4) ".") ; GERSHAYIM
520
521 ;; (let ((c 224))
522 ;; (while (< c 251)
523 ;; (modify-syntax-entry (make-char 'hebrew-iso8859-8 c) "w")
524 ;; (setq c (1+ c))))
525 ;; (modify-syntax-entry (make-char 'hebrew-iso8859-8 160) "w") ; NBSP
526
527 ;; Indian character set (IS 13194 and other Emacs original Indian charsets)
528
529 (modify-category-entry (make-char 'indian-is13194) ?i)
530 (modify-category-entry (make-char 'indian-2-column) ?I)
531 (modify-category-entry (make-char 'indian-glyph) ?I)
532 ;; Unicode Devanagari block
533 (let ((c #x901))
534 (while (<= c #x970)
535 (modify-category-entry (decode-char 'ucs c) ?i)
536 (setq c (1+ c))))
537
538 (let ((l '(;; RANGE CATEGORY MEANINGS
539 (#x01 #x03 ?7) ; vowel modifier
540 (#x05 #x14 ?1) ; base vowel
541 (#x15 #x39 ?0) ; consonants
542 (#x3e #x4d ?8) ; vowel modifier
543 (#x51 #x54 ?4) ; stress/tone mark
544 (#x58 #x5f ?0) ; consonants
545 (#x60 #x61 ?1) ; base vowel
546 (#x62 #x63 ?8) ; vowel modifier
547 (#x66 #x6f ?6) ; digits
548 )))
549 (dolist (elt1 '(#x900 #x980 #xa00 #xa80 #xb00 #xb80 #xc00 #xc80 #xd00))
550 (dolist (elt2 l)
551 (let* ((from (car elt2))
552 (counts (1+ (- (nth 1 elt2) from)))
553 (category (nth 2 elt2)))
554 (dotimes (i counts)
555 (modify-category-entry (decode-char 'ucs (+ elt1 from i))
556 category))))))
557
558 ;; Japanese character set (JISX0201-kana, JISX0201-roman, JISX0208, JISX0212)
559
560 (modify-category-entry (make-char 'katakana-jisx0201) ?k)
561 (modify-category-entry (make-char 'katakana-jisx0201) ?j)
562 (modify-category-entry (make-char 'latin-jisx0201) ?r)
563 (modify-category-entry (make-char 'japanese-jisx0208) ?j)
564 (modify-category-entry (make-char 'japanese-jisx0212) ?j)
565 (modify-category-entry (make-char 'katakana-jisx0201) ?\|)
566 (modify-category-entry (make-char 'japanese-jisx0208) ?\|)
567 (modify-category-entry (make-char 'japanese-jisx0212) ?\|)
568
569 ;; Unicode equivalents of JISX0201-kana
570 (let ((c #xff61))
571 (while (<= c #xff9f)
572 (modify-category-entry (decode-char 'ucs c) ?k)
573 (modify-category-entry (decode-char 'ucs c) ?j)
574 (modify-category-entry (decode-char 'ucs c) ?\|)
575 (setq c (1+ c))))
576
577 ;; Katakana block
578 (let ((c #x30a0))
579 (while (<= c #x30ff)
580 ;; ?K is double width, ?k isn't specified
581 (modify-category-entry (decode-char 'ucs c) ?k)
582 (modify-category-entry (decode-char 'ucs c) ?j)
583 (modify-category-entry (decode-char 'ucs c) ?\|)
584 (setq c (1+ c))))
585
586 ;; Hiragana block
587 (let ((c #x3040))
588 (while (<= c #x309f)
589 ;; ?H is actually defined to be double width
590 (modify-category-entry (decode-char 'ucs c) ?H)
591 ;;(modify-category-entry (decode-char 'ucs c) ?j)
592 (modify-category-entry (decode-char 'ucs c) ?\|)
593 (setq c (1+ c))))
594
595 ;; JISX0208
596 ;; (modify-syntax-entry (make-char 'japanese-jisx0208) "w")
597 (modify-syntax-entry (make-char 'japanese-jisx0208 33) "_")
598 (modify-syntax-entry (make-char 'japanese-jisx0208 34) "_")
599 (modify-syntax-entry (make-char 'japanese-jisx0208 40) "_")
600 (let ((chars '(?\e$B!<\e(B ?\e$B!+\e(B ?\e$B!,\e(B ?\e$B!3\e(B ?\e$B!4\e(B ?\e$B!5\e(B ?\e$B!6\e(B ?\e$B!7\e(B ?\e$B!8\e(B ?\e$B!9\e(B ?\e$B!:\e(B ?\e$B!;\e(B)))
601 (while chars
602 (modify-syntax-entry (car chars) "w")
603 (setq chars (cdr chars))))
604 (let ((parens "\e$B!J!K!L!M!N!O!P!Q!R!S!T!U!V!W!X!Y!Z![\e(B" )
605 open close)
606 (dotimes (i (/ (length parens) 2))
607 (setq open (aref parens (* i 2))
608 close (aref parens (1+ (* i 2))))
609 (modify-syntax-entry open (format "(%c" close))
610 (modify-syntax-entry close (format ")%c" open))))
611
612 (modify-category-entry (make-char 'japanese-jisx0208 35) ?A)
613 (modify-category-entry (make-char 'japanese-jisx0208 36) ?H)
614 (modify-category-entry (make-char 'japanese-jisx0208 37) ?K)
615 (modify-category-entry (make-char 'japanese-jisx0208 38) ?G)
616 (modify-category-entry (make-char 'japanese-jisx0208 39) ?Y)
617 (let ((row 48))
618 (while (< row 127)
619 (modify-category-entry (make-char 'japanese-jisx0208 row) ?C)
620 (setq row (1+ row))))
621 (modify-category-entry ?\e$B!<\e(B ?K)
622 (let ((chars '(?\e$B!+\e(B ?\e$B!,\e(B)))
623 (while chars
624 (modify-category-entry (car chars) ?K)
625 (modify-category-entry (car chars) ?H)
626 (setq chars (cdr chars))))
627 (let ((chars '(?\e$B!3\e(B ?\e$B!4\e(B ?\e$B!5\e(B ?\e$B!6\e(B ?\e$B!7\e(B ?\e$B!8\e(B ?\e$B!9\e(B ?\e$B!:\e(B ?\e$B!;\e(B)))
628 (while chars
629 (modify-category-entry (car chars) ?C)
630 (setq chars (cdr chars))))
631
632 ;; JISX0212
633 ;; (modify-syntax-entry (make-char 'japanese-jisx0212) "w")
634 (modify-syntax-entry (make-char 'japanese-jisx0212 33) "_")
635 (modify-syntax-entry (make-char 'japanese-jisx0212 34) "_")
636 (modify-syntax-entry (make-char 'japanese-jisx0212 35) "_")
637
638 (modify-category-entry (make-char 'japanese-jisx0212 ) ?C)
639
640 ;; JISX0201-Kana
641 ;; (modify-syntax-entry (make-char 'katakana-jisx0201) "w")
642 (let ((chars '(?\e(I!\e(B ?\e(I$\e(B ?\e(I%\e(B
643 ;; Unicode:
644 ?\e$,3sa\e(B ?\e$,3sd\e(B ?\e$,3se\e(B)))
645 (while chars
646 (modify-syntax-entry (car chars) ".")
647 (setq chars (cdr chars))))
648
649 (modify-syntax-entry ?\\e(I"\e(B "(\e(I#\e(B")
650 (modify-syntax-entry ?\\e(I#\e(B "(\e(I"\e(B")
651
652 ;; Korean character set (KSC5601)
653
654 ;; (modify-syntax-entry (make-char 'korean-ksc5601) "w")
655 (modify-syntax-entry (make-char 'korean-ksc5601 33) "_")
656 (modify-syntax-entry (make-char 'korean-ksc5601 34) "_")
657 (modify-syntax-entry (make-char 'korean-ksc5601 38) "_")
658 (modify-syntax-entry (make-char 'korean-ksc5601 39) "_")
659 (modify-syntax-entry (make-char 'korean-ksc5601 40) "_")
660 (modify-syntax-entry (make-char 'korean-ksc5601 41) "_")
661
662 (modify-category-entry (make-char 'korean-ksc5601) ?h)
663 (modify-category-entry (make-char 'korean-ksc5601 35) ?A)
664 (modify-category-entry (make-char 'korean-ksc5601 37) ?G)
665 (modify-category-entry (make-char 'korean-ksc5601 42) ?H)
666 (modify-category-entry (make-char 'korean-ksc5601 43) ?K)
667 (modify-category-entry (make-char 'korean-ksc5601 44) ?Y)
668
669 (let ((parens "\e$(C!2!3!4!5!6!7!8!9!:!;!<!=#(#)#[#]#{#}\e(B" )
670 open close)
671 (dotimes (i (/ (length parens) 2))
672 (setq open (aref parens (* i 2))
673 close (aref parens (1+ (* i 2))))
674 (modify-syntax-entry open (format "(%c" close))
675 (modify-syntax-entry close (format ")%c" open))))
676
677 ;; Latin character set (latin-1,2,3,4,5,8,9)
678
679 (modify-category-entry (make-char 'latin-iso8859-1) ?l)
680 (modify-category-entry (make-char 'latin-iso8859-2) ?l)
681 (modify-category-entry (make-char 'latin-iso8859-3) ?l)
682 (modify-category-entry (make-char 'latin-iso8859-4) ?l)
683 (modify-category-entry (make-char 'latin-iso8859-9) ?l)
684 (modify-category-entry (make-char 'latin-iso8859-14) ?l)
685 (modify-category-entry (make-char 'latin-iso8859-15) ?l)
686
687 (modify-category-entry (make-char 'latin-iso8859-1 160) ?\ )
688 (modify-category-entry (make-char 'latin-iso8859-2 160) ?\ )
689 (modify-category-entry (make-char 'latin-iso8859-3 160) ?\ )
690 (modify-category-entry (make-char 'latin-iso8859-4 160) ?\ )
691 (modify-category-entry (make-char 'latin-iso8859-9 160) ?\ )
692 (modify-category-entry (make-char 'latin-iso8859-14 160) ?\ )
693 (modify-category-entry (make-char 'latin-iso8859-15 160) ?\ )
694
695 ;; Lao character set
696
697 (modify-category-entry (make-char 'lao) ?o)
698 (dotimes (i (1+ (- #xeff #xe80)))
699 (modify-category-entry (decode-char 'ucs (+ i #xe80)) ?o))
700
701 (let ((deflist '(;; chars syntax category
702 ("\e(1!\e(B-\e(1N\e(B" "w" ?0) ; consonant
703 ("\e(1PRS]`\e(B-\e(1d\e(B" "w" ?1) ; vowel base
704 ("\e(1QT\e(B-\e(1W[m\e(B" "w" ?2) ; vowel upper
705 ("\e(1XY\e(B" "w" ?3) ; vowel lower
706 ("\e(1h\e(B-\e(1l\e(B" "w" ?4) ; tone mark
707 ("\e(1\\e(B" "w" ?9) ; semivowel lower
708 ("\e(1p\e(B-\e(1y\e(B" "w" ?6) ; digit
709 ("\e(1Of\e(B" "_" ?5) ; symbol
710 ;; Unicode equivalents
711 ("\e$,1D!\e(B-\e$,1DN\e(B" "w" ?0) ; consonant
712 ("\e$,1DPDRDSD]D`\e(B-\e$,1Dd\e(B" "w" ?1) ; vowel base
713 ("\e$,1DQDT\e(B-\e$,1DWD[Dm\e(B" "w" ?2) ; vowel upper
714 ("\e$,1DXDY\e(B" "w" ?3) ; vowel lower
715 ("\e$,1Dh\e(B-\e$,1Dk\e(B" "w" ?4) ; tone mark
716 ("\e$,1D\D]\e(B" "w" ?9) ; semivowel lower
717 ("\e$,1Dp\e(B-\e$,1Dy\e(B" "w" ?6) ; digit
718 ("\e$,1DODf\e(B" "_" ?5) ; symbol
719 ))
720 elm chars len syntax category to ch i)
721 (while deflist
722 (setq elm (car deflist))
723 (setq chars (car elm)
724 len (length chars)
725 syntax (nth 1 elm)
726 category (nth 2 elm)
727 i 0)
728 (while (< i len)
729 (if (= (aref chars i) ?-)
730 (setq i (1+ i)
731 to (aref chars i))
732 (setq ch (aref chars i)
733 to ch))
734 (while (<= ch to)
735 (unless (string-equal syntax "w")
736 (modify-syntax-entry ch syntax))
737 (modify-category-entry ch category)
738 (setq ch (1+ ch)))
739 (setq i (1+ i)))
740 (setq deflist (cdr deflist))))
741
742 ;; Thai character set (TIS620)
743
744 (modify-category-entry (make-char 'thai-tis620) ?t)
745 (dotimes (i (1+ (- #xe7f #xe00)))
746 (modify-category-entry (decode-char 'ucs (+ i #xe00)) ?t))
747
748 (let ((deflist '(;; chars syntax category
749 ("\e,T!\e(B-\e,TCEG\e(B-\e,TN\e(B" "w" ?0) ; consonant
750 ("\e,TDFPRS`\e(B-\e,Te\e(B" "w" ?1) ; vowel base
751 ("\e,TQT\e(B-\e,TWgn\e(B" "w" ?2) ; vowel upper
752 ("\e,TX\e(B-\e,TZ\e(B" "w" ?3) ; vowel lower
753 ("\e,Th\e(B-\e,Tm\e(B" "w" ?4) ; tone mark
754 ("\e,Tp\e(B-\e,Ty\e(B" "w" ?6) ; digit
755 ("\e,TOf_oz{\e(B" "_" ?5) ; symbol
756 ;; Unicode equivalents
757 ("\e$,1Ba\e(B-\e$,1C#C%C'\e(B-\e$,1C.\e(B" "w" ?0) ; consonant
758 ("\e$,1C$C&C0C2C3C@\e(B-\e$,1CE\e(B" "w" ?1) ; vowel base
759 ("\e$,1C1C4\e(B-\e$,1C7CGCN\e(B" "w" ?2) ; vowel upper
760 ("\e$,1C8\e(B-\e$,1C:\e(B" "w" ?3) ; vowel lower
761 ("\e$,1CH\e(B-\e$,1CM\e(B" "w" ?4) ; tone mark
762 ("\e$,1CP\e(B-\e$,1CY\e(B" "w" ?6) ; digit
763 ("\e$,1C/CFC?COCZC[\e(B" "_" ?5) ; symbol
764 ))
765 elm chars len syntax category to ch i)
766 (while deflist
767 (setq elm (car deflist))
768 (setq chars (car elm)
769 len (length chars)
770 syntax (nth 1 elm)
771 category (nth 2 elm)
772 i 0)
773 (while (< i len)
774 (if (= (aref chars i) ?-)
775 (setq i (1+ i)
776 to (aref chars i))
777 (setq ch (aref chars i)
778 to ch))
779 (while (<= ch to)
780 (unless (string-equal syntax "w")
781 (modify-syntax-entry ch syntax))
782 (modify-category-entry ch category)
783 (setq ch (1+ ch)))
784 (setq i (1+ i)))
785 (setq deflist (cdr deflist))))
786
787 ;; Tibetan character set
788
789 (modify-category-entry (make-char 'tibetan) ?q)
790 (modify-category-entry (make-char 'tibetan-1-column) ?q)
791 (dotimes (i (1+ (- #xfff #xf00)))
792 (modify-category-entry (decode-char 'ucs (+ i #xf00)) ?q))
793
794 (let ((deflist '(;; chars syntax category
795 ("\e4\e$(7"!\e0"!\e1\e(B-\e4\e$(7"J\e0"J\e1\e4"K\e0"K\e1\e(B" "w" ?0) ; consonant
796 ("\e$(7#!\e(B-\e$(7#J#K#L#M!"!#\e(B" "w" ?0) ;
797 ("\e$(7$!\e(B-\e$(7$e\e(B" "w" ?0) ;
798 ("\e$(7%!\e(B-\e$(7%u\e(B" "w" ?0) ;
799 ("\e$(7"S"["\"]"^"a\e(B" "w" ?2) ; upper vowel
800 ("\e$(7"_"c"d"g"h"i"j"k"l\e(B" "w" ?2) ; upper modifier
801 ("\e$(7!I"Q"R"U"e!e!g\e(B" "w" ?3) ; lowel vowel/modifier
802 ("\e$(7!P\e(B-\e$(7!Y!Z\e(B-\e$(7!c\e(B" "w" ?6) ; digit
803 ("\e$(7!;!=\e(B-\e$(7!B!D"`\e(B" "." ?|) ; line-break char
804 ("\e$(8!;!=!?!@!A!D"`\e(B" "." ?|) ;
805 ("\e$(7!8!;!=\e(B-\e$(7!B!D"`!m!d\e(B" "." ?>) ; prohibition
806 ("\e$(8!;!=!?!@!A!D"`\e(B" "." ?>) ;
807 ("\e$(7!0\e(B-\e$(7!:!l#R#S"f\e(B" "." ?<) ; prohibition
808 ("\e$(7!C!E\e(B-\e$(7!H!J\e(B-\e$(7!O!f!h\e(B-\e$(7!k!n!o#O#P\e(B-\e$(7#`\e(B" "." ?q) ; others
809
810 ;; Unicode version (not complete)
811 ("\e$,1F \e(B-\e$,1FIFJ\e(B" "w" ?0) ; consonant
812 ("\e$,1Fp\e(B-\e$,1G9G:G;G<\e(B" "w" ?0) ;
813 ("\e$,1FRFZF[F\F]F`\e(B" "w" ?2) ; upper vowel
814 ("\e$,1F^FbFcFfFgFhFiFjFk\e(B" "w" ?2) ; upper modifier
815 ("\e$,1EYFPFQFTFdEuEw\e(B" "w" ?3) ; lowel vowel/modifier
816 ("\e$,1E`\e(B-\e$,1EiEj\e(B-\e$,1Es\e(B" "w" ?6) ; digit
817 ("\e$,1EKEM\e(B-\e$,1ERETF_\e(B" "." ?|) ; line-break char
818 ("\e$,1EHEKEM\e(B-\e$,1ERETF_E}Et\e(B" "." ?>) ; prohibition
819 ("\e$,1E@\e(B-\e$,1EJE|GAGBFe\e(B" "." ?<) ; prohibition
820 ("\e$,1ESEU\e(B-\e$,1EXEZ\e(B-\e$,1E_EvEx\e(B-\e$,1E{E~E\7fG>G?\e(B-\e$,1GO\e(B" "." ?q) ; others
821 ))
822 elm chars len syntax category to ch i)
823 (while deflist
824 (setq elm (car deflist))
825 (setq chars (car elm)
826 len (length chars)
827 syntax (nth 1 elm)
828 category (nth 2 elm)
829 i 0)
830 (while (< i len)
831 (if (= (aref chars i) ?-)
832 (setq i (1+ i)
833 to (aref chars i))
834 (setq ch (aref chars i)
835 to ch))
836 (while (<= ch to)
837 (unless (string-equal syntax "w")
838 (modify-syntax-entry ch syntax))
839 (modify-category-entry ch category)
840 (setq ch (1+ ch)))
841 (setq i (1+ i)))
842 (setq deflist (cdr deflist))))
843
844 ;; Vietnamese character set
845
846 (let ((lower (make-char 'vietnamese-viscii-lower))
847 (upper (make-char 'vietnamese-viscii-upper)))
848 ;; (modify-syntax-entry lower "w")
849 ;; (modify-syntax-entry upper "w")
850 (modify-category-entry lower ?v)
851 (modify-category-entry upper ?v)
852 (modify-category-entry lower ?l) ; To make a word with
853 (modify-category-entry upper ?l) ; latin characters.
854 )
855
856 (let ((tbl (standard-case-table))
857 (i 32))
858 (while (< i 128)
859 (set-case-syntax-pair (make-char 'vietnamese-viscii-upper i)
860 (make-char 'vietnamese-viscii-lower i)
861 tbl)
862 (setq i (1+ i))))
863
864 ;; Unicode (mule-unicode-0100-24ff)
865
866 (let ((tbl (standard-case-table)) c)
867
868 ;; In some languages, U+0049 LATIN CAPITAL LETTER I and U+0131 LATIN
869 ;; SMALL LETTER DOTLESS I make a case pair, and so do U+0130 LATIN
870 ;; CAPITAL LETTER I WITH DOT ABOVE and U+0069 LATIN SMALL LETTER I.
871 ;; Thus we have to check language-environment to handle casing
872 ;; correctly. Currently only I<->i is available.
873
874 ;; Latin Extended-A, Latin Extended-B
875 (setq c #x0100)
876 (while (<= c #x0233)
877 (modify-category-entry (decode-char 'ucs c) ?l)
878 (and (or (<= c #x012e)
879 (and (>= c #x014a) (<= c #x0177)))
880 (zerop (% c 2))
881 (set-case-syntax-pair
882 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
883 (and (>= c #x013a)
884 (<= c #x0148)
885 (zerop (% c 2))
886 (set-case-syntax-pair
887 (decode-char 'ucs (1- c)) (decode-char 'ucs c) tbl))
888 (setq c (1+ c)))
889 (set-downcase-syntax ?\e$,1 P\e(B ?i tbl)
890 (set-upcase-syntax ?I ?\e$,1 Q\e(B tbl)
891 (set-case-syntax-pair ?\e$,1 R\e(B ?\e$,1 S\e(B tbl)
892 (set-case-syntax-pair ?\e$,1 T\e(B ?\e$,1 U\e(B tbl)
893 (set-case-syntax-pair ?\e$,1 V\e(B ?\e$,1 W\e(B tbl)
894 (set-case-syntax-pair ?\e$,1!8\e(B ?\e,A\7f\e(B tbl)
895 (set-case-syntax-pair ?\e$,1!9\e(B ?\e$,1!:\e(B tbl)
896 (set-case-syntax-pair ?\e$,1!;\e(B ?\e$,1!<\e(B tbl)
897 (set-case-syntax-pair ?\e$,1!=\e(B ?\e$,1!>\e(B tbl)
898
899 ;; Latin Extended-B
900 (set-case-syntax-pair ?\e$,1!A\e(B ?\e$,1#S\e(B tbl)
901 (set-case-syntax-pair ?\e$,1!B\e(B ?\e$,1!C\e(B tbl)
902 (set-case-syntax-pair ?\e$,1!D\e(B ?\e$,1!E\e(B tbl)
903 (set-case-syntax-pair ?\e$,1!F\e(B ?\e$,1#T\e(B tbl)
904 (set-case-syntax-pair ?\e$,1!G\e(B ?\e$,1!H\e(B tbl)
905 (set-case-syntax-pair ?\e$,1!I\e(B ?\e$,1#V\e(B tbl)
906 (set-case-syntax-pair ?\e$,1!J\e(B ?\e$,1#W\e(B tbl)
907 (set-case-syntax-pair ?\e$,1!K\e(B ?\e$,1!L\e(B tbl)
908 (set-case-syntax-pair ?\e$,1!N\e(B ?\e$,1"=\e(B tbl)
909 (set-case-syntax-pair ?\e$,1!O\e(B ?\e$,1#Y\e(B tbl)
910 (set-case-syntax-pair ?\e$,1!P\e(B ?\e$,1#[\e(B tbl)
911 (set-case-syntax-pair ?\e$,1!Q\e(B ?\e$,1!R\e(B tbl)
912 (set-case-syntax-pair ?\e$,1!S\e(B ?\e$,1#`\e(B tbl)
913 (set-case-syntax-pair ?\e$,1!T\e(B ?\e$,1#c\e(B tbl)
914 (set-case-syntax-pair ?\e$,1!V\e(B ?\e$,1#i\e(B tbl)
915 (set-case-syntax-pair ?\e$,1!W\e(B ?\e$,1#h\e(B tbl)
916 (set-case-syntax-pair ?\e$,1!X\e(B ?\e$,1!Y\e(B tbl)
917 (set-case-syntax-pair ?\e$,1!\\e(B ?\e$,1#o\e(B tbl)
918 (set-case-syntax-pair ?\e$,1!]\e(B ?\e$,1#r\e(B tbl)
919 (set-case-syntax-pair ?\e$,1!_\e(B ?\e$,1#u\e(B tbl)
920 (set-case-syntax-pair ?\e$,1!`\e(B ?\e$,1!a\e(B tbl)
921 (set-case-syntax-pair ?\e$,1!b\e(B ?\e$,1!c\e(B tbl)
922 (set-case-syntax-pair ?\e$,1!d\e(B ?\e$,1!e\e(B tbl)
923 (set-case-syntax-pair ?\e$,1!f\e(B ?\e$,1$ \e(B tbl)
924 (set-case-syntax-pair ?\e$,1!g\e(B ?\e$,1!h\e(B tbl)
925 (set-case-syntax-pair ?\e$,1!i\e(B ?\e$,1$#\e(B tbl)
926 (set-case-syntax-pair ?\e$,1!l\e(B ?\e$,1!m\e(B tbl)
927 (set-case-syntax-pair ?\e$,1!n\e(B ?\e$,1$(\e(B tbl)
928 (set-case-syntax-pair ?\e$,1!o\e(B ?\e$,1!p\e(B tbl)
929 (set-case-syntax-pair ?\e$,1!q\e(B ?\e$,1$*\e(B tbl)
930 (set-case-syntax-pair ?\e$,1!r\e(B ?\e$,1$+\e(B tbl)
931 (set-case-syntax-pair ?\e$,1!s\e(B ?\e$,1!t\e(B tbl)
932 (set-case-syntax-pair ?\e$,1!u\e(B ?\e$,1!v\e(B tbl)
933 (set-case-syntax-pair ?\e$,1!w\e(B ?\e$,1$2\e(B tbl)
934 (set-case-syntax-pair ?\e$,1!x\e(B ?\e$,1!y\e(B tbl)
935 (set-case-syntax-pair ?\e$,1!|\e(B ?\e$,1!}\e(B tbl)
936 (set-case-syntax-pair ?\e$,1"$\e(B ?\e$,1"&\e(B tbl)
937 (set-case-syntax-pair ?\e$,1"%\e(B ?\e$,1"&\e(B tbl)
938 (set-case-syntax-pair ?\e$,1"'\e(B ?\e$,1")\e(B tbl)
939 (set-case-syntax-pair ?\e$,1"(\e(B ?\e$,1")\e(B tbl)
940 (set-case-syntax-pair ?\e$,1"*\e(B ?\e$,1",\e(B tbl)
941 (set-case-syntax-pair ?\e$,1"+\e(B ?\e$,1",\e(B tbl)
942 (set-case-syntax-pair ?\e$,1"-\e(B ?\e$,1".\e(B tbl)
943 (set-case-syntax-pair ?\e$,1"/\e(B ?\e$,1"0\e(B tbl)
944 (set-case-syntax-pair ?\e$,1"1\e(B ?\e$,1"2\e(B tbl)
945 (set-case-syntax-pair ?\e$,1"3\e(B ?\e$,1"4\e(B tbl)
946 (set-case-syntax-pair ?\e$,1"5\e(B ?\e$,1"6\e(B tbl)
947 (set-case-syntax-pair ?\e$,1"7\e(B ?\e$,1"8\e(B tbl)
948 (set-case-syntax-pair ?\e$,1"9\e(B ?\e$,1":\e(B tbl)
949 (set-case-syntax-pair ?\e$,1";\e(B ?\e$,1"<\e(B tbl)
950 (set-case-syntax-pair ?\e$,1">\e(B ?\e$,1"?\e(B tbl)
951 (set-case-syntax-pair ?\e$,1"@\e(B ?\e$,1"A\e(B tbl)
952 (set-case-syntax-pair ?\e$,1"B\e(B ?\e$,1"C\e(B tbl)
953 (set-case-syntax-pair ?\e$,1"D\e(B ?\e$,1"E\e(B tbl)
954 (set-case-syntax-pair ?\e$,1"F\e(B ?\e$,1"G\e(B tbl)
955 (set-case-syntax-pair ?\e$,1"H\e(B ?\e$,1"I\e(B tbl)
956 (set-case-syntax-pair ?\e$,1"J\e(B ?\e$,1"K\e(B tbl)
957 (set-case-syntax-pair ?\e$,1"L\e(B ?\e$,1"M\e(B tbl)
958 (set-case-syntax-pair ?\e$,1"N\e(B ?\e$,1"O\e(B tbl)
959 ;; 01F0; F; 006A 030C; # LATIN SMALL LETTER J WITH CARON
960 (set-case-syntax-pair ?\e$,1"Q\e(B ?\e$,1"S\e(B tbl)
961 (set-case-syntax-pair ?\e$,1"R\e(B ?\e$,1"S\e(B tbl)
962 (set-case-syntax-pair ?\e$,1"T\e(B ?\e$,1"U\e(B tbl)
963 (set-case-syntax-pair ?\e$,1"V\e(B ?\e$,1!U\e(B tbl)
964 (set-case-syntax-pair ?\e$,1"W\e(B ?\e$,1!\7f\e(B tbl)
965 (set-case-syntax-pair ?\e$,1"X\e(B ?\e$,1"Y\e(B tbl)
966 (set-case-syntax-pair ?\e$,1"Z\e(B ?\e$,1"[\e(B tbl)
967 (set-case-syntax-pair ?\e$,1"\\e(B ?\e$,1"]\e(B tbl)
968 (set-case-syntax-pair ?\e$,1"^\e(B ?\e$,1"_\e(B tbl)
969 (set-case-syntax-pair ?\e$,1"`\e(B ?\e$,1"a\e(B tbl)
970 (set-case-syntax-pair ?\e$,1"b\e(B ?\e$,1"c\e(B tbl)
971 (set-case-syntax-pair ?\e$,1"d\e(B ?\e$,1"e\e(B tbl)
972 (set-case-syntax-pair ?\e$,1"f\e(B ?\e$,1"g\e(B tbl)
973 (set-case-syntax-pair ?\e$,1"h\e(B ?\e$,1"i\e(B tbl)
974 (set-case-syntax-pair ?\e$,1"j\e(B ?\e$,1"k\e(B tbl)
975 (set-case-syntax-pair ?\e$,1"l\e(B ?\e$,1"m\e(B tbl)
976 (set-case-syntax-pair ?\e$,1"n\e(B ?\e$,1"o\e(B tbl)
977 (set-case-syntax-pair ?\e$,1"p\e(B ?\e$,1"q\e(B tbl)
978 (set-case-syntax-pair ?\e$,1"r\e(B ?\e$,1"s\e(B tbl)
979 (set-case-syntax-pair ?\e$,1"t\e(B ?\e$,1"u\e(B tbl)
980 (set-case-syntax-pair ?\e$,1"v\e(B ?\e$,1"w\e(B tbl)
981 (set-case-syntax-pair ?\e$,1"x\e(B ?\e$,1"y\e(B tbl)
982 (set-case-syntax-pair ?\e$,1"z\e(B ?\e$,1"{\e(B tbl)
983 (set-case-syntax-pair ?\e$,1"|\e(B ?\e$,1"}\e(B tbl)
984 (set-case-syntax-pair ?\e$,1"~\e(B ?\e$,1"\7f\e(B tbl)
985 (set-case-syntax-pair ?\e$,1#"\e(B ?\e$,1##\e(B tbl)
986 (set-case-syntax-pair ?\e$,1#$\e(B ?\e$,1#%\e(B tbl)
987 (set-case-syntax-pair ?\e$,1#&\e(B ?\e$,1#'\e(B tbl)
988 (set-case-syntax-pair ?\e$,1#(\e(B ?\e$,1#)\e(B tbl)
989 (set-case-syntax-pair ?\e$,1#*\e(B ?\e$,1#+\e(B tbl)
990 (set-case-syntax-pair ?\e$,1#,\e(B ?\e$,1#-\e(B tbl)
991 (set-case-syntax-pair ?\e$,1#.\e(B ?\e$,1#/\e(B tbl)
992 (set-case-syntax-pair ?\e$,1#0\e(B ?\e$,1#1\e(B tbl)
993 (set-case-syntax-pair ?\e$,1#2\e(B ?\e$,1#3\e(B tbl)
994
995 ;; Latin Extended Additional
996 (setq c #x1e00)
997 (while (<= c #x1ef9)
998 (modify-category-entry (decode-char 'ucs c) ?l)
999 (and (zerop (% c 2))
1000 (or (<= c #x1e94) (>= c #x1ea0))
1001 (set-case-syntax-pair
1002 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
1003 (setq c (1+ c)))
1004
1005 ;; Greek
1006 (setq c #x0370)
1007 (while (<= c #x03ff)
1008 (modify-category-entry (decode-char 'ucs c) ?g)
1009 (if (or (and (>= c #x0391) (<= c #x03a1))
1010 (and (>= c #x03a3) (<= c #x03ab)))
1011 (set-case-syntax-pair
1012 (decode-char 'ucs c) (decode-char 'ucs (+ c 32)) tbl))
1013 (and (>= c #x03da)
1014 (<= c #x03ee)
1015 (zerop (% c 2))
1016 (set-case-syntax-pair
1017 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
1018 (setq c (1+ c)))
1019 (set-case-syntax-pair ?\e$,1&f\e(B ?\e$,1',\e(B tbl)
1020 (set-case-syntax-pair ?\e$,1&h\e(B ?\e$,1'-\e(B tbl)
1021 (set-case-syntax-pair ?\e$,1&i\e(B ?\e$,1'.\e(B tbl)
1022 (set-case-syntax-pair ?\e$,1&j\e(B ?\e$,1'/\e(B tbl)
1023 (set-case-syntax-pair ?\e$,1&l\e(B ?\e$,1'L\e(B tbl)
1024 (set-case-syntax-pair ?\e$,1&n\e(B ?\e$,1'M\e(B tbl)
1025 (set-case-syntax-pair ?\e$,1&o\e(B ?\e$,1'N\e(B tbl)
1026
1027 ;; Armenian
1028 (setq c #x531)
1029 (while (<= c #x556)
1030 (set-case-syntax-pair (decode-char 'ucs c)
1031 (decode-char 'ucs (+ c #x30)) tbl)
1032 (setq c (1+ c)))
1033
1034 ;; Greek Extended
1035 (setq c #x1f00)
1036 (while (<= c #x1fff)
1037 (modify-category-entry (decode-char 'ucs c) ?g)
1038 (and (<= (logand c #x000f) 7)
1039 (<= c #x1fa7)
1040 (not (memq c '(#x1f50 #x1f52 #x1f54 #x1f56)))
1041 (/= (logand c #x00f0) 7)
1042 (set-case-syntax-pair
1043 (decode-char 'ucs (+ c 8)) (decode-char 'ucs c) tbl))
1044 (setq c (1+ c)))
1045 (set-case-syntax-pair ?\e$,1qx\e(B ?\e$,1qp\e(B tbl)
1046 (set-case-syntax-pair ?\e$,1qy\e(B ?\e$,1qq\e(B tbl)
1047 (set-case-syntax-pair ?\e$,1qz\e(B ?\e$,1q0\e(B tbl)
1048 (set-case-syntax-pair ?\e$,1q{\e(B ?\e$,1q1\e(B tbl)
1049 (set-case-syntax-pair ?\e$,1q|\e(B ?\e$,1qs\e(B tbl)
1050 (set-case-syntax-pair ?\e$,1r(\e(B ?\e$,1q2\e(B tbl)
1051 (set-case-syntax-pair ?\e$,1r)\e(B ?\e$,1q3\e(B tbl)
1052 (set-case-syntax-pair ?\e$,1r*\e(B ?\e$,1q4\e(B tbl)
1053 (set-case-syntax-pair ?\e$,1r+\e(B ?\e$,1q5\e(B tbl)
1054 (set-case-syntax-pair ?\e$,1r,\e(B ?\e$,1r#\e(B tbl)
1055 (set-case-syntax-pair ?\e$,1r8\e(B ?\e$,1r0\e(B tbl)
1056 (set-case-syntax-pair ?\e$,1r9\e(B ?\e$,1r1\e(B tbl)
1057 (set-case-syntax-pair ?\e$,1r:\e(B ?\e$,1q6\e(B tbl)
1058 (set-case-syntax-pair ?\e$,1r;\e(B ?\e$,1q7\e(B tbl)
1059 (set-case-syntax-pair ?\e$,1rH\e(B ?\e$,1r@\e(B tbl)
1060 (set-case-syntax-pair ?\e$,1rI\e(B ?\e$,1rA\e(B tbl)
1061 (set-case-syntax-pair ?\e$,1rJ\e(B ?\e$,1q:\e(B tbl)
1062 (set-case-syntax-pair ?\e$,1rK\e(B ?\e$,1q;\e(B tbl)
1063 (set-case-syntax-pair ?\e$,1rL\e(B ?\e$,1rE\e(B tbl)
1064 (set-case-syntax-pair ?\e$,1rX\e(B ?\e$,1q8\e(B tbl)
1065 (set-case-syntax-pair ?\e$,1rY\e(B ?\e$,1q9\e(B tbl)
1066 (set-case-syntax-pair ?\e$,1rZ\e(B ?\e$,1q<\e(B tbl)
1067 (set-case-syntax-pair ?\e$,1r[\e(B ?\e$,1q=\e(B tbl)
1068 (set-case-syntax-pair ?\e$,1r\\e(B ?\e$,1rS\e(B tbl)
1069
1070 ;; cyrillic
1071 (setq c #x0400)
1072 (while (<= c #x04ff)
1073 (modify-category-entry (decode-char 'ucs c) ?y)
1074 (and (>= c #x0400)
1075 (<= c #x040f)
1076 (set-case-syntax-pair
1077 (decode-char 'ucs c) (decode-char 'ucs (+ c 80)) tbl))
1078 (and (>= c #x0410)
1079 (<= c #x042f)
1080 (set-case-syntax-pair
1081 (decode-char 'ucs c) (decode-char 'ucs (+ c 32)) tbl))
1082 (and (zerop (% c 2))
1083 (or (and (>= c #x0460) (<= c #x0480))
1084 (and (>= c #x048c) (<= c #x04be))
1085 (and (>= c #x04d0) (<= c #x04f4)))
1086 (set-case-syntax-pair
1087 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
1088 (setq c (1+ c)))
1089 (set-case-syntax-pair ?\e$,1*!\e(B ?\e$,1*"\e(B tbl)
1090 (set-case-syntax-pair ?\e$,1*#\e(B ?\e$,1*$\e(B tbl)
1091 (set-case-syntax-pair ?\e$,1*'\e(B ?\e$,1*(\e(B tbl)
1092 (set-case-syntax-pair ?\e$,1*+\e(B ?\e$,1*,\e(B tbl)
1093 (set-case-syntax-pair ?\e$,1*X\e(B ?\e$,1*Y\e(B tbl)
1094
1095 ;; general punctuation
1096 (setq c #x2000)
1097 (while (<= c #x200b)
1098 (set-case-syntax (decode-char 'ucs c) " " tbl)
1099 (setq c (1+ c)))
1100 (setq c #x2010)
1101 (while (<= c #x2016)
1102 (set-case-syntax (decode-char 'ucs c) "_" tbl)
1103 (setq c (1+ c)))
1104 ;; Punctuation syntax for quotation marks (like `)
1105 (while (<= c #x201f)
1106 (set-case-syntax (decode-char 'ucs c) "." tbl)
1107 (setq c (1+ c)))
1108 (while (<= c #x2027)
1109 (set-case-syntax (decode-char 'ucs c) "_" tbl)
1110 (setq c (1+ c)))
1111
1112 ;; Roman numerals
1113 (setq c #x2160)
1114 (while (<= c #x216f)
1115 (set-case-syntax-pair (decode-char 'ucs c)
1116 (decode-char 'ucs (+ c #x10)) tbl)
1117 (setq c (1+ c)))
1118
1119 ;; Circled Latin
1120 (setq c #x24b6)
1121 (while (<= c #x24cf)
1122 (set-case-syntax-pair (decode-char 'ucs c)
1123 (decode-char 'ucs (+ c 26)) tbl)
1124 (modify-category-entry (decode-char 'ucs c) ?l)
1125 (modify-category-entry (decode-char 'ucs (+ c 26)) ?l)
1126 (setq c (1+ c)))
1127
1128 ;; Fullwidth Latin
1129 (setq c #xff21)
1130 (while (<= c #xff3a)
1131 (set-case-syntax-pair (decode-char 'ucs c)
1132 (decode-char 'ucs (+ c #x20)) tbl)
1133 (modify-category-entry (decode-char 'ucs c) ?l)
1134 (modify-category-entry (decode-char 'ucs (+ c #x20)) ?l)
1135 (setq c (1+ c)))
1136
1137 ;; Combining diacritics
1138 (setq c #x300)
1139 (while (<= c #x362)
1140 (modify-category-entry (decode-char 'ucs c) ?^)
1141 (setq c (1+ c)))
1142
1143 ;; Combining marks
1144 (setq c #x20d0)
1145 (while (<= c #x20e3)
1146 (modify-category-entry (decode-char 'ucs c) ?^)
1147 (setq c (1+ c)))
1148
1149 ;; Fixme: syntax for symbols &c
1150 )
1151
1152 (let ((pairs
1153 '("\e$,1sEsF\e(B" ; U+2045 U+2046
1154 "\e$,1s}s~\e(B" ; U+207D U+207E
1155 "\e$,1t-t.\e(B" ; U+208D U+208E
1156 "\e$,1{){*\e(B" ; U+2329 U+232A
1157 "\e$,1|T|U\e(B" ; U+23B4 U+23B5
1158 "\e$,2&H&I\e(B" ; U+2768 U+2769
1159 "\e$,2&J&K\e(B" ; U+276A U+276B
1160 "\e$,2&L&M\e(B" ; U+276C U+276D
1161 "\e$,2&P&Q\e(B" ; U+2770 U+2771
1162 "\e$,2&R&S\e(B" ; U+2772 U+2773
1163 "\e$,2&T&U\e(B" ; U+2774 U+2775
1164 "\e$,2'f'g\e(B" ; U+27E6 U+27E7
1165 "\e$,2'h'i\e(B" ; U+27E8 U+27E9
1166 "\e$,2'j'k\e(B" ; U+27EA U+27EB
1167 "\e$,2,#,$\e(B" ; U+2983 U+2984
1168 "\e$,2,%,&\e(B" ; U+2985 U+2986
1169 "\e$,2,',(\e(B" ; U+2987 U+2988
1170 "\e$,2,),*\e(B" ; U+2989 U+298A
1171 "\e$,2,+,,\e(B" ; U+298B U+298C
1172 "\e$,2,-,.\e(B" ; U+298D U+298E
1173 "\e$,2,/,0\e(B" ; U+298F U+2990
1174 "\e$,2,1,2\e(B" ; U+2991 U+2992
1175 "\e$,2,3,4\e(B" ; U+2993 U+2994
1176 "\e$,2,5,6\e(B" ; U+2995 U+2996
1177 "\e$,2,7,8\e(B" ; U+2997 U+2998
1178 "\e$,2-<-=\e(B" ; U+29FC U+29FD
1179 "\e$,2=H=I\e(B" ; U+3008 U+3009
1180 "\e$,2=J=K\e(B" ; U+300A U+300B
1181 "\e$,2=L=M\e(B" ; U+300C U+300D
1182 "\e$,2=N=O\e(B" ; U+300E U+300F
1183 "\e$,2=P=Q\e(B" ; U+3010 U+3011
1184 "\e$,2=T=U\e(B" ; U+3014 U+3015
1185 "\e$,2=V=W\e(B" ; U+3016 U+3017
1186 "\e$,2=X=Y\e(B" ; U+3018 U+3019
1187 "\e$,2=Z=[\e(B" ; U+301A U+301B
1188 "\e$,3m~m\7f\e(B" ; U+FD3E U+FD3F
1189 "\e$,3pUpV\e(B" ; U+FE35 U+FE36
1190 "\e$,3pWpX\e(B" ; U+FE37 U+FE38
1191 "\e$,3pYpZ\e(B" ; U+FE39 U+FE3A
1192 "\e$,3p[p\\e(B" ; U+FE3B U+FE3C
1193 "\e$,3p]p^\e(B" ; U+FE3D U+FE3E
1194 "\e$,3p_p`\e(B" ; U+FE3F U+FE40
1195 "\e$,3papb\e(B" ; U+FE41 U+FE42
1196 "\e$,3pcpd\e(B" ; U+FE43 U+FE44
1197 "\e$,3pypz\e(B" ; U+FE59 U+FE5A
1198 "\e$,3p{p|\e(B" ; U+FE5B U+FE5C
1199 "\e$,3p}p~\e(B" ; U+FE5D U+FE5E
1200 "\e$,3rhri\e(B" ; U+FF08 U+FF09
1201 "\e$,3s;s=\e(B" ; U+FF3B U+FF3D
1202 "\e$,3s[s]\e(B" ; U+FF5B U+FF5D
1203 "\e$,3s_s`\e(B" ; U+FF5F U+FF60
1204 "\e$,3sbsc\e(B" ; U+FF62 U+FF63
1205 )))
1206 (dolist (elt pairs)
1207 (modify-syntax-entry (aref elt 0) (string ?\( (aref elt 1)))
1208 (modify-syntax-entry (aref elt 1) (string ?\) (aref elt 0)))))
1209
1210 \f
1211 ;;; Setting word boundary.
1212
1213 (setq word-combining-categories
1214 '((?l . ?l)))
1215
1216 (setq word-separating-categories ; (2-byte character sets)
1217 '((?A . ?K) ; Alpha numeric - Katakana
1218 (?A . ?C) ; Alpha numeric - Chinese
1219 (?H . ?A) ; Hiragana - Alpha numeric
1220 (?H . ?K) ; Hiragana - Katakana
1221 (?H . ?C) ; Hiragana - Chinese
1222 (?K . ?A) ; Katakana - Alpha numeric
1223 (?K . ?C) ; Katakana - Chinese
1224 (?C . ?A) ; Chinese - Alpha numeric
1225 (?C . ?K) ; Chinese - Katakana
1226 ))
1227
1228 \f
1229 ;; For each character set, put the information of the most proper
1230 ;; coding system to encode it by `preferred-coding-system' property.
1231
1232 (let ((l '((latin-iso8859-1 . iso-latin-1)
1233 (latin-iso8859-2 . iso-latin-2)
1234 (latin-iso8859-3 . iso-latin-3)
1235 (latin-iso8859-4 . iso-latin-4)
1236 (thai-tis620 . thai-tis620)
1237 (greek-iso8859-7 . greek-iso-8bit)
1238 (arabic-iso8859-6 . iso-2022-7bit)
1239 (hebrew-iso8859-8 . hebrew-iso-8bit)
1240 (katakana-jisx0201 . japanese-shift-jis)
1241 (latin-jisx0201 . japanese-shift-jis)
1242 (cyrillic-iso8859-5 . cyrillic-iso-8bit)
1243 (latin-iso8859-9 . iso-latin-5)
1244 (japanese-jisx0208-1978 . iso-2022-jp)
1245 (chinese-gb2312 . cn-gb-2312)
1246 (japanese-jisx0208 . iso-2022-jp)
1247 (korean-ksc5601 . iso-2022-kr)
1248 (japanese-jisx0212 . iso-2022-jp)
1249 (chinese-cns11643-1 . iso-2022-cn)
1250 (chinese-cns11643-2 . iso-2022-cn)
1251 (chinese-big5-1 . chinese-big5)
1252 (chinese-big5-2 . chinese-big5)
1253 (chinese-sisheng . iso-2022-7bit)
1254 (ipa . iso-2022-7bit)
1255 (vietnamese-viscii-lower . vietnamese-viscii)
1256 (vietnamese-viscii-upper . vietnamese-viscii)
1257 (arabic-digit . iso-2022-7bit)
1258 (arabic-1-column . iso-2022-7bit)
1259 (ascii-right-to-left . iso-2022-7bit)
1260 (lao . lao)
1261 (arabic-2-column . iso-2022-7bit)
1262 (indian-is13194 . devanagari)
1263 (indian-glyph . devanagari)
1264 (tibetan-1-column . tibetan)
1265 (ethiopic . iso-2022-7bit)
1266 (chinese-cns11643-3 . iso-2022-cn)
1267 (chinese-cns11643-4 . iso-2022-cn)
1268 (chinese-cns11643-5 . iso-2022-cn)
1269 (chinese-cns11643-6 . iso-2022-cn)
1270 (chinese-cns11643-7 . iso-2022-cn)
1271 (indian-2-column . devanagari)
1272 (tibetan . tibetan)
1273 (latin-iso8859-14 . iso-latin-8)
1274 (latin-iso8859-15 . iso-latin-9))))
1275 (while l
1276 (put-charset-property (car (car l)) 'preferred-coding-system (cdr (car l)))
1277 (setq l (cdr l))))
1278
1279 \f
1280 ;; Setup auto-fill-chars for charsets that should invoke auto-filling.
1281 ;; SPACE and NEWLINE are already set. Also put `nospace-between-words'
1282 ;; property on the charsets.
1283 (let ((l '(katakana-jisx0201
1284 japanese-jisx0208 japanese-jisx0212
1285 chinese-gb2312 chinese-big5-1 chinese-big5-2)))
1286 (while l
1287 (aset auto-fill-chars (make-char (car l)) t)
1288 (put-charset-property (car l) 'nospace-between-words t)
1289 (setq l (cdr l))))
1290
1291 \f
1292 (setq utf-translate-cjk-mode saved-utf-translate-cjk-mode)
1293 (makunbound 'saved-utf-translate-cjk-mode)
1294
1295 ;;; Local Variables:
1296 ;;; coding: iso-2022-7bit
1297 ;;; End:
1298
1299 ;;; arch-tag: 85889c35-9f4d-4912-9bf5-82de31b0d42d
1300 ;;; characters.el ends here