]> code.delx.au - gnu-emacs/blob - lisp/international/characters.el
Enable the correct case setting for
[gnu-emacs] / lisp / international / characters.el
1 ;;; characters.el --- set syntax and category for multibyte characters
2
3 ;; Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN.
4 ;; Licensed to the Free Software Foundation.
5 ;; Copyright (C) 2001, 2002 Free Software Foundation, Inc.
6
7 ;; Keywords: multibyte character, character set, syntax, category
8
9 ;; This file is part of GNU Emacs.
10
11 ;; GNU Emacs is free software; you can redistribute it and/or modify
12 ;; it under the terms of the GNU General Public License as published by
13 ;; the Free Software Foundation; either version 2, or (at your option)
14 ;; any later version.
15
16 ;; GNU Emacs is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 ;; GNU General Public License for more details.
20
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with GNU Emacs; see the file COPYING. If not, write to the
23 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 ;; Boston, MA 02111-1307, USA.
25
26 ;;; Commentary:
27
28 ;; This file contains multibyte characters. Save this file always in
29 ;; the coding system `iso-2022-7bit'.
30
31 ;; This file does not define the syntax for Latin-N character sets;
32 ;; those are defined by the files latin-N.el.
33
34 ;;; Code:
35
36 ;; We must set utf-translate-cjk-mode to nil while loading this file
37 ;; to avoid translating CJK characters in decode-char.
38 (defvar saved-utf-translate-cjk-mode utf-translate-cjk-mode)
39 (setq utf-translate-cjk-mode nil)
40
41 ;;; Predefined categories.
42
43 ;; For each character set.
44
45 (define-category ?a "ASCII")
46 (define-category ?l "Latin")
47 (define-category ?t "Thai")
48 (define-category ?g "Greek")
49 (define-category ?b "Arabic")
50 (define-category ?w "Hebrew")
51 (define-category ?y "Cyrillic")
52 (define-category ?k "Japanese katakana")
53 (define-category ?r "Japanese roman")
54 (define-category ?c "Chinese")
55 (define-category ?j "Japanese")
56 (define-category ?h "Korean")
57 (define-category ?e "Ethiopic (Ge'ez)")
58 (define-category ?v "Vietnamese")
59 (define-category ?i "Indian")
60 (define-category ?o "Lao")
61 (define-category ?q "Tibetan")
62
63 ;; For each group (row) of 2-byte character sets.
64
65 (define-category ?A "Alpha-numeric characters of 2-byte character sets")
66 (define-category ?C "Chinese (Han) characters of 2-byte character sets")
67 (define-category ?G "Greek characters of 2-byte character sets")
68 (define-category ?H "Japanese Hiragana characters of 2-byte character sets")
69 (define-category ?K "Japanese Katakana characters of 2-byte character sets")
70 (define-category ?N "Korean Hangul characters of 2-byte character sets")
71 (define-category ?Y "Cyrillic characters of 2-byte character sets")
72 (define-category ?I "Indian Glyphs")
73
74 ;; For phonetic classifications.
75
76 (define-category ?0 "consonant")
77 (define-category ?1 "base (independent) vowel")
78 (define-category ?2 "upper diacritical mark (including upper vowel)")
79 (define-category ?3 "lower diacritical mark (including lower vowel)")
80 (define-category ?4 "tone mark")
81 (define-category ?5 "symbol")
82 (define-category ?6 "digit")
83 (define-category ?7 "vowel-modifying diacritical mark")
84 (define-category ?8 "vowel-signs")
85 (define-category ?9 "semivowel lower")
86
87 ;; For filling.
88 (define-category ?| "While filling, we can break a line at this character.")
89
90 ;; For indentation calculation.
91 (define-category ?\s
92 "This character counts as a space for indentation purposes.")
93
94 ;; Keep the following for `kinsoku' processing. See comments in
95 ;; kinsoku.el.
96 (define-category ?> "A character which can't be placed at beginning of line.")
97 (define-category ?< "A character which can't be placed at end of line.")
98
99 ;; Combining
100 (define-category ?^ "Combining diacritic or mark")
101 \f
102 ;;; Setting syntax and category.
103
104 ;; ASCII
105
106 (let ((ch 32))
107 (while (< ch 127) ; All ASCII characters have
108 (modify-category-entry ch ?a) ; the category `a' (ASCII)
109 (modify-category-entry ch ?l) ; and `l' (Latin).
110 (setq ch (1+ ch))))
111
112 ;; Arabic character set
113
114 (let ((charsets '(arabic-iso8859-6
115 arabic-digit
116 arabic-1-column
117 arabic-2-column)))
118 (while charsets
119 ;; (modify-syntax-entry (make-char (car charsets)) "w")
120 (modify-category-entry (make-char (car charsets)) ?b)
121 (setq charsets (cdr charsets))))
122 (let ((ch #x600))
123 (while (<= ch #x6ff)
124 (modify-category-entry (decode-char 'ucs ch) ?b)
125 (setq ch (1+ ch)))
126 (setq ch #xfb50)
127 (while (<= ch #xfdff)
128 (modify-category-entry (decode-char 'ucs ch) ?b)
129 (setq ch (1+ ch)))
130 (setq ch #xfe70)
131 (while (<= ch #xfefe)
132 (modify-category-entry (decode-char 'ucs ch) ?b)
133 (setq ch (1+ ch))))
134
135 ;; Chinese character set (GB2312)
136
137 ;; (modify-syntax-entry (make-char 'chinese-gb2312) "w")
138 (modify-syntax-entry (make-char 'chinese-gb2312 33) "_")
139 (modify-syntax-entry (make-char 'chinese-gb2312 34) "_")
140 (modify-syntax-entry (make-char 'chinese-gb2312 41) "_")
141 (modify-syntax-entry ?\\e$A!2\e(B "(\e$A!3\e(B")
142 (modify-syntax-entry ?\\e$A!4\e(B "(\e$A!5\e(B")
143 (modify-syntax-entry ?\\e$A!6\e(B "(\e$A!7\e(B")
144 (modify-syntax-entry ?\\e$A!8\e(B "(\e$A!9\e(B")
145 (modify-syntax-entry ?\\e$A!:\e(B "(\e$A!;\e(B")
146 (modify-syntax-entry ?\\e$A!<\e(B "(\e$A!=\e(B")
147 (modify-syntax-entry ?\\e$A!>\e(B "(\e$A!?\e(B")
148 (modify-syntax-entry ?\\e$A#(\e(B "(\e$A#)\e(B")
149 (modify-syntax-entry ?\\e$A#{\e(B "(\e$A#}\e(B")
150 (modify-syntax-entry ?\\e$A#[\e(B "(\e$A#]\e(B")
151 (modify-syntax-entry ?\\e$A!3\e(B ")\e$A!2\e(B")
152 (modify-syntax-entry ?\\e$A!5\e(B ")\e$A!4\e(B")
153 (modify-syntax-entry ?\\e$A!7\e(B ")\e$A!6\e(B")
154 (modify-syntax-entry ?\\e$A!9\e(B ")\e$A!8\e(B")
155 (modify-syntax-entry ?\\e$A!;\e(B ")\e$A!:\e(B")
156 (modify-syntax-entry ?\\e$A!=\e(B ")\e$A!<\e(B")
157 (modify-syntax-entry ?\\e$A!?\e(B ")\e$A!>\e(B")
158 (modify-syntax-entry ?\\e$A#)\e(B ")\e$A#(\e(B")
159 (modify-syntax-entry ?\\e$A#}\e(B ")\e$A#{\e(B")
160 (modify-syntax-entry ?\\e$A#]\e(B ")\e$A#[\e(B")
161
162 (let ((chars "\e$A#,!"!##.!$#;#:#?#!!C!-!'#|#_!.!/!0!1#"!e#`!d\e(B"))
163 (dotimes (i (length chars))
164 (modify-syntax-entry (aref chars i) ".")))
165
166 (modify-category-entry (make-char 'chinese-gb2312) ?c)
167 (modify-category-entry (make-char 'chinese-gb2312) ?\|)
168 (modify-category-entry (make-char 'chinese-gb2312 35) ?A)
169 (modify-category-entry (make-char 'chinese-gb2312 36) ?H)
170 (modify-category-entry (make-char 'chinese-gb2312 37) ?K)
171 (modify-category-entry (make-char 'chinese-gb2312 38) ?G)
172 (modify-category-entry (make-char 'chinese-gb2312 39) ?Y)
173 (let ((row 48))
174 (while (< row 127)
175 (modify-category-entry (make-char 'chinese-gb2312 row) ?C)
176 (setq row (1+ row))))
177
178 ;; Chinese character set (BIG5)
179
180 (let ((from (decode-big5-char #xA141))
181 (to (decode-big5-char #xA15D)))
182 (while (< from to)
183 (modify-syntax-entry from ".")
184 (setq from (1+ from))))
185 (let ((from (decode-big5-char #xA1A5))
186 (to (decode-big5-char #xA1AD)))
187 (while (< from to)
188 (modify-syntax-entry from ".")
189 (setq from (1+ from))))
190 (let ((from (decode-big5-char #xA1AD))
191 (to (decode-big5-char #xA2AF)))
192 (while (< from to)
193 (modify-syntax-entry from "_")
194 (setq from (1+ from))))
195
196 (let ((parens "\e$(0!>!?!@!A!B!C!D!E!F!G!H!I!J!K!L!M!N!O!P!Q!R!S!T!U!V!W!X!Y!Z![!\!]!^!_!`!a!b!c\e(B")
197 open close)
198 (dotimes (i (/ (length parens) 2))
199 (setq open (aref parens (* i 2))
200 close (aref parens (1+ (* i 2))))
201 (modify-syntax-entry open (format "(%c" close))
202 (modify-syntax-entry close (format ")%c" open))))
203
204 (let ((generic-big5-1-char (make-char 'chinese-big5-1))
205 (generic-big5-2-char (make-char 'chinese-big5-2)))
206 ;; (modify-syntax-entry generic-big5-1-char "w")
207 ;; (modify-syntax-entry generic-big5-2-char "w")
208
209 (modify-category-entry generic-big5-1-char ?c)
210 (modify-category-entry generic-big5-2-char ?c)
211
212 (modify-category-entry generic-big5-1-char ?C)
213 (modify-category-entry generic-big5-2-char ?C)
214
215 (modify-category-entry generic-big5-1-char ?\|)
216 (modify-category-entry generic-big5-2-char ?\|))
217
218
219 ;; Chinese character set (CNS11643)
220
221 (let ((cns-list '(chinese-cns11643-1
222 chinese-cns11643-2
223 chinese-cns11643-3
224 chinese-cns11643-4
225 chinese-cns11643-5
226 chinese-cns11643-6
227 chinese-cns11643-7))
228 generic-char)
229 (while cns-list
230 (setq generic-char (make-char (car cns-list)))
231 ;; (modify-syntax-entry generic-char "w")
232 (modify-category-entry generic-char ?c)
233 (modify-category-entry generic-char ?C)
234 (modify-category-entry generic-char ?|)
235 (setq cns-list (cdr cns-list))))
236
237 (let ((parens "\e$(G!>!?!@!A!B!C!D!E!F!G!H!I!J!K!L!M!N!O!P!Q!R!S!T!U!V!W!X!Y!Z![!\!]!^!_!`!a!b!c\e(B")
238 open close)
239 (dotimes (i (/ (length parens) 2))
240 (setq open (aref parens (* i 2))
241 close (aref parens (1+ (* i 2))))
242 (modify-syntax-entry open (format "(%c" close))
243 (modify-syntax-entry close (format ")%c" open))))
244
245 ;; Cyrillic character set (ISO-8859-5)
246
247 (modify-category-entry (make-char 'cyrillic-iso8859-5) ?y)
248
249 (modify-syntax-entry (make-char 'cyrillic-iso8859-5 160) " ")
250 (modify-syntax-entry ?\e,L-\e(B ".")
251 (modify-syntax-entry ?\e,Lp\e(B ".")
252 (modify-syntax-entry ?\e,L}\e(B ".")
253 (let ((tbl (standard-case-table)))
254 (set-case-syntax-pair ?\e,L!\e(B ?\e,Lq\e(B tbl)
255 (set-case-syntax-pair ?\e,L"\e(B ?\e,Lr\e(B tbl)
256 (set-case-syntax-pair ?\e,L#\e(B ?\e,Ls\e(B tbl)
257 (set-case-syntax-pair ?\e,L$\e(B ?\e,Lt\e(B tbl)
258 (set-case-syntax-pair ?\e,L%\e(B ?\e,Lu\e(B tbl)
259 (set-case-syntax-pair ?\e,L&\e(B ?\e,Lv\e(B tbl)
260 (set-case-syntax-pair ?\e,L'\e(B ?\e,Lw\e(B tbl)
261 (set-case-syntax-pair ?\e,L(\e(B ?\e,Lx\e(B tbl)
262 (set-case-syntax-pair ?\e,L)\e(B ?\e,Ly\e(B tbl)
263 (set-case-syntax-pair ?\e,L*\e(B ?\e,Lz\e(B tbl)
264 (set-case-syntax-pair ?\e,L+\e(B ?\e,L{\e(B tbl)
265 (set-case-syntax-pair ?\e,L,\e(B ?\e,L|\e(B tbl)
266 (set-case-syntax-pair ?\e,L.\e(B ?\e,L~\e(B tbl)
267 (set-case-syntax-pair ?\e,L/\e(B ?\e,L\7f\e(B tbl)
268 (set-case-syntax-pair ?\e,L0\e(B ?\e,LP\e(B tbl)
269 (set-case-syntax-pair ?\e,L1\e(B ?\e,LQ\e(B tbl)
270 (set-case-syntax-pair ?\e,L2\e(B ?\e,LR\e(B tbl)
271 (set-case-syntax-pair ?\e,L3\e(B ?\e,LS\e(B tbl)
272 (set-case-syntax-pair ?\e,L4\e(B ?\e,LT\e(B tbl)
273 (set-case-syntax-pair ?\e,L5\e(B ?\e,LU\e(B tbl)
274 (set-case-syntax-pair ?\e,L6\e(B ?\e,LV\e(B tbl)
275 (set-case-syntax-pair ?\e,L7\e(B ?\e,LW\e(B tbl)
276 (set-case-syntax-pair ?\e,L8\e(B ?\e,LX\e(B tbl)
277 (set-case-syntax-pair ?\e,L9\e(B ?\e,LY\e(B tbl)
278 (set-case-syntax-pair ?\e,L:\e(B ?\e,LZ\e(B tbl)
279 (set-case-syntax-pair ?\e,L;\e(B ?\e,L[\e(B tbl)
280 (set-case-syntax-pair ?\e,L<\e(B ?\e,L\\e(B tbl)
281 (set-case-syntax-pair ?\e,L=\e(B ?\e,L]\e(B tbl)
282 (set-case-syntax-pair ?\e,L>\e(B ?\e,L^\e(B tbl)
283 (set-case-syntax-pair ?\e,L?\e(B ?\e,L_\e(B tbl)
284 (set-case-syntax-pair ?\e,L@\e(B ?\e,L`\e(B tbl)
285 (set-case-syntax-pair ?\e,LA\e(B ?\e,La\e(B tbl)
286 (set-case-syntax-pair ?\e,LB\e(B ?\e,Lb\e(B tbl)
287 (set-case-syntax-pair ?\e,LC\e(B ?\e,Lc\e(B tbl)
288 (set-case-syntax-pair ?\e,LD\e(B ?\e,Ld\e(B tbl)
289 (set-case-syntax-pair ?\e,LE\e(B ?\e,Le\e(B tbl)
290 (set-case-syntax-pair ?\e,LF\e(B ?\e,Lf\e(B tbl)
291 (set-case-syntax-pair ?\e,LG\e(B ?\e,Lg\e(B tbl)
292 (set-case-syntax-pair ?\e,LH\e(B ?\e,Lh\e(B tbl)
293 (set-case-syntax-pair ?\e,LI\e(B ?\e,Li\e(B tbl)
294 (set-case-syntax-pair ?\e,LJ\e(B ?\e,Lj\e(B tbl)
295 (set-case-syntax-pair ?\e,LK\e(B ?\e,Lk\e(B tbl)
296 (set-case-syntax-pair ?\e,LL\e(B ?\e,Ll\e(B tbl)
297 (set-case-syntax-pair ?\e,LM\e(B ?\e,Lm\e(B tbl)
298 (set-case-syntax-pair ?\e,LN\e(B ?\e,Ln\e(B tbl)
299 (set-case-syntax-pair ?\e,LO\e(B ?\e,Lo\e(B tbl)
300 (set-case-syntax-pair ?\e$,1(!\e(B ?\e$,1(q\e(B tbl)
301 (set-case-syntax-pair ?\e$,1("\e(B ?\e$,1(r\e(B tbl)
302 (set-case-syntax-pair ?\e$,1(#\e(B ?\e$,1(s\e(B tbl)
303 (set-case-syntax-pair ?\e$,1($\e(B ?\e$,1(t\e(B tbl)
304 (set-case-syntax-pair ?\e$,1(%\e(B ?\e$,1(u\e(B tbl)
305 (set-case-syntax-pair ?\e$,1(&\e(B ?\e$,1(v\e(B tbl)
306 (set-case-syntax-pair ?\e$,1('\e(B ?\e$,1(w\e(B tbl)
307 (set-case-syntax-pair ?\e$,1((\e(B ?\e$,1(x\e(B tbl)
308 (set-case-syntax-pair ?\e$,1()\e(B ?\e$,1(y\e(B tbl)
309 (set-case-syntax-pair ?\e$,1(*\e(B ?\e$,1(z\e(B tbl)
310 (set-case-syntax-pair ?\e$,1(+\e(B ?\e$,1({\e(B tbl)
311 (set-case-syntax-pair ?\e$,1(,\e(B ?\e$,1(|\e(B tbl)
312 (set-case-syntax-pair ?\e$,1(.\e(B ?\e$,1(~\e(B tbl)
313 (set-case-syntax-pair ?\e$,1(/\e(B ?\e$,1(\7f\e(B tbl)
314 (set-case-syntax-pair ?\e$,1(0\e(B ?\e$,1(P\e(B tbl)
315 (set-case-syntax-pair ?\e$,1(1\e(B ?\e$,1(Q\e(B tbl)
316 (set-case-syntax-pair ?\e$,1(2\e(B ?\e$,1(R\e(B tbl)
317 (set-case-syntax-pair ?\e$,1(3\e(B ?\e$,1(S\e(B tbl)
318 (set-case-syntax-pair ?\e$,1(4\e(B ?\e$,1(T\e(B tbl)
319 (set-case-syntax-pair ?\e$,1(5\e(B ?\e$,1(U\e(B tbl)
320 (set-case-syntax-pair ?\e$,1(6\e(B ?\e$,1(V\e(B tbl)
321 (set-case-syntax-pair ?\e$,1(7\e(B ?\e$,1(W\e(B tbl)
322 (set-case-syntax-pair ?\e$,1(8\e(B ?\e$,1(X\e(B tbl)
323 (set-case-syntax-pair ?\e$,1(9\e(B ?\e$,1(Y\e(B tbl)
324 (set-case-syntax-pair ?\e$,1(:\e(B ?\e$,1(Z\e(B tbl)
325 (set-case-syntax-pair ?\e$,1(;\e(B ?\e$,1([\e(B tbl)
326 (set-case-syntax-pair ?\e$,1(<\e(B ?\e$,1(\\e(B tbl)
327 (set-case-syntax-pair ?\e$,1(=\e(B ?\e$,1(]\e(B tbl)
328 (set-case-syntax-pair ?\e$,1(>\e(B ?\e$,1(^\e(B tbl)
329 (set-case-syntax-pair ?\e$,1(?\e(B ?\e$,1(_\e(B tbl)
330 (set-case-syntax-pair ?\e$,1(@\e(B ?\e$,1(`\e(B tbl)
331 (set-case-syntax-pair ?\e$,1(A\e(B ?\e$,1(a\e(B tbl)
332 (set-case-syntax-pair ?\e$,1(B\e(B ?\e$,1(b\e(B tbl)
333 (set-case-syntax-pair ?\e$,1(C\e(B ?\e$,1(c\e(B tbl)
334 (set-case-syntax-pair ?\e$,1(D\e(B ?\e$,1(d\e(B tbl)
335 (set-case-syntax-pair ?\e$,1(E\e(B ?\e$,1(e\e(B tbl)
336 (set-case-syntax-pair ?\e$,1(F\e(B ?\e$,1(f\e(B tbl)
337 (set-case-syntax-pair ?\e$,1(G\e(B ?\e$,1(g\e(B tbl)
338 (set-case-syntax-pair ?\e$,1(H\e(B ?\e$,1(h\e(B tbl)
339 (set-case-syntax-pair ?\e$,1(I\e(B ?\e$,1(i\e(B tbl)
340 (set-case-syntax-pair ?\e$,1(J\e(B ?\e$,1(j\e(B tbl)
341 (set-case-syntax-pair ?\e$,1(K\e(B ?\e$,1(k\e(B tbl)
342 (set-case-syntax-pair ?\e$,1(L\e(B ?\e$,1(l\e(B tbl)
343 (set-case-syntax-pair ?\e$,1(M\e(B ?\e$,1(m\e(B tbl)
344 (set-case-syntax-pair ?\e$,1(N\e(B ?\e$,1(n\e(B tbl)
345 (set-case-syntax-pair ?\e$,1(O\e(B ?\e$,1(o\e(B tbl))
346
347 ;; Devanagari character set
348
349 ;;; Commented out since the categories appear not to be used anywhere
350 ;;; and word syntax is the default.
351 ;; (let ((deflist '(;; chars syntax category
352 ;; ("\e$(5!!!"!#\e(B" "w" ?7) ; vowel-modifying diacritical mark
353 ;; ; chandrabindu, anuswar, visarga
354 ;; ("\e$(5!$\e(B-\e$(5!2\e(B" "w" ?1) ; independent vowel
355 ;; ("\e$(5!3\e(B-\e$(5!X\e(B" "w" ?0) ; consonant
356 ;; ("\e$(5!Z\e(B-\e$(5!g\e(B" "w" ?8) ; matra
357 ;; ("\e$(5!q\e(B-\e$(5!z\e(B" "w" ?6) ; digit
358 ;; ;; Unicode equivalents
359 ;; ("\e$,15A5B5C\e(B" "w" ?7) ; vowel-modifying diacritical mark
360 ;; ; chandrabindu, anuswar, visarga
361 ;; ("\e$,15E\e(B-\e$,15M\e(B" "w" ?1) ; independent vowel
362 ;; ("\e$,15U\e(B-\e$,15y\e(B" "w" ?0) ; consonant
363 ;; ("\e$,15~\e(B-\e$,16)\e(B" "w" ?8) ; matra
364 ;; ("\e$,16F\e(B-\e$,16O\e(B" "w" ?6) ; digit
365 ;; ))
366 ;; elm chars len syntax category to ch i)
367 ;; (while deflist
368 ;; (setq elm (car deflist))
369 ;; (setq chars (car elm)
370 ;; len (length chars)
371 ;; syntax (nth 1 elm)
372 ;; category (nth 2 elm)
373 ;; i 0)
374 ;; (while (< i len)
375 ;; (if (= (aref chars i) ?-)
376 ;; (setq i (1+ i)
377 ;; to (aref chars i))
378 ;; (setq ch (aref chars i)
379 ;; to ch))
380 ;; (while (<= ch to)
381 ;; (modify-syntax-entry ch syntax)
382 ;; (modify-category-entry ch category)
383 ;; (setq ch (1+ ch)))
384 ;; (setq i (1+ i)))
385 ;; (setq deflist (cdr deflist))))
386
387 ;; Ethiopic character set
388
389 (modify-category-entry (make-char 'ethiopic) ?e)
390 ;; (modify-syntax-entry (make-char 'ethiopic) "w")
391 (dotimes (i (1+ (- #x137c #x1200)))
392 (modify-category-entry (decode-char 'ucs (+ #x1200 i)) ?e))
393 (let ((chars '(?\e$(3$h\e(B ?\e$(3$i\e(B ?\e$(3$j\e(B ?\e$(3$k\e(B ?\e$(3$l\e(B ?\e$(3$m\e(B ?\e$(3$n\e(B ?\e$(3$o\e(B ?\e$(3%i\e(B ?\e$(3%t\e(B ?\e$(3%u\e(B ?\e$(3%v\e(B ?\e$(3%w\e(B ?\e$(3%x\e(B
394 ;; Unicode equivalents of the above:
395 ?\e$,1Q!\e(B ?\e$,1Q"\e(B ?\e$,1Q#\e(B ?\e$,1Q$\e(B ?\e$,1Q%\e(B ?\e$,1Q&\e(B ?\e$,1Q'\e(B ?\e$,1Q(\e(B ?\e$,3op\e(B ?\e$,3o{\e(B ?\e$,3o|\e(B ?\e$,3o}\e(B ?\e$,3o~\e(B ?\e$,3o\7f\e(B)))
396 (while chars
397 (modify-syntax-entry (car chars) ".")
398 (setq chars (cdr chars))))
399
400 ;; Greek character set (ISO-8859-7)
401
402 (modify-category-entry (make-char 'greek-iso8859-7) ?g)
403 (let ((c #x370))
404 (while (<= c #x3ff)
405 (modify-category-entry (decode-char 'ucs c) ?g)
406 (setq c (1+ c))))
407
408 ;; (let ((c 182))
409 ;; (while (< c 255)
410 ;; (modify-syntax-entry (make-char 'greek-iso8859-7 c) "w")
411 ;; (setq c (1+ c))))
412 ;; (modify-syntax-entry (make-char 'greek-iso8859-7 160) "w") ; NBSP
413 (modify-syntax-entry ?\e,F7\e(B ".")
414 (modify-syntax-entry ?\e,F;\e(B ".")
415 (modify-syntax-entry ?\e,F=\e(B ".")
416 (let ((tbl (standard-case-table)))
417 ;; Fixme: non-letter syntax copied from latin-1, but that's dubious
418 ;; in several cases.
419 (set-case-syntax ?\e,F!\e(B "." tbl)
420 (set-case-syntax ?\e,F"\e(B "." tbl)
421 (set-case-syntax ?\e,F&\e(B "." tbl)
422 (set-case-syntax ?\e,F&\e(B "_" tbl)
423 (set-case-syntax ?\e,F'\e(B "." tbl)
424 (set-case-syntax ?\e,F)\e(B "_" tbl)
425 (set-case-syntax ?\e,F+\e(B "." tbl)
426 (set-case-syntax ?\e,F,\e(B "_" tbl)
427 (set-case-syntax ?\e,F-\e(B "_" tbl)
428 (set-case-syntax ?\e,F/\e(B "." tbl)
429 (set-case-syntax ?\e,F0\e(B "_" tbl)
430 (set-case-syntax ?\e,F1\e(B "_" tbl)
431 ;; (set-case-syntax ?\e,F7\e(B "_" tbl)
432 ;; (set-case-syntax ?\e,F=\e(B "_" tbl)
433 (set-case-syntax-pair ?\e,FA\e(B ?\e,Fa\e(B tbl)
434 (set-case-syntax-pair ?\e,FB\e(B ?\e,Fb\e(B tbl)
435 (set-case-syntax-pair ?\e,FC\e(B ?\e,Fc\e(B tbl)
436 (set-case-syntax-pair ?\e,FD\e(B ?\e,Fd\e(B tbl)
437 (set-case-syntax-pair ?\e,FE\e(B ?\e,Fe\e(B tbl)
438 (set-case-syntax-pair ?\e,FF\e(B ?\e,Ff\e(B tbl)
439 (set-case-syntax-pair ?\e,FG\e(B ?\e,Fg\e(B tbl)
440 (set-case-syntax-pair ?\e,FH\e(B ?\e,Fh\e(B tbl)
441 (set-case-syntax-pair ?\e,FI\e(B ?\e,Fi\e(B tbl)
442 (set-case-syntax-pair ?\e,FJ\e(B ?\e,Fj\e(B tbl)
443 (set-case-syntax-pair ?\e,FK\e(B ?\e,Fk\e(B tbl)
444 (set-case-syntax-pair ?\e,FL\e(B ?\e,Fl\e(B tbl)
445 (set-case-syntax-pair ?\e,FM\e(B ?\e,Fm\e(B tbl)
446 (set-case-syntax-pair ?\e,FN\e(B ?\e,Fn\e(B tbl)
447 (set-case-syntax-pair ?\e,FO\e(B ?\e,Fo\e(B tbl)
448 (set-case-syntax-pair ?\e,FP\e(B ?\e,Fp\e(B tbl)
449 (set-case-syntax-pair ?\e,FQ\e(B ?\e,Fq\e(B tbl)
450 (set-upcase-syntax ?\e,FS\e(B ?\e,Fr\e(B tbl)
451 (set-case-syntax-pair ?\e,FS\e(B ?\e,Fs\e(B tbl)
452 (set-case-syntax-pair ?\e,FT\e(B ?\e,Ft\e(B tbl)
453 (set-case-syntax-pair ?\e,FU\e(B ?\e,Fu\e(B tbl)
454 (set-case-syntax-pair ?\e,FV\e(B ?\e,Fv\e(B tbl)
455 (set-case-syntax-pair ?\e,FW\e(B ?\e,Fw\e(B tbl)
456 (set-case-syntax-pair ?\e,FX\e(B ?\e,Fx\e(B tbl)
457 (set-case-syntax-pair ?\e,FY\e(B ?\e,Fy\e(B tbl)
458 (set-case-syntax-pair ?\e,FZ\e(B ?\e,Fz\e(B tbl)
459 (set-case-syntax-pair ?\e,F[\e(B ?\e,F{\e(B tbl)
460 (set-case-syntax-pair ?\e,F?\e(B ?\e,F~\e(B tbl)
461 (set-case-syntax-pair ?\e,F>\e(B ?\e,F}\e(B tbl)
462 (set-case-syntax-pair ?\e,F<\e(B ?\e,F|\e(B tbl)
463 (set-case-syntax-pair ?\e,F6\e(B ?\e,F\\e(B tbl)
464 (set-case-syntax-pair ?\e,F8\e(B ?\e,F]\e(B tbl)
465 (set-case-syntax-pair ?\e,F9\e(B ?\e,F^\e(B tbl)
466 (set-case-syntax-pair ?\e,F:\e(B ?\e,F_\e(B tbl)
467 ;; Unicode equivalents
468 (set-case-syntax-pair ?\e$,1&q\e(B ?\e$,1'1\e(B tbl)
469 (set-case-syntax-pair ?\e$,1&r\e(B ?\e$,1'2\e(B tbl)
470 (set-case-syntax-pair ?\e$,1&s\e(B ?\e$,1'3\e(B tbl)
471 (set-case-syntax-pair ?\e$,1&t\e(B ?\e$,1'4\e(B tbl)
472 (set-case-syntax-pair ?\e$,1&u\e(B ?\e$,1'5\e(B tbl)
473 (set-case-syntax-pair ?\e$,1&v\e(B ?\e$,1'6\e(B tbl)
474 (set-case-syntax-pair ?\e$,1&w\e(B ?\e$,1'7\e(B tbl)
475 (set-case-syntax-pair ?\e$,1&x\e(B ?\e$,1'8\e(B tbl)
476 (set-case-syntax-pair ?\e$,1&y\e(B ?\e$,1'9\e(B tbl)
477 (set-case-syntax-pair ?\e$,1&z\e(B ?\e$,1':\e(B tbl)
478 (set-case-syntax-pair ?\e$,1&{\e(B ?\e$,1';\e(B tbl)
479 (set-case-syntax-pair ?\e$,1&|\e(B ?\e$,1'<\e(B tbl)
480 (set-case-syntax-pair ?\e$,1&}\e(B ?\e$,1'=\e(B tbl)
481 (set-case-syntax-pair ?\e$,1&~\e(B ?\e$,1'>\e(B tbl)
482 (set-case-syntax-pair ?\e$,1&\7f\e(B ?\e$,1'?\e(B tbl)
483 (set-case-syntax-pair ?\e$,1' \e(B ?\e$,1'@\e(B tbl)
484 (set-case-syntax-pair ?\e$,1'!\e(B ?\e$,1'A\e(B tbl)
485 (set-upcase-syntax ?\e$,1'#\e(B ?\e$,1'B\e(B tbl)
486 (set-case-syntax-pair ?\e$,1'#\e(B ?\e$,1'C\e(B tbl)
487 (set-case-syntax-pair ?\e$,1'$\e(B ?\e$,1'D\e(B tbl)
488 (set-case-syntax-pair ?\e$,1'%\e(B ?\e$,1'E\e(B tbl)
489 (set-case-syntax-pair ?\e$,1'&\e(B ?\e$,1'F\e(B tbl)
490 (set-case-syntax-pair ?\e$,1''\e(B ?\e$,1'G\e(B tbl)
491 (set-case-syntax-pair ?\e$,1'(\e(B ?\e$,1'H\e(B tbl)
492 (set-case-syntax-pair ?\e$,1')\e(B ?\e$,1'I\e(B tbl)
493 (set-case-syntax-pair ?\e$,1'*\e(B ?\e$,1'J\e(B tbl)
494 (set-case-syntax-pair ?\e$,1'+\e(B ?\e$,1'K\e(B tbl)
495 (set-case-syntax-pair ?\e$,1&o\e(B ?\e$,1'N\e(B tbl)
496 (set-case-syntax-pair ?\e$,1&n\e(B ?\e$,1'M\e(B tbl)
497 (set-case-syntax-pair ?\e$,1&l\e(B ?\e$,1'L\e(B tbl)
498 (set-case-syntax-pair ?\e$,1&f\e(B ?\e$,1',\e(B tbl)
499 (set-case-syntax-pair ?\e$,1&h\e(B ?\e$,1'-\e(B tbl)
500 (set-case-syntax-pair ?\e$,1&i\e(B ?\e$,1'.\e(B tbl)
501 (set-case-syntax-pair ?\e$,1&j\e(B ?\e$,1'/\e(B tbl))
502
503 ;; Hebrew character set (ISO-8859-8)
504
505 (modify-category-entry (make-char 'hebrew-iso8859-8) ?w)
506 (let ((c #x591))
507 (while (<= c #x5f4)
508 (modify-category-entry (decode-char 'ucs c) ?w)
509 (setq c (1+ c))))
510
511 (modify-syntax-entry (make-char 'hebrew-iso8859-8 208) ".") ; PASEQ
512 (modify-syntax-entry (make-char 'hebrew-iso8859-8 211) ".") ; SOF PASUQ
513 (modify-syntax-entry (decode-char 'ucs #x5be) ".") ; MAQAF
514 (modify-syntax-entry (decode-char 'ucs #x5c0) ".") ; PASEQ
515 (modify-syntax-entry (decode-char 'ucs #x5c3) ".") ; SOF PASUQ
516 (modify-syntax-entry (decode-char 'ucs #x5f3) ".") ; GERESH
517 (modify-syntax-entry (decode-char 'ucs #x5f4) ".") ; GERSHAYIM
518
519 ;; (let ((c 224))
520 ;; (while (< c 251)
521 ;; (modify-syntax-entry (make-char 'hebrew-iso8859-8 c) "w")
522 ;; (setq c (1+ c))))
523 ;; (modify-syntax-entry (make-char 'hebrew-iso8859-8 160) "w") ; NBSP
524
525 ;; Indian character set (IS 13194 and other Emacs original Indian charsets)
526
527 (modify-category-entry (make-char 'indian-is13194) ?i)
528 (modify-category-entry (make-char 'indian-2-column) ?I)
529 (modify-category-entry (make-char 'indian-glyph) ?I)
530 ;; Unicode Devanagari block
531 (let ((c #x901))
532 (while (<= c #x970)
533 (modify-category-entry (decode-char 'ucs c) ?i)
534 (setq c (1+ c))))
535
536 (let ((l '(;; RANGE CATEGORY MEANINGS
537 (#x01 #x03 ?7) ; vowel modifier
538 (#x05 #x14 ?1) ; base vowel
539 (#x15 #x39 ?0) ; consonants
540 (#x3e #x4d ?8) ; vowel modifier
541 (#x51 #x54 ?4) ; stress/tone mark
542 (#x58 #x5f ?0) ; consonants
543 (#x60 #x61 ?1) ; base vowel
544 (#x62 #x63 ?8) ; vowel modifier
545 (#x66 #x6f ?6) ; digits
546 )))
547 (dolist (elt1 '(#x900 #x980 #xa00 #xa80 #xb00 #xb80 #xc00 #xc80 #xd00))
548 (dolist (elt2 l)
549 (let* ((from (car elt2))
550 (counts (1+ (- (nth 1 elt2) from)))
551 (category (nth 2 elt2)))
552 (dotimes (i counts)
553 (modify-category-entry (decode-char 'ucs (+ elt1 from i))
554 category))))))
555
556 ;; Japanese character set (JISX0201-kana, JISX0201-roman, JISX0208, JISX0212)
557
558 (modify-category-entry (make-char 'katakana-jisx0201) ?k)
559 (modify-category-entry (make-char 'katakana-jisx0201) ?j)
560 (modify-category-entry (make-char 'latin-jisx0201) ?r)
561 (modify-category-entry (make-char 'japanese-jisx0208) ?j)
562 (modify-category-entry (make-char 'japanese-jisx0212) ?j)
563 (modify-category-entry (make-char 'katakana-jisx0201) ?\|)
564 (modify-category-entry (make-char 'japanese-jisx0208) ?\|)
565 (modify-category-entry (make-char 'japanese-jisx0212) ?\|)
566
567 ;; Unicode equivalents of JISX0201-kana
568 (let ((c #xff61))
569 (while (<= c #xff9f)
570 (modify-category-entry (decode-char 'ucs c) ?k)
571 (modify-category-entry (decode-char 'ucs c) ?j)
572 (modify-category-entry (decode-char 'ucs c) ?\|)
573 (setq c (1+ c))))
574
575 ;; Katakana block
576 (let ((c #x30a0))
577 (while (<= c #x30ff)
578 ;; ?K is double width, ?k isn't specified
579 (modify-category-entry (decode-char 'ucs c) ?k)
580 (modify-category-entry (decode-char 'ucs c) ?j)
581 (modify-category-entry (decode-char 'ucs c) ?\|)
582 (setq c (1+ c))))
583
584 ;; Hiragana block
585 (let ((c #x3040))
586 (while (<= c #x309f)
587 ;; ?H is actually defined to be double width
588 (modify-category-entry (decode-char 'ucs c) ?H)
589 ;;(modify-category-entry (decode-char 'ucs c) ?j)
590 (modify-category-entry (decode-char 'ucs c) ?\|)
591 (setq c (1+ c))))
592
593 ;; JISX0208
594 ;; (modify-syntax-entry (make-char 'japanese-jisx0208) "w")
595 (modify-syntax-entry (make-char 'japanese-jisx0208 33) "_")
596 (modify-syntax-entry (make-char 'japanese-jisx0208 34) "_")
597 (modify-syntax-entry (make-char 'japanese-jisx0208 40) "_")
598 (let ((chars '(?\e$B!<\e(B ?\e$B!+\e(B ?\e$B!,\e(B ?\e$B!3\e(B ?\e$B!4\e(B ?\e$B!5\e(B ?\e$B!6\e(B ?\e$B!7\e(B ?\e$B!8\e(B ?\e$B!9\e(B ?\e$B!:\e(B ?\e$B!;\e(B)))
599 (while chars
600 (modify-syntax-entry (car chars) "w")
601 (setq chars (cdr chars))))
602 (let ((parens "\e$B!J!K!L!M!N!O!P!Q!R!S!T!U!V!W!X!Y!Z![\e(B" )
603 open close)
604 (dotimes (i (/ (length parens) 2))
605 (setq open (aref parens (* i 2))
606 close (aref parens (1+ (* i 2))))
607 (modify-syntax-entry open (format "(%c" close))
608 (modify-syntax-entry close (format ")%c" open))))
609
610 (modify-category-entry (make-char 'japanese-jisx0208 35) ?A)
611 (modify-category-entry (make-char 'japanese-jisx0208 36) ?H)
612 (modify-category-entry (make-char 'japanese-jisx0208 37) ?K)
613 (modify-category-entry (make-char 'japanese-jisx0208 38) ?G)
614 (modify-category-entry (make-char 'japanese-jisx0208 39) ?Y)
615 (let ((row 48))
616 (while (< row 127)
617 (modify-category-entry (make-char 'japanese-jisx0208 row) ?C)
618 (setq row (1+ row))))
619 (modify-category-entry ?\e$B!<\e(B ?K)
620 (let ((chars '(?\e$B!+\e(B ?\e$B!,\e(B)))
621 (while chars
622 (modify-category-entry (car chars) ?K)
623 (modify-category-entry (car chars) ?H)
624 (setq chars (cdr chars))))
625 (let ((chars '(?\e$B!3\e(B ?\e$B!4\e(B ?\e$B!5\e(B ?\e$B!6\e(B ?\e$B!7\e(B ?\e$B!8\e(B ?\e$B!9\e(B ?\e$B!:\e(B ?\e$B!;\e(B)))
626 (while chars
627 (modify-category-entry (car chars) ?C)
628 (setq chars (cdr chars))))
629
630 ;; JISX0212
631 ;; (modify-syntax-entry (make-char 'japanese-jisx0212) "w")
632 (modify-syntax-entry (make-char 'japanese-jisx0212 33) "_")
633 (modify-syntax-entry (make-char 'japanese-jisx0212 34) "_")
634 (modify-syntax-entry (make-char 'japanese-jisx0212 35) "_")
635
636 (modify-category-entry (make-char 'japanese-jisx0212 ) ?C)
637
638 ;; JISX0201-Kana
639 ;; (modify-syntax-entry (make-char 'katakana-jisx0201) "w")
640 (let ((chars '(?\e(I!\e(B ?\e(I$\e(B ?\e(I%\e(B
641 ;; Unicode:
642 ?\e$,3sa\e(B ?\e$,3sd\e(B ?\e$,3se\e(B)))
643 (while chars
644 (modify-syntax-entry (car chars) ".")
645 (setq chars (cdr chars))))
646
647 (modify-syntax-entry ?\\e(I"\e(B "(\e(I#\e(B")
648 (modify-syntax-entry ?\\e(I#\e(B "(\e(I"\e(B")
649
650 ;; Korean character set (KSC5601)
651
652 ;; (modify-syntax-entry (make-char 'korean-ksc5601) "w")
653 (modify-syntax-entry (make-char 'korean-ksc5601 33) "_")
654 (modify-syntax-entry (make-char 'korean-ksc5601 34) "_")
655 (modify-syntax-entry (make-char 'korean-ksc5601 38) "_")
656 (modify-syntax-entry (make-char 'korean-ksc5601 39) "_")
657 (modify-syntax-entry (make-char 'korean-ksc5601 40) "_")
658 (modify-syntax-entry (make-char 'korean-ksc5601 41) "_")
659
660 (modify-category-entry (make-char 'korean-ksc5601) ?h)
661 (modify-category-entry (make-char 'korean-ksc5601 35) ?A)
662 (modify-category-entry (make-char 'korean-ksc5601 37) ?G)
663 (modify-category-entry (make-char 'korean-ksc5601 42) ?H)
664 (modify-category-entry (make-char 'korean-ksc5601 43) ?K)
665 (modify-category-entry (make-char 'korean-ksc5601 44) ?Y)
666
667 (let ((parens "\e$(C!2!3!4!5!6!7!8!9!:!;!<!=#(#)#[#]#{#}\e(B" )
668 open close)
669 (dotimes (i (/ (length parens) 2))
670 (setq open (aref parens (* i 2))
671 close (aref parens (1+ (* i 2))))
672 (modify-syntax-entry open (format "(%c" close))
673 (modify-syntax-entry close (format ")%c" open))))
674
675 ;; Latin character set (latin-1,2,3,4,5,8,9)
676
677 (modify-category-entry (make-char 'latin-iso8859-1) ?l)
678 (modify-category-entry (make-char 'latin-iso8859-2) ?l)
679 (modify-category-entry (make-char 'latin-iso8859-3) ?l)
680 (modify-category-entry (make-char 'latin-iso8859-4) ?l)
681 (modify-category-entry (make-char 'latin-iso8859-9) ?l)
682 (modify-category-entry (make-char 'latin-iso8859-14) ?l)
683 (modify-category-entry (make-char 'latin-iso8859-15) ?l)
684
685 (modify-category-entry (make-char 'latin-iso8859-1 160) ?\ )
686 (modify-category-entry (make-char 'latin-iso8859-2 160) ?\ )
687 (modify-category-entry (make-char 'latin-iso8859-3 160) ?\ )
688 (modify-category-entry (make-char 'latin-iso8859-4 160) ?\ )
689 (modify-category-entry (make-char 'latin-iso8859-9 160) ?\ )
690 (modify-category-entry (make-char 'latin-iso8859-14 160) ?\ )
691 (modify-category-entry (make-char 'latin-iso8859-15 160) ?\ )
692
693 ;; Lao character set
694
695 (modify-category-entry (make-char 'lao) ?o)
696 (dotimes (i (1+ (- #xeff #xe80)))
697 (modify-category-entry (decode-char 'ucs (+ i #xe80)) ?o))
698
699 (let ((deflist '(;; chars syntax category
700 ("\e(1!\e(B-\e(1N\e(B" "w" ?0) ; consonant
701 ("\e(1PRS]`\e(B-\e(1d\e(B" "w" ?1) ; vowel base
702 ("\e(1QT\e(B-\e(1W[m\e(B" "w" ?2) ; vowel upper
703 ("\e(1XY\e(B" "w" ?3) ; vowel lower
704 ("\e(1h\e(B-\e(1l\e(B" "w" ?4) ; tone mark
705 ("\e(1\\e(B" "w" ?9) ; semivowel lower
706 ("\e(1p\e(B-\e(1y\e(B" "w" ?6) ; digit
707 ("\e(1Of\e(B" "_" ?5) ; symbol
708 ;; Unicode equivalents
709 ("\e$,1D!\e(B-\e$,1DN\e(B" "w" ?0) ; consonant
710 ("\e$,1DPDRDSD]D`\e(B-\e$,1Dd\e(B" "w" ?1) ; vowel base
711 ("\e$,1DQDT\e(B-\e$,1DWD[Dm\e(B" "w" ?2) ; vowel upper
712 ("\e$,1DXDY\e(B" "w" ?3) ; vowel lower
713 ("\e$,1Dh\e(B-\e$,1Dk\e(B" "w" ?4) ; tone mark
714 ("\e$,1D\D]\e(B" "w" ?9) ; semivowel lower
715 ("\e$,1Dp\e(B-\e$,1Dy\e(B" "w" ?6) ; digit
716 ("\e$,1DODf\e(B" "_" ?5) ; symbol
717 ))
718 elm chars len syntax category to ch i)
719 (while deflist
720 (setq elm (car deflist))
721 (setq chars (car elm)
722 len (length chars)
723 syntax (nth 1 elm)
724 category (nth 2 elm)
725 i 0)
726 (while (< i len)
727 (if (= (aref chars i) ?-)
728 (setq i (1+ i)
729 to (aref chars i))
730 (setq ch (aref chars i)
731 to ch))
732 (while (<= ch to)
733 (unless (string-equal syntax "w")
734 (modify-syntax-entry ch syntax))
735 (modify-category-entry ch category)
736 (setq ch (1+ ch)))
737 (setq i (1+ i)))
738 (setq deflist (cdr deflist))))
739
740 ;; Thai character set (TIS620)
741
742 (modify-category-entry (make-char 'thai-tis620) ?t)
743 (dotimes (i (1+ (- #xe7f #xe00)))
744 (modify-category-entry (decode-char 'ucs (+ i #xe00)) ?t))
745
746 (let ((deflist '(;; chars syntax category
747 ("\e,T!\e(B-\e,TCEG\e(B-\e,TN\e(B" "w" ?0) ; consonant
748 ("\e,TDFPRS`\e(B-\e,Te\e(B" "w" ?1) ; vowel base
749 ("\e,TQT\e(B-\e,TWgn\e(B" "w" ?2) ; vowel upper
750 ("\e,TX\e(B-\e,TZ\e(B" "w" ?3) ; vowel lower
751 ("\e,Th\e(B-\e,Tm\e(B" "w" ?4) ; tone mark
752 ("\e,Tp\e(B-\e,Ty\e(B" "w" ?6) ; digit
753 ("\e,TOf_oz{\e(B" "_" ?5) ; symbol
754 ;; Unicode equivalents
755 ("\e$,1Ba\e(B-\e$,1C#C%C'\e(B-\e$,1C.\e(B" "w" ?0) ; consonant
756 ("\e$,1C$C&C0C2C3C@\e(B-\e$,1CE\e(B" "w" ?1) ; vowel base
757 ("\e$,1C1C4\e(B-\e$,1C7CGCN\e(B" "w" ?2) ; vowel upper
758 ("\e$,1C8\e(B-\e$,1C:\e(B" "w" ?3) ; vowel lower
759 ("\e$,1CH\e(B-\e$,1CM\e(B" "w" ?4) ; tone mark
760 ("\e$,1CP\e(B-\e$,1CY\e(B" "w" ?6) ; digit
761 ("\e$,1C/CFC?COCZC[\e(B" "_" ?5) ; symbol
762 ))
763 elm chars len syntax category to ch i)
764 (while deflist
765 (setq elm (car deflist))
766 (setq chars (car elm)
767 len (length chars)
768 syntax (nth 1 elm)
769 category (nth 2 elm)
770 i 0)
771 (while (< i len)
772 (if (= (aref chars i) ?-)
773 (setq i (1+ i)
774 to (aref chars i))
775 (setq ch (aref chars i)
776 to ch))
777 (while (<= ch to)
778 (unless (string-equal syntax "w")
779 (modify-syntax-entry ch syntax))
780 (modify-category-entry ch category)
781 (setq ch (1+ ch)))
782 (setq i (1+ i)))
783 (setq deflist (cdr deflist))))
784
785 ;; Tibetan character set
786
787 (modify-category-entry (make-char 'tibetan) ?q)
788 (modify-category-entry (make-char 'tibetan-1-column) ?q)
789 (dotimes (i (1+ (- #xfff #xf00)))
790 (modify-category-entry (decode-char 'ucs (+ i #xf00)) ?q))
791
792 (let ((deflist '(;; chars syntax category
793 ("\e4\e$(7"!\e0"!\e1\e(B-\e4\e$(7"J\e0"J\e1\e4"K\e0"K\e1\e(B" "w" ?0) ; consonant
794 ("\e$(7#!\e(B-\e$(7#J#K#L#M!"!#\e(B" "w" ?0) ;
795 ("\e$(7$!\e(B-\e$(7$e\e(B" "w" ?0) ;
796 ("\e$(7%!\e(B-\e$(7%u\e(B" "w" ?0) ;
797 ("\e$(7"S"["\"]"^"a\e(B" "w" ?2) ; upper vowel
798 ("\e$(7"_"c"d"g"h"i"j"k"l\e(B" "w" ?2) ; upper modifier
799 ("\e$(7!I"Q"R"U"e!e!g\e(B" "w" ?3) ; lowel vowel/modifier
800 ("\e$(7!P\e(B-\e$(7!Y!Z\e(B-\e$(7!c\e(B" "w" ?6) ; digit
801 ("\e$(7!;!=\e(B-\e$(7!B!D"`\e(B" "." ?|) ; line-break char
802 ("\e$(8!;!=!?!@!A!D"`\e(B" "." ?|) ;
803 ("\e$(7!8!;!=\e(B-\e$(7!B!D"`!m!d\e(B" "." ?>) ; prohibition
804 ("\e$(8!;!=!?!@!A!D"`\e(B" "." ?>) ;
805 ("\e$(7!0\e(B-\e$(7!:!l#R#S"f\e(B" "." ?<) ; prohibition
806 ("\e$(7!C!E\e(B-\e$(7!H!J\e(B-\e$(7!O!f!h\e(B-\e$(7!k!n!o#O#P\e(B-\e$(7#`\e(B" "." ?q) ; others
807
808 ;; Unicode version (not complete)
809 ("\e$,1F \e(B-\e$,1FIFJ\e(B" "w" ?0) ; consonant
810 ("\e$,1Fp\e(B-\e$,1G9G:G;G<\e(B" "w" ?0) ;
811 ("\e$,1FRFZF[F\F]F`\e(B" "w" ?2) ; upper vowel
812 ("\e$,1F^FbFcFfFgFhFiFjFk\e(B" "w" ?2) ; upper modifier
813 ("\e$,1EYFPFQFTFdEuEw\e(B" "w" ?3) ; lowel vowel/modifier
814 ("\e$,1E`\e(B-\e$,1EiEj\e(B-\e$,1Es\e(B" "w" ?6) ; digit
815 ("\e$,1EKEM\e(B-\e$,1ERETF_\e(B" "." ?|) ; line-break char
816 ("\e$,1EHEKEM\e(B-\e$,1ERETF_E}Et\e(B" "." ?>) ; prohibition
817 ("\e$,1E@\e(B-\e$,1EJE|GAGBFe\e(B" "." ?<) ; prohibition
818 ("\e$,1ESEU\e(B-\e$,1EXEZ\e(B-\e$,1E_EvEx\e(B-\e$,1E{E~E\7fG>G?\e(B-\e$,1GO\e(B" "." ?q) ; others
819 ))
820 elm chars len syntax category to ch i)
821 (while deflist
822 (setq elm (car deflist))
823 (setq chars (car elm)
824 len (length chars)
825 syntax (nth 1 elm)
826 category (nth 2 elm)
827 i 0)
828 (while (< i len)
829 (if (= (aref chars i) ?-)
830 (setq i (1+ i)
831 to (aref chars i))
832 (setq ch (aref chars i)
833 to ch))
834 (while (<= ch to)
835 (unless (string-equal syntax "w")
836 (modify-syntax-entry ch syntax))
837 (modify-category-entry ch category)
838 (setq ch (1+ ch)))
839 (setq i (1+ i)))
840 (setq deflist (cdr deflist))))
841
842 ;; Vietnamese character set
843
844 (let ((lower (make-char 'vietnamese-viscii-lower))
845 (upper (make-char 'vietnamese-viscii-upper)))
846 ;; (modify-syntax-entry lower "w")
847 ;; (modify-syntax-entry upper "w")
848 (modify-category-entry lower ?v)
849 (modify-category-entry upper ?v)
850 (modify-category-entry lower ?l) ; To make a word with
851 (modify-category-entry upper ?l) ; latin characters.
852 )
853
854 (let ((tbl (standard-case-table))
855 (i 32))
856 (while (< i 128)
857 (set-case-syntax-pair (make-char 'vietnamese-viscii-upper i)
858 (make-char 'vietnamese-viscii-lower i)
859 tbl)
860 (setq i (1+ i))))
861
862 ;; Unicode (mule-unicode-0100-24ff)
863
864 (let ((tbl (standard-case-table)) c)
865
866 ;; In some languages, U+0049 LATIN CAPITAL LETTER I and U+0131 LATIN
867 ;; SMALL LETTER DOTLESS I make a case pair, and so do U+0130 LATIN
868 ;; CAPITAL LETTER I WITH DOT ABOVE and U+0069 LATIN SMALL LETTER I.
869 ;; Thus we have to check language-environment to handle casing
870 ;; correctly. Currently only I<->i is available.
871
872 ;; Latin Extended-A, Latin Extended-B
873 (setq c #x0100)
874 (while (<= c #x0233)
875 (modify-category-entry (decode-char 'ucs c) ?l)
876 (and (or (<= c #x012e)
877 (and (>= c #x014a) (<= c #x0177)))
878 (zerop (% c 2))
879 (set-case-syntax-pair
880 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
881 (and (>= c #x013a)
882 (<= c #x0148)
883 (zerop (% c 2))
884 (set-case-syntax-pair
885 (decode-char 'ucs (1- c)) (decode-char 'ucs c) tbl))
886 (setq c (1+ c)))
887 (set-downcase-syntax ?\e$,1 P\e(B ?i tbl)
888 (set-upcase-syntax ?I ?\e$,1 Q\e(B tbl)
889 (set-case-syntax-pair ?\e$,1 R\e(B ?\e$,1 S\e(B tbl)
890 (set-case-syntax-pair ?\e$,1 T\e(B ?\e$,1 U\e(B tbl)
891 (set-case-syntax-pair ?\e$,1 V\e(B ?\e$,1 W\e(B tbl)
892 (set-case-syntax-pair ?\e$,1!8\e(B ?\e,A\7f\e(B tbl)
893 (set-case-syntax-pair ?\e$,1!9\e(B ?\e$,1!:\e(B tbl)
894 (set-case-syntax-pair ?\e$,1!;\e(B ?\e$,1!<\e(B tbl)
895 (set-case-syntax-pair ?\e$,1!=\e(B ?\e$,1!>\e(B tbl)
896
897 ;; Latin Extended-B
898 (set-case-syntax-pair ?\e$,1!A\e(B ?\e$,1#S\e(B tbl)
899 (set-case-syntax-pair ?\e$,1!B\e(B ?\e$,1!C\e(B tbl)
900 (set-case-syntax-pair ?\e$,1!D\e(B ?\e$,1!E\e(B tbl)
901 (set-case-syntax-pair ?\e$,1!F\e(B ?\e$,1#T\e(B tbl)
902 (set-case-syntax-pair ?\e$,1!G\e(B ?\e$,1!H\e(B tbl)
903 (set-case-syntax-pair ?\e$,1!I\e(B ?\e$,1#V\e(B tbl)
904 (set-case-syntax-pair ?\e$,1!J\e(B ?\e$,1#W\e(B tbl)
905 (set-case-syntax-pair ?\e$,1!K\e(B ?\e$,1!L\e(B tbl)
906 (set-case-syntax-pair ?\e$,1!N\e(B ?\e$,1"=\e(B tbl)
907 (set-case-syntax-pair ?\e$,1!O\e(B ?\e$,1#Y\e(B tbl)
908 (set-case-syntax-pair ?\e$,1!P\e(B ?\e$,1#[\e(B tbl)
909 (set-case-syntax-pair ?\e$,1!Q\e(B ?\e$,1!R\e(B tbl)
910 (set-case-syntax-pair ?\e$,1!S\e(B ?\e$,1#`\e(B tbl)
911 (set-case-syntax-pair ?\e$,1!T\e(B ?\e$,1#c\e(B tbl)
912 (set-case-syntax-pair ?\e$,1!V\e(B ?\e$,1#i\e(B tbl)
913 (set-case-syntax-pair ?\e$,1!W\e(B ?\e$,1#h\e(B tbl)
914 (set-case-syntax-pair ?\e$,1!X\e(B ?\e$,1!Y\e(B tbl)
915 (set-case-syntax-pair ?\e$,1!\\e(B ?\e$,1#o\e(B tbl)
916 (set-case-syntax-pair ?\e$,1!]\e(B ?\e$,1#r\e(B tbl)
917 (set-case-syntax-pair ?\e$,1!_\e(B ?\e$,1#u\e(B tbl)
918 (set-case-syntax-pair ?\e$,1!`\e(B ?\e$,1!a\e(B tbl)
919 (set-case-syntax-pair ?\e$,1!b\e(B ?\e$,1!c\e(B tbl)
920 (set-case-syntax-pair ?\e$,1!d\e(B ?\e$,1!e\e(B tbl)
921 (set-case-syntax-pair ?\e$,1!f\e(B ?\e$,1$ \e(B tbl)
922 (set-case-syntax-pair ?\e$,1!g\e(B ?\e$,1!h\e(B tbl)
923 (set-case-syntax-pair ?\e$,1!i\e(B ?\e$,1$#\e(B tbl)
924 (set-case-syntax-pair ?\e$,1!l\e(B ?\e$,1!m\e(B tbl)
925 (set-case-syntax-pair ?\e$,1!n\e(B ?\e$,1$(\e(B tbl)
926 (set-case-syntax-pair ?\e$,1!o\e(B ?\e$,1!p\e(B tbl)
927 (set-case-syntax-pair ?\e$,1!q\e(B ?\e$,1$*\e(B tbl)
928 (set-case-syntax-pair ?\e$,1!r\e(B ?\e$,1$+\e(B tbl)
929 (set-case-syntax-pair ?\e$,1!s\e(B ?\e$,1!t\e(B tbl)
930 (set-case-syntax-pair ?\e$,1!u\e(B ?\e$,1!v\e(B tbl)
931 (set-case-syntax-pair ?\e$,1!w\e(B ?\e$,1$2\e(B tbl)
932 (set-case-syntax-pair ?\e$,1!x\e(B ?\e$,1!y\e(B tbl)
933 (set-case-syntax-pair ?\e$,1!|\e(B ?\e$,1!}\e(B tbl)
934 (set-case-syntax-pair ?\e$,1"$\e(B ?\e$,1"&\e(B tbl)
935 (set-case-syntax-pair ?\e$,1"%\e(B ?\e$,1"&\e(B tbl)
936 (set-case-syntax-pair ?\e$,1"'\e(B ?\e$,1")\e(B tbl)
937 (set-case-syntax-pair ?\e$,1"(\e(B ?\e$,1")\e(B tbl)
938 (set-case-syntax-pair ?\e$,1"*\e(B ?\e$,1",\e(B tbl)
939 (set-case-syntax-pair ?\e$,1"+\e(B ?\e$,1",\e(B tbl)
940 (set-case-syntax-pair ?\e$,1"-\e(B ?\e$,1".\e(B tbl)
941 (set-case-syntax-pair ?\e$,1"/\e(B ?\e$,1"0\e(B tbl)
942 (set-case-syntax-pair ?\e$,1"1\e(B ?\e$,1"2\e(B tbl)
943 (set-case-syntax-pair ?\e$,1"3\e(B ?\e$,1"4\e(B tbl)
944 (set-case-syntax-pair ?\e$,1"5\e(B ?\e$,1"6\e(B tbl)
945 (set-case-syntax-pair ?\e$,1"7\e(B ?\e$,1"8\e(B tbl)
946 (set-case-syntax-pair ?\e$,1"9\e(B ?\e$,1":\e(B tbl)
947 (set-case-syntax-pair ?\e$,1";\e(B ?\e$,1"<\e(B tbl)
948 (set-case-syntax-pair ?\e$,1">\e(B ?\e$,1"?\e(B tbl)
949 (set-case-syntax-pair ?\e$,1"@\e(B ?\e$,1"A\e(B tbl)
950 (set-case-syntax-pair ?\e$,1"B\e(B ?\e$,1"C\e(B tbl)
951 (set-case-syntax-pair ?\e$,1"D\e(B ?\e$,1"E\e(B tbl)
952 (set-case-syntax-pair ?\e$,1"F\e(B ?\e$,1"G\e(B tbl)
953 (set-case-syntax-pair ?\e$,1"H\e(B ?\e$,1"I\e(B tbl)
954 (set-case-syntax-pair ?\e$,1"J\e(B ?\e$,1"K\e(B tbl)
955 (set-case-syntax-pair ?\e$,1"L\e(B ?\e$,1"M\e(B tbl)
956 (set-case-syntax-pair ?\e$,1"N\e(B ?\e$,1"O\e(B tbl)
957 ;; 01F0; F; 006A 030C; # LATIN SMALL LETTER J WITH CARON
958 (set-case-syntax-pair ?\e$,1"Q\e(B ?\e$,1"S\e(B tbl)
959 (set-case-syntax-pair ?\e$,1"R\e(B ?\e$,1"S\e(B tbl)
960 (set-case-syntax-pair ?\e$,1"T\e(B ?\e$,1"U\e(B tbl)
961 (set-case-syntax-pair ?\e$,1"V\e(B ?\e$,1!U\e(B tbl)
962 (set-case-syntax-pair ?\e$,1"W\e(B ?\e$,1!\7f\e(B tbl)
963 (set-case-syntax-pair ?\e$,1"X\e(B ?\e$,1"Y\e(B tbl)
964 (set-case-syntax-pair ?\e$,1"Z\e(B ?\e$,1"[\e(B tbl)
965 (set-case-syntax-pair ?\e$,1"\\e(B ?\e$,1"]\e(B tbl)
966 (set-case-syntax-pair ?\e$,1"^\e(B ?\e$,1"_\e(B tbl)
967 (set-case-syntax-pair ?\e$,1"`\e(B ?\e$,1"a\e(B tbl)
968 (set-case-syntax-pair ?\e$,1"b\e(B ?\e$,1"c\e(B tbl)
969 (set-case-syntax-pair ?\e$,1"d\e(B ?\e$,1"e\e(B tbl)
970 (set-case-syntax-pair ?\e$,1"f\e(B ?\e$,1"g\e(B tbl)
971 (set-case-syntax-pair ?\e$,1"h\e(B ?\e$,1"i\e(B tbl)
972 (set-case-syntax-pair ?\e$,1"j\e(B ?\e$,1"k\e(B tbl)
973 (set-case-syntax-pair ?\e$,1"l\e(B ?\e$,1"m\e(B tbl)
974 (set-case-syntax-pair ?\e$,1"n\e(B ?\e$,1"o\e(B tbl)
975 (set-case-syntax-pair ?\e$,1"p\e(B ?\e$,1"q\e(B tbl)
976 (set-case-syntax-pair ?\e$,1"r\e(B ?\e$,1"s\e(B tbl)
977 (set-case-syntax-pair ?\e$,1"t\e(B ?\e$,1"u\e(B tbl)
978 (set-case-syntax-pair ?\e$,1"v\e(B ?\e$,1"w\e(B tbl)
979 (set-case-syntax-pair ?\e$,1"x\e(B ?\e$,1"y\e(B tbl)
980 (set-case-syntax-pair ?\e$,1"z\e(B ?\e$,1"{\e(B tbl)
981 (set-case-syntax-pair ?\e$,1"|\e(B ?\e$,1"}\e(B tbl)
982 (set-case-syntax-pair ?\e$,1"~\e(B ?\e$,1"\7f\e(B tbl)
983 (set-case-syntax-pair ?\e$,1#"\e(B ?\e$,1##\e(B tbl)
984 (set-case-syntax-pair ?\e$,1#$\e(B ?\e$,1#%\e(B tbl)
985 (set-case-syntax-pair ?\e$,1#&\e(B ?\e$,1#'\e(B tbl)
986 (set-case-syntax-pair ?\e$,1#(\e(B ?\e$,1#)\e(B tbl)
987 (set-case-syntax-pair ?\e$,1#*\e(B ?\e$,1#+\e(B tbl)
988 (set-case-syntax-pair ?\e$,1#,\e(B ?\e$,1#-\e(B tbl)
989 (set-case-syntax-pair ?\e$,1#.\e(B ?\e$,1#/\e(B tbl)
990 (set-case-syntax-pair ?\e$,1#0\e(B ?\e$,1#1\e(B tbl)
991 (set-case-syntax-pair ?\e$,1#2\e(B ?\e$,1#3\e(B tbl)
992
993 ;; Latin Extended Additional
994 (setq c #x1e00)
995 (while (<= c #x1ef9)
996 (modify-category-entry (decode-char 'ucs c) ?l)
997 (and (zerop (% c 2))
998 (or (<= c #x1e94) (>= c #x1ea0))
999 (set-case-syntax-pair
1000 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
1001 (setq c (1+ c)))
1002
1003 ;; Greek
1004 (setq c #x0370)
1005 (while (<= c #x03ff)
1006 (modify-category-entry (decode-char 'ucs c) ?g)
1007 (if (or (and (>= c #x0391) (<= c #x03a1))
1008 (and (>= c #x03a3) (<= c #x03ab)))
1009 (set-case-syntax-pair
1010 (decode-char 'ucs c) (decode-char 'ucs (+ c 32)) tbl))
1011 (and (>= c #x03da)
1012 (<= c #x03ee)
1013 (zerop (% c 2))
1014 (set-case-syntax-pair
1015 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
1016 (setq c (1+ c)))
1017 (set-case-syntax-pair ?\e$,1&f\e(B ?\e$,1',\e(B tbl)
1018 (set-case-syntax-pair ?\e$,1&h\e(B ?\e$,1'-\e(B tbl)
1019 (set-case-syntax-pair ?\e$,1&i\e(B ?\e$,1'.\e(B tbl)
1020 (set-case-syntax-pair ?\e$,1&j\e(B ?\e$,1'/\e(B tbl)
1021 (set-case-syntax-pair ?\e$,1&l\e(B ?\e$,1'L\e(B tbl)
1022 (set-case-syntax-pair ?\e$,1&n\e(B ?\e$,1'M\e(B tbl)
1023 (set-case-syntax-pair ?\e$,1&o\e(B ?\e$,1'N\e(B tbl)
1024
1025 ;; Armenian
1026 (setq c #x531)
1027 (while (<= c #x556)
1028 (set-case-syntax-pair (decode-char 'ucs c)
1029 (decode-char 'ucs (+ c #x30)) tbl)
1030 (setq c (1+ c)))
1031
1032 ;; Greek Extended
1033 (setq c #x1f00)
1034 (while (<= c #x1fff)
1035 (modify-category-entry (decode-char 'ucs c) ?g)
1036 (and (<= (logand c #x000f) 7)
1037 (<= c #x1fa7)
1038 (not (memq c '(#x1f50 #x1f52 #x1f54 #x1f56)))
1039 (/= (logand c #x00f0) 7)
1040 (set-case-syntax-pair
1041 (decode-char 'ucs (+ c 8)) (decode-char 'ucs c) tbl))
1042 (setq c (1+ c)))
1043 (set-case-syntax-pair ?\e$,1qx\e(B ?\e$,1qp\e(B tbl)
1044 (set-case-syntax-pair ?\e$,1qy\e(B ?\e$,1qq\e(B tbl)
1045 (set-case-syntax-pair ?\e$,1qz\e(B ?\e$,1q0\e(B tbl)
1046 (set-case-syntax-pair ?\e$,1q{\e(B ?\e$,1q1\e(B tbl)
1047 (set-case-syntax-pair ?\e$,1q|\e(B ?\e$,1qs\e(B tbl)
1048 (set-case-syntax-pair ?\e$,1r(\e(B ?\e$,1q2\e(B tbl)
1049 (set-case-syntax-pair ?\e$,1r)\e(B ?\e$,1q3\e(B tbl)
1050 (set-case-syntax-pair ?\e$,1r*\e(B ?\e$,1q4\e(B tbl)
1051 (set-case-syntax-pair ?\e$,1r+\e(B ?\e$,1q5\e(B tbl)
1052 (set-case-syntax-pair ?\e$,1r,\e(B ?\e$,1r#\e(B tbl)
1053 (set-case-syntax-pair ?\e$,1r8\e(B ?\e$,1r0\e(B tbl)
1054 (set-case-syntax-pair ?\e$,1r9\e(B ?\e$,1r1\e(B tbl)
1055 (set-case-syntax-pair ?\e$,1r:\e(B ?\e$,1q6\e(B tbl)
1056 (set-case-syntax-pair ?\e$,1r;\e(B ?\e$,1q7\e(B tbl)
1057 (set-case-syntax-pair ?\e$,1rH\e(B ?\e$,1r@\e(B tbl)
1058 (set-case-syntax-pair ?\e$,1rI\e(B ?\e$,1rA\e(B tbl)
1059 (set-case-syntax-pair ?\e$,1rJ\e(B ?\e$,1q:\e(B tbl)
1060 (set-case-syntax-pair ?\e$,1rK\e(B ?\e$,1q;\e(B tbl)
1061 (set-case-syntax-pair ?\e$,1rL\e(B ?\e$,1rE\e(B tbl)
1062 (set-case-syntax-pair ?\e$,1rX\e(B ?\e$,1q8\e(B tbl)
1063 (set-case-syntax-pair ?\e$,1rY\e(B ?\e$,1q9\e(B tbl)
1064 (set-case-syntax-pair ?\e$,1rZ\e(B ?\e$,1q<\e(B tbl)
1065 (set-case-syntax-pair ?\e$,1r[\e(B ?\e$,1q=\e(B tbl)
1066 (set-case-syntax-pair ?\e$,1r\\e(B ?\e$,1rS\e(B tbl)
1067
1068 ;; cyrillic
1069 (setq c #x0400)
1070 (while (<= c #x04ff)
1071 (modify-category-entry (decode-char 'ucs c) ?y)
1072 (and (>= c #x0400)
1073 (<= c #x040f)
1074 (set-case-syntax-pair
1075 (decode-char 'ucs c) (decode-char 'ucs (+ c 80)) tbl))
1076 (and (>= c #x0410)
1077 (<= c #x042f)
1078 (set-case-syntax-pair
1079 (decode-char 'ucs c) (decode-char 'ucs (+ c 32)) tbl))
1080 (and (zerop (% c 2))
1081 (or (and (>= c #x0460) (<= c #x0480))
1082 (and (>= c #x048c) (<= c #x04be))
1083 (and (>= c #x04d0) (<= c #x04f4)))
1084 (set-case-syntax-pair
1085 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
1086 (setq c (1+ c)))
1087 (set-case-syntax-pair ?\e$,1*!\e(B ?\e$,1*"\e(B tbl)
1088 (set-case-syntax-pair ?\e$,1*#\e(B ?\e$,1*$\e(B tbl)
1089 (set-case-syntax-pair ?\e$,1*'\e(B ?\e$,1*(\e(B tbl)
1090 (set-case-syntax-pair ?\e$,1*+\e(B ?\e$,1*,\e(B tbl)
1091 (set-case-syntax-pair ?\e$,1*X\e(B ?\e$,1*Y\e(B tbl)
1092
1093 ;; general punctuation
1094 (setq c #x2000)
1095 (while (<= c #x200b)
1096 (set-case-syntax (decode-char 'ucs c) " " tbl)
1097 (setq c (1+ c)))
1098 (setq c #x2010)
1099 (while (<= c #x2016)
1100 (set-case-syntax (decode-char 'ucs c) "_" tbl)
1101 (setq c (1+ c)))
1102 ;; Punctuation syntax for quotation marks (like `)
1103 (while (<= c #x201f)
1104 (set-case-syntax (decode-char 'ucs c) "." tbl)
1105 (setq c (1+ c)))
1106 (while (<= c #x2027)
1107 (set-case-syntax (decode-char 'ucs c) "_" tbl)
1108 (setq c (1+ c)))
1109
1110 ;; Roman numerals
1111 (setq c #x2160)
1112 (while (<= c #x216f)
1113 (set-case-syntax-pair (decode-char 'ucs c)
1114 (decode-char 'ucs (+ c #x10)) tbl)
1115 (setq c (1+ c)))
1116
1117 ;; Circled Latin
1118 (setq c #x24b6)
1119 (while (<= c #x24cf)
1120 (set-case-syntax-pair (decode-char 'ucs c)
1121 (decode-char 'ucs (+ c 26)) tbl)
1122 (modify-category-entry (decode-char 'ucs c) ?l)
1123 (modify-category-entry (decode-char 'ucs (+ c 26)) ?l)
1124 (setq c (1+ c)))
1125
1126 ;; Fullwidth Latin
1127 (setq c #xff21)
1128 (while (<= c #xff3a)
1129 (set-case-syntax-pair (decode-char 'ucs c)
1130 (decode-char 'ucs (+ c #x20)) tbl)
1131 (modify-category-entry (decode-char 'ucs c) ?l)
1132 (modify-category-entry (decode-char 'ucs (+ c #x20)) ?l)
1133 (setq c (1+ c)))
1134
1135 ;; Combining diacritics
1136 (setq c #x300)
1137 (while (<= c #x362)
1138 (modify-category-entry (decode-char 'ucs c) ?^)
1139 (setq c (1+ c)))
1140
1141 ;; Combining marks
1142 (setq c #x20d0)
1143 (while (<= c #x20e3)
1144 (modify-category-entry (decode-char 'ucs c) ?^)
1145 (setq c (1+ c)))
1146
1147 ;; Fixme: syntax for symbols &c
1148 )
1149
1150 (let ((pairs
1151 '("\e$,1sEsF\e(B" ; U+2045 U+2046
1152 "\e$,1s}s~\e(B" ; U+207D U+207E
1153 "\e$,1t-t.\e(B" ; U+208D U+208E
1154 "\e$,1{){*\e(B" ; U+2329 U+232A
1155 "\e$,1|T|U\e(B" ; U+23B4 U+23B5
1156 "\e$,2&H&I\e(B" ; U+2768 U+2769
1157 "\e$,2&J&K\e(B" ; U+276A U+276B
1158 "\e$,2&L&M\e(B" ; U+276C U+276D
1159 "\e$,2&P&Q\e(B" ; U+2770 U+2771
1160 "\e$,2&R&S\e(B" ; U+2772 U+2773
1161 "\e$,2&T&U\e(B" ; U+2774 U+2775
1162 "\e$,2'f'g\e(B" ; U+27E6 U+27E7
1163 "\e$,2'h'i\e(B" ; U+27E8 U+27E9
1164 "\e$,2'j'k\e(B" ; U+27EA U+27EB
1165 "\e$,2,#,$\e(B" ; U+2983 U+2984
1166 "\e$,2,%,&\e(B" ; U+2985 U+2986
1167 "\e$,2,',(\e(B" ; U+2987 U+2988
1168 "\e$,2,),*\e(B" ; U+2989 U+298A
1169 "\e$,2,+,,\e(B" ; U+298B U+298C
1170 "\e$,2,-,.\e(B" ; U+298D U+298E
1171 "\e$,2,/,0\e(B" ; U+298F U+2990
1172 "\e$,2,1,2\e(B" ; U+2991 U+2992
1173 "\e$,2,3,4\e(B" ; U+2993 U+2994
1174 "\e$,2,5,6\e(B" ; U+2995 U+2996
1175 "\e$,2,7,8\e(B" ; U+2997 U+2998
1176 "\e$,2-<-=\e(B" ; U+29FC U+29FD
1177 "\e$,2=H=I\e(B" ; U+3008 U+3009
1178 "\e$,2=J=K\e(B" ; U+300A U+300B
1179 "\e$,2=L=M\e(B" ; U+300C U+300D
1180 "\e$,2=N=O\e(B" ; U+300E U+300F
1181 "\e$,2=P=Q\e(B" ; U+3010 U+3011
1182 "\e$,2=T=U\e(B" ; U+3014 U+3015
1183 "\e$,2=V=W\e(B" ; U+3016 U+3017
1184 "\e$,2=X=Y\e(B" ; U+3018 U+3019
1185 "\e$,2=Z=[\e(B" ; U+301A U+301B
1186 "\e$,3m~m\7f\e(B" ; U+FD3E U+FD3F
1187 "\e$,3pUpV\e(B" ; U+FE35 U+FE36
1188 "\e$,3pWpX\e(B" ; U+FE37 U+FE38
1189 "\e$,3pYpZ\e(B" ; U+FE39 U+FE3A
1190 "\e$,3p[p\\e(B" ; U+FE3B U+FE3C
1191 "\e$,3p]p^\e(B" ; U+FE3D U+FE3E
1192 "\e$,3p_p`\e(B" ; U+FE3F U+FE40
1193 "\e$,3papb\e(B" ; U+FE41 U+FE42
1194 "\e$,3pcpd\e(B" ; U+FE43 U+FE44
1195 "\e$,3pypz\e(B" ; U+FE59 U+FE5A
1196 "\e$,3p{p|\e(B" ; U+FE5B U+FE5C
1197 "\e$,3p}p~\e(B" ; U+FE5D U+FE5E
1198 "\e$,3rhri\e(B" ; U+FF08 U+FF09
1199 "\e$,3s;s=\e(B" ; U+FF3B U+FF3D
1200 "\e$,3s[s]\e(B" ; U+FF5B U+FF5D
1201 "\e$,3s_s`\e(B" ; U+FF5F U+FF60
1202 "\e$,3sbsc\e(B" ; U+FF62 U+FF63
1203 )))
1204 (dolist (elt pairs)
1205 (modify-syntax-entry (aref elt 0) (string ?\( (aref elt 1)))
1206 (modify-syntax-entry (aref elt 1) (string ?\) (aref elt 0)))))
1207
1208 \f
1209 ;;; Setting word boundary.
1210
1211 (setq word-combining-categories
1212 '((?l . ?l)))
1213
1214 (setq word-separating-categories ; (2-byte character sets)
1215 '((?A . ?K) ; Alpha numeric - Katakana
1216 (?A . ?C) ; Alpha numeric - Chinese
1217 (?H . ?A) ; Hiragana - Alpha numeric
1218 (?H . ?K) ; Hiragana - Katakana
1219 (?H . ?C) ; Hiragana - Chinese
1220 (?K . ?A) ; Katakana - Alpha numeric
1221 (?K . ?C) ; Katakana - Chinese
1222 (?C . ?A) ; Chinese - Alpha numeric
1223 (?C . ?K) ; Chinese - Katakana
1224 ))
1225
1226 \f
1227 ;; For each character set, put the information of the most proper
1228 ;; coding system to encode it by `preferred-coding-system' property.
1229
1230 (let ((l '((latin-iso8859-1 . iso-latin-1)
1231 (latin-iso8859-2 . iso-latin-2)
1232 (latin-iso8859-3 . iso-latin-3)
1233 (latin-iso8859-4 . iso-latin-4)
1234 (thai-tis620 . thai-tis620)
1235 (greek-iso8859-7 . greek-iso-8bit)
1236 (arabic-iso8859-6 . iso-2022-7bit)
1237 (hebrew-iso8859-8 . hebrew-iso-8bit)
1238 (katakana-jisx0201 . japanese-shift-jis)
1239 (latin-jisx0201 . japanese-shift-jis)
1240 (cyrillic-iso8859-5 . cyrillic-iso-8bit)
1241 (latin-iso8859-9 . iso-latin-5)
1242 (japanese-jisx0208-1978 . iso-2022-jp)
1243 (chinese-gb2312 . cn-gb-2312)
1244 (japanese-jisx0208 . iso-2022-jp)
1245 (korean-ksc5601 . iso-2022-kr)
1246 (japanese-jisx0212 . iso-2022-jp)
1247 (chinese-cns11643-1 . iso-2022-cn)
1248 (chinese-cns11643-2 . iso-2022-cn)
1249 (chinese-big5-1 . chinese-big5)
1250 (chinese-big5-2 . chinese-big5)
1251 (chinese-sisheng . iso-2022-7bit)
1252 (ipa . iso-2022-7bit)
1253 (vietnamese-viscii-lower . vietnamese-viscii)
1254 (vietnamese-viscii-upper . vietnamese-viscii)
1255 (arabic-digit . iso-2022-7bit)
1256 (arabic-1-column . iso-2022-7bit)
1257 (ascii-right-to-left . iso-2022-7bit)
1258 (lao . lao)
1259 (arabic-2-column . iso-2022-7bit)
1260 (indian-is13194 . devanagari)
1261 (indian-glyph . devanagari)
1262 (tibetan-1-column . tibetan)
1263 (ethiopic . iso-2022-7bit)
1264 (chinese-cns11643-3 . iso-2022-cn)
1265 (chinese-cns11643-4 . iso-2022-cn)
1266 (chinese-cns11643-5 . iso-2022-cn)
1267 (chinese-cns11643-6 . iso-2022-cn)
1268 (chinese-cns11643-7 . iso-2022-cn)
1269 (indian-2-column . devanagari)
1270 (tibetan . tibetan)
1271 (latin-iso8859-14 . iso-latin-8)
1272 (latin-iso8859-15 . iso-latin-9))))
1273 (while l
1274 (put-charset-property (car (car l)) 'preferred-coding-system (cdr (car l)))
1275 (setq l (cdr l))))
1276
1277 \f
1278 ;; Setup auto-fill-chars for charsets that should invoke auto-filling.
1279 ;; SPACE and NEWLINE are already set. Also put `nospace-between-words'
1280 ;; property on the charsets.
1281 (let ((l '(katakana-jisx0201
1282 japanese-jisx0208 japanese-jisx0212
1283 chinese-gb2312 chinese-big5-1 chinese-big5-2)))
1284 (while l
1285 (aset auto-fill-chars (make-char (car l)) t)
1286 (put-charset-property (car l) 'nospace-between-words t)
1287 (setq l (cdr l))))
1288
1289 \f
1290 (setq utf-translate-cjk-mode saved-utf-translate-cjk-mode)
1291 (makunbound 'saved-utf-translate-cjk-mode)
1292
1293 ;;; Local Variables:
1294 ;;; coding: iso-2022-7bit
1295 ;;; End:
1296
1297 ;;; arch-tag: 85889c35-9f4d-4912-9bf5-82de31b0d42d
1298 ;;; characters.el ends here