]> code.delx.au - gnu-emacs/blob - lisp/international/characters.el
(case table): Do nothing special for i and I.
[gnu-emacs] / lisp / international / characters.el
1 ;;; characters.el --- set syntax and category for multibyte characters
2
3 ;; Copyright (C) 1997, 2000, 2001, 2002, 2003, 2004
4 ;; Free Software Foundation, Inc.
5 ;; Copyright (C) 1995, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005
6 ;; National Institute of Advanced Industrial Science and Technology (AIST)
7 ;; Registration Number H14PRO021
8
9 ;; Keywords: multibyte character, character set, syntax, category
10
11 ;; This file is part of GNU Emacs.
12
13 ;; GNU Emacs is free software; you can redistribute it and/or modify
14 ;; it under the terms of the GNU General Public License as published by
15 ;; the Free Software Foundation; either version 2, or (at your option)
16 ;; any later version.
17
18 ;; GNU Emacs is distributed in the hope that it will be useful,
19 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
20 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 ;; GNU General Public License for more details.
22
23 ;; You should have received a copy of the GNU General Public License
24 ;; along with GNU Emacs; see the file COPYING. If not, write to the
25 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
26 ;; Boston, MA 02110-1301, USA.
27
28 ;;; Commentary:
29
30 ;; This file contains multibyte characters. Save this file always in
31 ;; the coding system `iso-2022-7bit'.
32
33 ;; This file does not define the syntax for Latin-N character sets;
34 ;; those are defined by the files latin-N.el.
35
36 ;;; Code:
37
38 ;; We must set utf-translate-cjk-mode to nil while loading this file
39 ;; to avoid translating CJK characters in decode-char.
40 (defvar saved-utf-translate-cjk-mode utf-translate-cjk-mode)
41 (setq utf-translate-cjk-mode nil)
42
43 ;;; Predefined categories.
44
45 ;; For each character set.
46
47 (define-category ?a "ASCII")
48 (define-category ?l "Latin")
49 (define-category ?t "Thai")
50 (define-category ?g "Greek")
51 (define-category ?b "Arabic")
52 (define-category ?w "Hebrew")
53 (define-category ?y "Cyrillic")
54 (define-category ?k "Japanese katakana")
55 (define-category ?r "Japanese roman")
56 (define-category ?c "Chinese")
57 (define-category ?j "Japanese")
58 (define-category ?h "Korean")
59 (define-category ?e "Ethiopic (Ge'ez)")
60 (define-category ?v "Vietnamese")
61 (define-category ?i "Indian")
62 (define-category ?o "Lao")
63 (define-category ?q "Tibetan")
64
65 ;; For each group (row) of 2-byte character sets.
66
67 (define-category ?A "Alpha-numeric characters of 2-byte character sets")
68 (define-category ?C "Chinese (Han) characters of 2-byte character sets")
69 (define-category ?G "Greek characters of 2-byte character sets")
70 (define-category ?H "Japanese Hiragana characters of 2-byte character sets")
71 (define-category ?K "Japanese Katakana characters of 2-byte character sets")
72 (define-category ?N "Korean Hangul characters of 2-byte character sets")
73 (define-category ?Y "Cyrillic characters of 2-byte character sets")
74 (define-category ?I "Indian Glyphs")
75
76 ;; For phonetic classifications.
77
78 (define-category ?0 "consonant")
79 (define-category ?1 "base (independent) vowel")
80 (define-category ?2 "upper diacritical mark (including upper vowel)")
81 (define-category ?3 "lower diacritical mark (including lower vowel)")
82 (define-category ?4 "tone mark")
83 (define-category ?5 "symbol")
84 (define-category ?6 "digit")
85 (define-category ?7 "vowel-modifying diacritical mark")
86 (define-category ?8 "vowel-signs")
87 (define-category ?9 "semivowel lower")
88
89 ;; For filling.
90 (define-category ?| "While filling, we can break a line at this character.")
91
92 ;; For indentation calculation.
93 (define-category ?\s
94 "This character counts as a space for indentation purposes.")
95
96 ;; Keep the following for `kinsoku' processing. See comments in
97 ;; kinsoku.el.
98 (define-category ?> "A character which can't be placed at beginning of line.")
99 (define-category ?< "A character which can't be placed at end of line.")
100
101 ;; Combining
102 (define-category ?^ "Combining diacritic or mark")
103 \f
104 ;;; Setting syntax and category.
105
106 ;; ASCII
107
108 (let ((ch 32))
109 (while (< ch 127) ; All ASCII characters have
110 (modify-category-entry ch ?a) ; the category `a' (ASCII)
111 (modify-category-entry ch ?l) ; and `l' (Latin).
112 (setq ch (1+ ch))))
113
114 ;; Arabic character set
115
116 (let ((charsets '(arabic-iso8859-6
117 arabic-digit
118 arabic-1-column
119 arabic-2-column)))
120 (while charsets
121 ;; (modify-syntax-entry (make-char (car charsets)) "w")
122 (modify-category-entry (make-char (car charsets)) ?b)
123 (setq charsets (cdr charsets))))
124 (let ((ch #x600))
125 (while (<= ch #x6ff)
126 (modify-category-entry (decode-char 'ucs ch) ?b)
127 (setq ch (1+ ch)))
128 (setq ch #xfb50)
129 (while (<= ch #xfdff)
130 (modify-category-entry (decode-char 'ucs ch) ?b)
131 (setq ch (1+ ch)))
132 (setq ch #xfe70)
133 (while (<= ch #xfefe)
134 (modify-category-entry (decode-char 'ucs ch) ?b)
135 (setq ch (1+ ch))))
136
137 ;; Chinese character set (GB2312)
138
139 ;; (modify-syntax-entry (make-char 'chinese-gb2312) "w")
140 (modify-syntax-entry (make-char 'chinese-gb2312 33) "_")
141 (modify-syntax-entry (make-char 'chinese-gb2312 34) "_")
142 (modify-syntax-entry (make-char 'chinese-gb2312 41) "_")
143 (modify-syntax-entry ?\\e$A!2\e(B "(\e$A!3\e(B")
144 (modify-syntax-entry ?\\e$A!4\e(B "(\e$A!5\e(B")
145 (modify-syntax-entry ?\\e$A!6\e(B "(\e$A!7\e(B")
146 (modify-syntax-entry ?\\e$A!8\e(B "(\e$A!9\e(B")
147 (modify-syntax-entry ?\\e$A!:\e(B "(\e$A!;\e(B")
148 (modify-syntax-entry ?\\e$A!<\e(B "(\e$A!=\e(B")
149 (modify-syntax-entry ?\\e$A!>\e(B "(\e$A!?\e(B")
150 (modify-syntax-entry ?\\e$A#(\e(B "(\e$A#)\e(B")
151 (modify-syntax-entry ?\\e$A#{\e(B "(\e$A#}\e(B")
152 (modify-syntax-entry ?\\e$A#[\e(B "(\e$A#]\e(B")
153 (modify-syntax-entry ?\\e$A!3\e(B ")\e$A!2\e(B")
154 (modify-syntax-entry ?\\e$A!5\e(B ")\e$A!4\e(B")
155 (modify-syntax-entry ?\\e$A!7\e(B ")\e$A!6\e(B")
156 (modify-syntax-entry ?\\e$A!9\e(B ")\e$A!8\e(B")
157 (modify-syntax-entry ?\\e$A!;\e(B ")\e$A!:\e(B")
158 (modify-syntax-entry ?\\e$A!=\e(B ")\e$A!<\e(B")
159 (modify-syntax-entry ?\\e$A!?\e(B ")\e$A!>\e(B")
160 (modify-syntax-entry ?\\e$A#)\e(B ")\e$A#(\e(B")
161 (modify-syntax-entry ?\\e$A#}\e(B ")\e$A#{\e(B")
162 (modify-syntax-entry ?\\e$A#]\e(B ")\e$A#[\e(B")
163
164 (let ((chars "\e$A#,!"!##.!$#;#:#?#!!C!-!'#|#_!.!/!0!1#"!e#`!d\e(B"))
165 (dotimes (i (length chars))
166 (modify-syntax-entry (aref chars i) ".")))
167
168 (modify-category-entry (make-char 'chinese-gb2312) ?c)
169 (modify-category-entry (make-char 'chinese-gb2312) ?\|)
170 (modify-category-entry (make-char 'chinese-gb2312 35) ?A)
171 (modify-category-entry (make-char 'chinese-gb2312 36) ?H)
172 (modify-category-entry (make-char 'chinese-gb2312 37) ?K)
173 (modify-category-entry (make-char 'chinese-gb2312 38) ?G)
174 (modify-category-entry (make-char 'chinese-gb2312 39) ?Y)
175 (let ((row 48))
176 (while (< row 127)
177 (modify-category-entry (make-char 'chinese-gb2312 row) ?C)
178 (setq row (1+ row))))
179
180 (let ((tbl (standard-case-table)))
181 (dotimes (i 26)
182 (set-case-syntax-pair (make-char 'chinese-gb2312 #x23 (+ #x41 i))
183 (make-char 'chinese-gb2312 #x23 (+ #x61 i)) tbl))
184 (dotimes (i 24)
185 (set-case-syntax-pair (make-char 'chinese-gb2312 #x26 (+ #x21 i))
186 (make-char 'chinese-gb2312 #x26 (+ #x41 i)) tbl))
187 (dotimes (i 33)
188 (set-case-syntax-pair (make-char 'chinese-gb2312 #x27 (+ #x21 i))
189 (make-char 'chinese-gb2312 #x27 (+ #x51 i)) tbl)))
190
191 ;; Chinese character set (BIG5)
192
193 (let ((from (decode-big5-char #xA141))
194 (to (decode-big5-char #xA15D)))
195 (while (< from to)
196 (modify-syntax-entry from ".")
197 (setq from (1+ from))))
198 (let ((from (decode-big5-char #xA1A5))
199 (to (decode-big5-char #xA1AD)))
200 (while (< from to)
201 (modify-syntax-entry from ".")
202 (setq from (1+ from))))
203 (let ((from (decode-big5-char #xA1AD))
204 (to (decode-big5-char #xA2AF)))
205 (while (< from to)
206 (modify-syntax-entry from "_")
207 (setq from (1+ from))))
208
209 (let ((parens "\e$(0!>!?!@!A!B!C!D!E!F!G!H!I!J!K!L!M!N!O!P!Q!R!S!T!U!V!W!X!Y!Z![!\!]!^!_!`!a!b!c\e(B")
210 open close)
211 (dotimes (i (/ (length parens) 2))
212 (setq open (aref parens (* i 2))
213 close (aref parens (1+ (* i 2))))
214 (modify-syntax-entry open (format "(%c" close))
215 (modify-syntax-entry close (format ")%c" open))))
216
217 (let ((generic-big5-1-char (make-char 'chinese-big5-1))
218 (generic-big5-2-char (make-char 'chinese-big5-2)))
219 ;; (modify-syntax-entry generic-big5-1-char "w")
220 ;; (modify-syntax-entry generic-big5-2-char "w")
221
222 (modify-category-entry generic-big5-1-char ?c)
223 (modify-category-entry generic-big5-2-char ?c)
224
225 (modify-category-entry generic-big5-1-char ?C)
226 (modify-category-entry generic-big5-2-char ?C)
227
228 (modify-category-entry generic-big5-1-char ?\|)
229 (modify-category-entry generic-big5-2-char ?\|))
230
231 (let ((tbl (standard-case-table)))
232 (dotimes (i 22)
233 (set-case-syntax-pair (decode-big5-char (+ #xA2CF i))
234 (decode-big5-char (+ #xA2CF i 26)) tbl))
235 (dotimes (i 4)
236 (set-case-syntax-pair (decode-big5-char (+ #xA2E4 i))
237 (decode-big5-char (+ #xA340 i)) tbl))
238 (dotimes (i 24)
239 (set-case-syntax-pair (decode-big5-char (+ #xA344 i))
240 (decode-big5-char (+ #xA344 i 24)) tbl)))
241
242
243 ;; Chinese character set (CNS11643)
244
245 (let ((cns-list '(chinese-cns11643-1
246 chinese-cns11643-2
247 chinese-cns11643-3
248 chinese-cns11643-4
249 chinese-cns11643-5
250 chinese-cns11643-6
251 chinese-cns11643-7))
252 generic-char)
253 (while cns-list
254 (setq generic-char (make-char (car cns-list)))
255 ;; (modify-syntax-entry generic-char "w")
256 (modify-category-entry generic-char ?c)
257 (modify-category-entry generic-char ?C)
258 (modify-category-entry generic-char ?|)
259 (setq cns-list (cdr cns-list))))
260
261 (let ((parens "\e$(G!>!?!@!A!B!C!D!E!F!G!H!I!J!K!L!M!N!O!P!Q!R!S!T!U!V!W!X!Y!Z![!\!]!^!_!`!a!b!c\e(B")
262 open close)
263 (dotimes (i (/ (length parens) 2))
264 (setq open (aref parens (* i 2))
265 close (aref parens (1+ (* i 2))))
266 (modify-syntax-entry open (format "(%c" close))
267 (modify-syntax-entry close (format ")%c" open))))
268
269 ;; Cyrillic character set (ISO-8859-5)
270
271 (modify-category-entry (make-char 'cyrillic-iso8859-5) ?y)
272
273 (modify-syntax-entry (make-char 'cyrillic-iso8859-5 160) " ")
274 (modify-syntax-entry ?\e,L-\e(B ".")
275 (modify-syntax-entry ?\e,Lp\e(B ".")
276 (modify-syntax-entry ?\e,L}\e(B ".")
277 (let ((tbl (standard-case-table)))
278 (set-case-syntax-pair ?\e,L!\e(B ?\e,Lq\e(B tbl)
279 (set-case-syntax-pair ?\e,L"\e(B ?\e,Lr\e(B tbl)
280 (set-case-syntax-pair ?\e,L#\e(B ?\e,Ls\e(B tbl)
281 (set-case-syntax-pair ?\e,L$\e(B ?\e,Lt\e(B tbl)
282 (set-case-syntax-pair ?\e,L%\e(B ?\e,Lu\e(B tbl)
283 (set-case-syntax-pair ?\e,L&\e(B ?\e,Lv\e(B tbl)
284 (set-case-syntax-pair ?\e,L'\e(B ?\e,Lw\e(B tbl)
285 (set-case-syntax-pair ?\e,L(\e(B ?\e,Lx\e(B tbl)
286 (set-case-syntax-pair ?\e,L)\e(B ?\e,Ly\e(B tbl)
287 (set-case-syntax-pair ?\e,L*\e(B ?\e,Lz\e(B tbl)
288 (set-case-syntax-pair ?\e,L+\e(B ?\e,L{\e(B tbl)
289 (set-case-syntax-pair ?\e,L,\e(B ?\e,L|\e(B tbl)
290 (set-case-syntax-pair ?\e,L.\e(B ?\e,L~\e(B tbl)
291 (set-case-syntax-pair ?\e,L/\e(B ?\e,L\7f\e(B tbl)
292 (set-case-syntax-pair ?\e,L0\e(B ?\e,LP\e(B tbl)
293 (set-case-syntax-pair ?\e,L1\e(B ?\e,LQ\e(B tbl)
294 (set-case-syntax-pair ?\e,L2\e(B ?\e,LR\e(B tbl)
295 (set-case-syntax-pair ?\e,L3\e(B ?\e,LS\e(B tbl)
296 (set-case-syntax-pair ?\e,L4\e(B ?\e,LT\e(B tbl)
297 (set-case-syntax-pair ?\e,L5\e(B ?\e,LU\e(B tbl)
298 (set-case-syntax-pair ?\e,L6\e(B ?\e,LV\e(B tbl)
299 (set-case-syntax-pair ?\e,L7\e(B ?\e,LW\e(B tbl)
300 (set-case-syntax-pair ?\e,L8\e(B ?\e,LX\e(B tbl)
301 (set-case-syntax-pair ?\e,L9\e(B ?\e,LY\e(B tbl)
302 (set-case-syntax-pair ?\e,L:\e(B ?\e,LZ\e(B tbl)
303 (set-case-syntax-pair ?\e,L;\e(B ?\e,L[\e(B tbl)
304 (set-case-syntax-pair ?\e,L<\e(B ?\e,L\\e(B tbl)
305 (set-case-syntax-pair ?\e,L=\e(B ?\e,L]\e(B tbl)
306 (set-case-syntax-pair ?\e,L>\e(B ?\e,L^\e(B tbl)
307 (set-case-syntax-pair ?\e,L?\e(B ?\e,L_\e(B tbl)
308 (set-case-syntax-pair ?\e,L@\e(B ?\e,L`\e(B tbl)
309 (set-case-syntax-pair ?\e,LA\e(B ?\e,La\e(B tbl)
310 (set-case-syntax-pair ?\e,LB\e(B ?\e,Lb\e(B tbl)
311 (set-case-syntax-pair ?\e,LC\e(B ?\e,Lc\e(B tbl)
312 (set-case-syntax-pair ?\e,LD\e(B ?\e,Ld\e(B tbl)
313 (set-case-syntax-pair ?\e,LE\e(B ?\e,Le\e(B tbl)
314 (set-case-syntax-pair ?\e,LF\e(B ?\e,Lf\e(B tbl)
315 (set-case-syntax-pair ?\e,LG\e(B ?\e,Lg\e(B tbl)
316 (set-case-syntax-pair ?\e,LH\e(B ?\e,Lh\e(B tbl)
317 (set-case-syntax-pair ?\e,LI\e(B ?\e,Li\e(B tbl)
318 (set-case-syntax-pair ?\e,LJ\e(B ?\e,Lj\e(B tbl)
319 (set-case-syntax-pair ?\e,LK\e(B ?\e,Lk\e(B tbl)
320 (set-case-syntax-pair ?\e,LL\e(B ?\e,Ll\e(B tbl)
321 (set-case-syntax-pair ?\e,LM\e(B ?\e,Lm\e(B tbl)
322 (set-case-syntax-pair ?\e,LN\e(B ?\e,Ln\e(B tbl)
323 (set-case-syntax-pair ?\e,LO\e(B ?\e,Lo\e(B tbl)
324 (set-case-syntax-pair ?\e$,1(!\e(B ?\e$,1(q\e(B tbl)
325 (set-case-syntax-pair ?\e$,1("\e(B ?\e$,1(r\e(B tbl)
326 (set-case-syntax-pair ?\e$,1(#\e(B ?\e$,1(s\e(B tbl)
327 (set-case-syntax-pair ?\e$,1($\e(B ?\e$,1(t\e(B tbl)
328 (set-case-syntax-pair ?\e$,1(%\e(B ?\e$,1(u\e(B tbl)
329 (set-case-syntax-pair ?\e$,1(&\e(B ?\e$,1(v\e(B tbl)
330 (set-case-syntax-pair ?\e$,1('\e(B ?\e$,1(w\e(B tbl)
331 (set-case-syntax-pair ?\e$,1((\e(B ?\e$,1(x\e(B tbl)
332 (set-case-syntax-pair ?\e$,1()\e(B ?\e$,1(y\e(B tbl)
333 (set-case-syntax-pair ?\e$,1(*\e(B ?\e$,1(z\e(B tbl)
334 (set-case-syntax-pair ?\e$,1(+\e(B ?\e$,1({\e(B tbl)
335 (set-case-syntax-pair ?\e$,1(,\e(B ?\e$,1(|\e(B tbl)
336 (set-case-syntax-pair ?\e$,1(.\e(B ?\e$,1(~\e(B tbl)
337 (set-case-syntax-pair ?\e$,1(/\e(B ?\e$,1(\7f\e(B tbl)
338 (set-case-syntax-pair ?\e$,1(0\e(B ?\e$,1(P\e(B tbl)
339 (set-case-syntax-pair ?\e$,1(1\e(B ?\e$,1(Q\e(B tbl)
340 (set-case-syntax-pair ?\e$,1(2\e(B ?\e$,1(R\e(B tbl)
341 (set-case-syntax-pair ?\e$,1(3\e(B ?\e$,1(S\e(B tbl)
342 (set-case-syntax-pair ?\e$,1(4\e(B ?\e$,1(T\e(B tbl)
343 (set-case-syntax-pair ?\e$,1(5\e(B ?\e$,1(U\e(B tbl)
344 (set-case-syntax-pair ?\e$,1(6\e(B ?\e$,1(V\e(B tbl)
345 (set-case-syntax-pair ?\e$,1(7\e(B ?\e$,1(W\e(B tbl)
346 (set-case-syntax-pair ?\e$,1(8\e(B ?\e$,1(X\e(B tbl)
347 (set-case-syntax-pair ?\e$,1(9\e(B ?\e$,1(Y\e(B tbl)
348 (set-case-syntax-pair ?\e$,1(:\e(B ?\e$,1(Z\e(B tbl)
349 (set-case-syntax-pair ?\e$,1(;\e(B ?\e$,1([\e(B tbl)
350 (set-case-syntax-pair ?\e$,1(<\e(B ?\e$,1(\\e(B tbl)
351 (set-case-syntax-pair ?\e$,1(=\e(B ?\e$,1(]\e(B tbl)
352 (set-case-syntax-pair ?\e$,1(>\e(B ?\e$,1(^\e(B tbl)
353 (set-case-syntax-pair ?\e$,1(?\e(B ?\e$,1(_\e(B tbl)
354 (set-case-syntax-pair ?\e$,1(@\e(B ?\e$,1(`\e(B tbl)
355 (set-case-syntax-pair ?\e$,1(A\e(B ?\e$,1(a\e(B tbl)
356 (set-case-syntax-pair ?\e$,1(B\e(B ?\e$,1(b\e(B tbl)
357 (set-case-syntax-pair ?\e$,1(C\e(B ?\e$,1(c\e(B tbl)
358 (set-case-syntax-pair ?\e$,1(D\e(B ?\e$,1(d\e(B tbl)
359 (set-case-syntax-pair ?\e$,1(E\e(B ?\e$,1(e\e(B tbl)
360 (set-case-syntax-pair ?\e$,1(F\e(B ?\e$,1(f\e(B tbl)
361 (set-case-syntax-pair ?\e$,1(G\e(B ?\e$,1(g\e(B tbl)
362 (set-case-syntax-pair ?\e$,1(H\e(B ?\e$,1(h\e(B tbl)
363 (set-case-syntax-pair ?\e$,1(I\e(B ?\e$,1(i\e(B tbl)
364 (set-case-syntax-pair ?\e$,1(J\e(B ?\e$,1(j\e(B tbl)
365 (set-case-syntax-pair ?\e$,1(K\e(B ?\e$,1(k\e(B tbl)
366 (set-case-syntax-pair ?\e$,1(L\e(B ?\e$,1(l\e(B tbl)
367 (set-case-syntax-pair ?\e$,1(M\e(B ?\e$,1(m\e(B tbl)
368 (set-case-syntax-pair ?\e$,1(N\e(B ?\e$,1(n\e(B tbl)
369 (set-case-syntax-pair ?\e$,1(O\e(B ?\e$,1(o\e(B tbl))
370
371 ;; Devanagari character set
372
373 ;;; Commented out since the categories appear not to be used anywhere
374 ;;; and word syntax is the default.
375 ;; (let ((deflist '(;; chars syntax category
376 ;; ("\e$(5!!!"!#\e(B" "w" ?7) ; vowel-modifying diacritical mark
377 ;; ; chandrabindu, anuswar, visarga
378 ;; ("\e$(5!$\e(B-\e$(5!2\e(B" "w" ?1) ; independent vowel
379 ;; ("\e$(5!3\e(B-\e$(5!X\e(B" "w" ?0) ; consonant
380 ;; ("\e$(5!Z\e(B-\e$(5!g\e(B" "w" ?8) ; matra
381 ;; ("\e$(5!q\e(B-\e$(5!z\e(B" "w" ?6) ; digit
382 ;; ;; Unicode equivalents
383 ;; ("\e$,15A5B5C\e(B" "w" ?7) ; vowel-modifying diacritical mark
384 ;; ; chandrabindu, anuswar, visarga
385 ;; ("\e$,15E\e(B-\e$,15M\e(B" "w" ?1) ; independent vowel
386 ;; ("\e$,15U\e(B-\e$,15y\e(B" "w" ?0) ; consonant
387 ;; ("\e$,15~\e(B-\e$,16)\e(B" "w" ?8) ; matra
388 ;; ("\e$,16F\e(B-\e$,16O\e(B" "w" ?6) ; digit
389 ;; ))
390 ;; elm chars len syntax category to ch i)
391 ;; (while deflist
392 ;; (setq elm (car deflist))
393 ;; (setq chars (car elm)
394 ;; len (length chars)
395 ;; syntax (nth 1 elm)
396 ;; category (nth 2 elm)
397 ;; i 0)
398 ;; (while (< i len)
399 ;; (if (= (aref chars i) ?-)
400 ;; (setq i (1+ i)
401 ;; to (aref chars i))
402 ;; (setq ch (aref chars i)
403 ;; to ch))
404 ;; (while (<= ch to)
405 ;; (modify-syntax-entry ch syntax)
406 ;; (modify-category-entry ch category)
407 ;; (setq ch (1+ ch)))
408 ;; (setq i (1+ i)))
409 ;; (setq deflist (cdr deflist))))
410
411 ;; Ethiopic character set
412
413 (modify-category-entry (make-char 'ethiopic) ?e)
414 ;; (modify-syntax-entry (make-char 'ethiopic) "w")
415 (dotimes (i (1+ (- #x137c #x1200)))
416 (modify-category-entry (decode-char 'ucs (+ #x1200 i)) ?e))
417 (let ((chars '(?\e$(3$h\e(B ?\e$(3$i\e(B ?\e$(3$j\e(B ?\e$(3$k\e(B ?\e$(3$l\e(B ?\e$(3$m\e(B ?\e$(3$n\e(B ?\e$(3$o\e(B ?\e$(3%i\e(B ?\e$(3%t\e(B ?\e$(3%u\e(B ?\e$(3%v\e(B ?\e$(3%w\e(B ?\e$(3%x\e(B
418 ;; Unicode equivalents of the above:
419 ?\e$,1Q!\e(B ?\e$,1Q"\e(B ?\e$,1Q#\e(B ?\e$,1Q$\e(B ?\e$,1Q%\e(B ?\e$,1Q&\e(B ?\e$,1Q'\e(B ?\e$,1Q(\e(B ?\e$,3op\e(B ?\e$,3o{\e(B ?\e$,3o|\e(B ?\e$,3o}\e(B ?\e$,3o~\e(B ?\e$,3o\7f\e(B)))
420 (while chars
421 (modify-syntax-entry (car chars) ".")
422 (setq chars (cdr chars))))
423
424 ;; Greek character set (ISO-8859-7)
425
426 (modify-category-entry (make-char 'greek-iso8859-7) ?g)
427 (let ((c #x370))
428 (while (<= c #x3ff)
429 (modify-category-entry (decode-char 'ucs c) ?g)
430 (setq c (1+ c))))
431
432 ;; (let ((c 182))
433 ;; (while (< c 255)
434 ;; (modify-syntax-entry (make-char 'greek-iso8859-7 c) "w")
435 ;; (setq c (1+ c))))
436 ;; (modify-syntax-entry (make-char 'greek-iso8859-7 160) "w") ; NBSP
437 (modify-syntax-entry ?\e,F7\e(B ".")
438 (modify-syntax-entry ?\e,F;\e(B ".")
439 (modify-syntax-entry ?\e,F=\e(B ".")
440 (let ((tbl (standard-case-table)))
441 ;; Fixme: non-letter syntax copied from latin-1, but that's dubious
442 ;; in several cases.
443 (set-case-syntax ?\e,F!\e(B "." tbl)
444 (set-case-syntax ?\e,F"\e(B "." tbl)
445 (set-case-syntax ?\e,F&\e(B "." tbl)
446 (set-case-syntax ?\e,F&\e(B "_" tbl)
447 (set-case-syntax ?\e,F'\e(B "." tbl)
448 (set-case-syntax ?\e,F)\e(B "_" tbl)
449 (set-case-syntax ?\e,F+\e(B "." tbl)
450 (set-case-syntax ?\e,F,\e(B "_" tbl)
451 (set-case-syntax ?\e,F-\e(B "_" tbl)
452 (set-case-syntax ?\e,F/\e(B "." tbl)
453 (set-case-syntax ?\e,F0\e(B "_" tbl)
454 (set-case-syntax ?\e,F1\e(B "_" tbl)
455 ;; (set-case-syntax ?\e,F7\e(B "_" tbl)
456 ;; (set-case-syntax ?\e,F=\e(B "_" tbl)
457 (set-case-syntax-pair ?\e,FA\e(B ?\e,Fa\e(B tbl)
458 (set-case-syntax-pair ?\e,FB\e(B ?\e,Fb\e(B tbl)
459 (set-case-syntax-pair ?\e,FC\e(B ?\e,Fc\e(B tbl)
460 (set-case-syntax-pair ?\e,FD\e(B ?\e,Fd\e(B tbl)
461 (set-case-syntax-pair ?\e,FE\e(B ?\e,Fe\e(B tbl)
462 (set-case-syntax-pair ?\e,FF\e(B ?\e,Ff\e(B tbl)
463 (set-case-syntax-pair ?\e,FG\e(B ?\e,Fg\e(B tbl)
464 (set-case-syntax-pair ?\e,FH\e(B ?\e,Fh\e(B tbl)
465 (set-case-syntax-pair ?\e,FI\e(B ?\e,Fi\e(B tbl)
466 (set-case-syntax-pair ?\e,FJ\e(B ?\e,Fj\e(B tbl)
467 (set-case-syntax-pair ?\e,FK\e(B ?\e,Fk\e(B tbl)
468 (set-case-syntax-pair ?\e,FL\e(B ?\e,Fl\e(B tbl)
469 (set-case-syntax-pair ?\e,FM\e(B ?\e,Fm\e(B tbl)
470 (set-case-syntax-pair ?\e,FN\e(B ?\e,Fn\e(B tbl)
471 (set-case-syntax-pair ?\e,FO\e(B ?\e,Fo\e(B tbl)
472 (set-case-syntax-pair ?\e,FP\e(B ?\e,Fp\e(B tbl)
473 (set-case-syntax-pair ?\e,FQ\e(B ?\e,Fq\e(B tbl)
474 (set-upcase-syntax ?\e,FS\e(B ?\e,Fr\e(B tbl)
475 (set-case-syntax-pair ?\e,FS\e(B ?\e,Fs\e(B tbl)
476 (set-case-syntax-pair ?\e,FT\e(B ?\e,Ft\e(B tbl)
477 (set-case-syntax-pair ?\e,FU\e(B ?\e,Fu\e(B tbl)
478 (set-case-syntax-pair ?\e,FV\e(B ?\e,Fv\e(B tbl)
479 (set-case-syntax-pair ?\e,FW\e(B ?\e,Fw\e(B tbl)
480 (set-case-syntax-pair ?\e,FX\e(B ?\e,Fx\e(B tbl)
481 (set-case-syntax-pair ?\e,FY\e(B ?\e,Fy\e(B tbl)
482 (set-case-syntax-pair ?\e,FZ\e(B ?\e,Fz\e(B tbl)
483 (set-case-syntax-pair ?\e,F[\e(B ?\e,F{\e(B tbl)
484 (set-case-syntax-pair ?\e,F?\e(B ?\e,F~\e(B tbl)
485 (set-case-syntax-pair ?\e,F>\e(B ?\e,F}\e(B tbl)
486 (set-case-syntax-pair ?\e,F<\e(B ?\e,F|\e(B tbl)
487 (set-case-syntax-pair ?\e,F6\e(B ?\e,F\\e(B tbl)
488 (set-case-syntax-pair ?\e,F8\e(B ?\e,F]\e(B tbl)
489 (set-case-syntax-pair ?\e,F9\e(B ?\e,F^\e(B tbl)
490 (set-case-syntax-pair ?\e,F:\e(B ?\e,F_\e(B tbl)
491 ;; Unicode equivalents
492 (set-case-syntax-pair ?\e$,1&q\e(B ?\e$,1'1\e(B tbl)
493 (set-case-syntax-pair ?\e$,1&r\e(B ?\e$,1'2\e(B tbl)
494 (set-case-syntax-pair ?\e$,1&s\e(B ?\e$,1'3\e(B tbl)
495 (set-case-syntax-pair ?\e$,1&t\e(B ?\e$,1'4\e(B tbl)
496 (set-case-syntax-pair ?\e$,1&u\e(B ?\e$,1'5\e(B tbl)
497 (set-case-syntax-pair ?\e$,1&v\e(B ?\e$,1'6\e(B tbl)
498 (set-case-syntax-pair ?\e$,1&w\e(B ?\e$,1'7\e(B tbl)
499 (set-case-syntax-pair ?\e$,1&x\e(B ?\e$,1'8\e(B tbl)
500 (set-case-syntax-pair ?\e$,1&y\e(B ?\e$,1'9\e(B tbl)
501 (set-case-syntax-pair ?\e$,1&z\e(B ?\e$,1':\e(B tbl)
502 (set-case-syntax-pair ?\e$,1&{\e(B ?\e$,1';\e(B tbl)
503 (set-case-syntax-pair ?\e$,1&|\e(B ?\e$,1'<\e(B tbl)
504 (set-case-syntax-pair ?\e$,1&}\e(B ?\e$,1'=\e(B tbl)
505 (set-case-syntax-pair ?\e$,1&~\e(B ?\e$,1'>\e(B tbl)
506 (set-case-syntax-pair ?\e$,1&\7f\e(B ?\e$,1'?\e(B tbl)
507 (set-case-syntax-pair ?\e$,1' \e(B ?\e$,1'@\e(B tbl)
508 (set-case-syntax-pair ?\e$,1'!\e(B ?\e$,1'A\e(B tbl)
509 (set-upcase-syntax ?\e$,1'#\e(B ?\e$,1'B\e(B tbl)
510 (set-case-syntax-pair ?\e$,1'#\e(B ?\e$,1'C\e(B tbl)
511 (set-case-syntax-pair ?\e$,1'$\e(B ?\e$,1'D\e(B tbl)
512 (set-case-syntax-pair ?\e$,1'%\e(B ?\e$,1'E\e(B tbl)
513 (set-case-syntax-pair ?\e$,1'&\e(B ?\e$,1'F\e(B tbl)
514 (set-case-syntax-pair ?\e$,1''\e(B ?\e$,1'G\e(B tbl)
515 (set-case-syntax-pair ?\e$,1'(\e(B ?\e$,1'H\e(B tbl)
516 (set-case-syntax-pair ?\e$,1')\e(B ?\e$,1'I\e(B tbl)
517 (set-case-syntax-pair ?\e$,1'*\e(B ?\e$,1'J\e(B tbl)
518 (set-case-syntax-pair ?\e$,1'+\e(B ?\e$,1'K\e(B tbl)
519 (set-case-syntax-pair ?\e$,1&o\e(B ?\e$,1'N\e(B tbl)
520 (set-case-syntax-pair ?\e$,1&n\e(B ?\e$,1'M\e(B tbl)
521 (set-case-syntax-pair ?\e$,1&l\e(B ?\e$,1'L\e(B tbl)
522 (set-case-syntax-pair ?\e$,1&f\e(B ?\e$,1',\e(B tbl)
523 (set-case-syntax-pair ?\e$,1&h\e(B ?\e$,1'-\e(B tbl)
524 (set-case-syntax-pair ?\e$,1&i\e(B ?\e$,1'.\e(B tbl)
525 (set-case-syntax-pair ?\e$,1&j\e(B ?\e$,1'/\e(B tbl))
526
527 ;; Hebrew character set (ISO-8859-8)
528
529 (modify-category-entry (make-char 'hebrew-iso8859-8) ?w)
530 (let ((c #x591))
531 (while (<= c #x5f4)
532 (modify-category-entry (decode-char 'ucs c) ?w)
533 (setq c (1+ c))))
534
535 (modify-syntax-entry (make-char 'hebrew-iso8859-8 208) ".") ; PASEQ
536 (modify-syntax-entry (make-char 'hebrew-iso8859-8 211) ".") ; SOF PASUQ
537 (modify-syntax-entry (decode-char 'ucs #x5be) ".") ; MAQAF
538 (modify-syntax-entry (decode-char 'ucs #x5c0) ".") ; PASEQ
539 (modify-syntax-entry (decode-char 'ucs #x5c3) ".") ; SOF PASUQ
540 (modify-syntax-entry (decode-char 'ucs #x5f3) ".") ; GERESH
541 (modify-syntax-entry (decode-char 'ucs #x5f4) ".") ; GERSHAYIM
542
543 ;; (let ((c 224))
544 ;; (while (< c 251)
545 ;; (modify-syntax-entry (make-char 'hebrew-iso8859-8 c) "w")
546 ;; (setq c (1+ c))))
547 ;; (modify-syntax-entry (make-char 'hebrew-iso8859-8 160) "w") ; NBSP
548
549 ;; Indian character set (IS 13194 and other Emacs original Indian charsets)
550
551 (modify-category-entry (make-char 'indian-is13194) ?i)
552 (modify-category-entry (make-char 'indian-2-column) ?I)
553 (modify-category-entry (make-char 'indian-glyph) ?I)
554 ;; Unicode Devanagari block
555 (let ((c #x901))
556 (while (<= c #x970)
557 (modify-category-entry (decode-char 'ucs c) ?i)
558 (setq c (1+ c))))
559
560 (let ((l '(;; RANGE CATEGORY MEANINGS
561 (#x01 #x03 ?7) ; vowel modifier
562 (#x05 #x14 ?1) ; base vowel
563 (#x15 #x39 ?0) ; consonants
564 (#x3e #x4d ?8) ; vowel modifier
565 (#x51 #x54 ?4) ; stress/tone mark
566 (#x58 #x5f ?0) ; consonants
567 (#x60 #x61 ?1) ; base vowel
568 (#x62 #x63 ?8) ; vowel modifier
569 (#x66 #x6f ?6) ; digits
570 )))
571 (dolist (elt1 '(#x900 #x980 #xa00 #xa80 #xb00 #xb80 #xc00 #xc80 #xd00))
572 (dolist (elt2 l)
573 (let* ((from (car elt2))
574 (counts (1+ (- (nth 1 elt2) from)))
575 (category (nth 2 elt2)))
576 (dotimes (i counts)
577 (modify-category-entry (decode-char 'ucs (+ elt1 from i))
578 category))))))
579
580 ;; Japanese character set (JISX0201-kana, JISX0201-roman, JISX0208, JISX0212)
581
582 (modify-category-entry (make-char 'katakana-jisx0201) ?k)
583 (modify-category-entry (make-char 'katakana-jisx0201) ?j)
584 (modify-category-entry (make-char 'latin-jisx0201) ?r)
585 (modify-category-entry (make-char 'japanese-jisx0208) ?j)
586 (modify-category-entry (make-char 'japanese-jisx0212) ?j)
587 (modify-category-entry (make-char 'katakana-jisx0201) ?\|)
588 (modify-category-entry (make-char 'japanese-jisx0208) ?\|)
589 (modify-category-entry (make-char 'japanese-jisx0212) ?\|)
590
591 ;; Unicode equivalents of JISX0201-kana
592 (let ((c #xff61))
593 (while (<= c #xff9f)
594 (modify-category-entry (decode-char 'ucs c) ?k)
595 (modify-category-entry (decode-char 'ucs c) ?j)
596 (modify-category-entry (decode-char 'ucs c) ?\|)
597 (setq c (1+ c))))
598
599 ;; Katakana block
600 (let ((c #x30a0))
601 (while (<= c #x30ff)
602 ;; ?K is double width, ?k isn't specified
603 (modify-category-entry (decode-char 'ucs c) ?k)
604 (modify-category-entry (decode-char 'ucs c) ?j)
605 (modify-category-entry (decode-char 'ucs c) ?\|)
606 (setq c (1+ c))))
607
608 ;; Hiragana block
609 (let ((c #x3040))
610 (while (<= c #x309f)
611 ;; ?H is actually defined to be double width
612 (modify-category-entry (decode-char 'ucs c) ?H)
613 ;;(modify-category-entry (decode-char 'ucs c) ?j)
614 (modify-category-entry (decode-char 'ucs c) ?\|)
615 (setq c (1+ c))))
616
617 ;; JISX0208
618 ;; (modify-syntax-entry (make-char 'japanese-jisx0208) "w")
619 (modify-syntax-entry (make-char 'japanese-jisx0208 33) "_")
620 (modify-syntax-entry (make-char 'japanese-jisx0208 34) "_")
621 (modify-syntax-entry (make-char 'japanese-jisx0208 40) "_")
622 (let ((chars '(?\e$B!<\e(B ?\e$B!+\e(B ?\e$B!,\e(B ?\e$B!3\e(B ?\e$B!4\e(B ?\e$B!5\e(B ?\e$B!6\e(B ?\e$B!7\e(B ?\e$B!8\e(B ?\e$B!9\e(B ?\e$B!:\e(B ?\e$B!;\e(B)))
623 (while chars
624 (modify-syntax-entry (car chars) "w")
625 (setq chars (cdr chars))))
626 (let ((parens "\e$B!J!K!L!M!N!O!P!Q!R!S!T!U!V!W!X!Y!Z![\e(B" )
627 open close)
628 (dotimes (i (/ (length parens) 2))
629 (setq open (aref parens (* i 2))
630 close (aref parens (1+ (* i 2))))
631 (modify-syntax-entry open (format "(%c" close))
632 (modify-syntax-entry close (format ")%c" open))))
633
634 (modify-category-entry (make-char 'japanese-jisx0208 35) ?A)
635 (modify-category-entry (make-char 'japanese-jisx0208 36) ?H)
636 (modify-category-entry (make-char 'japanese-jisx0208 37) ?K)
637 (modify-category-entry (make-char 'japanese-jisx0208 38) ?G)
638 (modify-category-entry (make-char 'japanese-jisx0208 39) ?Y)
639 (let ((row 48))
640 (while (< row 127)
641 (modify-category-entry (make-char 'japanese-jisx0208 row) ?C)
642 (setq row (1+ row))))
643 (modify-category-entry ?\e$B!<\e(B ?K)
644 (let ((chars '(?\e$B!+\e(B ?\e$B!,\e(B)))
645 (while chars
646 (modify-category-entry (car chars) ?K)
647 (modify-category-entry (car chars) ?H)
648 (setq chars (cdr chars))))
649 (let ((chars '(?\e$B!3\e(B ?\e$B!4\e(B ?\e$B!5\e(B ?\e$B!6\e(B ?\e$B!7\e(B ?\e$B!8\e(B ?\e$B!9\e(B ?\e$B!:\e(B ?\e$B!;\e(B)))
650 (while chars
651 (modify-category-entry (car chars) ?C)
652 (setq chars (cdr chars))))
653
654 (let ((tbl (standard-case-table)))
655 (dotimes (i 26)
656 (set-case-syntax-pair (make-char 'japanese-jisx0208 #x23 (+ #x41 i))
657 (make-char 'japanese-jisx0208 #x23 (+ #x61 i)) tbl))
658 (dotimes (i 24)
659 (set-case-syntax-pair (make-char 'japanese-jisx0208 #x26 (+ #x21 i))
660 (make-char 'japanese-jisx0208 #x26 (+ #x41 i)) tbl))
661 (dotimes (i 33)
662 (set-case-syntax-pair (make-char 'japanese-jisx0208 #x27 (+ #x21 i))
663 (make-char 'japanese-jisx0208 #x27 (+ #x51 i)) tbl)))
664
665 ;; JISX0212
666 ;; (modify-syntax-entry (make-char 'japanese-jisx0212) "w")
667 (modify-syntax-entry (make-char 'japanese-jisx0212 33) "_")
668 (modify-syntax-entry (make-char 'japanese-jisx0212 34) "_")
669 (modify-syntax-entry (make-char 'japanese-jisx0212 35) "_")
670
671 (modify-category-entry (make-char 'japanese-jisx0212 ) ?C)
672
673 ;; JISX0201-Kana
674 ;; (modify-syntax-entry (make-char 'katakana-jisx0201) "w")
675 (let ((chars '(?\e(I!\e(B ?\e(I$\e(B ?\e(I%\e(B
676 ;; Unicode:
677 ?\e$,3sa\e(B ?\e$,3sd\e(B ?\e$,3se\e(B)))
678 (while chars
679 (modify-syntax-entry (car chars) ".")
680 (setq chars (cdr chars))))
681
682 (modify-syntax-entry ?\\e(I"\e(B "(\e(I#\e(B")
683 (modify-syntax-entry ?\\e(I#\e(B "(\e(I"\e(B")
684
685 ;; Korean character set (KSC5601)
686
687 ;; (modify-syntax-entry (make-char 'korean-ksc5601) "w")
688 (modify-syntax-entry (make-char 'korean-ksc5601 33) "_")
689 (modify-syntax-entry (make-char 'korean-ksc5601 34) "_")
690 (modify-syntax-entry (make-char 'korean-ksc5601 38) "_")
691 (modify-syntax-entry (make-char 'korean-ksc5601 39) "_")
692 (modify-syntax-entry (make-char 'korean-ksc5601 40) "_")
693 (modify-syntax-entry (make-char 'korean-ksc5601 41) "_")
694
695 (modify-category-entry (make-char 'korean-ksc5601) ?h)
696 (modify-category-entry (make-char 'korean-ksc5601 35) ?A)
697 (modify-category-entry (make-char 'korean-ksc5601 37) ?G)
698 (modify-category-entry (make-char 'korean-ksc5601 42) ?H)
699 (modify-category-entry (make-char 'korean-ksc5601 43) ?K)
700 (modify-category-entry (make-char 'korean-ksc5601 44) ?Y)
701
702 (let ((parens "\e$(C!2!3!4!5!6!7!8!9!:!;!<!=#(#)#[#]#{#}\e(B" )
703 open close)
704 (dotimes (i (/ (length parens) 2))
705 (setq open (aref parens (* i 2))
706 close (aref parens (1+ (* i 2))))
707 (modify-syntax-entry open (format "(%c" close))
708 (modify-syntax-entry close (format ")%c" open))))
709
710 (let ((tbl (standard-case-table)))
711 (dotimes (i 26)
712 (set-case-syntax-pair (make-char 'korean-ksc5601 #x23 (+ #x41 i))
713 (make-char 'korean-ksc5601 #x23 (+ #x61 i)) tbl))
714 (dotimes (i 10)
715 (set-case-syntax-pair (make-char 'korean-ksc5601 #x25 (+ #x21 i))
716 (make-char 'korean-ksc5601 #x25 (+ #x30 i)) tbl))
717 (dotimes (i 24)
718 (set-case-syntax-pair (make-char 'korean-ksc5601 #x25 (+ #x41 i))
719 (make-char 'korean-ksc5601 #x25 (+ #x61 i)) tbl))
720 (dotimes (i 33)
721 (set-case-syntax-pair (make-char 'korean-ksc5601 #x2C (+ #x21 i))
722 (make-char 'korean-ksc5601 #x2C (+ #x51 i)) tbl)))
723
724 ;; Latin character set (latin-1,2,3,4,5,8,9)
725
726 (modify-category-entry (make-char 'latin-iso8859-1) ?l)
727 (modify-category-entry (make-char 'latin-iso8859-2) ?l)
728 (modify-category-entry (make-char 'latin-iso8859-3) ?l)
729 (modify-category-entry (make-char 'latin-iso8859-4) ?l)
730 (modify-category-entry (make-char 'latin-iso8859-9) ?l)
731 (modify-category-entry (make-char 'latin-iso8859-14) ?l)
732 (modify-category-entry (make-char 'latin-iso8859-15) ?l)
733
734 (modify-category-entry (make-char 'latin-iso8859-1 160) ?\ )
735 (modify-category-entry (make-char 'latin-iso8859-2 160) ?\ )
736 (modify-category-entry (make-char 'latin-iso8859-3 160) ?\ )
737 (modify-category-entry (make-char 'latin-iso8859-4 160) ?\ )
738 (modify-category-entry (make-char 'latin-iso8859-9 160) ?\ )
739 (modify-category-entry (make-char 'latin-iso8859-14 160) ?\ )
740 (modify-category-entry (make-char 'latin-iso8859-15 160) ?\ )
741
742 ;; Lao character set
743
744 (modify-category-entry (make-char 'lao) ?o)
745 (dotimes (i (1+ (- #xeff #xe80)))
746 (modify-category-entry (decode-char 'ucs (+ i #xe80)) ?o))
747
748 (let ((deflist '(;; chars syntax category
749 ("\e(1!\e(B-\e(1N\e(B" "w" ?0) ; consonant
750 ("\e(1PRS]`\e(B-\e(1d\e(B" "w" ?1) ; vowel base
751 ("\e(1QT\e(B-\e(1W[m\e(B" "w" ?2) ; vowel upper
752 ("\e(1XY\e(B" "w" ?3) ; vowel lower
753 ("\e(1h\e(B-\e(1l\e(B" "w" ?4) ; tone mark
754 ("\e(1\\e(B" "w" ?9) ; semivowel lower
755 ("\e(1p\e(B-\e(1y\e(B" "w" ?6) ; digit
756 ("\e(1Of\e(B" "_" ?5) ; symbol
757 ;; Unicode equivalents
758 ("\e$,1D!\e(B-\e$,1DN\e(B" "w" ?0) ; consonant
759 ("\e$,1DPDRDSD]D`\e(B-\e$,1Dd\e(B" "w" ?1) ; vowel base
760 ("\e$,1DQDT\e(B-\e$,1DWD[Dm\e(B" "w" ?2) ; vowel upper
761 ("\e$,1DXDY\e(B" "w" ?3) ; vowel lower
762 ("\e$,1Dh\e(B-\e$,1Dk\e(B" "w" ?4) ; tone mark
763 ("\e$,1D\D]\e(B" "w" ?9) ; semivowel lower
764 ("\e$,1Dp\e(B-\e$,1Dy\e(B" "w" ?6) ; digit
765 ("\e$,1DODf\e(B" "_" ?5) ; symbol
766 ))
767 elm chars len syntax category to ch i)
768 (while deflist
769 (setq elm (car deflist))
770 (setq chars (car elm)
771 len (length chars)
772 syntax (nth 1 elm)
773 category (nth 2 elm)
774 i 0)
775 (while (< i len)
776 (if (= (aref chars i) ?-)
777 (setq i (1+ i)
778 to (aref chars i))
779 (setq ch (aref chars i)
780 to ch))
781 (while (<= ch to)
782 (unless (string-equal syntax "w")
783 (modify-syntax-entry ch syntax))
784 (modify-category-entry ch category)
785 (setq ch (1+ ch)))
786 (setq i (1+ i)))
787 (setq deflist (cdr deflist))))
788
789 ;; Thai character set (TIS620)
790
791 (modify-category-entry (make-char 'thai-tis620) ?t)
792 (dotimes (i (1+ (- #xe7f #xe00)))
793 (modify-category-entry (decode-char 'ucs (+ i #xe00)) ?t))
794
795 (let ((deflist '(;; chars syntax category
796 ("\e,T!\e(B-\e,TCEG\e(B-\e,TN\e(B" "w" ?0) ; consonant
797 ("\e,TDFPRS`\e(B-\e,Te\e(B" "w" ?1) ; vowel base
798 ("\e,TQT\e(B-\e,TWgn\e(B" "w" ?2) ; vowel upper
799 ("\e,TX\e(B-\e,TZ\e(B" "w" ?3) ; vowel lower
800 ("\e,Th\e(B-\e,Tm\e(B" "w" ?4) ; tone mark
801 ("\e,Tp\e(B-\e,Ty\e(B" "w" ?6) ; digit
802 ("\e,TOf_oz{\e(B" "_" ?5) ; symbol
803 ;; Unicode equivalents
804 ("\e$,1Ba\e(B-\e$,1C#C%C'\e(B-\e$,1C.\e(B" "w" ?0) ; consonant
805 ("\e$,1C$C&C0C2C3C@\e(B-\e$,1CE\e(B" "w" ?1) ; vowel base
806 ("\e$,1C1C4\e(B-\e$,1C7CGCN\e(B" "w" ?2) ; vowel upper
807 ("\e$,1C8\e(B-\e$,1C:\e(B" "w" ?3) ; vowel lower
808 ("\e$,1CH\e(B-\e$,1CM\e(B" "w" ?4) ; tone mark
809 ("\e$,1CP\e(B-\e$,1CY\e(B" "w" ?6) ; digit
810 ("\e$,1C/CFC?COCZC[\e(B" "_" ?5) ; symbol
811 ))
812 elm chars len syntax category to ch i)
813 (while deflist
814 (setq elm (car deflist))
815 (setq chars (car elm)
816 len (length chars)
817 syntax (nth 1 elm)
818 category (nth 2 elm)
819 i 0)
820 (while (< i len)
821 (if (= (aref chars i) ?-)
822 (setq i (1+ i)
823 to (aref chars i))
824 (setq ch (aref chars i)
825 to ch))
826 (while (<= ch to)
827 (unless (string-equal syntax "w")
828 (modify-syntax-entry ch syntax))
829 (modify-category-entry ch category)
830 (setq ch (1+ ch)))
831 (setq i (1+ i)))
832 (setq deflist (cdr deflist))))
833
834 ;; Tibetan character set
835
836 (modify-category-entry (make-char 'tibetan) ?q)
837 (modify-category-entry (make-char 'tibetan-1-column) ?q)
838 (dotimes (i (1+ (- #xfff #xf00)))
839 (modify-category-entry (decode-char 'ucs (+ i #xf00)) ?q))
840
841 (let ((deflist '(;; chars syntax category
842 ("\e4\e$(7"!\e0"!\e1\e(B-\e4\e$(7"J\e0"J\e1\e4"K\e0"K\e1\e(B" "w" ?0) ; consonant
843 ("\e$(7#!\e(B-\e$(7#J#K#L#M!"!#\e(B" "w" ?0) ;
844 ("\e$(7$!\e(B-\e$(7$e\e(B" "w" ?0) ;
845 ("\e$(7%!\e(B-\e$(7%u\e(B" "w" ?0) ;
846 ("\e$(7"S"["\"]"^"a\e(B" "w" ?2) ; upper vowel
847 ("\e$(7"_"c"d"g"h"i"j"k"l\e(B" "w" ?2) ; upper modifier
848 ("\e$(7!I"Q"R"U"e!e!g\e(B" "w" ?3) ; lowel vowel/modifier
849 ("\e$(7!P\e(B-\e$(7!Y!Z\e(B-\e$(7!c\e(B" "w" ?6) ; digit
850 ("\e$(7!;!=\e(B-\e$(7!B!D"`\e(B" "." ?|) ; line-break char
851 ("\e$(8!;!=!?!@!A!D"`\e(B" "." ?|) ;
852 ("\e$(7!8!;!=\e(B-\e$(7!B!D"`!m!d\e(B" "." ?>) ; prohibition
853 ("\e$(8!;!=!?!@!A!D"`\e(B" "." ?>) ;
854 ("\e$(7!0\e(B-\e$(7!:!l#R#S"f\e(B" "." ?<) ; prohibition
855 ("\e$(7!C!E\e(B-\e$(7!H!J\e(B-\e$(7!O!f!h\e(B-\e$(7!k!n!o#O#P\e(B-\e$(7#`\e(B" "." ?q) ; others
856
857 ;; Unicode version (not complete)
858 ("\e$,1F \e(B-\e$,1FIFJ\e(B" "w" ?0) ; consonant
859 ("\e$,1Fp\e(B-\e$,1G9G:G;G<\e(B" "w" ?0) ;
860 ("\e$,1FRFZF[F\F]F`\e(B" "w" ?2) ; upper vowel
861 ("\e$,1F^FbFcFfFgFhFiFjFk\e(B" "w" ?2) ; upper modifier
862 ("\e$,1EYFPFQFTFdEuEw\e(B" "w" ?3) ; lowel vowel/modifier
863 ("\e$,1E`\e(B-\e$,1EiEj\e(B-\e$,1Es\e(B" "w" ?6) ; digit
864 ("\e$,1EKEM\e(B-\e$,1ERETF_\e(B" "." ?|) ; line-break char
865 ("\e$,1EHEKEM\e(B-\e$,1ERETF_E}Et\e(B" "." ?>) ; prohibition
866 ("\e$,1E@\e(B-\e$,1EJE|GAGBFe\e(B" "." ?<) ; prohibition
867 ("\e$,1ESEU\e(B-\e$,1EXEZ\e(B-\e$,1E_EvEx\e(B-\e$,1E{E~E\7fG>G?\e(B-\e$,1GO\e(B" "." ?q) ; others
868 ))
869 elm chars len syntax category to ch i)
870 (while deflist
871 (setq elm (car deflist))
872 (setq chars (car elm)
873 len (length chars)
874 syntax (nth 1 elm)
875 category (nth 2 elm)
876 i 0)
877 (while (< i len)
878 (if (= (aref chars i) ?-)
879 (setq i (1+ i)
880 to (aref chars i))
881 (setq ch (aref chars i)
882 to ch))
883 (while (<= ch to)
884 (unless (string-equal syntax "w")
885 (modify-syntax-entry ch syntax))
886 (modify-category-entry ch category)
887 (setq ch (1+ ch)))
888 (setq i (1+ i)))
889 (setq deflist (cdr deflist))))
890
891 ;; Vietnamese character set
892
893 (let ((lower (make-char 'vietnamese-viscii-lower))
894 (upper (make-char 'vietnamese-viscii-upper)))
895 ;; (modify-syntax-entry lower "w")
896 ;; (modify-syntax-entry upper "w")
897 (modify-category-entry lower ?v)
898 (modify-category-entry upper ?v)
899 (modify-category-entry lower ?l) ; To make a word with
900 (modify-category-entry upper ?l) ; latin characters.
901 )
902
903 (let ((tbl (standard-case-table))
904 (i 32))
905 (while (< i 128)
906 (set-case-syntax-pair (make-char 'vietnamese-viscii-upper i)
907 (make-char 'vietnamese-viscii-lower i)
908 tbl)
909 (setq i (1+ i))))
910
911 ;; Unicode (mule-unicode-0100-24ff)
912
913 (let ((tbl (standard-case-table)) c)
914
915 ;; Latin Extended-A, Latin Extended-B
916 (setq c #x0100)
917 (while (<= c #x0233)
918 (modify-category-entry (decode-char 'ucs c) ?l)
919 (and (or (<= c #x012e)
920 (and (>= c #x014a) (<= c #x0177)))
921 (zerop (% c 2))
922 (set-case-syntax-pair
923 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
924 (and (>= c #x013a)
925 (<= c #x0148)
926 (zerop (% c 2))
927 (set-case-syntax-pair
928 (decode-char 'ucs (1- c)) (decode-char 'ucs c) tbl))
929 (setq c (1+ c)))
930
931
932 ;; In some languages, such as Turkish, U+0049 LATIN CAPITAL LETTER I
933 ;; and U+0131 LATIN SMALL LETTER DOTLESS I make a case pair, and so
934 ;; do U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE and U+0069 LATIN
935 ;; SMALL LETTER I.
936
937 ;; We used to set up half of those correspondence unconditionally,
938 ;; but that makes searches slow. So now we don't set up either half
939 ;; of these correspondences by default.
940
941 ;; (set-downcase-syntax ?\e$,1 P\e(B ?i tbl)
942 ;; (set-upcase-syntax ?I ?\e$,1 Q\e(B tbl)
943
944 (set-case-syntax-pair ?\e$,1 R\e(B ?\e$,1 S\e(B tbl)
945 (set-case-syntax-pair ?\e$,1 T\e(B ?\e$,1 U\e(B tbl)
946 (set-case-syntax-pair ?\e$,1 V\e(B ?\e$,1 W\e(B tbl)
947 (set-case-syntax-pair ?\e$,1!8\e(B ?\e,A\7f\e(B tbl)
948 (set-case-syntax-pair ?\e$,1!9\e(B ?\e$,1!:\e(B tbl)
949 (set-case-syntax-pair ?\e$,1!;\e(B ?\e$,1!<\e(B tbl)
950 (set-case-syntax-pair ?\e$,1!=\e(B ?\e$,1!>\e(B tbl)
951
952 ;; Latin Extended-B
953 (set-case-syntax-pair ?\e$,1!A\e(B ?\e$,1#S\e(B tbl)
954 (set-case-syntax-pair ?\e$,1!B\e(B ?\e$,1!C\e(B tbl)
955 (set-case-syntax-pair ?\e$,1!D\e(B ?\e$,1!E\e(B tbl)
956 (set-case-syntax-pair ?\e$,1!F\e(B ?\e$,1#T\e(B tbl)
957 (set-case-syntax-pair ?\e$,1!G\e(B ?\e$,1!H\e(B tbl)
958 (set-case-syntax-pair ?\e$,1!I\e(B ?\e$,1#V\e(B tbl)
959 (set-case-syntax-pair ?\e$,1!J\e(B ?\e$,1#W\e(B tbl)
960 (set-case-syntax-pair ?\e$,1!K\e(B ?\e$,1!L\e(B tbl)
961 (set-case-syntax-pair ?\e$,1!N\e(B ?\e$,1"=\e(B tbl)
962 (set-case-syntax-pair ?\e$,1!O\e(B ?\e$,1#Y\e(B tbl)
963 (set-case-syntax-pair ?\e$,1!P\e(B ?\e$,1#[\e(B tbl)
964 (set-case-syntax-pair ?\e$,1!Q\e(B ?\e$,1!R\e(B tbl)
965 (set-case-syntax-pair ?\e$,1!S\e(B ?\e$,1#`\e(B tbl)
966 (set-case-syntax-pair ?\e$,1!T\e(B ?\e$,1#c\e(B tbl)
967 (set-case-syntax-pair ?\e$,1!V\e(B ?\e$,1#i\e(B tbl)
968 (set-case-syntax-pair ?\e$,1!W\e(B ?\e$,1#h\e(B tbl)
969 (set-case-syntax-pair ?\e$,1!X\e(B ?\e$,1!Y\e(B tbl)
970 (set-case-syntax-pair ?\e$,1!\\e(B ?\e$,1#o\e(B tbl)
971 (set-case-syntax-pair ?\e$,1!]\e(B ?\e$,1#r\e(B tbl)
972 (set-case-syntax-pair ?\e$,1!_\e(B ?\e$,1#u\e(B tbl)
973 (set-case-syntax-pair ?\e$,1!`\e(B ?\e$,1!a\e(B tbl)
974 (set-case-syntax-pair ?\e$,1!b\e(B ?\e$,1!c\e(B tbl)
975 (set-case-syntax-pair ?\e$,1!d\e(B ?\e$,1!e\e(B tbl)
976 (set-case-syntax-pair ?\e$,1!f\e(B ?\e$,1$ \e(B tbl)
977 (set-case-syntax-pair ?\e$,1!g\e(B ?\e$,1!h\e(B tbl)
978 (set-case-syntax-pair ?\e$,1!i\e(B ?\e$,1$#\e(B tbl)
979 (set-case-syntax-pair ?\e$,1!l\e(B ?\e$,1!m\e(B tbl)
980 (set-case-syntax-pair ?\e$,1!n\e(B ?\e$,1$(\e(B tbl)
981 (set-case-syntax-pair ?\e$,1!o\e(B ?\e$,1!p\e(B tbl)
982 (set-case-syntax-pair ?\e$,1!q\e(B ?\e$,1$*\e(B tbl)
983 (set-case-syntax-pair ?\e$,1!r\e(B ?\e$,1$+\e(B tbl)
984 (set-case-syntax-pair ?\e$,1!s\e(B ?\e$,1!t\e(B tbl)
985 (set-case-syntax-pair ?\e$,1!u\e(B ?\e$,1!v\e(B tbl)
986 (set-case-syntax-pair ?\e$,1!w\e(B ?\e$,1$2\e(B tbl)
987 (set-case-syntax-pair ?\e$,1!x\e(B ?\e$,1!y\e(B tbl)
988 (set-case-syntax-pair ?\e$,1!|\e(B ?\e$,1!}\e(B tbl)
989 (set-case-syntax-pair ?\e$,1"$\e(B ?\e$,1"&\e(B tbl)
990 (set-case-syntax-pair ?\e$,1"%\e(B ?\e$,1"&\e(B tbl)
991 (set-case-syntax-pair ?\e$,1"'\e(B ?\e$,1")\e(B tbl)
992 (set-case-syntax-pair ?\e$,1"(\e(B ?\e$,1")\e(B tbl)
993 (set-case-syntax-pair ?\e$,1"*\e(B ?\e$,1",\e(B tbl)
994 (set-case-syntax-pair ?\e$,1"+\e(B ?\e$,1",\e(B tbl)
995 (set-case-syntax-pair ?\e$,1"-\e(B ?\e$,1".\e(B tbl)
996 (set-case-syntax-pair ?\e$,1"/\e(B ?\e$,1"0\e(B tbl)
997 (set-case-syntax-pair ?\e$,1"1\e(B ?\e$,1"2\e(B tbl)
998 (set-case-syntax-pair ?\e$,1"3\e(B ?\e$,1"4\e(B tbl)
999 (set-case-syntax-pair ?\e$,1"5\e(B ?\e$,1"6\e(B tbl)
1000 (set-case-syntax-pair ?\e$,1"7\e(B ?\e$,1"8\e(B tbl)
1001 (set-case-syntax-pair ?\e$,1"9\e(B ?\e$,1":\e(B tbl)
1002 (set-case-syntax-pair ?\e$,1";\e(B ?\e$,1"<\e(B tbl)
1003 (set-case-syntax-pair ?\e$,1">\e(B ?\e$,1"?\e(B tbl)
1004 (set-case-syntax-pair ?\e$,1"@\e(B ?\e$,1"A\e(B tbl)
1005 (set-case-syntax-pair ?\e$,1"B\e(B ?\e$,1"C\e(B tbl)
1006 (set-case-syntax-pair ?\e$,1"D\e(B ?\e$,1"E\e(B tbl)
1007 (set-case-syntax-pair ?\e$,1"F\e(B ?\e$,1"G\e(B tbl)
1008 (set-case-syntax-pair ?\e$,1"H\e(B ?\e$,1"I\e(B tbl)
1009 (set-case-syntax-pair ?\e$,1"J\e(B ?\e$,1"K\e(B tbl)
1010 (set-case-syntax-pair ?\e$,1"L\e(B ?\e$,1"M\e(B tbl)
1011 (set-case-syntax-pair ?\e$,1"N\e(B ?\e$,1"O\e(B tbl)
1012 ;; 01F0; F; 006A 030C; # LATIN SMALL LETTER J WITH CARON
1013 (set-case-syntax-pair ?\e$,1"Q\e(B ?\e$,1"S\e(B tbl)
1014 (set-case-syntax-pair ?\e$,1"R\e(B ?\e$,1"S\e(B tbl)
1015 (set-case-syntax-pair ?\e$,1"T\e(B ?\e$,1"U\e(B tbl)
1016 (set-case-syntax-pair ?\e$,1"V\e(B ?\e$,1!U\e(B tbl)
1017 (set-case-syntax-pair ?\e$,1"W\e(B ?\e$,1!\7f\e(B tbl)
1018 (set-case-syntax-pair ?\e$,1"X\e(B ?\e$,1"Y\e(B tbl)
1019 (set-case-syntax-pair ?\e$,1"Z\e(B ?\e$,1"[\e(B tbl)
1020 (set-case-syntax-pair ?\e$,1"\\e(B ?\e$,1"]\e(B tbl)
1021 (set-case-syntax-pair ?\e$,1"^\e(B ?\e$,1"_\e(B tbl)
1022 (set-case-syntax-pair ?\e$,1"`\e(B ?\e$,1"a\e(B tbl)
1023 (set-case-syntax-pair ?\e$,1"b\e(B ?\e$,1"c\e(B tbl)
1024 (set-case-syntax-pair ?\e$,1"d\e(B ?\e$,1"e\e(B tbl)
1025 (set-case-syntax-pair ?\e$,1"f\e(B ?\e$,1"g\e(B tbl)
1026 (set-case-syntax-pair ?\e$,1"h\e(B ?\e$,1"i\e(B tbl)
1027 (set-case-syntax-pair ?\e$,1"j\e(B ?\e$,1"k\e(B tbl)
1028 (set-case-syntax-pair ?\e$,1"l\e(B ?\e$,1"m\e(B tbl)
1029 (set-case-syntax-pair ?\e$,1"n\e(B ?\e$,1"o\e(B tbl)
1030 (set-case-syntax-pair ?\e$,1"p\e(B ?\e$,1"q\e(B tbl)
1031 (set-case-syntax-pair ?\e$,1"r\e(B ?\e$,1"s\e(B tbl)
1032 (set-case-syntax-pair ?\e$,1"t\e(B ?\e$,1"u\e(B tbl)
1033 (set-case-syntax-pair ?\e$,1"v\e(B ?\e$,1"w\e(B tbl)
1034 (set-case-syntax-pair ?\e$,1"x\e(B ?\e$,1"y\e(B tbl)
1035 (set-case-syntax-pair ?\e$,1"z\e(B ?\e$,1"{\e(B tbl)
1036 (set-case-syntax-pair ?\e$,1"|\e(B ?\e$,1"}\e(B tbl)
1037 (set-case-syntax-pair ?\e$,1"~\e(B ?\e$,1"\7f\e(B tbl)
1038 (set-case-syntax-pair ?\e$,1#"\e(B ?\e$,1##\e(B tbl)
1039 (set-case-syntax-pair ?\e$,1#$\e(B ?\e$,1#%\e(B tbl)
1040 (set-case-syntax-pair ?\e$,1#&\e(B ?\e$,1#'\e(B tbl)
1041 (set-case-syntax-pair ?\e$,1#(\e(B ?\e$,1#)\e(B tbl)
1042 (set-case-syntax-pair ?\e$,1#*\e(B ?\e$,1#+\e(B tbl)
1043 (set-case-syntax-pair ?\e$,1#,\e(B ?\e$,1#-\e(B tbl)
1044 (set-case-syntax-pair ?\e$,1#.\e(B ?\e$,1#/\e(B tbl)
1045 (set-case-syntax-pair ?\e$,1#0\e(B ?\e$,1#1\e(B tbl)
1046 (set-case-syntax-pair ?\e$,1#2\e(B ?\e$,1#3\e(B tbl)
1047
1048 ;; Latin Extended Additional
1049 (setq c #x1e00)
1050 (while (<= c #x1ef9)
1051 (modify-category-entry (decode-char 'ucs c) ?l)
1052 (and (zerop (% c 2))
1053 (or (<= c #x1e94) (>= c #x1ea0))
1054 (set-case-syntax-pair
1055 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
1056 (setq c (1+ c)))
1057
1058 ;; Greek
1059 (setq c #x0370)
1060 (while (<= c #x03ff)
1061 (modify-category-entry (decode-char 'ucs c) ?g)
1062 (if (or (and (>= c #x0391) (<= c #x03a1))
1063 (and (>= c #x03a3) (<= c #x03ab)))
1064 (set-case-syntax-pair
1065 (decode-char 'ucs c) (decode-char 'ucs (+ c 32)) tbl))
1066 (and (>= c #x03da)
1067 (<= c #x03ee)
1068 (zerop (% c 2))
1069 (set-case-syntax-pair
1070 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
1071 (setq c (1+ c)))
1072 (set-case-syntax-pair ?\e$,1&f\e(B ?\e$,1',\e(B tbl)
1073 (set-case-syntax-pair ?\e$,1&h\e(B ?\e$,1'-\e(B tbl)
1074 (set-case-syntax-pair ?\e$,1&i\e(B ?\e$,1'.\e(B tbl)
1075 (set-case-syntax-pair ?\e$,1&j\e(B ?\e$,1'/\e(B tbl)
1076 (set-case-syntax-pair ?\e$,1&l\e(B ?\e$,1'L\e(B tbl)
1077 (set-case-syntax-pair ?\e$,1&n\e(B ?\e$,1'M\e(B tbl)
1078 (set-case-syntax-pair ?\e$,1&o\e(B ?\e$,1'N\e(B tbl)
1079
1080 ;; Armenian
1081 (setq c #x531)
1082 (while (<= c #x556)
1083 (set-case-syntax-pair (decode-char 'ucs c)
1084 (decode-char 'ucs (+ c #x30)) tbl)
1085 (setq c (1+ c)))
1086
1087 ;; Greek Extended
1088 (setq c #x1f00)
1089 (while (<= c #x1fff)
1090 (modify-category-entry (decode-char 'ucs c) ?g)
1091 (and (<= (logand c #x000f) 7)
1092 (<= c #x1fa7)
1093 (not (memq c '(#x1f50 #x1f52 #x1f54 #x1f56)))
1094 (/= (logand c #x00f0) 7)
1095 (set-case-syntax-pair
1096 (decode-char 'ucs (+ c 8)) (decode-char 'ucs c) tbl))
1097 (setq c (1+ c)))
1098 (set-case-syntax-pair ?\e$,1qx\e(B ?\e$,1qp\e(B tbl)
1099 (set-case-syntax-pair ?\e$,1qy\e(B ?\e$,1qq\e(B tbl)
1100 (set-case-syntax-pair ?\e$,1qz\e(B ?\e$,1q0\e(B tbl)
1101 (set-case-syntax-pair ?\e$,1q{\e(B ?\e$,1q1\e(B tbl)
1102 (set-case-syntax-pair ?\e$,1q|\e(B ?\e$,1qs\e(B tbl)
1103 (set-case-syntax-pair ?\e$,1r(\e(B ?\e$,1q2\e(B tbl)
1104 (set-case-syntax-pair ?\e$,1r)\e(B ?\e$,1q3\e(B tbl)
1105 (set-case-syntax-pair ?\e$,1r*\e(B ?\e$,1q4\e(B tbl)
1106 (set-case-syntax-pair ?\e$,1r+\e(B ?\e$,1q5\e(B tbl)
1107 (set-case-syntax-pair ?\e$,1r,\e(B ?\e$,1r#\e(B tbl)
1108 (set-case-syntax-pair ?\e$,1r8\e(B ?\e$,1r0\e(B tbl)
1109 (set-case-syntax-pair ?\e$,1r9\e(B ?\e$,1r1\e(B tbl)
1110 (set-case-syntax-pair ?\e$,1r:\e(B ?\e$,1q6\e(B tbl)
1111 (set-case-syntax-pair ?\e$,1r;\e(B ?\e$,1q7\e(B tbl)
1112 (set-case-syntax-pair ?\e$,1rH\e(B ?\e$,1r@\e(B tbl)
1113 (set-case-syntax-pair ?\e$,1rI\e(B ?\e$,1rA\e(B tbl)
1114 (set-case-syntax-pair ?\e$,1rJ\e(B ?\e$,1q:\e(B tbl)
1115 (set-case-syntax-pair ?\e$,1rK\e(B ?\e$,1q;\e(B tbl)
1116 (set-case-syntax-pair ?\e$,1rL\e(B ?\e$,1rE\e(B tbl)
1117 (set-case-syntax-pair ?\e$,1rX\e(B ?\e$,1q8\e(B tbl)
1118 (set-case-syntax-pair ?\e$,1rY\e(B ?\e$,1q9\e(B tbl)
1119 (set-case-syntax-pair ?\e$,1rZ\e(B ?\e$,1q<\e(B tbl)
1120 (set-case-syntax-pair ?\e$,1r[\e(B ?\e$,1q=\e(B tbl)
1121 (set-case-syntax-pair ?\e$,1r\\e(B ?\e$,1rS\e(B tbl)
1122
1123 ;; cyrillic
1124 (setq c #x0400)
1125 (while (<= c #x04ff)
1126 (modify-category-entry (decode-char 'ucs c) ?y)
1127 (and (>= c #x0400)
1128 (<= c #x040f)
1129 (set-case-syntax-pair
1130 (decode-char 'ucs c) (decode-char 'ucs (+ c 80)) tbl))
1131 (and (>= c #x0410)
1132 (<= c #x042f)
1133 (set-case-syntax-pair
1134 (decode-char 'ucs c) (decode-char 'ucs (+ c 32)) tbl))
1135 (and (zerop (% c 2))
1136 (or (and (>= c #x0460) (<= c #x0480))
1137 (and (>= c #x048c) (<= c #x04be))
1138 (and (>= c #x04d0) (<= c #x04f4)))
1139 (set-case-syntax-pair
1140 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
1141 (setq c (1+ c)))
1142 (set-case-syntax-pair ?\e$,1*!\e(B ?\e$,1*"\e(B tbl)
1143 (set-case-syntax-pair ?\e$,1*#\e(B ?\e$,1*$\e(B tbl)
1144 (set-case-syntax-pair ?\e$,1*'\e(B ?\e$,1*(\e(B tbl)
1145 (set-case-syntax-pair ?\e$,1*+\e(B ?\e$,1*,\e(B tbl)
1146 (set-case-syntax-pair ?\e$,1*X\e(B ?\e$,1*Y\e(B tbl)
1147
1148 ;; general punctuation
1149 (setq c #x2000)
1150 (while (<= c #x200b)
1151 (set-case-syntax (decode-char 'ucs c) " " tbl)
1152 (setq c (1+ c)))
1153 (setq c #x2010)
1154 (while (<= c #x2016)
1155 (set-case-syntax (decode-char 'ucs c) "_" tbl)
1156 (setq c (1+ c)))
1157 ;; Punctuation syntax for quotation marks (like `)
1158 (while (<= c #x201f)
1159 (set-case-syntax (decode-char 'ucs c) "." tbl)
1160 (setq c (1+ c)))
1161 (while (<= c #x2027)
1162 (set-case-syntax (decode-char 'ucs c) "_" tbl)
1163 (setq c (1+ c)))
1164
1165 ;; Roman numerals
1166 (setq c #x2160)
1167 (while (<= c #x216f)
1168 (set-case-syntax-pair (decode-char 'ucs c)
1169 (decode-char 'ucs (+ c #x10)) tbl)
1170 (setq c (1+ c)))
1171
1172 ;; Circled Latin
1173 (setq c #x24b6)
1174 (while (<= c #x24cf)
1175 (set-case-syntax-pair (decode-char 'ucs c)
1176 (decode-char 'ucs (+ c 26)) tbl)
1177 (modify-category-entry (decode-char 'ucs c) ?l)
1178 (modify-category-entry (decode-char 'ucs (+ c 26)) ?l)
1179 (setq c (1+ c)))
1180
1181 ;; Fullwidth Latin
1182 (setq c #xff21)
1183 (while (<= c #xff3a)
1184 (set-case-syntax-pair (decode-char 'ucs c)
1185 (decode-char 'ucs (+ c #x20)) tbl)
1186 (modify-category-entry (decode-char 'ucs c) ?l)
1187 (modify-category-entry (decode-char 'ucs (+ c #x20)) ?l)
1188 (setq c (1+ c)))
1189
1190 ;; Combining diacritics
1191 (setq c #x300)
1192 (while (<= c #x362)
1193 (modify-category-entry (decode-char 'ucs c) ?^)
1194 (setq c (1+ c)))
1195
1196 ;; Combining marks
1197 (setq c #x20d0)
1198 (while (<= c #x20e3)
1199 (modify-category-entry (decode-char 'ucs c) ?^)
1200 (setq c (1+ c)))
1201
1202 ;; Fixme: syntax for symbols &c
1203 )
1204
1205 (let ((pairs
1206 '("\e$,1sEsF\e(B" ; U+2045 U+2046
1207 "\e$,1s}s~\e(B" ; U+207D U+207E
1208 "\e$,1t-t.\e(B" ; U+208D U+208E
1209 "\e$,1{){*\e(B" ; U+2329 U+232A
1210 "\e$,1|T|U\e(B" ; U+23B4 U+23B5
1211 "\e$,2&H&I\e(B" ; U+2768 U+2769
1212 "\e$,2&J&K\e(B" ; U+276A U+276B
1213 "\e$,2&L&M\e(B" ; U+276C U+276D
1214 "\e$,2&P&Q\e(B" ; U+2770 U+2771
1215 "\e$,2&R&S\e(B" ; U+2772 U+2773
1216 "\e$,2&T&U\e(B" ; U+2774 U+2775
1217 "\e$,2'f'g\e(B" ; U+27E6 U+27E7
1218 "\e$,2'h'i\e(B" ; U+27E8 U+27E9
1219 "\e$,2'j'k\e(B" ; U+27EA U+27EB
1220 "\e$,2,#,$\e(B" ; U+2983 U+2984
1221 "\e$,2,%,&\e(B" ; U+2985 U+2986
1222 "\e$,2,',(\e(B" ; U+2987 U+2988
1223 "\e$,2,),*\e(B" ; U+2989 U+298A
1224 "\e$,2,+,,\e(B" ; U+298B U+298C
1225 "\e$,2,-,.\e(B" ; U+298D U+298E
1226 "\e$,2,/,0\e(B" ; U+298F U+2990
1227 "\e$,2,1,2\e(B" ; U+2991 U+2992
1228 "\e$,2,3,4\e(B" ; U+2993 U+2994
1229 "\e$,2,5,6\e(B" ; U+2995 U+2996
1230 "\e$,2,7,8\e(B" ; U+2997 U+2998
1231 "\e$,2-<-=\e(B" ; U+29FC U+29FD
1232 "\e$,2=H=I\e(B" ; U+3008 U+3009
1233 "\e$,2=J=K\e(B" ; U+300A U+300B
1234 "\e$,2=L=M\e(B" ; U+300C U+300D
1235 "\e$,2=N=O\e(B" ; U+300E U+300F
1236 "\e$,2=P=Q\e(B" ; U+3010 U+3011
1237 "\e$,2=T=U\e(B" ; U+3014 U+3015
1238 "\e$,2=V=W\e(B" ; U+3016 U+3017
1239 "\e$,2=X=Y\e(B" ; U+3018 U+3019
1240 "\e$,2=Z=[\e(B" ; U+301A U+301B
1241 "\e$,3m~m\7f\e(B" ; U+FD3E U+FD3F
1242 "\e$,3pUpV\e(B" ; U+FE35 U+FE36
1243 "\e$,3pWpX\e(B" ; U+FE37 U+FE38
1244 "\e$,3pYpZ\e(B" ; U+FE39 U+FE3A
1245 "\e$,3p[p\\e(B" ; U+FE3B U+FE3C
1246 "\e$,3p]p^\e(B" ; U+FE3D U+FE3E
1247 "\e$,3p_p`\e(B" ; U+FE3F U+FE40
1248 "\e$,3papb\e(B" ; U+FE41 U+FE42
1249 "\e$,3pcpd\e(B" ; U+FE43 U+FE44
1250 "\e$,3pypz\e(B" ; U+FE59 U+FE5A
1251 "\e$,3p{p|\e(B" ; U+FE5B U+FE5C
1252 "\e$,3p}p~\e(B" ; U+FE5D U+FE5E
1253 "\e$,3rhri\e(B" ; U+FF08 U+FF09
1254 "\e$,3s;s=\e(B" ; U+FF3B U+FF3D
1255 "\e$,3s[s]\e(B" ; U+FF5B U+FF5D
1256 "\e$,3s_s`\e(B" ; U+FF5F U+FF60
1257 "\e$,3sbsc\e(B" ; U+FF62 U+FF63
1258 )))
1259 (dolist (elt pairs)
1260 (modify-syntax-entry (aref elt 0) (string ?\( (aref elt 1)))
1261 (modify-syntax-entry (aref elt 1) (string ?\) (aref elt 0)))))
1262
1263 \f
1264 ;;; Setting word boundary.
1265
1266 (setq word-combining-categories
1267 '((?l . ?l)
1268 (?C . ?C)
1269 (?C . ?H)
1270 (?C . ?K)))
1271
1272 (setq word-separating-categories ; (2-byte character sets)
1273 '((?A . ?K) ; Alpha numeric - Katakana
1274 (?A . ?C) ; Alpha numeric - Chinese
1275 (?H . ?A) ; Hiragana - Alpha numeric
1276 (?H . ?K) ; Hiragana - Katakana
1277 (?H . ?C) ; Hiragana - Chinese
1278 (?K . ?A) ; Katakana - Alpha numeric
1279 (?K . ?C) ; Katakana - Chinese
1280 (?C . ?A) ; Chinese - Alpha numeric
1281 (?C . ?K) ; Chinese - Katakana
1282 ))
1283
1284 \f
1285 ;; For each character set, put the information of the most proper
1286 ;; coding system to encode it by `preferred-coding-system' property.
1287
1288 (let ((l '((latin-iso8859-1 . iso-latin-1)
1289 (latin-iso8859-2 . iso-latin-2)
1290 (latin-iso8859-3 . iso-latin-3)
1291 (latin-iso8859-4 . iso-latin-4)
1292 (thai-tis620 . thai-tis620)
1293 (greek-iso8859-7 . greek-iso-8bit)
1294 (arabic-iso8859-6 . iso-2022-7bit)
1295 (hebrew-iso8859-8 . hebrew-iso-8bit)
1296 (katakana-jisx0201 . japanese-shift-jis)
1297 (latin-jisx0201 . japanese-shift-jis)
1298 (cyrillic-iso8859-5 . cyrillic-iso-8bit)
1299 (latin-iso8859-9 . iso-latin-5)
1300 (japanese-jisx0208-1978 . iso-2022-jp)
1301 (chinese-gb2312 . cn-gb-2312)
1302 (japanese-jisx0208 . iso-2022-jp)
1303 (korean-ksc5601 . iso-2022-kr)
1304 (japanese-jisx0212 . iso-2022-jp)
1305 (chinese-cns11643-1 . iso-2022-cn)
1306 (chinese-cns11643-2 . iso-2022-cn)
1307 (chinese-big5-1 . chinese-big5)
1308 (chinese-big5-2 . chinese-big5)
1309 (chinese-sisheng . iso-2022-7bit)
1310 (ipa . iso-2022-7bit)
1311 (vietnamese-viscii-lower . vietnamese-viscii)
1312 (vietnamese-viscii-upper . vietnamese-viscii)
1313 (arabic-digit . iso-2022-7bit)
1314 (arabic-1-column . iso-2022-7bit)
1315 (ascii-right-to-left . iso-2022-7bit)
1316 (lao . lao)
1317 (arabic-2-column . iso-2022-7bit)
1318 (indian-is13194 . devanagari)
1319 (indian-glyph . devanagari)
1320 (tibetan-1-column . tibetan)
1321 (ethiopic . iso-2022-7bit)
1322 (chinese-cns11643-3 . iso-2022-cn)
1323 (chinese-cns11643-4 . iso-2022-cn)
1324 (chinese-cns11643-5 . iso-2022-cn)
1325 (chinese-cns11643-6 . iso-2022-cn)
1326 (chinese-cns11643-7 . iso-2022-cn)
1327 (indian-2-column . devanagari)
1328 (tibetan . tibetan)
1329 (latin-iso8859-14 . iso-latin-8)
1330 (latin-iso8859-15 . iso-latin-9))))
1331 (while l
1332 (put-charset-property (car (car l)) 'preferred-coding-system (cdr (car l)))
1333 (setq l (cdr l))))
1334
1335 \f
1336 ;; Setup auto-fill-chars for charsets that should invoke auto-filling.
1337 ;; SPACE and NEWLINE are already set. Also put `nospace-between-words'
1338 ;; property on the charsets.
1339 (let ((l '(katakana-jisx0201
1340 japanese-jisx0208 japanese-jisx0212
1341 chinese-gb2312 chinese-big5-1 chinese-big5-2)))
1342 (while l
1343 (aset auto-fill-chars (make-char (car l)) t)
1344 (put-charset-property (car l) 'nospace-between-words t)
1345 (setq l (cdr l))))
1346
1347 \f
1348 (setq utf-translate-cjk-mode saved-utf-translate-cjk-mode)
1349 (makunbound 'saved-utf-translate-cjk-mode)
1350
1351 ;;; Local Variables:
1352 ;;; coding: iso-2022-7bit
1353 ;;; End:
1354
1355 ;;; arch-tag: 85889c35-9f4d-4912-9bf5-82de31b0d42d
1356 ;;; characters.el ends here