]> code.delx.au - gnu-emacs/blob - lisp/international/characters.el
*** empty log message ***
[gnu-emacs] / lisp / international / characters.el
1 ;;; characters.el --- set syntax and category for multibyte characters
2
3 ;; Copyright (C) 1997, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007
4 ;; Free Software Foundation, Inc.
5 ;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
6 ;; 2005, 2006, 2007
7 ;; National Institute of Advanced Industrial Science and Technology (AIST)
8 ;; Registration Number H14PRO021
9
10 ;; Keywords: multibyte character, character set, syntax, category
11
12 ;; This file is part of GNU Emacs.
13
14 ;; GNU Emacs is free software; you can redistribute it and/or modify
15 ;; it under the terms of the GNU General Public License as published by
16 ;; the Free Software Foundation; either version 2, or (at your option)
17 ;; any later version.
18
19 ;; GNU Emacs is distributed in the hope that it will be useful,
20 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
21 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 ;; GNU General Public License for more details.
23
24 ;; You should have received a copy of the GNU General Public License
25 ;; along with GNU Emacs; see the file COPYING. If not, write to the
26 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
27 ;; Boston, MA 02110-1301, USA.
28
29 ;;; Commentary:
30
31 ;; This file contains multibyte characters. Save this file always in
32 ;; the coding system `iso-2022-7bit'.
33
34 ;; This file does not define the syntax for Latin-N character sets;
35 ;; those are defined by the files latin-N.el.
36
37 ;;; Code:
38
39 ;; We must set utf-translate-cjk-mode to nil while loading this file
40 ;; to avoid translating CJK characters in decode-char.
41 (defvar saved-utf-translate-cjk-mode utf-translate-cjk-mode)
42 (setq utf-translate-cjk-mode nil)
43
44 ;;; Predefined categories.
45
46 ;; For each character set.
47
48 (define-category ?a "ASCII")
49 (define-category ?l "Latin")
50 (define-category ?t "Thai")
51 (define-category ?g "Greek")
52 (define-category ?b "Arabic")
53 (define-category ?w "Hebrew")
54 (define-category ?y "Cyrillic")
55 (define-category ?k "Japanese katakana")
56 (define-category ?r "Japanese roman")
57 (define-category ?c "Chinese")
58 (define-category ?j "Japanese")
59 (define-category ?h "Korean")
60 (define-category ?e "Ethiopic (Ge'ez)")
61 (define-category ?v "Vietnamese")
62 (define-category ?i "Indian")
63 (define-category ?o "Lao")
64 (define-category ?q "Tibetan")
65
66 ;; For each group (row) of 2-byte character sets.
67
68 (define-category ?A "Alpha-numeric characters of 2-byte character sets")
69 (define-category ?C "Chinese (Han) characters of 2-byte character sets")
70 (define-category ?G "Greek characters of 2-byte character sets")
71 (define-category ?H "Japanese Hiragana characters of 2-byte character sets")
72 (define-category ?K "Japanese Katakana characters of 2-byte character sets")
73 (define-category ?N "Korean Hangul characters of 2-byte character sets")
74 (define-category ?Y "Cyrillic characters of 2-byte character sets")
75 (define-category ?I "Indian Glyphs")
76
77 ;; For phonetic classifications.
78
79 (define-category ?0 "consonant")
80 (define-category ?1 "base (independent) vowel")
81 (define-category ?2 "upper diacritical mark (including upper vowel)")
82 (define-category ?3 "lower diacritical mark (including lower vowel)")
83 (define-category ?4 "tone mark")
84 (define-category ?5 "symbol")
85 (define-category ?6 "digit")
86 (define-category ?7 "vowel-modifying diacritical mark")
87 (define-category ?8 "vowel-signs")
88 (define-category ?9 "semivowel lower")
89
90 ;; For filling.
91 (define-category ?| "While filling, we can break a line at this character.")
92
93 ;; For indentation calculation.
94 (define-category ?\s
95 "This character counts as a space for indentation purposes.")
96
97 ;; Keep the following for `kinsoku' processing. See comments in
98 ;; kinsoku.el.
99 (define-category ?> "A character which can't be placed at beginning of line.")
100 (define-category ?< "A character which can't be placed at end of line.")
101
102 ;; Combining
103 (define-category ?^ "Combining diacritic or mark")
104 \f
105 ;;; Setting syntax and category.
106
107 ;; ASCII
108
109 (let ((ch 32))
110 (while (< ch 127) ; All ASCII characters have
111 (modify-category-entry ch ?a) ; the category `a' (ASCII)
112 (modify-category-entry ch ?l) ; and `l' (Latin).
113 (setq ch (1+ ch))))
114
115 ;; Arabic character set
116
117 (let ((charsets '(arabic-iso8859-6
118 arabic-digit
119 arabic-1-column
120 arabic-2-column)))
121 (while charsets
122 ;; (modify-syntax-entry (make-char (car charsets)) "w")
123 (modify-category-entry (make-char (car charsets)) ?b)
124 (setq charsets (cdr charsets))))
125 (let ((ch #x600))
126 (while (<= ch #x6ff)
127 (modify-category-entry (decode-char 'ucs ch) ?b)
128 (setq ch (1+ ch)))
129 (setq ch #xfb50)
130 (while (<= ch #xfdff)
131 (modify-category-entry (decode-char 'ucs ch) ?b)
132 (setq ch (1+ ch)))
133 (setq ch #xfe70)
134 (while (<= ch #xfefe)
135 (modify-category-entry (decode-char 'ucs ch) ?b)
136 (setq ch (1+ ch))))
137
138 ;; Chinese character set (GB2312)
139
140 ;; (modify-syntax-entry (make-char 'chinese-gb2312) "w")
141 (modify-syntax-entry (make-char 'chinese-gb2312 33) "_")
142 (modify-syntax-entry (make-char 'chinese-gb2312 34) "_")
143 (modify-syntax-entry (make-char 'chinese-gb2312 41) "_")
144 (modify-syntax-entry ?\\e$A!2\e(B "(\e$A!3\e(B")
145 (modify-syntax-entry ?\\e$A!4\e(B "(\e$A!5\e(B")
146 (modify-syntax-entry ?\\e$A!6\e(B "(\e$A!7\e(B")
147 (modify-syntax-entry ?\\e$A!8\e(B "(\e$A!9\e(B")
148 (modify-syntax-entry ?\\e$A!:\e(B "(\e$A!;\e(B")
149 (modify-syntax-entry ?\\e$A!<\e(B "(\e$A!=\e(B")
150 (modify-syntax-entry ?\\e$A!>\e(B "(\e$A!?\e(B")
151 (modify-syntax-entry ?\\e$A#(\e(B "(\e$A#)\e(B")
152 (modify-syntax-entry ?\\e$A#{\e(B "(\e$A#}\e(B")
153 (modify-syntax-entry ?\\e$A#[\e(B "(\e$A#]\e(B")
154 (modify-syntax-entry ?\\e$A!3\e(B ")\e$A!2\e(B")
155 (modify-syntax-entry ?\\e$A!5\e(B ")\e$A!4\e(B")
156 (modify-syntax-entry ?\\e$A!7\e(B ")\e$A!6\e(B")
157 (modify-syntax-entry ?\\e$A!9\e(B ")\e$A!8\e(B")
158 (modify-syntax-entry ?\\e$A!;\e(B ")\e$A!:\e(B")
159 (modify-syntax-entry ?\\e$A!=\e(B ")\e$A!<\e(B")
160 (modify-syntax-entry ?\\e$A!?\e(B ")\e$A!>\e(B")
161 (modify-syntax-entry ?\\e$A#)\e(B ")\e$A#(\e(B")
162 (modify-syntax-entry ?\\e$A#}\e(B ")\e$A#{\e(B")
163 (modify-syntax-entry ?\\e$A#]\e(B ")\e$A#[\e(B")
164
165 (let ((chars "\e$A#,!"!##.!$#;#:#?#!!C!-!'#|#_!.!/!0!1#"!e#`!d\e(B"))
166 (dotimes (i (length chars))
167 (modify-syntax-entry (aref chars i) ".")))
168
169 (modify-category-entry (make-char 'chinese-gb2312) ?c)
170 (modify-category-entry (make-char 'chinese-gb2312) ?\|)
171 (modify-category-entry (make-char 'chinese-gb2312 35) ?A)
172 (modify-category-entry (make-char 'chinese-gb2312 36) ?H)
173 (modify-category-entry (make-char 'chinese-gb2312 37) ?K)
174 (modify-category-entry (make-char 'chinese-gb2312 38) ?G)
175 (modify-category-entry (make-char 'chinese-gb2312 39) ?Y)
176 (let ((row 48))
177 (while (< row 127)
178 (modify-category-entry (make-char 'chinese-gb2312 row) ?C)
179 (setq row (1+ row))))
180
181 (let ((tbl (standard-case-table)))
182 (dotimes (i 26)
183 (set-case-syntax-pair (make-char 'chinese-gb2312 #x23 (+ #x41 i))
184 (make-char 'chinese-gb2312 #x23 (+ #x61 i)) tbl))
185 (dotimes (i 24)
186 (set-case-syntax-pair (make-char 'chinese-gb2312 #x26 (+ #x21 i))
187 (make-char 'chinese-gb2312 #x26 (+ #x41 i)) tbl))
188 (dotimes (i 33)
189 (set-case-syntax-pair (make-char 'chinese-gb2312 #x27 (+ #x21 i))
190 (make-char 'chinese-gb2312 #x27 (+ #x51 i)) tbl)))
191
192 ;; Chinese character set (BIG5)
193
194 (let ((from (decode-big5-char #xA141))
195 (to (decode-big5-char #xA15D)))
196 (while (< from to)
197 (modify-syntax-entry from ".")
198 (setq from (1+ from))))
199 (let ((from (decode-big5-char #xA1A5))
200 (to (decode-big5-char #xA1AD)))
201 (while (< from to)
202 (modify-syntax-entry from ".")
203 (setq from (1+ from))))
204 (let ((from (decode-big5-char #xA1AD))
205 (to (decode-big5-char #xA2AF)))
206 (while (< from to)
207 (modify-syntax-entry from "_")
208 (setq from (1+ from))))
209
210 (let ((parens "\e$(0!>!?!@!A!B!C!D!E!F!G!H!I!J!K!L!M!N!O!P!Q!R!S!T!U!V!W!X!Y!Z![!\!]!^!_!`!a!b!c\e(B")
211 open close)
212 (dotimes (i (/ (length parens) 2))
213 (setq open (aref parens (* i 2))
214 close (aref parens (1+ (* i 2))))
215 (modify-syntax-entry open (format "(%c" close))
216 (modify-syntax-entry close (format ")%c" open))))
217
218 (let ((generic-big5-1-char (make-char 'chinese-big5-1))
219 (generic-big5-2-char (make-char 'chinese-big5-2)))
220 ;; (modify-syntax-entry generic-big5-1-char "w")
221 ;; (modify-syntax-entry generic-big5-2-char "w")
222
223 (modify-category-entry generic-big5-1-char ?c)
224 (modify-category-entry generic-big5-2-char ?c)
225
226 (modify-category-entry generic-big5-1-char ?C)
227 (modify-category-entry generic-big5-2-char ?C)
228
229 (modify-category-entry generic-big5-1-char ?\|)
230 (modify-category-entry generic-big5-2-char ?\|))
231
232 (let ((tbl (standard-case-table)))
233 (dotimes (i 22)
234 (set-case-syntax-pair (decode-big5-char (+ #xA2CF i))
235 (decode-big5-char (+ #xA2CF i 26)) tbl))
236 (dotimes (i 4)
237 (set-case-syntax-pair (decode-big5-char (+ #xA2E4 i))
238 (decode-big5-char (+ #xA340 i)) tbl))
239 (dotimes (i 24)
240 (set-case-syntax-pair (decode-big5-char (+ #xA344 i))
241 (decode-big5-char (+ #xA344 i 24)) tbl)))
242
243
244 ;; Chinese character set (CNS11643)
245
246 (let ((cns-list '(chinese-cns11643-1
247 chinese-cns11643-2
248 chinese-cns11643-3
249 chinese-cns11643-4
250 chinese-cns11643-5
251 chinese-cns11643-6
252 chinese-cns11643-7))
253 generic-char)
254 (while cns-list
255 (setq generic-char (make-char (car cns-list)))
256 ;; (modify-syntax-entry generic-char "w")
257 (modify-category-entry generic-char ?c)
258 (modify-category-entry generic-char ?C)
259 (modify-category-entry generic-char ?|)
260 (setq cns-list (cdr cns-list))))
261
262 (let ((parens "\e$(G!>!?!@!A!B!C!D!E!F!G!H!I!J!K!L!M!N!O!P!Q!R!S!T!U!V!W!X!Y!Z![!\!]!^!_!`!a!b!c\e(B")
263 open close)
264 (dotimes (i (/ (length parens) 2))
265 (setq open (aref parens (* i 2))
266 close (aref parens (1+ (* i 2))))
267 (modify-syntax-entry open (format "(%c" close))
268 (modify-syntax-entry close (format ")%c" open))))
269
270 ;; Cyrillic character set (ISO-8859-5)
271
272 (modify-category-entry (make-char 'cyrillic-iso8859-5) ?y)
273
274 (modify-syntax-entry (make-char 'cyrillic-iso8859-5 160) " ")
275 (modify-syntax-entry ?\e,L-\e(B ".")
276 (modify-syntax-entry ?\e,Lp\e(B ".")
277 (modify-syntax-entry ?\e,L}\e(B ".")
278 (let ((tbl (standard-case-table)))
279 (set-case-syntax-pair ?\e,L!\e(B ?\e,Lq\e(B tbl)
280 (set-case-syntax-pair ?\e,L"\e(B ?\e,Lr\e(B tbl)
281 (set-case-syntax-pair ?\e,L#\e(B ?\e,Ls\e(B tbl)
282 (set-case-syntax-pair ?\e,L$\e(B ?\e,Lt\e(B tbl)
283 (set-case-syntax-pair ?\e,L%\e(B ?\e,Lu\e(B tbl)
284 (set-case-syntax-pair ?\e,L&\e(B ?\e,Lv\e(B tbl)
285 (set-case-syntax-pair ?\e,L'\e(B ?\e,Lw\e(B tbl)
286 (set-case-syntax-pair ?\e,L(\e(B ?\e,Lx\e(B tbl)
287 (set-case-syntax-pair ?\e,L)\e(B ?\e,Ly\e(B tbl)
288 (set-case-syntax-pair ?\e,L*\e(B ?\e,Lz\e(B tbl)
289 (set-case-syntax-pair ?\e,L+\e(B ?\e,L{\e(B tbl)
290 (set-case-syntax-pair ?\e,L,\e(B ?\e,L|\e(B tbl)
291 (set-case-syntax-pair ?\e,L.\e(B ?\e,L~\e(B tbl)
292 (set-case-syntax-pair ?\e,L/\e(B ?\e,L\7f\e(B tbl)
293 (set-case-syntax-pair ?\e,L0\e(B ?\e,LP\e(B tbl)
294 (set-case-syntax-pair ?\e,L1\e(B ?\e,LQ\e(B tbl)
295 (set-case-syntax-pair ?\e,L2\e(B ?\e,LR\e(B tbl)
296 (set-case-syntax-pair ?\e,L3\e(B ?\e,LS\e(B tbl)
297 (set-case-syntax-pair ?\e,L4\e(B ?\e,LT\e(B tbl)
298 (set-case-syntax-pair ?\e,L5\e(B ?\e,LU\e(B tbl)
299 (set-case-syntax-pair ?\e,L6\e(B ?\e,LV\e(B tbl)
300 (set-case-syntax-pair ?\e,L7\e(B ?\e,LW\e(B tbl)
301 (set-case-syntax-pair ?\e,L8\e(B ?\e,LX\e(B tbl)
302 (set-case-syntax-pair ?\e,L9\e(B ?\e,LY\e(B tbl)
303 (set-case-syntax-pair ?\e,L:\e(B ?\e,LZ\e(B tbl)
304 (set-case-syntax-pair ?\e,L;\e(B ?\e,L[\e(B tbl)
305 (set-case-syntax-pair ?\e,L<\e(B ?\e,L\\e(B tbl)
306 (set-case-syntax-pair ?\e,L=\e(B ?\e,L]\e(B tbl)
307 (set-case-syntax-pair ?\e,L>\e(B ?\e,L^\e(B tbl)
308 (set-case-syntax-pair ?\e,L?\e(B ?\e,L_\e(B tbl)
309 (set-case-syntax-pair ?\e,L@\e(B ?\e,L`\e(B tbl)
310 (set-case-syntax-pair ?\e,LA\e(B ?\e,La\e(B tbl)
311 (set-case-syntax-pair ?\e,LB\e(B ?\e,Lb\e(B tbl)
312 (set-case-syntax-pair ?\e,LC\e(B ?\e,Lc\e(B tbl)
313 (set-case-syntax-pair ?\e,LD\e(B ?\e,Ld\e(B tbl)
314 (set-case-syntax-pair ?\e,LE\e(B ?\e,Le\e(B tbl)
315 (set-case-syntax-pair ?\e,LF\e(B ?\e,Lf\e(B tbl)
316 (set-case-syntax-pair ?\e,LG\e(B ?\e,Lg\e(B tbl)
317 (set-case-syntax-pair ?\e,LH\e(B ?\e,Lh\e(B tbl)
318 (set-case-syntax-pair ?\e,LI\e(B ?\e,Li\e(B tbl)
319 (set-case-syntax-pair ?\e,LJ\e(B ?\e,Lj\e(B tbl)
320 (set-case-syntax-pair ?\e,LK\e(B ?\e,Lk\e(B tbl)
321 (set-case-syntax-pair ?\e,LL\e(B ?\e,Ll\e(B tbl)
322 (set-case-syntax-pair ?\e,LM\e(B ?\e,Lm\e(B tbl)
323 (set-case-syntax-pair ?\e,LN\e(B ?\e,Ln\e(B tbl)
324 (set-case-syntax-pair ?\e,LO\e(B ?\e,Lo\e(B tbl)
325 (set-case-syntax-pair ?\e$,1(!\e(B ?\e$,1(q\e(B tbl)
326 (set-case-syntax-pair ?\e$,1("\e(B ?\e$,1(r\e(B tbl)
327 (set-case-syntax-pair ?\e$,1(#\e(B ?\e$,1(s\e(B tbl)
328 (set-case-syntax-pair ?\e$,1($\e(B ?\e$,1(t\e(B tbl)
329 (set-case-syntax-pair ?\e$,1(%\e(B ?\e$,1(u\e(B tbl)
330 (set-case-syntax-pair ?\e$,1(&\e(B ?\e$,1(v\e(B tbl)
331 (set-case-syntax-pair ?\e$,1('\e(B ?\e$,1(w\e(B tbl)
332 (set-case-syntax-pair ?\e$,1((\e(B ?\e$,1(x\e(B tbl)
333 (set-case-syntax-pair ?\e$,1()\e(B ?\e$,1(y\e(B tbl)
334 (set-case-syntax-pair ?\e$,1(*\e(B ?\e$,1(z\e(B tbl)
335 (set-case-syntax-pair ?\e$,1(+\e(B ?\e$,1({\e(B tbl)
336 (set-case-syntax-pair ?\e$,1(,\e(B ?\e$,1(|\e(B tbl)
337 (set-case-syntax-pair ?\e$,1(.\e(B ?\e$,1(~\e(B tbl)
338 (set-case-syntax-pair ?\e$,1(/\e(B ?\e$,1(\7f\e(B tbl)
339 (set-case-syntax-pair ?\e$,1(0\e(B ?\e$,1(P\e(B tbl)
340 (set-case-syntax-pair ?\e$,1(1\e(B ?\e$,1(Q\e(B tbl)
341 (set-case-syntax-pair ?\e$,1(2\e(B ?\e$,1(R\e(B tbl)
342 (set-case-syntax-pair ?\e$,1(3\e(B ?\e$,1(S\e(B tbl)
343 (set-case-syntax-pair ?\e$,1(4\e(B ?\e$,1(T\e(B tbl)
344 (set-case-syntax-pair ?\e$,1(5\e(B ?\e$,1(U\e(B tbl)
345 (set-case-syntax-pair ?\e$,1(6\e(B ?\e$,1(V\e(B tbl)
346 (set-case-syntax-pair ?\e$,1(7\e(B ?\e$,1(W\e(B tbl)
347 (set-case-syntax-pair ?\e$,1(8\e(B ?\e$,1(X\e(B tbl)
348 (set-case-syntax-pair ?\e$,1(9\e(B ?\e$,1(Y\e(B tbl)
349 (set-case-syntax-pair ?\e$,1(:\e(B ?\e$,1(Z\e(B tbl)
350 (set-case-syntax-pair ?\e$,1(;\e(B ?\e$,1([\e(B tbl)
351 (set-case-syntax-pair ?\e$,1(<\e(B ?\e$,1(\\e(B tbl)
352 (set-case-syntax-pair ?\e$,1(=\e(B ?\e$,1(]\e(B tbl)
353 (set-case-syntax-pair ?\e$,1(>\e(B ?\e$,1(^\e(B tbl)
354 (set-case-syntax-pair ?\e$,1(?\e(B ?\e$,1(_\e(B tbl)
355 (set-case-syntax-pair ?\e$,1(@\e(B ?\e$,1(`\e(B tbl)
356 (set-case-syntax-pair ?\e$,1(A\e(B ?\e$,1(a\e(B tbl)
357 (set-case-syntax-pair ?\e$,1(B\e(B ?\e$,1(b\e(B tbl)
358 (set-case-syntax-pair ?\e$,1(C\e(B ?\e$,1(c\e(B tbl)
359 (set-case-syntax-pair ?\e$,1(D\e(B ?\e$,1(d\e(B tbl)
360 (set-case-syntax-pair ?\e$,1(E\e(B ?\e$,1(e\e(B tbl)
361 (set-case-syntax-pair ?\e$,1(F\e(B ?\e$,1(f\e(B tbl)
362 (set-case-syntax-pair ?\e$,1(G\e(B ?\e$,1(g\e(B tbl)
363 (set-case-syntax-pair ?\e$,1(H\e(B ?\e$,1(h\e(B tbl)
364 (set-case-syntax-pair ?\e$,1(I\e(B ?\e$,1(i\e(B tbl)
365 (set-case-syntax-pair ?\e$,1(J\e(B ?\e$,1(j\e(B tbl)
366 (set-case-syntax-pair ?\e$,1(K\e(B ?\e$,1(k\e(B tbl)
367 (set-case-syntax-pair ?\e$,1(L\e(B ?\e$,1(l\e(B tbl)
368 (set-case-syntax-pair ?\e$,1(M\e(B ?\e$,1(m\e(B tbl)
369 (set-case-syntax-pair ?\e$,1(N\e(B ?\e$,1(n\e(B tbl)
370 (set-case-syntax-pair ?\e$,1(O\e(B ?\e$,1(o\e(B tbl))
371
372 ;; Devanagari character set
373
374 ;;; Commented out since the categories appear not to be used anywhere
375 ;;; and word syntax is the default.
376 ;; (let ((deflist '(;; chars syntax category
377 ;; ("\e$(5!!!"!#\e(B" "w" ?7) ; vowel-modifying diacritical mark
378 ;; ; chandrabindu, anuswar, visarga
379 ;; ("\e$(5!$\e(B-\e$(5!2\e(B" "w" ?1) ; independent vowel
380 ;; ("\e$(5!3\e(B-\e$(5!X\e(B" "w" ?0) ; consonant
381 ;; ("\e$(5!Z\e(B-\e$(5!g\e(B" "w" ?8) ; matra
382 ;; ("\e$(5!q\e(B-\e$(5!z\e(B" "w" ?6) ; digit
383 ;; ;; Unicode equivalents
384 ;; ("\e$,15A5B5C\e(B" "w" ?7) ; vowel-modifying diacritical mark
385 ;; ; chandrabindu, anuswar, visarga
386 ;; ("\e$,15E\e(B-\e$,15M\e(B" "w" ?1) ; independent vowel
387 ;; ("\e$,15U\e(B-\e$,15y\e(B" "w" ?0) ; consonant
388 ;; ("\e$,15~\e(B-\e$,16)\e(B" "w" ?8) ; matra
389 ;; ("\e$,16F\e(B-\e$,16O\e(B" "w" ?6) ; digit
390 ;; ))
391 ;; elm chars len syntax category to ch i)
392 ;; (while deflist
393 ;; (setq elm (car deflist))
394 ;; (setq chars (car elm)
395 ;; len (length chars)
396 ;; syntax (nth 1 elm)
397 ;; category (nth 2 elm)
398 ;; i 0)
399 ;; (while (< i len)
400 ;; (if (= (aref chars i) ?-)
401 ;; (setq i (1+ i)
402 ;; to (aref chars i))
403 ;; (setq ch (aref chars i)
404 ;; to ch))
405 ;; (while (<= ch to)
406 ;; (modify-syntax-entry ch syntax)
407 ;; (modify-category-entry ch category)
408 ;; (setq ch (1+ ch)))
409 ;; (setq i (1+ i)))
410 ;; (setq deflist (cdr deflist))))
411
412 ;; Ethiopic character set
413
414 (modify-category-entry (make-char 'ethiopic) ?e)
415 ;; (modify-syntax-entry (make-char 'ethiopic) "w")
416 (dotimes (i (1+ (- #x137c #x1200)))
417 (modify-category-entry (decode-char 'ucs (+ #x1200 i)) ?e))
418 (let ((chars '(?\e$(3$h\e(B ?\e$(3$i\e(B ?\e$(3$j\e(B ?\e$(3$k\e(B ?\e$(3$l\e(B ?\e$(3$m\e(B ?\e$(3$n\e(B ?\e$(3$o\e(B ?\e$(3%i\e(B ?\e$(3%t\e(B ?\e$(3%u\e(B ?\e$(3%v\e(B ?\e$(3%w\e(B ?\e$(3%x\e(B
419 ;; Unicode equivalents of the above:
420 ?\e$,1Q!\e(B ?\e$,1Q"\e(B ?\e$,1Q#\e(B ?\e$,1Q$\e(B ?\e$,1Q%\e(B ?\e$,1Q&\e(B ?\e$,1Q'\e(B ?\e$,1Q(\e(B ?\e$,3op\e(B ?\e$,3o{\e(B ?\e$,3o|\e(B ?\e$,3o}\e(B ?\e$,3o~\e(B ?\e$,3o\7f\e(B)))
421 (while chars
422 (modify-syntax-entry (car chars) ".")
423 (setq chars (cdr chars))))
424
425 ;; Greek character set (ISO-8859-7)
426
427 (modify-category-entry (make-char 'greek-iso8859-7) ?g)
428 (let ((c #x370))
429 (while (<= c #x3ff)
430 (modify-category-entry (decode-char 'ucs c) ?g)
431 (setq c (1+ c))))
432
433 ;; (let ((c 182))
434 ;; (while (< c 255)
435 ;; (modify-syntax-entry (make-char 'greek-iso8859-7 c) "w")
436 ;; (setq c (1+ c))))
437 ;; (modify-syntax-entry (make-char 'greek-iso8859-7 160) "w") ; NBSP
438 (modify-syntax-entry ?\e,F7\e(B ".")
439 (modify-syntax-entry ?\e,F;\e(B ".")
440 (modify-syntax-entry ?\e,F=\e(B ".")
441 (let ((tbl (standard-case-table)))
442 ;; Fixme: non-letter syntax copied from latin-1, but that's dubious
443 ;; in several cases.
444 (set-case-syntax ?\e,F!\e(B "." tbl)
445 (set-case-syntax ?\e,F"\e(B "." tbl)
446 (set-case-syntax ?\e,F&\e(B "." tbl)
447 (set-case-syntax ?\e,F&\e(B "_" tbl)
448 (set-case-syntax ?\e,F'\e(B "." tbl)
449 (set-case-syntax ?\e,F)\e(B "_" tbl)
450 (set-case-syntax ?\e,F+\e(B "." tbl)
451 (set-case-syntax ?\e,F,\e(B "_" tbl)
452 (set-case-syntax ?\e,F-\e(B "_" tbl)
453 (set-case-syntax ?\e,F/\e(B "." tbl)
454 (set-case-syntax ?\e,F0\e(B "_" tbl)
455 (set-case-syntax ?\e,F1\e(B "_" tbl)
456 ;; (set-case-syntax ?\e,F7\e(B "_" tbl)
457 ;; (set-case-syntax ?\e,F=\e(B "_" tbl)
458 (set-case-syntax-pair ?\e,FA\e(B ?\e,Fa\e(B tbl)
459 (set-case-syntax-pair ?\e,FB\e(B ?\e,Fb\e(B tbl)
460 (set-case-syntax-pair ?\e,FC\e(B ?\e,Fc\e(B tbl)
461 (set-case-syntax-pair ?\e,FD\e(B ?\e,Fd\e(B tbl)
462 (set-case-syntax-pair ?\e,FE\e(B ?\e,Fe\e(B tbl)
463 (set-case-syntax-pair ?\e,FF\e(B ?\e,Ff\e(B tbl)
464 (set-case-syntax-pair ?\e,FG\e(B ?\e,Fg\e(B tbl)
465 (set-case-syntax-pair ?\e,FH\e(B ?\e,Fh\e(B tbl)
466 (set-case-syntax-pair ?\e,FI\e(B ?\e,Fi\e(B tbl)
467 (set-case-syntax-pair ?\e,FJ\e(B ?\e,Fj\e(B tbl)
468 (set-case-syntax-pair ?\e,FK\e(B ?\e,Fk\e(B tbl)
469 (set-case-syntax-pair ?\e,FL\e(B ?\e,Fl\e(B tbl)
470 (set-case-syntax-pair ?\e,FM\e(B ?\e,Fm\e(B tbl)
471 (set-case-syntax-pair ?\e,FN\e(B ?\e,Fn\e(B tbl)
472 (set-case-syntax-pair ?\e,FO\e(B ?\e,Fo\e(B tbl)
473 (set-case-syntax-pair ?\e,FP\e(B ?\e,Fp\e(B tbl)
474 (set-case-syntax-pair ?\e,FQ\e(B ?\e,Fq\e(B tbl)
475 (set-upcase-syntax ?\e,FS\e(B ?\e,Fr\e(B tbl)
476 (set-case-syntax-pair ?\e,FS\e(B ?\e,Fs\e(B tbl)
477 (set-case-syntax-pair ?\e,FT\e(B ?\e,Ft\e(B tbl)
478 (set-case-syntax-pair ?\e,FU\e(B ?\e,Fu\e(B tbl)
479 (set-case-syntax-pair ?\e,FV\e(B ?\e,Fv\e(B tbl)
480 (set-case-syntax-pair ?\e,FW\e(B ?\e,Fw\e(B tbl)
481 (set-case-syntax-pair ?\e,FX\e(B ?\e,Fx\e(B tbl)
482 (set-case-syntax-pair ?\e,FY\e(B ?\e,Fy\e(B tbl)
483 (set-case-syntax-pair ?\e,FZ\e(B ?\e,Fz\e(B tbl)
484 (set-case-syntax-pair ?\e,F[\e(B ?\e,F{\e(B tbl)
485 (set-case-syntax-pair ?\e,F?\e(B ?\e,F~\e(B tbl)
486 (set-case-syntax-pair ?\e,F>\e(B ?\e,F}\e(B tbl)
487 (set-case-syntax-pair ?\e,F<\e(B ?\e,F|\e(B tbl)
488 (set-case-syntax-pair ?\e,F6\e(B ?\e,F\\e(B tbl)
489 (set-case-syntax-pair ?\e,F8\e(B ?\e,F]\e(B tbl)
490 (set-case-syntax-pair ?\e,F9\e(B ?\e,F^\e(B tbl)
491 (set-case-syntax-pair ?\e,F:\e(B ?\e,F_\e(B tbl)
492 ;; Unicode equivalents
493 (set-case-syntax-pair ?\e$,1&q\e(B ?\e$,1'1\e(B tbl)
494 (set-case-syntax-pair ?\e$,1&r\e(B ?\e$,1'2\e(B tbl)
495 (set-case-syntax-pair ?\e$,1&s\e(B ?\e$,1'3\e(B tbl)
496 (set-case-syntax-pair ?\e$,1&t\e(B ?\e$,1'4\e(B tbl)
497 (set-case-syntax-pair ?\e$,1&u\e(B ?\e$,1'5\e(B tbl)
498 (set-case-syntax-pair ?\e$,1&v\e(B ?\e$,1'6\e(B tbl)
499 (set-case-syntax-pair ?\e$,1&w\e(B ?\e$,1'7\e(B tbl)
500 (set-case-syntax-pair ?\e$,1&x\e(B ?\e$,1'8\e(B tbl)
501 (set-case-syntax-pair ?\e$,1&y\e(B ?\e$,1'9\e(B tbl)
502 (set-case-syntax-pair ?\e$,1&z\e(B ?\e$,1':\e(B tbl)
503 (set-case-syntax-pair ?\e$,1&{\e(B ?\e$,1';\e(B tbl)
504 (set-case-syntax-pair ?\e$,1&|\e(B ?\e$,1'<\e(B tbl)
505 (set-case-syntax-pair ?\e$,1&}\e(B ?\e$,1'=\e(B tbl)
506 (set-case-syntax-pair ?\e$,1&~\e(B ?\e$,1'>\e(B tbl)
507 (set-case-syntax-pair ?\e$,1&\7f\e(B ?\e$,1'?\e(B tbl)
508 (set-case-syntax-pair ?\e$,1' \e(B ?\e$,1'@\e(B tbl)
509 (set-case-syntax-pair ?\e$,1'!\e(B ?\e$,1'A\e(B tbl)
510 (set-upcase-syntax ?\e$,1'#\e(B ?\e$,1'B\e(B tbl)
511 (set-case-syntax-pair ?\e$,1'#\e(B ?\e$,1'C\e(B tbl)
512 (set-case-syntax-pair ?\e$,1'$\e(B ?\e$,1'D\e(B tbl)
513 (set-case-syntax-pair ?\e$,1'%\e(B ?\e$,1'E\e(B tbl)
514 (set-case-syntax-pair ?\e$,1'&\e(B ?\e$,1'F\e(B tbl)
515 (set-case-syntax-pair ?\e$,1''\e(B ?\e$,1'G\e(B tbl)
516 (set-case-syntax-pair ?\e$,1'(\e(B ?\e$,1'H\e(B tbl)
517 (set-case-syntax-pair ?\e$,1')\e(B ?\e$,1'I\e(B tbl)
518 (set-case-syntax-pair ?\e$,1'*\e(B ?\e$,1'J\e(B tbl)
519 (set-case-syntax-pair ?\e$,1'+\e(B ?\e$,1'K\e(B tbl)
520 (set-case-syntax-pair ?\e$,1&o\e(B ?\e$,1'N\e(B tbl)
521 (set-case-syntax-pair ?\e$,1&n\e(B ?\e$,1'M\e(B tbl)
522 (set-case-syntax-pair ?\e$,1&l\e(B ?\e$,1'L\e(B tbl)
523 (set-case-syntax-pair ?\e$,1&f\e(B ?\e$,1',\e(B tbl)
524 (set-case-syntax-pair ?\e$,1&h\e(B ?\e$,1'-\e(B tbl)
525 (set-case-syntax-pair ?\e$,1&i\e(B ?\e$,1'.\e(B tbl)
526 (set-case-syntax-pair ?\e$,1&j\e(B ?\e$,1'/\e(B tbl))
527
528 ;; Hebrew character set (ISO-8859-8)
529
530 (modify-category-entry (make-char 'hebrew-iso8859-8) ?w)
531 (let ((c #x591))
532 (while (<= c #x5f4)
533 (modify-category-entry (decode-char 'ucs c) ?w)
534 (setq c (1+ c))))
535
536 (modify-syntax-entry (make-char 'hebrew-iso8859-8 208) ".") ; PASEQ
537 (modify-syntax-entry (make-char 'hebrew-iso8859-8 211) ".") ; SOF PASUQ
538 (modify-syntax-entry (decode-char 'ucs #x5be) ".") ; MAQAF
539 (modify-syntax-entry (decode-char 'ucs #x5c0) ".") ; PASEQ
540 (modify-syntax-entry (decode-char 'ucs #x5c3) ".") ; SOF PASUQ
541 (modify-syntax-entry (decode-char 'ucs #x5f3) ".") ; GERESH
542 (modify-syntax-entry (decode-char 'ucs #x5f4) ".") ; GERSHAYIM
543
544 ;; (let ((c 224))
545 ;; (while (< c 251)
546 ;; (modify-syntax-entry (make-char 'hebrew-iso8859-8 c) "w")
547 ;; (setq c (1+ c))))
548 ;; (modify-syntax-entry (make-char 'hebrew-iso8859-8 160) "w") ; NBSP
549
550 ;; Indian character set (IS 13194 and other Emacs original Indian charsets)
551
552 (modify-category-entry (make-char 'indian-is13194) ?i)
553 (modify-category-entry (make-char 'indian-2-column) ?I)
554 (modify-category-entry (make-char 'indian-glyph) ?I)
555 ;; Unicode Devanagari block
556 (let ((c #x901))
557 (while (<= c #x970)
558 (modify-category-entry (decode-char 'ucs c) ?i)
559 (setq c (1+ c))))
560
561 (let ((l '(;; RANGE CATEGORY MEANINGS
562 (#x01 #x03 ?7) ; vowel modifier
563 (#x05 #x14 ?1) ; base vowel
564 (#x15 #x39 ?0) ; consonants
565 (#x3e #x4d ?8) ; vowel modifier
566 (#x51 #x54 ?4) ; stress/tone mark
567 (#x58 #x5f ?0) ; consonants
568 (#x60 #x61 ?1) ; base vowel
569 (#x62 #x63 ?8) ; vowel modifier
570 (#x66 #x6f ?6) ; digits
571 )))
572 (dolist (elt1 '(#x900 #x980 #xa00 #xa80 #xb00 #xb80 #xc00 #xc80 #xd00))
573 (dolist (elt2 l)
574 (let* ((from (car elt2))
575 (counts (1+ (- (nth 1 elt2) from)))
576 (category (nth 2 elt2)))
577 (dotimes (i counts)
578 (modify-category-entry (decode-char 'ucs (+ elt1 from i))
579 category))))))
580
581 ;; Japanese character set (JISX0201-kana, JISX0201-roman, JISX0208, JISX0212)
582
583 (modify-category-entry (make-char 'katakana-jisx0201) ?k)
584 (modify-category-entry (make-char 'katakana-jisx0201) ?j)
585 (modify-category-entry (make-char 'latin-jisx0201) ?r)
586 (modify-category-entry (make-char 'japanese-jisx0208) ?j)
587 (modify-category-entry (make-char 'japanese-jisx0212) ?j)
588 (modify-category-entry (make-char 'katakana-jisx0201) ?\|)
589 (modify-category-entry (make-char 'japanese-jisx0208) ?\|)
590 (modify-category-entry (make-char 'japanese-jisx0212) ?\|)
591
592 ;; Unicode equivalents of JISX0201-kana
593 (let ((c #xff61))
594 (while (<= c #xff9f)
595 (modify-category-entry (decode-char 'ucs c) ?k)
596 (modify-category-entry (decode-char 'ucs c) ?j)
597 (modify-category-entry (decode-char 'ucs c) ?\|)
598 (setq c (1+ c))))
599
600 ;; Katakana block
601 (let ((c #x30a0))
602 (while (<= c #x30ff)
603 ;; ?K is double width, ?k isn't specified
604 (modify-category-entry (decode-char 'ucs c) ?k)
605 (modify-category-entry (decode-char 'ucs c) ?j)
606 (modify-category-entry (decode-char 'ucs c) ?\|)
607 (setq c (1+ c))))
608
609 ;; Hiragana block
610 (let ((c #x3040))
611 (while (<= c #x309f)
612 ;; ?H is actually defined to be double width
613 (modify-category-entry (decode-char 'ucs c) ?H)
614 ;;(modify-category-entry (decode-char 'ucs c) ?j)
615 (modify-category-entry (decode-char 'ucs c) ?\|)
616 (setq c (1+ c))))
617
618 ;; JISX0208
619 ;; (modify-syntax-entry (make-char 'japanese-jisx0208) "w")
620 (modify-syntax-entry (make-char 'japanese-jisx0208 33) "_")
621 (modify-syntax-entry (make-char 'japanese-jisx0208 34) "_")
622 (modify-syntax-entry (make-char 'japanese-jisx0208 40) "_")
623 (let ((chars '(?\e$B!<\e(B ?\e$B!+\e(B ?\e$B!,\e(B ?\e$B!3\e(B ?\e$B!4\e(B ?\e$B!5\e(B ?\e$B!6\e(B ?\e$B!7\e(B ?\e$B!8\e(B ?\e$B!9\e(B ?\e$B!:\e(B ?\e$B!;\e(B)))
624 (while chars
625 (modify-syntax-entry (car chars) "w")
626 (setq chars (cdr chars))))
627 (let ((parens "\e$B!J!K!L!M!N!O!P!Q!R!S!T!U!V!W!X!Y!Z![\e(B" )
628 open close)
629 (dotimes (i (/ (length parens) 2))
630 (setq open (aref parens (* i 2))
631 close (aref parens (1+ (* i 2))))
632 (modify-syntax-entry open (format "(%c" close))
633 (modify-syntax-entry close (format ")%c" open))))
634
635 (modify-category-entry (make-char 'japanese-jisx0208 35) ?A)
636 (modify-category-entry (make-char 'japanese-jisx0208 36) ?H)
637 (modify-category-entry (make-char 'japanese-jisx0208 37) ?K)
638 (modify-category-entry (make-char 'japanese-jisx0208 38) ?G)
639 (modify-category-entry (make-char 'japanese-jisx0208 39) ?Y)
640 (let ((row 48))
641 (while (< row 127)
642 (modify-category-entry (make-char 'japanese-jisx0208 row) ?C)
643 (setq row (1+ row))))
644 (modify-category-entry ?\e$B!<\e(B ?K)
645 (let ((chars '(?\e$B!+\e(B ?\e$B!,\e(B)))
646 (while chars
647 (modify-category-entry (car chars) ?K)
648 (modify-category-entry (car chars) ?H)
649 (setq chars (cdr chars))))
650 (let ((chars '(?\e$B!3\e(B ?\e$B!4\e(B ?\e$B!5\e(B ?\e$B!6\e(B ?\e$B!7\e(B ?\e$B!8\e(B ?\e$B!9\e(B ?\e$B!:\e(B ?\e$B!;\e(B)))
651 (while chars
652 (modify-category-entry (car chars) ?C)
653 (setq chars (cdr chars))))
654
655 (let ((tbl (standard-case-table)))
656 (dotimes (i 26)
657 (set-case-syntax-pair (make-char 'japanese-jisx0208 #x23 (+ #x41 i))
658 (make-char 'japanese-jisx0208 #x23 (+ #x61 i)) tbl))
659 (dotimes (i 24)
660 (set-case-syntax-pair (make-char 'japanese-jisx0208 #x26 (+ #x21 i))
661 (make-char 'japanese-jisx0208 #x26 (+ #x41 i)) tbl))
662 (dotimes (i 33)
663 (set-case-syntax-pair (make-char 'japanese-jisx0208 #x27 (+ #x21 i))
664 (make-char 'japanese-jisx0208 #x27 (+ #x51 i)) tbl)))
665
666 ;; JISX0212
667 ;; (modify-syntax-entry (make-char 'japanese-jisx0212) "w")
668 (modify-syntax-entry (make-char 'japanese-jisx0212 33) "_")
669 (modify-syntax-entry (make-char 'japanese-jisx0212 34) "_")
670 (modify-syntax-entry (make-char 'japanese-jisx0212 35) "_")
671
672 (modify-category-entry (make-char 'japanese-jisx0212 ) ?C)
673
674 ;; JISX0201-Kana
675 ;; (modify-syntax-entry (make-char 'katakana-jisx0201) "w")
676 (let ((chars '(?\e(I!\e(B ?\e(I$\e(B ?\e(I%\e(B
677 ;; Unicode:
678 ?\e$,3sa\e(B ?\e$,3sd\e(B ?\e$,3se\e(B)))
679 (while chars
680 (modify-syntax-entry (car chars) ".")
681 (setq chars (cdr chars))))
682
683 (modify-syntax-entry ?\\e(I"\e(B "(\e(I#\e(B")
684 (modify-syntax-entry ?\\e(I#\e(B "(\e(I"\e(B")
685
686 ;; Korean character set (KSC5601)
687
688 ;; (modify-syntax-entry (make-char 'korean-ksc5601) "w")
689 (modify-syntax-entry (make-char 'korean-ksc5601 33) "_")
690 (modify-syntax-entry (make-char 'korean-ksc5601 34) "_")
691 (modify-syntax-entry (make-char 'korean-ksc5601 38) "_")
692 (modify-syntax-entry (make-char 'korean-ksc5601 39) "_")
693 (modify-syntax-entry (make-char 'korean-ksc5601 40) "_")
694 (modify-syntax-entry (make-char 'korean-ksc5601 41) "_")
695
696 (modify-category-entry (make-char 'korean-ksc5601) ?h)
697 (modify-category-entry (make-char 'korean-ksc5601 35) ?A)
698 (modify-category-entry (make-char 'korean-ksc5601 37) ?G)
699 (modify-category-entry (make-char 'korean-ksc5601 42) ?H)
700 (modify-category-entry (make-char 'korean-ksc5601 43) ?K)
701 (modify-category-entry (make-char 'korean-ksc5601 44) ?Y)
702
703 (let ((parens "\e$(C!2!3!4!5!6!7!8!9!:!;!<!=#(#)#[#]#{#}\e(B" )
704 open close)
705 (dotimes (i (/ (length parens) 2))
706 (setq open (aref parens (* i 2))
707 close (aref parens (1+ (* i 2))))
708 (modify-syntax-entry open (format "(%c" close))
709 (modify-syntax-entry close (format ")%c" open))))
710
711 (let ((tbl (standard-case-table)))
712 (dotimes (i 26)
713 (set-case-syntax-pair (make-char 'korean-ksc5601 #x23 (+ #x41 i))
714 (make-char 'korean-ksc5601 #x23 (+ #x61 i)) tbl))
715 (dotimes (i 10)
716 (set-case-syntax-pair (make-char 'korean-ksc5601 #x25 (+ #x21 i))
717 (make-char 'korean-ksc5601 #x25 (+ #x30 i)) tbl))
718 (dotimes (i 24)
719 (set-case-syntax-pair (make-char 'korean-ksc5601 #x25 (+ #x41 i))
720 (make-char 'korean-ksc5601 #x25 (+ #x61 i)) tbl))
721 (dotimes (i 33)
722 (set-case-syntax-pair (make-char 'korean-ksc5601 #x2C (+ #x21 i))
723 (make-char 'korean-ksc5601 #x2C (+ #x51 i)) tbl)))
724
725 ;; Latin character set (latin-1,2,3,4,5,8,9)
726
727 (modify-category-entry (make-char 'latin-iso8859-1) ?l)
728 (modify-category-entry (make-char 'latin-iso8859-2) ?l)
729 (modify-category-entry (make-char 'latin-iso8859-3) ?l)
730 (modify-category-entry (make-char 'latin-iso8859-4) ?l)
731 (modify-category-entry (make-char 'latin-iso8859-9) ?l)
732 (modify-category-entry (make-char 'latin-iso8859-14) ?l)
733 (modify-category-entry (make-char 'latin-iso8859-15) ?l)
734
735 (modify-category-entry (make-char 'latin-iso8859-1 160) ?\ )
736 (modify-category-entry (make-char 'latin-iso8859-2 160) ?\ )
737 (modify-category-entry (make-char 'latin-iso8859-3 160) ?\ )
738 (modify-category-entry (make-char 'latin-iso8859-4 160) ?\ )
739 (modify-category-entry (make-char 'latin-iso8859-9 160) ?\ )
740 (modify-category-entry (make-char 'latin-iso8859-14 160) ?\ )
741 (modify-category-entry (make-char 'latin-iso8859-15 160) ?\ )
742
743 ;; Lao character set
744
745 (modify-category-entry (make-char 'lao) ?o)
746 (dotimes (i (1+ (- #xeff #xe80)))
747 (modify-category-entry (decode-char 'ucs (+ i #xe80)) ?o))
748
749 (let ((deflist '(;; chars syntax category
750 ("\e(1!\e(B-\e(1N\e(B" "w" ?0) ; consonant
751 ("\e(1PRS]`\e(B-\e(1d\e(B" "w" ?1) ; vowel base
752 ("\e(1QT\e(B-\e(1W[m\e(B" "w" ?2) ; vowel upper
753 ("\e(1XY\e(B" "w" ?3) ; vowel lower
754 ("\e(1h\e(B-\e(1l\e(B" "w" ?4) ; tone mark
755 ("\e(1\\e(B" "w" ?9) ; semivowel lower
756 ("\e(1p\e(B-\e(1y\e(B" "w" ?6) ; digit
757 ("\e(1Of\e(B" "_" ?5) ; symbol
758 ;; Unicode equivalents
759 ("\e$,1D!\e(B-\e$,1DN\e(B" "w" ?0) ; consonant
760 ("\e$,1DPDRDSD]D`\e(B-\e$,1Dd\e(B" "w" ?1) ; vowel base
761 ("\e$,1DQDT\e(B-\e$,1DWD[Dm\e(B" "w" ?2) ; vowel upper
762 ("\e$,1DXDY\e(B" "w" ?3) ; vowel lower
763 ("\e$,1Dh\e(B-\e$,1Dk\e(B" "w" ?4) ; tone mark
764 ("\e$,1D\D]\e(B" "w" ?9) ; semivowel lower
765 ("\e$,1Dp\e(B-\e$,1Dy\e(B" "w" ?6) ; digit
766 ("\e$,1DODf\e(B" "_" ?5) ; symbol
767 ))
768 elm chars len syntax category to ch i)
769 (while deflist
770 (setq elm (car deflist))
771 (setq chars (car elm)
772 len (length chars)
773 syntax (nth 1 elm)
774 category (nth 2 elm)
775 i 0)
776 (while (< i len)
777 (if (= (aref chars i) ?-)
778 (setq i (1+ i)
779 to (aref chars i))
780 (setq ch (aref chars i)
781 to ch))
782 (while (<= ch to)
783 (unless (string-equal syntax "w")
784 (modify-syntax-entry ch syntax))
785 (modify-category-entry ch category)
786 (setq ch (1+ ch)))
787 (setq i (1+ i)))
788 (setq deflist (cdr deflist))))
789
790 ;; Thai character set (TIS620)
791
792 (modify-category-entry (make-char 'thai-tis620) ?t)
793 (dotimes (i (1+ (- #xe7f #xe00)))
794 (modify-category-entry (decode-char 'ucs (+ i #xe00)) ?t))
795
796 (let ((deflist '(;; chars syntax category
797 ("\e,T!\e(B-\e,TCEG\e(B-\e,TN\e(B" "w" ?0) ; consonant
798 ("\e,TDFPRS`\e(B-\e,Te\e(B" "w" ?1) ; vowel base
799 ("\e,TQT\e(B-\e,TWgn\e(B" "w" ?2) ; vowel upper
800 ("\e,TX\e(B-\e,TZ\e(B" "w" ?3) ; vowel lower
801 ("\e,Th\e(B-\e,Tm\e(B" "w" ?4) ; tone mark
802 ("\e,Tp\e(B-\e,Ty\e(B" "w" ?6) ; digit
803 ("\e,TOf_oz{\e(B" "_" ?5) ; symbol
804 ;; Unicode equivalents
805 ("\e$,1Ba\e(B-\e$,1C#C%C'\e(B-\e$,1C.\e(B" "w" ?0) ; consonant
806 ("\e$,1C$C&C0C2C3C@\e(B-\e$,1CE\e(B" "w" ?1) ; vowel base
807 ("\e$,1C1C4\e(B-\e$,1C7CGCN\e(B" "w" ?2) ; vowel upper
808 ("\e$,1C8\e(B-\e$,1C:\e(B" "w" ?3) ; vowel lower
809 ("\e$,1CH\e(B-\e$,1CM\e(B" "w" ?4) ; tone mark
810 ("\e$,1CP\e(B-\e$,1CY\e(B" "w" ?6) ; digit
811 ("\e$,1C/CFC?COCZC[\e(B" "_" ?5) ; symbol
812 ))
813 elm chars len syntax category to ch i)
814 (while deflist
815 (setq elm (car deflist))
816 (setq chars (car elm)
817 len (length chars)
818 syntax (nth 1 elm)
819 category (nth 2 elm)
820 i 0)
821 (while (< i len)
822 (if (= (aref chars i) ?-)
823 (setq i (1+ i)
824 to (aref chars i))
825 (setq ch (aref chars i)
826 to ch))
827 (while (<= ch to)
828 (unless (string-equal syntax "w")
829 (modify-syntax-entry ch syntax))
830 (modify-category-entry ch category)
831 (setq ch (1+ ch)))
832 (setq i (1+ i)))
833 (setq deflist (cdr deflist))))
834
835 ;; Tibetan character set
836
837 (modify-category-entry (make-char 'tibetan) ?q)
838 (modify-category-entry (make-char 'tibetan-1-column) ?q)
839 (dotimes (i (1+ (- #xfff #xf00)))
840 (modify-category-entry (decode-char 'ucs (+ i #xf00)) ?q))
841
842 (let ((deflist '(;; chars syntax category
843 ("\e4\e$(7"!\e0"!\e1\e(B-\e4\e$(7"J\e0"J\e1\e4"K\e0"K\e1\e(B" "w" ?0) ; consonant
844 ("\e$(7#!\e(B-\e$(7#J#K#L#M!"!#\e(B" "w" ?0) ;
845 ("\e$(7$!\e(B-\e$(7$e\e(B" "w" ?0) ;
846 ("\e$(7%!\e(B-\e$(7%u\e(B" "w" ?0) ;
847 ("\e$(7"S"["\"]"^"a\e(B" "w" ?2) ; upper vowel
848 ("\e$(7"_"c"d"g"h"i"j"k"l\e(B" "w" ?2) ; upper modifier
849 ("\e$(7!I"Q"R"U"e!e!g\e(B" "w" ?3) ; lowel vowel/modifier
850 ("\e$(7!P\e(B-\e$(7!Y!Z\e(B-\e$(7!c\e(B" "w" ?6) ; digit
851 ("\e$(7!;!=\e(B-\e$(7!B!D"`\e(B" "." ?|) ; line-break char
852 ("\e$(8!;!=!?!@!A!D"`\e(B" "." ?|) ;
853 ("\e$(7!8!;!=\e(B-\e$(7!B!D"`!m!d\e(B" "." ?>) ; prohibition
854 ("\e$(8!;!=!?!@!A!D"`\e(B" "." ?>) ;
855 ("\e$(7!0\e(B-\e$(7!:!l#R#S"f\e(B" "." ?<) ; prohibition
856 ("\e$(7!C!E\e(B-\e$(7!H!J\e(B-\e$(7!O!f!h\e(B-\e$(7!k!n!o#O#P\e(B-\e$(7#`\e(B" "." ?q) ; others
857
858 ;; Unicode version (not complete)
859 ("\e$,1F \e(B-\e$,1FIFJ\e(B" "w" ?0) ; consonant
860 ("\e$,1Fp\e(B-\e$,1G9G:G;G<\e(B" "w" ?0) ;
861 ("\e$,1FRFZF[F\F]F`\e(B" "w" ?2) ; upper vowel
862 ("\e$,1F^FbFcFfFgFhFiFjFk\e(B" "w" ?2) ; upper modifier
863 ("\e$,1EYFPFQFTFdEuEw\e(B" "w" ?3) ; lowel vowel/modifier
864 ("\e$,1E`\e(B-\e$,1EiEj\e(B-\e$,1Es\e(B" "w" ?6) ; digit
865 ("\e$,1EKEM\e(B-\e$,1ERETF_\e(B" "." ?|) ; line-break char
866 ("\e$,1EHEKEM\e(B-\e$,1ERETF_E}Et\e(B" "." ?>) ; prohibition
867 ("\e$,1E@\e(B-\e$,1EJE|GAGBFe\e(B" "." ?<) ; prohibition
868 ("\e$,1ESEU\e(B-\e$,1EXEZ\e(B-\e$,1E_EvEx\e(B-\e$,1E{E~E\7fG>G?\e(B-\e$,1GO\e(B" "." ?q) ; others
869 ))
870 elm chars len syntax category to ch i)
871 (while deflist
872 (setq elm (car deflist))
873 (setq chars (car elm)
874 len (length chars)
875 syntax (nth 1 elm)
876 category (nth 2 elm)
877 i 0)
878 (while (< i len)
879 (if (= (aref chars i) ?-)
880 (setq i (1+ i)
881 to (aref chars i))
882 (setq ch (aref chars i)
883 to ch))
884 (while (<= ch to)
885 (unless (string-equal syntax "w")
886 (modify-syntax-entry ch syntax))
887 (modify-category-entry ch category)
888 (setq ch (1+ ch)))
889 (setq i (1+ i)))
890 (setq deflist (cdr deflist))))
891
892 ;; Vietnamese character set
893
894 (let ((lower (make-char 'vietnamese-viscii-lower))
895 (upper (make-char 'vietnamese-viscii-upper)))
896 ;; (modify-syntax-entry lower "w")
897 ;; (modify-syntax-entry upper "w")
898 (modify-category-entry lower ?v)
899 (modify-category-entry upper ?v)
900 (modify-category-entry lower ?l) ; To make a word with
901 (modify-category-entry upper ?l) ; latin characters.
902 )
903
904 (let ((tbl (standard-case-table))
905 (i 32))
906 (while (< i 128)
907 (set-case-syntax-pair (make-char 'vietnamese-viscii-upper i)
908 (make-char 'vietnamese-viscii-lower i)
909 tbl)
910 (setq i (1+ i))))
911
912 ;; Unicode (mule-unicode-0100-24ff)
913
914 (let ((tbl (standard-case-table)) c)
915
916 ;; Latin Extended-A, Latin Extended-B
917 (setq c #x0100)
918 (while (<= c #x0233)
919 (modify-category-entry (decode-char 'ucs c) ?l)
920 (and (or (<= c #x012e)
921 (and (>= c #x014a) (<= c #x0177)))
922 (zerop (% c 2))
923 (set-case-syntax-pair
924 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
925 (and (>= c #x013a)
926 (<= c #x0148)
927 (zerop (% c 2))
928 (set-case-syntax-pair
929 (decode-char 'ucs (1- c)) (decode-char 'ucs c) tbl))
930 (setq c (1+ c)))
931
932
933 ;; In some languages, such as Turkish, U+0049 LATIN CAPITAL LETTER I
934 ;; and U+0131 LATIN SMALL LETTER DOTLESS I make a case pair, and so
935 ;; do U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE and U+0069 LATIN
936 ;; SMALL LETTER I.
937
938 ;; We used to set up half of those correspondence unconditionally,
939 ;; but that makes searches slow. So now we don't set up either half
940 ;; of these correspondences by default.
941
942 ;; (set-downcase-syntax ?\e$,1 P\e(B ?i tbl)
943 ;; (set-upcase-syntax ?I ?\e$,1 Q\e(B tbl)
944
945 (set-case-syntax-pair ?\e$,1 R\e(B ?\e$,1 S\e(B tbl)
946 (set-case-syntax-pair ?\e$,1 T\e(B ?\e$,1 U\e(B tbl)
947 (set-case-syntax-pair ?\e$,1 V\e(B ?\e$,1 W\e(B tbl)
948 (set-case-syntax-pair ?\e$,1!8\e(B ?\e,A\7f\e(B tbl)
949 (set-case-syntax-pair ?\e$,1!9\e(B ?\e$,1!:\e(B tbl)
950 (set-case-syntax-pair ?\e$,1!;\e(B ?\e$,1!<\e(B tbl)
951 (set-case-syntax-pair ?\e$,1!=\e(B ?\e$,1!>\e(B tbl)
952
953 ;; Latin Extended-B
954 (set-case-syntax-pair ?\e$,1!A\e(B ?\e$,1#S\e(B tbl)
955 (set-case-syntax-pair ?\e$,1!B\e(B ?\e$,1!C\e(B tbl)
956 (set-case-syntax-pair ?\e$,1!D\e(B ?\e$,1!E\e(B tbl)
957 (set-case-syntax-pair ?\e$,1!F\e(B ?\e$,1#T\e(B tbl)
958 (set-case-syntax-pair ?\e$,1!G\e(B ?\e$,1!H\e(B tbl)
959 (set-case-syntax-pair ?\e$,1!I\e(B ?\e$,1#V\e(B tbl)
960 (set-case-syntax-pair ?\e$,1!J\e(B ?\e$,1#W\e(B tbl)
961 (set-case-syntax-pair ?\e$,1!K\e(B ?\e$,1!L\e(B tbl)
962 (set-case-syntax-pair ?\e$,1!N\e(B ?\e$,1"=\e(B tbl)
963 (set-case-syntax-pair ?\e$,1!O\e(B ?\e$,1#Y\e(B tbl)
964 (set-case-syntax-pair ?\e$,1!P\e(B ?\e$,1#[\e(B tbl)
965 (set-case-syntax-pair ?\e$,1!Q\e(B ?\e$,1!R\e(B tbl)
966 (set-case-syntax-pair ?\e$,1!S\e(B ?\e$,1#`\e(B tbl)
967 (set-case-syntax-pair ?\e$,1!T\e(B ?\e$,1#c\e(B tbl)
968 (set-case-syntax-pair ?\e$,1!V\e(B ?\e$,1#i\e(B tbl)
969 (set-case-syntax-pair ?\e$,1!W\e(B ?\e$,1#h\e(B tbl)
970 (set-case-syntax-pair ?\e$,1!X\e(B ?\e$,1!Y\e(B tbl)
971 (set-case-syntax-pair ?\e$,1!\\e(B ?\e$,1#o\e(B tbl)
972 (set-case-syntax-pair ?\e$,1!]\e(B ?\e$,1#r\e(B tbl)
973 (set-case-syntax-pair ?\e$,1!_\e(B ?\e$,1#u\e(B tbl)
974 (set-case-syntax-pair ?\e$,1!`\e(B ?\e$,1!a\e(B tbl)
975 (set-case-syntax-pair ?\e$,1!b\e(B ?\e$,1!c\e(B tbl)
976 (set-case-syntax-pair ?\e$,1!d\e(B ?\e$,1!e\e(B tbl)
977 (set-case-syntax-pair ?\e$,1!f\e(B ?\e$,1$ \e(B tbl)
978 (set-case-syntax-pair ?\e$,1!g\e(B ?\e$,1!h\e(B tbl)
979 (set-case-syntax-pair ?\e$,1!i\e(B ?\e$,1$#\e(B tbl)
980 (set-case-syntax-pair ?\e$,1!l\e(B ?\e$,1!m\e(B tbl)
981 (set-case-syntax-pair ?\e$,1!n\e(B ?\e$,1$(\e(B tbl)
982 (set-case-syntax-pair ?\e$,1!o\e(B ?\e$,1!p\e(B tbl)
983 (set-case-syntax-pair ?\e$,1!q\e(B ?\e$,1$*\e(B tbl)
984 (set-case-syntax-pair ?\e$,1!r\e(B ?\e$,1$+\e(B tbl)
985 (set-case-syntax-pair ?\e$,1!s\e(B ?\e$,1!t\e(B tbl)
986 (set-case-syntax-pair ?\e$,1!u\e(B ?\e$,1!v\e(B tbl)
987 (set-case-syntax-pair ?\e$,1!w\e(B ?\e$,1$2\e(B tbl)
988 (set-case-syntax-pair ?\e$,1!x\e(B ?\e$,1!y\e(B tbl)
989 (set-case-syntax-pair ?\e$,1!|\e(B ?\e$,1!}\e(B tbl)
990 (set-case-syntax-pair ?\e$,1"$\e(B ?\e$,1"&\e(B tbl)
991 (set-case-syntax-pair ?\e$,1"%\e(B ?\e$,1"&\e(B tbl)
992 (set-case-syntax-pair ?\e$,1"'\e(B ?\e$,1")\e(B tbl)
993 (set-case-syntax-pair ?\e$,1"(\e(B ?\e$,1")\e(B tbl)
994 (set-case-syntax-pair ?\e$,1"*\e(B ?\e$,1",\e(B tbl)
995 (set-case-syntax-pair ?\e$,1"+\e(B ?\e$,1",\e(B tbl)
996 (set-case-syntax-pair ?\e$,1"-\e(B ?\e$,1".\e(B tbl)
997 (set-case-syntax-pair ?\e$,1"/\e(B ?\e$,1"0\e(B tbl)
998 (set-case-syntax-pair ?\e$,1"1\e(B ?\e$,1"2\e(B tbl)
999 (set-case-syntax-pair ?\e$,1"3\e(B ?\e$,1"4\e(B tbl)
1000 (set-case-syntax-pair ?\e$,1"5\e(B ?\e$,1"6\e(B tbl)
1001 (set-case-syntax-pair ?\e$,1"7\e(B ?\e$,1"8\e(B tbl)
1002 (set-case-syntax-pair ?\e$,1"9\e(B ?\e$,1":\e(B tbl)
1003 (set-case-syntax-pair ?\e$,1";\e(B ?\e$,1"<\e(B tbl)
1004 (set-case-syntax-pair ?\e$,1">\e(B ?\e$,1"?\e(B tbl)
1005 (set-case-syntax-pair ?\e$,1"@\e(B ?\e$,1"A\e(B tbl)
1006 (set-case-syntax-pair ?\e$,1"B\e(B ?\e$,1"C\e(B tbl)
1007 (set-case-syntax-pair ?\e$,1"D\e(B ?\e$,1"E\e(B tbl)
1008 (set-case-syntax-pair ?\e$,1"F\e(B ?\e$,1"G\e(B tbl)
1009 (set-case-syntax-pair ?\e$,1"H\e(B ?\e$,1"I\e(B tbl)
1010 (set-case-syntax-pair ?\e$,1"J\e(B ?\e$,1"K\e(B tbl)
1011 (set-case-syntax-pair ?\e$,1"L\e(B ?\e$,1"M\e(B tbl)
1012 (set-case-syntax-pair ?\e$,1"N\e(B ?\e$,1"O\e(B tbl)
1013 ;; 01F0; F; 006A 030C; # LATIN SMALL LETTER J WITH CARON
1014 (set-case-syntax-pair ?\e$,1"Q\e(B ?\e$,1"S\e(B tbl)
1015 (set-case-syntax-pair ?\e$,1"R\e(B ?\e$,1"S\e(B tbl)
1016 (set-case-syntax-pair ?\e$,1"T\e(B ?\e$,1"U\e(B tbl)
1017 (set-case-syntax-pair ?\e$,1"V\e(B ?\e$,1!U\e(B tbl)
1018 (set-case-syntax-pair ?\e$,1"W\e(B ?\e$,1!\7f\e(B tbl)
1019 (set-case-syntax-pair ?\e$,1"X\e(B ?\e$,1"Y\e(B tbl)
1020 (set-case-syntax-pair ?\e$,1"Z\e(B ?\e$,1"[\e(B tbl)
1021 (set-case-syntax-pair ?\e$,1"\\e(B ?\e$,1"]\e(B tbl)
1022 (set-case-syntax-pair ?\e$,1"^\e(B ?\e$,1"_\e(B tbl)
1023 (set-case-syntax-pair ?\e$,1"`\e(B ?\e$,1"a\e(B tbl)
1024 (set-case-syntax-pair ?\e$,1"b\e(B ?\e$,1"c\e(B tbl)
1025 (set-case-syntax-pair ?\e$,1"d\e(B ?\e$,1"e\e(B tbl)
1026 (set-case-syntax-pair ?\e$,1"f\e(B ?\e$,1"g\e(B tbl)
1027 (set-case-syntax-pair ?\e$,1"h\e(B ?\e$,1"i\e(B tbl)
1028 (set-case-syntax-pair ?\e$,1"j\e(B ?\e$,1"k\e(B tbl)
1029 (set-case-syntax-pair ?\e$,1"l\e(B ?\e$,1"m\e(B tbl)
1030 (set-case-syntax-pair ?\e$,1"n\e(B ?\e$,1"o\e(B tbl)
1031 (set-case-syntax-pair ?\e$,1"p\e(B ?\e$,1"q\e(B tbl)
1032 (set-case-syntax-pair ?\e$,1"r\e(B ?\e$,1"s\e(B tbl)
1033 (set-case-syntax-pair ?\e$,1"t\e(B ?\e$,1"u\e(B tbl)
1034 (set-case-syntax-pair ?\e$,1"v\e(B ?\e$,1"w\e(B tbl)
1035 (set-case-syntax-pair ?\e$,1"x\e(B ?\e$,1"y\e(B tbl)
1036 (set-case-syntax-pair ?\e$,1"z\e(B ?\e$,1"{\e(B tbl)
1037 (set-case-syntax-pair ?\e$,1"|\e(B ?\e$,1"}\e(B tbl)
1038 (set-case-syntax-pair ?\e$,1"~\e(B ?\e$,1"\7f\e(B tbl)
1039 (set-case-syntax-pair ?\e$,1#"\e(B ?\e$,1##\e(B tbl)
1040 (set-case-syntax-pair ?\e$,1#$\e(B ?\e$,1#%\e(B tbl)
1041 (set-case-syntax-pair ?\e$,1#&\e(B ?\e$,1#'\e(B tbl)
1042 (set-case-syntax-pair ?\e$,1#(\e(B ?\e$,1#)\e(B tbl)
1043 (set-case-syntax-pair ?\e$,1#*\e(B ?\e$,1#+\e(B tbl)
1044 (set-case-syntax-pair ?\e$,1#,\e(B ?\e$,1#-\e(B tbl)
1045 (set-case-syntax-pair ?\e$,1#.\e(B ?\e$,1#/\e(B tbl)
1046 (set-case-syntax-pair ?\e$,1#0\e(B ?\e$,1#1\e(B tbl)
1047 (set-case-syntax-pair ?\e$,1#2\e(B ?\e$,1#3\e(B tbl)
1048
1049 ;; Latin Extended Additional
1050 (setq c #x1e00)
1051 (while (<= c #x1ef9)
1052 (modify-category-entry (decode-char 'ucs c) ?l)
1053 (and (zerop (% c 2))
1054 (or (<= c #x1e94) (>= c #x1ea0))
1055 (set-case-syntax-pair
1056 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
1057 (setq c (1+ c)))
1058
1059 ;; Greek
1060 (setq c #x0370)
1061 (while (<= c #x03ff)
1062 (modify-category-entry (decode-char 'ucs c) ?g)
1063 (if (or (and (>= c #x0391) (<= c #x03a1))
1064 (and (>= c #x03a3) (<= c #x03ab)))
1065 (set-case-syntax-pair
1066 (decode-char 'ucs c) (decode-char 'ucs (+ c 32)) tbl))
1067 (and (>= c #x03da)
1068 (<= c #x03ee)
1069 (zerop (% c 2))
1070 (set-case-syntax-pair
1071 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
1072 (setq c (1+ c)))
1073 (set-case-syntax-pair ?\e$,1&f\e(B ?\e$,1',\e(B tbl)
1074 (set-case-syntax-pair ?\e$,1&h\e(B ?\e$,1'-\e(B tbl)
1075 (set-case-syntax-pair ?\e$,1&i\e(B ?\e$,1'.\e(B tbl)
1076 (set-case-syntax-pair ?\e$,1&j\e(B ?\e$,1'/\e(B tbl)
1077 (set-case-syntax-pair ?\e$,1&l\e(B ?\e$,1'L\e(B tbl)
1078 (set-case-syntax-pair ?\e$,1&n\e(B ?\e$,1'M\e(B tbl)
1079 (set-case-syntax-pair ?\e$,1&o\e(B ?\e$,1'N\e(B tbl)
1080
1081 ;; Armenian
1082 (setq c #x531)
1083 (while (<= c #x556)
1084 (set-case-syntax-pair (decode-char 'ucs c)
1085 (decode-char 'ucs (+ c #x30)) tbl)
1086 (setq c (1+ c)))
1087
1088 ;; Greek Extended
1089 (setq c #x1f00)
1090 (while (<= c #x1fff)
1091 (modify-category-entry (decode-char 'ucs c) ?g)
1092 (and (<= (logand c #x000f) 7)
1093 (<= c #x1fa7)
1094 (not (memq c '(#x1f50 #x1f52 #x1f54 #x1f56)))
1095 (/= (logand c #x00f0) 7)
1096 (set-case-syntax-pair
1097 (decode-char 'ucs (+ c 8)) (decode-char 'ucs c) tbl))
1098 (setq c (1+ c)))
1099 (set-case-syntax-pair ?\e$,1qx\e(B ?\e$,1qp\e(B tbl)
1100 (set-case-syntax-pair ?\e$,1qy\e(B ?\e$,1qq\e(B tbl)
1101 (set-case-syntax-pair ?\e$,1qz\e(B ?\e$,1q0\e(B tbl)
1102 (set-case-syntax-pair ?\e$,1q{\e(B ?\e$,1q1\e(B tbl)
1103 (set-case-syntax-pair ?\e$,1q|\e(B ?\e$,1qs\e(B tbl)
1104 (set-case-syntax-pair ?\e$,1r(\e(B ?\e$,1q2\e(B tbl)
1105 (set-case-syntax-pair ?\e$,1r)\e(B ?\e$,1q3\e(B tbl)
1106 (set-case-syntax-pair ?\e$,1r*\e(B ?\e$,1q4\e(B tbl)
1107 (set-case-syntax-pair ?\e$,1r+\e(B ?\e$,1q5\e(B tbl)
1108 (set-case-syntax-pair ?\e$,1r,\e(B ?\e$,1r#\e(B tbl)
1109 (set-case-syntax-pair ?\e$,1r8\e(B ?\e$,1r0\e(B tbl)
1110 (set-case-syntax-pair ?\e$,1r9\e(B ?\e$,1r1\e(B tbl)
1111 (set-case-syntax-pair ?\e$,1r:\e(B ?\e$,1q6\e(B tbl)
1112 (set-case-syntax-pair ?\e$,1r;\e(B ?\e$,1q7\e(B tbl)
1113 (set-case-syntax-pair ?\e$,1rH\e(B ?\e$,1r@\e(B tbl)
1114 (set-case-syntax-pair ?\e$,1rI\e(B ?\e$,1rA\e(B tbl)
1115 (set-case-syntax-pair ?\e$,1rJ\e(B ?\e$,1q:\e(B tbl)
1116 (set-case-syntax-pair ?\e$,1rK\e(B ?\e$,1q;\e(B tbl)
1117 (set-case-syntax-pair ?\e$,1rL\e(B ?\e$,1rE\e(B tbl)
1118 (set-case-syntax-pair ?\e$,1rX\e(B ?\e$,1q8\e(B tbl)
1119 (set-case-syntax-pair ?\e$,1rY\e(B ?\e$,1q9\e(B tbl)
1120 (set-case-syntax-pair ?\e$,1rZ\e(B ?\e$,1q<\e(B tbl)
1121 (set-case-syntax-pair ?\e$,1r[\e(B ?\e$,1q=\e(B tbl)
1122 (set-case-syntax-pair ?\e$,1r\\e(B ?\e$,1rS\e(B tbl)
1123
1124 ;; cyrillic
1125 (setq c #x0400)
1126 (while (<= c #x04ff)
1127 (modify-category-entry (decode-char 'ucs c) ?y)
1128 (and (>= c #x0400)
1129 (<= c #x040f)
1130 (set-case-syntax-pair
1131 (decode-char 'ucs c) (decode-char 'ucs (+ c 80)) tbl))
1132 (and (>= c #x0410)
1133 (<= c #x042f)
1134 (set-case-syntax-pair
1135 (decode-char 'ucs c) (decode-char 'ucs (+ c 32)) tbl))
1136 (and (zerop (% c 2))
1137 (or (and (>= c #x0460) (<= c #x0480))
1138 (and (>= c #x048c) (<= c #x04be))
1139 (and (>= c #x04d0) (<= c #x04f4)))
1140 (set-case-syntax-pair
1141 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
1142 (setq c (1+ c)))
1143 (set-case-syntax-pair ?\e$,1*!\e(B ?\e$,1*"\e(B tbl)
1144 (set-case-syntax-pair ?\e$,1*#\e(B ?\e$,1*$\e(B tbl)
1145 (set-case-syntax-pair ?\e$,1*'\e(B ?\e$,1*(\e(B tbl)
1146 (set-case-syntax-pair ?\e$,1*+\e(B ?\e$,1*,\e(B tbl)
1147 (set-case-syntax-pair ?\e$,1*X\e(B ?\e$,1*Y\e(B tbl)
1148
1149 ;; general punctuation
1150 (setq c #x2000)
1151 (while (<= c #x200b)
1152 (set-case-syntax (decode-char 'ucs c) " " tbl)
1153 (setq c (1+ c)))
1154 (setq c #x2010)
1155 (while (<= c #x2016)
1156 (set-case-syntax (decode-char 'ucs c) "_" tbl)
1157 (setq c (1+ c)))
1158 ;; Punctuation syntax for quotation marks (like `)
1159 (while (<= c #x201f)
1160 (set-case-syntax (decode-char 'ucs c) "." tbl)
1161 (setq c (1+ c)))
1162 (while (<= c #x2027)
1163 (set-case-syntax (decode-char 'ucs c) "_" tbl)
1164 (setq c (1+ c)))
1165
1166 ;; Roman numerals
1167 (setq c #x2160)
1168 (while (<= c #x216f)
1169 (set-case-syntax-pair (decode-char 'ucs c)
1170 (decode-char 'ucs (+ c #x10)) tbl)
1171 (setq c (1+ c)))
1172
1173 ;; Circled Latin
1174 (setq c #x24b6)
1175 (while (<= c #x24cf)
1176 (set-case-syntax-pair (decode-char 'ucs c)
1177 (decode-char 'ucs (+ c 26)) tbl)
1178 (modify-category-entry (decode-char 'ucs c) ?l)
1179 (modify-category-entry (decode-char 'ucs (+ c 26)) ?l)
1180 (setq c (1+ c)))
1181
1182 ;; Fullwidth Latin
1183 (setq c #xff21)
1184 (while (<= c #xff3a)
1185 (set-case-syntax-pair (decode-char 'ucs c)
1186 (decode-char 'ucs (+ c #x20)) tbl)
1187 (modify-category-entry (decode-char 'ucs c) ?l)
1188 (modify-category-entry (decode-char 'ucs (+ c #x20)) ?l)
1189 (setq c (1+ c)))
1190
1191 ;; Combining diacritics
1192 (setq c #x300)
1193 (while (<= c #x362)
1194 (modify-category-entry (decode-char 'ucs c) ?^)
1195 (setq c (1+ c)))
1196
1197 ;; Combining marks
1198 (setq c #x20d0)
1199 (while (<= c #x20e3)
1200 (modify-category-entry (decode-char 'ucs c) ?^)
1201 (setq c (1+ c)))
1202
1203 ;; Fixme: syntax for symbols &c
1204 )
1205
1206 (let ((pairs
1207 '("\e$,1sEsF\e(B" ; U+2045 U+2046
1208 "\e$,1s}s~\e(B" ; U+207D U+207E
1209 "\e$,1t-t.\e(B" ; U+208D U+208E
1210 "\e$,1{){*\e(B" ; U+2329 U+232A
1211 "\e$,1|T|U\e(B" ; U+23B4 U+23B5
1212 "\e$,2&H&I\e(B" ; U+2768 U+2769
1213 "\e$,2&J&K\e(B" ; U+276A U+276B
1214 "\e$,2&L&M\e(B" ; U+276C U+276D
1215 "\e$,2&P&Q\e(B" ; U+2770 U+2771
1216 "\e$,2&R&S\e(B" ; U+2772 U+2773
1217 "\e$,2&T&U\e(B" ; U+2774 U+2775
1218 "\e$,2'f'g\e(B" ; U+27E6 U+27E7
1219 "\e$,2'h'i\e(B" ; U+27E8 U+27E9
1220 "\e$,2'j'k\e(B" ; U+27EA U+27EB
1221 "\e$,2,#,$\e(B" ; U+2983 U+2984
1222 "\e$,2,%,&\e(B" ; U+2985 U+2986
1223 "\e$,2,',(\e(B" ; U+2987 U+2988
1224 "\e$,2,),*\e(B" ; U+2989 U+298A
1225 "\e$,2,+,,\e(B" ; U+298B U+298C
1226 "\e$,2,-,.\e(B" ; U+298D U+298E
1227 "\e$,2,/,0\e(B" ; U+298F U+2990
1228 "\e$,2,1,2\e(B" ; U+2991 U+2992
1229 "\e$,2,3,4\e(B" ; U+2993 U+2994
1230 "\e$,2,5,6\e(B" ; U+2995 U+2996
1231 "\e$,2,7,8\e(B" ; U+2997 U+2998
1232 "\e$,2-<-=\e(B" ; U+29FC U+29FD
1233 "\e$,2=H=I\e(B" ; U+3008 U+3009
1234 "\e$,2=J=K\e(B" ; U+300A U+300B
1235 "\e$,2=L=M\e(B" ; U+300C U+300D
1236 "\e$,2=N=O\e(B" ; U+300E U+300F
1237 "\e$,2=P=Q\e(B" ; U+3010 U+3011
1238 "\e$,2=T=U\e(B" ; U+3014 U+3015
1239 "\e$,2=V=W\e(B" ; U+3016 U+3017
1240 "\e$,2=X=Y\e(B" ; U+3018 U+3019
1241 "\e$,2=Z=[\e(B" ; U+301A U+301B
1242 "\e$,3m~m\7f\e(B" ; U+FD3E U+FD3F
1243 "\e$,3pUpV\e(B" ; U+FE35 U+FE36
1244 "\e$,3pWpX\e(B" ; U+FE37 U+FE38
1245 "\e$,3pYpZ\e(B" ; U+FE39 U+FE3A
1246 "\e$,3p[p\\e(B" ; U+FE3B U+FE3C
1247 "\e$,3p]p^\e(B" ; U+FE3D U+FE3E
1248 "\e$,3p_p`\e(B" ; U+FE3F U+FE40
1249 "\e$,3papb\e(B" ; U+FE41 U+FE42
1250 "\e$,3pcpd\e(B" ; U+FE43 U+FE44
1251 "\e$,3pypz\e(B" ; U+FE59 U+FE5A
1252 "\e$,3p{p|\e(B" ; U+FE5B U+FE5C
1253 "\e$,3p}p~\e(B" ; U+FE5D U+FE5E
1254 "\e$,3rhri\e(B" ; U+FF08 U+FF09
1255 "\e$,3s;s=\e(B" ; U+FF3B U+FF3D
1256 "\e$,3s[s]\e(B" ; U+FF5B U+FF5D
1257 "\e$,3s_s`\e(B" ; U+FF5F U+FF60
1258 "\e$,3sbsc\e(B" ; U+FF62 U+FF63
1259 )))
1260 (dolist (elt pairs)
1261 (modify-syntax-entry (aref elt 0) (string ?\( (aref elt 1)))
1262 (modify-syntax-entry (aref elt 1) (string ?\) (aref elt 0)))))
1263
1264 \f
1265 ;;; Setting word boundary.
1266
1267 (setq word-combining-categories
1268 '((?l . ?l)
1269 (?C . ?C)
1270 (?C . ?H)
1271 (?C . ?K)))
1272
1273 (setq word-separating-categories ; (2-byte character sets)
1274 '((?A . ?K) ; Alpha numeric - Katakana
1275 (?A . ?C) ; Alpha numeric - Chinese
1276 (?H . ?A) ; Hiragana - Alpha numeric
1277 (?H . ?K) ; Hiragana - Katakana
1278 (?H . ?C) ; Hiragana - Chinese
1279 (?K . ?A) ; Katakana - Alpha numeric
1280 (?K . ?C) ; Katakana - Chinese
1281 (?C . ?A) ; Chinese - Alpha numeric
1282 (?C . ?K) ; Chinese - Katakana
1283 ))
1284
1285 \f
1286 ;; For each character set, put the information of the most proper
1287 ;; coding system to encode it by `preferred-coding-system' property.
1288
1289 (let ((l '((latin-iso8859-1 . iso-latin-1)
1290 (latin-iso8859-2 . iso-latin-2)
1291 (latin-iso8859-3 . iso-latin-3)
1292 (latin-iso8859-4 . iso-latin-4)
1293 (thai-tis620 . thai-tis620)
1294 (greek-iso8859-7 . greek-iso-8bit)
1295 (arabic-iso8859-6 . iso-2022-7bit)
1296 (hebrew-iso8859-8 . hebrew-iso-8bit)
1297 (katakana-jisx0201 . japanese-shift-jis)
1298 (latin-jisx0201 . japanese-shift-jis)
1299 (cyrillic-iso8859-5 . cyrillic-iso-8bit)
1300 (latin-iso8859-9 . iso-latin-5)
1301 (japanese-jisx0208-1978 . iso-2022-jp)
1302 (chinese-gb2312 . cn-gb-2312)
1303 (japanese-jisx0208 . iso-2022-jp)
1304 (korean-ksc5601 . iso-2022-kr)
1305 (japanese-jisx0212 . iso-2022-jp)
1306 (chinese-cns11643-1 . iso-2022-cn)
1307 (chinese-cns11643-2 . iso-2022-cn)
1308 (chinese-big5-1 . chinese-big5)
1309 (chinese-big5-2 . chinese-big5)
1310 (chinese-sisheng . iso-2022-7bit)
1311 (ipa . iso-2022-7bit)
1312 (vietnamese-viscii-lower . vietnamese-viscii)
1313 (vietnamese-viscii-upper . vietnamese-viscii)
1314 (arabic-digit . iso-2022-7bit)
1315 (arabic-1-column . iso-2022-7bit)
1316 (ascii-right-to-left . iso-2022-7bit)
1317 (lao . lao)
1318 (arabic-2-column . iso-2022-7bit)
1319 (indian-is13194 . devanagari)
1320 (indian-glyph . devanagari)
1321 (tibetan-1-column . tibetan)
1322 (ethiopic . iso-2022-7bit)
1323 (chinese-cns11643-3 . iso-2022-cn)
1324 (chinese-cns11643-4 . iso-2022-cn)
1325 (chinese-cns11643-5 . iso-2022-cn)
1326 (chinese-cns11643-6 . iso-2022-cn)
1327 (chinese-cns11643-7 . iso-2022-cn)
1328 (indian-2-column . devanagari)
1329 (tibetan . tibetan)
1330 (latin-iso8859-14 . iso-latin-8)
1331 (latin-iso8859-15 . iso-latin-9))))
1332 (while l
1333 (put-charset-property (car (car l)) 'preferred-coding-system (cdr (car l)))
1334 (setq l (cdr l))))
1335
1336 \f
1337 ;; Setup auto-fill-chars for charsets that should invoke auto-filling.
1338 ;; SPACE and NEWLINE are already set. Also put `nospace-between-words'
1339 ;; property on the charsets.
1340 (let ((l '(katakana-jisx0201
1341 japanese-jisx0208 japanese-jisx0212
1342 chinese-gb2312 chinese-big5-1 chinese-big5-2)))
1343 (while l
1344 (aset auto-fill-chars (make-char (car l)) t)
1345 (put-charset-property (car l) 'nospace-between-words t)
1346 (setq l (cdr l))))
1347
1348 \f
1349 (setq utf-translate-cjk-mode saved-utf-translate-cjk-mode)
1350 (makunbound 'saved-utf-translate-cjk-mode)
1351
1352 ;;; Local Variables:
1353 ;;; coding: iso-2022-7bit
1354 ;;; End:
1355
1356 ;;; arch-tag: 85889c35-9f4d-4912-9bf5-82de31b0d42d
1357 ;;; characters.el ends here