]> code.delx.au - gnu-emacs/blob - lisp/international/characters.el
Setup categories for Indian characters.
[gnu-emacs] / lisp / international / characters.el
1 ;;; characters.el --- set syntax and category for multibyte characters
2
3 ;; Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN.
4 ;; Licensed to the Free Software Foundation.
5 ;; Copyright (C) 2001, 2002 Free Software Foundation, Inc.
6
7 ;; Keywords: multibyte character, character set, syntax, category
8
9 ;; This file is part of GNU Emacs.
10
11 ;; GNU Emacs is free software; you can redistribute it and/or modify
12 ;; it under the terms of the GNU General Public License as published by
13 ;; the Free Software Foundation; either version 2, or (at your option)
14 ;; any later version.
15
16 ;; GNU Emacs is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 ;; GNU General Public License for more details.
20
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with GNU Emacs; see the file COPYING. If not, write to the
23 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 ;; Boston, MA 02111-1307, USA.
25
26 ;;; Commentary:
27
28 ;; This file contains multibyte characters. Save this file always in
29 ;; the coding system `iso-2022-7bit'.
30
31 ;; This file does not define the syntax for Latin-N character sets;
32 ;; those are defined by the files latin-N.el.
33
34 ;;; Code:
35
36 ;;; Predefined categories.
37
38 ;; For each character set.
39
40 (define-category ?a "ASCII")
41 (define-category ?l "Latin")
42 (define-category ?t "Thai")
43 (define-category ?g "Greek")
44 (define-category ?b "Arabic")
45 (define-category ?w "Hebrew")
46 (define-category ?y "Cyrillic")
47 (define-category ?k "Japanese katakana")
48 (define-category ?r "Japanese roman")
49 (define-category ?c "Chinese")
50 (define-category ?j "Japanese")
51 (define-category ?h "Korean")
52 (define-category ?e "Ethiopic (Ge'ez)")
53 (define-category ?v "Vietnamese")
54 (define-category ?i "Indian")
55 (define-category ?o "Lao")
56 (define-category ?q "Tibetan")
57
58 ;; For each group (row) of 2-byte character sets.
59
60 (define-category ?A "Alpha-numeric characters of 2-byte character sets")
61 (define-category ?C "Chinese (Han) characters of 2-byte character sets")
62 (define-category ?G "Greek characters of 2-byte character sets")
63 (define-category ?H "Japanese Hiragana characters of 2-byte character sets")
64 (define-category ?K "Japanese Katakana characters of 2-byte character sets")
65 (define-category ?N "Korean Hangul characters of 2-byte character sets")
66 (define-category ?Y "Cyrillic characters of 2-byte character sets")
67 (define-category ?I "Indian Glyphs")
68
69 ;; For phonetic classifications.
70
71 (define-category ?0 "consonant")
72 (define-category ?1 "base (independent) vowel")
73 (define-category ?2 "upper diacritical mark (including upper vowel)")
74 (define-category ?3 "lower diacritical mark (including lower vowel)")
75 (define-category ?4 "tone mark")
76 (define-category ?5 "symbol")
77 (define-category ?6 "digit")
78 (define-category ?7 "vowel-modifying diacritical mark")
79 (define-category ?8 "vowel-signs")
80 (define-category ?9 "semivowel lower")
81
82 ;; For filling.
83 (define-category ?| "While filling, we can break a line at this character.")
84
85 ;; For indentation calculation.
86 (define-category ?\s
87 "This character counts as a space for indentation purposes.")
88
89 ;; Keep the following for `kinsoku' processing. See comments in
90 ;; kinsoku.el.
91 (define-category ?> "A character which can't be placed at beginning of line.")
92 (define-category ?< "A character which can't be placed at end of line.")
93
94 ;; Combining
95 (define-category ?^ "Combining diacritic or mark")
96 \f
97 ;;; Setting syntax and category.
98
99 ;; ASCII
100
101 (let ((ch 32))
102 (while (< ch 127) ; All ASCII characters have
103 (modify-category-entry ch ?a) ; the category `a' (ASCII)
104 (modify-category-entry ch ?l) ; and `l' (Latin).
105 (setq ch (1+ ch))))
106
107 ;; Arabic character set
108
109 (let ((charsets '(arabic-iso8859-6
110 arabic-digit
111 arabic-1-column
112 arabic-2-column)))
113 (while charsets
114 ;; (modify-syntax-entry (make-char (car charsets)) "w")
115 (modify-category-entry (make-char (car charsets)) ?b)
116 (setq charsets (cdr charsets))))
117 (let ((ch #x600))
118 (while (<= ch #x6ff)
119 (modify-category-entry (decode-char 'ucs ch) ?b)
120 (setq ch (1+ ch)))
121 (setq ch #xfb50)
122 (while (<= ch #xfdff)
123 (modify-category-entry (decode-char 'ucs ch) ?b)
124 (setq ch (1+ ch)))
125 (setq ch #xfe70)
126 (while (<= ch #xfefe)
127 (modify-category-entry (decode-char 'ucs ch) ?b)
128 (setq ch (1+ ch))))
129
130 ;; Chinese character set (GB2312)
131
132 ;; (modify-syntax-entry (make-char 'chinese-gb2312) "w")
133 (modify-syntax-entry (make-char 'chinese-gb2312 33) "_")
134 (modify-syntax-entry (make-char 'chinese-gb2312 34) "_")
135 (modify-syntax-entry (make-char 'chinese-gb2312 41) "_")
136 (modify-syntax-entry ?\\e$A!2\e(B "(\e$A!3\e(B")
137 (modify-syntax-entry ?\\e$A!4\e(B "(\e$A!5\e(B")
138 (modify-syntax-entry ?\\e$A!6\e(B "(\e$A!7\e(B")
139 (modify-syntax-entry ?\\e$A!8\e(B "(\e$A!9\e(B")
140 (modify-syntax-entry ?\\e$A!:\e(B "(\e$A!;\e(B")
141 (modify-syntax-entry ?\\e$A!<\e(B "(\e$A!=\e(B")
142 (modify-syntax-entry ?\\e$A!>\e(B "(\e$A!?\e(B")
143 (modify-syntax-entry ?\\e$A#(\e(B "(\e$A#)\e(B")
144 (modify-syntax-entry ?\\e$A#{\e(B "(\e$A#}\e(B")
145 (modify-syntax-entry ?\\e$A#[\e(B "(\e$A#]\e(B")
146 (modify-syntax-entry ?\\e$A!3\e(B ")\e$A!2\e(B")
147 (modify-syntax-entry ?\\e$A!5\e(B ")\e$A!4\e(B")
148 (modify-syntax-entry ?\\e$A!7\e(B ")\e$A!6\e(B")
149 (modify-syntax-entry ?\\e$A!9\e(B ")\e$A!8\e(B")
150 (modify-syntax-entry ?\\e$A!;\e(B ")\e$A!:\e(B")
151 (modify-syntax-entry ?\\e$A!=\e(B ")\e$A!<\e(B")
152 (modify-syntax-entry ?\\e$A!?\e(B ")\e$A!>\e(B")
153 (modify-syntax-entry ?\\e$A#)\e(B ")\e$A#(\e(B")
154 (modify-syntax-entry ?\\e$A#}\e(B ")\e$A#{\e(B")
155 (modify-syntax-entry ?\\e$A#]\e(B ")\e$A#[\e(B")
156 ;; Unicode equivalents of above
157 (modify-syntax-entry ?\\e$,2=T\e(B "(\e$,2=U\e(B")
158 (modify-syntax-entry ?\\e$,2=H\e(B "(\e$,2=I\e(B")
159 (modify-syntax-entry ?\\e$,2=J\e(B "(\e$,2=K\e(B")
160 (modify-syntax-entry ?\\e$,2=L\e(B "(\e$,2=M\e(B")
161 (modify-syntax-entry ?\\e$,2=N\e(B "(\e$,2=O\e(B")
162 (modify-syntax-entry ?\\e$,2=V\e(B "(\e$,2=W\e(B")
163 (modify-syntax-entry ?\\e$,2=P\e(B "(\e$,2=Q\e(B")
164 (modify-syntax-entry ?\\e$,2=U\e(B ")\e$,2=T\e(B")
165 (modify-syntax-entry ?\\e$,2=I\e(B ")\e$,2=H\e(B")
166 (modify-syntax-entry ?\\e$,2=K\e(B ")\e$,2=J\e(B")
167 (modify-syntax-entry ?\\e$,2=M\e(B ")\e$,2=L\e(B")
168 (modify-syntax-entry ?\\e$,2=O\e(B ")\e$,2=N\e(B")
169 (modify-syntax-entry ?\\e$,2=W\e(B ")\e$,2=V\e(B")
170 (modify-syntax-entry ?\\e$,2=Q\e(B ")\e$,2=P\e(B")
171
172 (let ((chars "\e$A#,!"!##.!$#;#:#?#!!C!-!'#|#_!.!/!0!1#"!e#`!d\e(B"))
173 (dotimes (i (length chars))
174 (modify-syntax-entry (aref chars i) ".")))
175
176 (modify-category-entry (make-char 'chinese-gb2312) ?c)
177 (modify-category-entry (make-char 'chinese-gb2312) ?\|)
178 (modify-category-entry (make-char 'chinese-gb2312 35) ?A)
179 (modify-category-entry (make-char 'chinese-gb2312 36) ?H)
180 (modify-category-entry (make-char 'chinese-gb2312 37) ?K)
181 (modify-category-entry (make-char 'chinese-gb2312 38) ?G)
182 (modify-category-entry (make-char 'chinese-gb2312 39) ?Y)
183 (let ((row 48))
184 (while (< row 127)
185 (modify-category-entry (make-char 'chinese-gb2312 row) ?C)
186 (setq row (1+ row))))
187
188 ;; Chinese character set (BIG5)
189
190
191
192 (let ((from (decode-big5-char #xA141))
193 (to (decode-big5-char #xA15D)))
194 (while (< from to)
195 (modify-syntax-entry from ".")
196 (setq from (1+ from))))
197 (let ((from (decode-big5-char #xA1A5))
198 (to (decode-big5-char #xA1AD)))
199 (while (< from to)
200 (modify-syntax-entry from ".")
201 (setq from (1+ from))))
202 (let ((from (decode-big5-char #xA1AD))
203 (to (decode-big5-char #xA2AF)))
204 (while (< from to)
205 (modify-syntax-entry from "_")
206 (setq from (1+ from))))
207
208 (let ((parens "\e$(0!>!?!@!A!B!C!D!E!F!G!H!I!J!K!L!M!N!O!P!Q!R!S!T!U!V!W!X!Y!Z![!\!]!^!_!`!a!b!c\e(B")
209 open close)
210 (dotimes (i (/ (length parens) 2))
211 (setq open (aref parens (* i 2))
212 close (aref parens (1+ (* i 2))))
213 (modify-syntax-entry open (format "(%c" close))
214 (modify-syntax-entry close (format ")%c" open))))
215
216 (let ((generic-big5-1-char (make-char 'chinese-big5-1))
217 (generic-big5-2-char (make-char 'chinese-big5-2)))
218 ;; (modify-syntax-entry generic-big5-1-char "w")
219 ;; (modify-syntax-entry generic-big5-2-char "w")
220
221 (modify-category-entry generic-big5-1-char ?c)
222 (modify-category-entry generic-big5-2-char ?c)
223
224 (modify-category-entry generic-big5-1-char ?C)
225 (modify-category-entry generic-big5-2-char ?C)
226
227 (modify-category-entry generic-big5-1-char ?\|)
228 (modify-category-entry generic-big5-2-char ?\|))
229
230
231 ;; Chinese character set (CNS11643)
232
233 (let ((cns-list '(chinese-cns11643-1
234 chinese-cns11643-2
235 chinese-cns11643-3
236 chinese-cns11643-4
237 chinese-cns11643-5
238 chinese-cns11643-6
239 chinese-cns11643-7))
240 generic-char)
241 (while cns-list
242 (setq generic-char (make-char (car cns-list)))
243 ;; (modify-syntax-entry generic-char "w")
244 (modify-category-entry generic-char ?c)
245 (modify-category-entry generic-char ?C)
246 (modify-category-entry generic-char ?|)
247 (setq cns-list (cdr cns-list))))
248
249 ;; Cyrillic character set (ISO-8859-5)
250
251 (modify-category-entry (make-char 'cyrillic-iso8859-5) ?y)
252
253 (modify-syntax-entry (make-char 'cyrillic-iso8859-5 160) " ")
254 (modify-syntax-entry ?\e,L-\e(B ".")
255 (modify-syntax-entry ?\e,Lp\e(B ".")
256 (modify-syntax-entry ?\e,L}\e(B ".")
257 (let ((tbl (standard-case-table)))
258 (set-case-syntax-pair ?\e,L!\e(B ?\e,Lq\e(B tbl)
259 (set-case-syntax-pair ?\e,L"\e(B ?\e,Lr\e(B tbl)
260 (set-case-syntax-pair ?\e,L#\e(B ?\e,Ls\e(B tbl)
261 (set-case-syntax-pair ?\e,L$\e(B ?\e,Lt\e(B tbl)
262 (set-case-syntax-pair ?\e,L%\e(B ?\e,Lu\e(B tbl)
263 (set-case-syntax-pair ?\e,L&\e(B ?\e,Lv\e(B tbl)
264 (set-case-syntax-pair ?\e,L'\e(B ?\e,Lw\e(B tbl)
265 (set-case-syntax-pair ?\e,L(\e(B ?\e,Lx\e(B tbl)
266 (set-case-syntax-pair ?\e,L)\e(B ?\e,Ly\e(B tbl)
267 (set-case-syntax-pair ?\e,L*\e(B ?\e,Lz\e(B tbl)
268 (set-case-syntax-pair ?\e,L+\e(B ?\e,L{\e(B tbl)
269 (set-case-syntax-pair ?\e,L,\e(B ?\e,L|\e(B tbl)
270 (set-case-syntax-pair ?\e,L.\e(B ?\e,L~\e(B tbl)
271 (set-case-syntax-pair ?\e,L/\e(B ?\e,L\7f\e(B tbl)
272 (set-case-syntax-pair ?\e,L0\e(B ?\e,LP\e(B tbl)
273 (set-case-syntax-pair ?\e,L1\e(B ?\e,LQ\e(B tbl)
274 (set-case-syntax-pair ?\e,L2\e(B ?\e,LR\e(B tbl)
275 (set-case-syntax-pair ?\e,L3\e(B ?\e,LS\e(B tbl)
276 (set-case-syntax-pair ?\e,L4\e(B ?\e,LT\e(B tbl)
277 (set-case-syntax-pair ?\e,L5\e(B ?\e,LU\e(B tbl)
278 (set-case-syntax-pair ?\e,L6\e(B ?\e,LV\e(B tbl)
279 (set-case-syntax-pair ?\e,L7\e(B ?\e,LW\e(B tbl)
280 (set-case-syntax-pair ?\e,L8\e(B ?\e,LX\e(B tbl)
281 (set-case-syntax-pair ?\e,L9\e(B ?\e,LY\e(B tbl)
282 (set-case-syntax-pair ?\e,L:\e(B ?\e,LZ\e(B tbl)
283 (set-case-syntax-pair ?\e,L;\e(B ?\e,L[\e(B tbl)
284 (set-case-syntax-pair ?\e,L<\e(B ?\e,L\\e(B tbl)
285 (set-case-syntax-pair ?\e,L=\e(B ?\e,L]\e(B tbl)
286 (set-case-syntax-pair ?\e,L>\e(B ?\e,L^\e(B tbl)
287 (set-case-syntax-pair ?\e,L?\e(B ?\e,L_\e(B tbl)
288 (set-case-syntax-pair ?\e,L@\e(B ?\e,L`\e(B tbl)
289 (set-case-syntax-pair ?\e,LA\e(B ?\e,La\e(B tbl)
290 (set-case-syntax-pair ?\e,LB\e(B ?\e,Lb\e(B tbl)
291 (set-case-syntax-pair ?\e,LC\e(B ?\e,Lc\e(B tbl)
292 (set-case-syntax-pair ?\e,LD\e(B ?\e,Ld\e(B tbl)
293 (set-case-syntax-pair ?\e,LE\e(B ?\e,Le\e(B tbl)
294 (set-case-syntax-pair ?\e,LF\e(B ?\e,Lf\e(B tbl)
295 (set-case-syntax-pair ?\e,LG\e(B ?\e,Lg\e(B tbl)
296 (set-case-syntax-pair ?\e,LH\e(B ?\e,Lh\e(B tbl)
297 (set-case-syntax-pair ?\e,LI\e(B ?\e,Li\e(B tbl)
298 (set-case-syntax-pair ?\e,LJ\e(B ?\e,Lj\e(B tbl)
299 (set-case-syntax-pair ?\e,LK\e(B ?\e,Lk\e(B tbl)
300 (set-case-syntax-pair ?\e,LL\e(B ?\e,Ll\e(B tbl)
301 (set-case-syntax-pair ?\e,LM\e(B ?\e,Lm\e(B tbl)
302 (set-case-syntax-pair ?\e,LN\e(B ?\e,Ln\e(B tbl)
303 (set-case-syntax-pair ?\e,LO\e(B ?\e,Lo\e(B tbl)
304 (set-case-syntax-pair ?\e$,1(!\e(B ?\e$,1(q\e(B tbl)
305 (set-case-syntax-pair ?\e$,1("\e(B ?\e$,1(r\e(B tbl)
306 (set-case-syntax-pair ?\e$,1(#\e(B ?\e$,1(s\e(B tbl)
307 (set-case-syntax-pair ?\e$,1($\e(B ?\e$,1(t\e(B tbl)
308 (set-case-syntax-pair ?\e$,1(%\e(B ?\e$,1(u\e(B tbl)
309 (set-case-syntax-pair ?\e$,1(&\e(B ?\e$,1(v\e(B tbl)
310 (set-case-syntax-pair ?\e$,1('\e(B ?\e$,1(w\e(B tbl)
311 (set-case-syntax-pair ?\e$,1((\e(B ?\e$,1(x\e(B tbl)
312 (set-case-syntax-pair ?\e$,1()\e(B ?\e$,1(y\e(B tbl)
313 (set-case-syntax-pair ?\e$,1(*\e(B ?\e$,1(z\e(B tbl)
314 (set-case-syntax-pair ?\e$,1(+\e(B ?\e$,1({\e(B tbl)
315 (set-case-syntax-pair ?\e$,1(,\e(B ?\e$,1(|\e(B tbl)
316 (set-case-syntax-pair ?\e$,1(.\e(B ?\e$,1(~\e(B tbl)
317 (set-case-syntax-pair ?\e$,1(/\e(B ?\e$,1(\7f\e(B tbl)
318 (set-case-syntax-pair ?\e$,1(0\e(B ?\e$,1(P\e(B tbl)
319 (set-case-syntax-pair ?\e$,1(1\e(B ?\e$,1(Q\e(B tbl)
320 (set-case-syntax-pair ?\e$,1(2\e(B ?\e$,1(R\e(B tbl)
321 (set-case-syntax-pair ?\e$,1(3\e(B ?\e$,1(S\e(B tbl)
322 (set-case-syntax-pair ?\e$,1(4\e(B ?\e$,1(T\e(B tbl)
323 (set-case-syntax-pair ?\e$,1(5\e(B ?\e$,1(U\e(B tbl)
324 (set-case-syntax-pair ?\e$,1(6\e(B ?\e$,1(V\e(B tbl)
325 (set-case-syntax-pair ?\e$,1(7\e(B ?\e$,1(W\e(B tbl)
326 (set-case-syntax-pair ?\e$,1(8\e(B ?\e$,1(X\e(B tbl)
327 (set-case-syntax-pair ?\e$,1(9\e(B ?\e$,1(Y\e(B tbl)
328 (set-case-syntax-pair ?\e$,1(:\e(B ?\e$,1(Z\e(B tbl)
329 (set-case-syntax-pair ?\e$,1(;\e(B ?\e$,1([\e(B tbl)
330 (set-case-syntax-pair ?\e$,1(<\e(B ?\e$,1(\\e(B tbl)
331 (set-case-syntax-pair ?\e$,1(=\e(B ?\e$,1(]\e(B tbl)
332 (set-case-syntax-pair ?\e$,1(>\e(B ?\e$,1(^\e(B tbl)
333 (set-case-syntax-pair ?\e$,1(?\e(B ?\e$,1(_\e(B tbl)
334 (set-case-syntax-pair ?\e$,1(@\e(B ?\e$,1(`\e(B tbl)
335 (set-case-syntax-pair ?\e$,1(A\e(B ?\e$,1(a\e(B tbl)
336 (set-case-syntax-pair ?\e$,1(B\e(B ?\e$,1(b\e(B tbl)
337 (set-case-syntax-pair ?\e$,1(C\e(B ?\e$,1(c\e(B tbl)
338 (set-case-syntax-pair ?\e$,1(D\e(B ?\e$,1(d\e(B tbl)
339 (set-case-syntax-pair ?\e$,1(E\e(B ?\e$,1(e\e(B tbl)
340 (set-case-syntax-pair ?\e$,1(F\e(B ?\e$,1(f\e(B tbl)
341 (set-case-syntax-pair ?\e$,1(G\e(B ?\e$,1(g\e(B tbl)
342 (set-case-syntax-pair ?\e$,1(H\e(B ?\e$,1(h\e(B tbl)
343 (set-case-syntax-pair ?\e$,1(I\e(B ?\e$,1(i\e(B tbl)
344 (set-case-syntax-pair ?\e$,1(J\e(B ?\e$,1(j\e(B tbl)
345 (set-case-syntax-pair ?\e$,1(K\e(B ?\e$,1(k\e(B tbl)
346 (set-case-syntax-pair ?\e$,1(L\e(B ?\e$,1(l\e(B tbl)
347 (set-case-syntax-pair ?\e$,1(M\e(B ?\e$,1(m\e(B tbl)
348 (set-case-syntax-pair ?\e$,1(N\e(B ?\e$,1(n\e(B tbl)
349 (set-case-syntax-pair ?\e$,1(O\e(B ?\e$,1(o\e(B tbl))
350
351 ;; Devanagari character set
352
353 ;;; Commented out since the categories appear not to be used anywhere
354 ;;; and word syntax is the default.
355 ;; (let ((deflist '(;; chars syntax category
356 ;; ("\e$(5!!!"!#\e(B" "w" ?7) ; vowel-modifying diacritical mark
357 ;; ; chandrabindu, anuswar, visarga
358 ;; ("\e$(5!$\e(B-\e$(5!2\e(B" "w" ?1) ; independent vowel
359 ;; ("\e$(5!3\e(B-\e$(5!X\e(B" "w" ?0) ; consonant
360 ;; ("\e$(5!Z\e(B-\e$(5!g\e(B" "w" ?8) ; matra
361 ;; ("\e$(5!q\e(B-\e$(5!z\e(B" "w" ?6) ; digit
362 ;; ;; Unicode equivalents
363 ;; ("\e$,15A5B5C\e(B" "w" ?7) ; vowel-modifying diacritical mark
364 ;; ; chandrabindu, anuswar, visarga
365 ;; ("\e$,15E\e(B-\e$,15M\e(B" "w" ?1) ; independent vowel
366 ;; ("\e$,15U\e(B-\e$,15y\e(B" "w" ?0) ; consonant
367 ;; ("\e$,15~\e(B-\e$,16)\e(B" "w" ?8) ; matra
368 ;; ("\e$,16F\e(B-\e$,16O\e(B" "w" ?6) ; digit
369 ;; ))
370 ;; elm chars len syntax category to ch i)
371 ;; (while deflist
372 ;; (setq elm (car deflist))
373 ;; (setq chars (car elm)
374 ;; len (length chars)
375 ;; syntax (nth 1 elm)
376 ;; category (nth 2 elm)
377 ;; i 0)
378 ;; (while (< i len)
379 ;; (if (= (aref chars i) ?-)
380 ;; (setq i (1+ i)
381 ;; to (aref chars i))
382 ;; (setq ch (aref chars i)
383 ;; to ch))
384 ;; (while (<= ch to)
385 ;; (modify-syntax-entry ch syntax)
386 ;; (modify-category-entry ch category)
387 ;; (setq ch (1+ ch)))
388 ;; (setq i (1+ i)))
389 ;; (setq deflist (cdr deflist))))
390
391 ;; Ethiopic character set
392
393 (modify-category-entry (make-char 'ethiopic) ?e)
394 ;; (modify-syntax-entry (make-char 'ethiopic) "w")
395 (dotimes (i (1+ (- #x137c #x1200)))
396 (modify-category-entry (decode-char 'ucs (+ #x1200 i)) ?e))
397 (let ((chars '(?\e$(3$h\e(B ?\e$(3$i\e(B ?\e$(3$j\e(B ?\e$(3$k\e(B ?\e$(3$l\e(B ?\e$(3$m\e(B ?\e$(3$n\e(B ?\e$(3$o\e(B ?\e$(3%i\e(B ?\e$(3%t\e(B ?\e$(3%u\e(B ?\e$(3%v\e(B ?\e$(3%w\e(B ?\e$(3%x\e(B
398 ;; Unicode equivalents of the above:
399 ?\e$,1Q!\e(B ?\e$,1Q"\e(B ?\e$,1Q#\e(B ?\e$,1Q$\e(B ?\e$,1Q%\e(B ?\e$,1Q&\e(B ?\e$,1Q'\e(B ?\e$,1Q(\e(B ?\e$,3op\e(B ?\e$,3o{\e(B ?\e$,3o|\e(B ?\e$,3o}\e(B ?\e$,3o~\e(B ?\e$,3o\7f\e(B)))
400 (while chars
401 (modify-syntax-entry (car chars) ".")
402 (setq chars (cdr chars))))
403
404 ;; Greek character set (ISO-8859-7)
405
406 (modify-category-entry (make-char 'greek-iso8859-7) ?g)
407 (let ((c #x370))
408 (while (<= c #x3ff)
409 (modify-category-entry (decode-char 'ucs c) ?g)
410 (setq c (1+ c))))
411
412 ;; (let ((c 182))
413 ;; (while (< c 255)
414 ;; (modify-syntax-entry (make-char 'greek-iso8859-7 c) "w")
415 ;; (setq c (1+ c))))
416 ;; (modify-syntax-entry (make-char 'greek-iso8859-7 160) "w") ; NBSP
417 (modify-syntax-entry ?\e,F7\e(B ".")
418 (modify-syntax-entry ?\e,F;\e(B ".")
419 (modify-syntax-entry ?\e,F=\e(B ".")
420 (let ((tbl (standard-case-table)))
421 ;; Fixme: non-letter syntax copied from latin-1, but that's dubious
422 ;; in several cases.
423 (set-case-syntax ?\e,F!\e(B "." tbl)
424 (set-case-syntax ?\e,F"\e(B "." tbl)
425 (set-case-syntax ?\e,F&\e(B "." tbl)
426 (set-case-syntax ?\e,F&\e(B "_" tbl)
427 (set-case-syntax ?\e,F'\e(B "." tbl)
428 (set-case-syntax ?\e,F)\e(B "_" tbl)
429 (set-case-syntax ?\e,F+\e(B "." tbl)
430 (set-case-syntax ?\e,F,\e(B "_" tbl)
431 (set-case-syntax ?\e,F-\e(B "_" tbl)
432 (set-case-syntax ?\e,F/\e(B "." tbl)
433 (set-case-syntax ?\e,F0\e(B "_" tbl)
434 (set-case-syntax ?\e,F1\e(B "_" tbl)
435 ;; (set-case-syntax ?\e,F7\e(B "_" tbl)
436 ;; (set-case-syntax ?\e,F=\e(B "_" tbl)
437 (set-case-syntax-pair ?\e,FA\e(B ?\e,Fa\e(B tbl)
438 (set-case-syntax-pair ?\e,FB\e(B ?\e,Fb\e(B tbl)
439 (set-case-syntax-pair ?\e,FC\e(B ?\e,Fc\e(B tbl)
440 (set-case-syntax-pair ?\e,FD\e(B ?\e,Fd\e(B tbl)
441 (set-case-syntax-pair ?\e,FE\e(B ?\e,Fe\e(B tbl)
442 (set-case-syntax-pair ?\e,FF\e(B ?\e,Ff\e(B tbl)
443 (set-case-syntax-pair ?\e,FG\e(B ?\e,Fg\e(B tbl)
444 (set-case-syntax-pair ?\e,FH\e(B ?\e,Fh\e(B tbl)
445 (set-case-syntax-pair ?\e,FI\e(B ?\e,Fi\e(B tbl)
446 (set-case-syntax-pair ?\e,FJ\e(B ?\e,Fj\e(B tbl)
447 (set-case-syntax-pair ?\e,FK\e(B ?\e,Fk\e(B tbl)
448 (set-case-syntax-pair ?\e,FL\e(B ?\e,Fl\e(B tbl)
449 (set-case-syntax-pair ?\e,FM\e(B ?\e,Fm\e(B tbl)
450 (set-case-syntax-pair ?\e,FN\e(B ?\e,Fn\e(B tbl)
451 (set-case-syntax-pair ?\e,FO\e(B ?\e,Fo\e(B tbl)
452 (set-case-syntax-pair ?\e,FP\e(B ?\e,Fp\e(B tbl)
453 (set-case-syntax-pair ?\e,FQ\e(B ?\e,Fq\e(B tbl)
454 (set-case-syntax-pair ?\e,FS\e(B ?\e,Fs\e(B tbl)
455 (set-case-syntax-pair ?\e,FT\e(B ?\e,Ft\e(B tbl)
456 (set-case-syntax-pair ?\e,FU\e(B ?\e,Fu\e(B tbl)
457 (set-case-syntax-pair ?\e,FV\e(B ?\e,Fv\e(B tbl)
458 (set-case-syntax-pair ?\e,FW\e(B ?\e,Fw\e(B tbl)
459 (set-case-syntax-pair ?\e,FX\e(B ?\e,Fx\e(B tbl)
460 (set-case-syntax-pair ?\e,FY\e(B ?\e,Fy\e(B tbl)
461 (set-case-syntax-pair ?\e,FZ\e(B ?\e,Fz\e(B tbl)
462 (set-case-syntax-pair ?\e,F[\e(B ?\e,F{\e(B tbl)
463 (set-case-syntax-pair ?\e,F?\e(B ?\e,F~\e(B tbl)
464 (set-case-syntax-pair ?\e,F>\e(B ?\e,F}\e(B tbl)
465 (set-case-syntax-pair ?\e,F<\e(B ?\e,F|\e(B tbl)
466 (set-case-syntax-pair ?\e,F6\e(B ?\e,F\\e(B tbl)
467 (set-case-syntax-pair ?\e,F8\e(B ?\e,F]\e(B tbl)
468 (set-case-syntax-pair ?\e,F9\e(B ?\e,F^\e(B tbl)
469 (set-case-syntax-pair ?\e,F:\e(B ?\e,F_\e(B tbl)
470 ;; Unicode equivalents
471 (set-case-syntax-pair ?\e$,1&q\e(B ?\e$,1'1\e(B tbl)
472 (set-case-syntax-pair ?\e$,1&r\e(B ?\e$,1'2\e(B tbl)
473 (set-case-syntax-pair ?\e$,1&s\e(B ?\e$,1'3\e(B tbl)
474 (set-case-syntax-pair ?\e$,1&t\e(B ?\e$,1'4\e(B tbl)
475 (set-case-syntax-pair ?\e$,1&u\e(B ?\e$,1'5\e(B tbl)
476 (set-case-syntax-pair ?\e$,1&v\e(B ?\e$,1'6\e(B tbl)
477 (set-case-syntax-pair ?\e$,1&w\e(B ?\e$,1'7\e(B tbl)
478 (set-case-syntax-pair ?\e$,1&x\e(B ?\e$,1'8\e(B tbl)
479 (set-case-syntax-pair ?\e$,1&y\e(B ?\e$,1'9\e(B tbl)
480 (set-case-syntax-pair ?\e$,1&z\e(B ?\e$,1':\e(B tbl)
481 (set-case-syntax-pair ?\e$,1&{\e(B ?\e$,1';\e(B tbl)
482 (set-case-syntax-pair ?\e$,1&|\e(B ?\e$,1'<\e(B tbl)
483 (set-case-syntax-pair ?\e$,1&}\e(B ?\e$,1'=\e(B tbl)
484 (set-case-syntax-pair ?\e$,1&~\e(B ?\e$,1'>\e(B tbl)
485 (set-case-syntax-pair ?\e$,1&\7f\e(B ?\e$,1'?\e(B tbl)
486 (set-case-syntax-pair ?\e$,1' \e(B ?\e$,1'@\e(B tbl)
487 (set-case-syntax-pair ?\e$,1'!\e(B ?\e$,1'A\e(B tbl)
488 (set-case-syntax-pair ?\e$,1'#\e(B ?\e$,1'C\e(B tbl)
489 (set-case-syntax-pair ?\e$,1'$\e(B ?\e$,1'D\e(B tbl)
490 (set-case-syntax-pair ?\e$,1'%\e(B ?\e$,1'E\e(B tbl)
491 (set-case-syntax-pair ?\e$,1'&\e(B ?\e$,1'F\e(B tbl)
492 (set-case-syntax-pair ?\e$,1''\e(B ?\e$,1'G\e(B tbl)
493 (set-case-syntax-pair ?\e$,1'(\e(B ?\e$,1'H\e(B tbl)
494 (set-case-syntax-pair ?\e$,1')\e(B ?\e$,1'I\e(B tbl)
495 (set-case-syntax-pair ?\e$,1'*\e(B ?\e$,1'J\e(B tbl)
496 (set-case-syntax-pair ?\e$,1'+\e(B ?\e$,1'K\e(B tbl)
497 (set-case-syntax-pair ?\e$,1&o\e(B ?\e$,1'N\e(B tbl)
498 (set-case-syntax-pair ?\e$,1&n\e(B ?\e$,1'M\e(B tbl)
499 (set-case-syntax-pair ?\e$,1&l\e(B ?\e$,1'L\e(B tbl)
500 (set-case-syntax-pair ?\e$,1&f\e(B ?\e$,1',\e(B tbl)
501 (set-case-syntax-pair ?\e$,1&h\e(B ?\e$,1'-\e(B tbl)
502 (set-case-syntax-pair ?\e$,1&i\e(B ?\e$,1'.\e(B tbl)
503 (set-case-syntax-pair ?\e$,1&j\e(B ?\e$,1'/\e(B tbl))
504
505 ;; Hebrew character set (ISO-8859-8)
506
507 (modify-category-entry (make-char 'hebrew-iso8859-8) ?w)
508 (let ((c #x591))
509 (while (<= c #x5f4)
510 (modify-category-entry (decode-char 'ucs c) ?w)
511 (setq c (1+ c))))
512
513 (modify-syntax-entry (make-char 'hebrew-iso8859-8 208) ".") ; PASEQ
514 (modify-syntax-entry (make-char 'hebrew-iso8859-8 211) ".") ; SOF PASUQ
515 (modify-syntax-entry (decode-char 'ucs #x5be) ".") ; MAQAF
516 (modify-syntax-entry (decode-char 'ucs #x5c0) ".") ; PASEQ
517 (modify-syntax-entry (decode-char 'ucs #x5c3) ".") ; SOF PASUQ
518 (modify-syntax-entry (decode-char 'ucs #x5f3) ".") ; GERESH
519 (modify-syntax-entry (decode-char 'ucs #x5f4) ".") ; GERSHAYIM
520
521 ;; (let ((c 224))
522 ;; (while (< c 251)
523 ;; (modify-syntax-entry (make-char 'hebrew-iso8859-8 c) "w")
524 ;; (setq c (1+ c))))
525 ;; (modify-syntax-entry (make-char 'hebrew-iso8859-8 160) "w") ; NBSP
526
527 ;; Indian character set (IS 13194 and other Emacs original Indian charsets)
528
529 (modify-category-entry (make-char 'indian-is13194) ?i)
530 (modify-category-entry (make-char 'indian-2-column) ?I)
531 (modify-category-entry (make-char 'indian-glyph) ?I)
532 ;; Unicode Devanagari block
533 (let ((c #x901))
534 (while (<= c #x970)
535 (modify-category-entry (decode-char 'ucs c) ?i)
536 (setq c (1+ c))))
537
538 (let ((l '(;; RANGE CATEGORY MEANINGS
539 (#x01 #x03 ?7) ; vowel modifier
540 (#x05 #x14 ?1) ; base vowel
541 (#x15 #x39 ?0) ; consonants
542 (#x3e #x4d ?8) ; vowel modifier
543 (#x51 #x54 ?4) ; stress/tone mark
544 (#x58 #x5f ?0) ; consonants
545 (#x60 #x61 ?1) ; base vowel
546 (#x62 #x63 ?8) ; vowel modifier
547 (#x66 #x6f ?6) ; digits
548 )))
549 (dolist (elt1 '(#x900 #x980 #xa00 #xa80 #xb00 #xb80 #xc00 #xc80 #xd00))
550 (dolist (elt2 l)
551 (let* ((from (car elt2))
552 (counts (1+ (- (nth 1 elt2) from)))
553 (category (nth 2 elt2)))
554 (dotimes (i counts)
555 (modify-category-entry (decode-char 'ucs (+ elt1 from i))
556 category))))))
557
558 ;; Japanese character set (JISX0201-kana, JISX0201-roman, JISX0208, JISX0212)
559
560 (modify-category-entry (make-char 'katakana-jisx0201) ?k)
561 (modify-category-entry (make-char 'katakana-jisx0201) ?j)
562 (modify-category-entry (make-char 'latin-jisx0201) ?r)
563 (modify-category-entry (make-char 'japanese-jisx0208) ?j)
564 (modify-category-entry (make-char 'japanese-jisx0212) ?j)
565 (modify-category-entry (make-char 'katakana-jisx0201) ?\|)
566 (modify-category-entry (make-char 'japanese-jisx0208) ?\|)
567 (modify-category-entry (make-char 'japanese-jisx0212) ?\|)
568
569 ;; Unicode equivalents of JISX0201-kana
570 (let ((c #xff61))
571 (while (<= c #xff9f)
572 (modify-category-entry (decode-char 'ucs c) ?k)
573 (modify-category-entry (decode-char 'ucs c) ?j)
574 (modify-category-entry (decode-char 'ucs c) ?\|)
575 (setq c (1+ c))))
576
577 ;; Katakana block
578 (let ((c #x30a0))
579 (while (<= c #x30ff)
580 ;; ?K is double width, ?k isn't specified
581 (modify-category-entry (decode-char 'ucs c) ?k)
582 (modify-category-entry (decode-char 'ucs c) ?j)
583 (modify-category-entry (decode-char 'ucs c) ?\|)
584 (setq c (1+ c))))
585
586 ;; Hiragana block
587 (let ((c #x3040))
588 (while (<= c #x309f)
589 ;; ?H is actually defined to be double width
590 (modify-category-entry (decode-char 'ucs c) ?H)
591 ;;(modify-category-entry (decode-char 'ucs c) ?j)
592 (modify-category-entry (decode-char 'ucs c) ?\|)
593 (setq c (1+ c))))
594
595 ;; JISX0208
596 ;; (modify-syntax-entry (make-char 'japanese-jisx0208) "w")
597 (modify-syntax-entry (make-char 'japanese-jisx0208 33) "_")
598 (modify-syntax-entry (make-char 'japanese-jisx0208 34) "_")
599 (modify-syntax-entry (make-char 'japanese-jisx0208 40) "_")
600 (let ((chars '(?\e$B!<\e(B ?\e$B!+\e(B ?\e$B!,\e(B ?\e$B!3\e(B ?\e$B!4\e(B ?\e$B!5\e(B ?\e$B!6\e(B ?\e$B!7\e(B ?\e$B!8\e(B ?\e$B!9\e(B ?\e$B!:\e(B ?\e$B!;\e(B)))
601 (while chars
602 (modify-syntax-entry (car chars) "w")
603 (setq chars (cdr chars))))
604 (modify-syntax-entry ?\\e$B!J\e(B "(\e$B!K\e(B")
605 (modify-syntax-entry ?\\e$B!N\e(B "(\e$B!O\e(B")
606 (modify-syntax-entry ?\\e$B!P\e(B "(\e$B!Q\e(B")
607 (modify-syntax-entry ?\\e$B!V\e(B "(\e$B!W\e(B")
608 (modify-syntax-entry ?\\e$B!X\e(B "(\e$B!Y\e(B")
609 (modify-syntax-entry ?\\e$B!K\e(B ")\e$B!J\e(B")
610 (modify-syntax-entry ?\\e$B!O\e(B ")\e$B!N\e(B")
611 (modify-syntax-entry ?\\e$B!Q\e(B ")\e$B!P\e(B")
612 (modify-syntax-entry ?\\e$B!W\e(B ")\e$B!V\e(B")
613 (modify-syntax-entry ?\\e$B!Y\e(B ")\e$B!X\e(B")
614
615 (modify-category-entry (make-char 'japanese-jisx0208 35) ?A)
616 (modify-category-entry (make-char 'japanese-jisx0208 36) ?H)
617 (modify-category-entry (make-char 'japanese-jisx0208 37) ?K)
618 (modify-category-entry (make-char 'japanese-jisx0208 38) ?G)
619 (modify-category-entry (make-char 'japanese-jisx0208 39) ?Y)
620 (let ((row 48))
621 (while (< row 127)
622 (modify-category-entry (make-char 'japanese-jisx0208 row) ?C)
623 (setq row (1+ row))))
624 (modify-category-entry ?\e$B!<\e(B ?K)
625 (let ((chars '(?\e$B!+\e(B ?\e$B!,\e(B)))
626 (while chars
627 (modify-category-entry (car chars) ?K)
628 (modify-category-entry (car chars) ?H)
629 (setq chars (cdr chars))))
630 (let ((chars '(?\e$B!3\e(B ?\e$B!4\e(B ?\e$B!5\e(B ?\e$B!6\e(B ?\e$B!7\e(B ?\e$B!8\e(B ?\e$B!9\e(B ?\e$B!:\e(B ?\e$B!;\e(B)))
631 (while chars
632 (modify-category-entry (car chars) ?C)
633 (setq chars (cdr chars))))
634
635 ;; JISX0212
636 ;; (modify-syntax-entry (make-char 'japanese-jisx0212) "w")
637 (modify-syntax-entry (make-char 'japanese-jisx0212 33) "_")
638 (modify-syntax-entry (make-char 'japanese-jisx0212 34) "_")
639 (modify-syntax-entry (make-char 'japanese-jisx0212 35) "_")
640
641 (modify-category-entry (make-char 'japanese-jisx0212 ) ?C)
642
643 ;; JISX0201-Kana
644 ;; (modify-syntax-entry (make-char 'katakana-jisx0201) "w")
645 (let ((chars '(?\e(I!\e(B ?\e(I$\e(B ?\e(I%\e(B
646 ;; Unicode:
647 ?\e$,3sa\e(B ?\e$,3sd\e(B ?\e$,3se\e(B)))
648 (while chars
649 (modify-syntax-entry (car chars) ".")
650 (setq chars (cdr chars))))
651
652 (modify-syntax-entry ?\\e(I"\e(B "(\e(I#\e(B")
653 (modify-syntax-entry ?\\e(I#\e(B "(\e(I"\e(B")
654
655 ;; Korean character set (KSC5601)
656
657 ;; (modify-syntax-entry (make-char 'korean-ksc5601) "w")
658 (modify-syntax-entry (make-char 'korean-ksc5601 33) "_")
659 (modify-syntax-entry (make-char 'korean-ksc5601 34) "_")
660 (modify-syntax-entry (make-char 'korean-ksc5601 38) "_")
661 (modify-syntax-entry (make-char 'korean-ksc5601 39) "_")
662 (modify-syntax-entry (make-char 'korean-ksc5601 40) "_")
663 (modify-syntax-entry (make-char 'korean-ksc5601 41) "_")
664
665 (modify-category-entry (make-char 'korean-ksc5601) ?h)
666 (modify-category-entry (make-char 'korean-ksc5601 35) ?A)
667 (modify-category-entry (make-char 'korean-ksc5601 37) ?G)
668 (modify-category-entry (make-char 'korean-ksc5601 42) ?H)
669 (modify-category-entry (make-char 'korean-ksc5601 43) ?K)
670 (modify-category-entry (make-char 'korean-ksc5601 44) ?Y)
671
672 ;; Latin character set (latin-1,2,3,4,5,8,9)
673
674 (modify-category-entry (make-char 'latin-iso8859-1) ?l)
675 (modify-category-entry (make-char 'latin-iso8859-2) ?l)
676 (modify-category-entry (make-char 'latin-iso8859-3) ?l)
677 (modify-category-entry (make-char 'latin-iso8859-4) ?l)
678 (modify-category-entry (make-char 'latin-iso8859-9) ?l)
679 (modify-category-entry (make-char 'latin-iso8859-14) ?l)
680 (modify-category-entry (make-char 'latin-iso8859-15) ?l)
681
682 (modify-category-entry (make-char 'latin-iso8859-1 160) ?\ )
683 (modify-category-entry (make-char 'latin-iso8859-2 160) ?\ )
684 (modify-category-entry (make-char 'latin-iso8859-3 160) ?\ )
685 (modify-category-entry (make-char 'latin-iso8859-4 160) ?\ )
686 (modify-category-entry (make-char 'latin-iso8859-9 160) ?\ )
687 (modify-category-entry (make-char 'latin-iso8859-14 160) ?\ )
688 (modify-category-entry (make-char 'latin-iso8859-15 160) ?\ )
689
690 ;; Lao character set
691
692 (modify-category-entry (make-char 'lao) ?o)
693 (dotimes (i (1+ (- #xeff #xe80)))
694 (modify-category-entry (decode-char 'ucs (+ i #xe80)) ?o))
695
696 (let ((deflist '(;; chars syntax category
697 ("\e(1!\e(B-\e(1N\e(B" "w" ?0) ; consonant
698 ("\e(1PRS]`\e(B-\e(1d\e(B" "w" ?1) ; vowel base
699 ("\e(1QT\e(B-\e(1W[m\e(B" "w" ?2) ; vowel upper
700 ("\e(1XY\e(B" "w" ?3) ; vowel lower
701 ("\e(1h\e(B-\e(1l\e(B" "w" ?4) ; tone mark
702 ("\e(1\\e(B" "w" ?9) ; semivowel lower
703 ("\e(1p\e(B-\e(1y\e(B" "w" ?6) ; digit
704 ("\e(1Of\e(B" "_" ?5) ; symbol
705 ;; Unicode equivalents
706 ("\e$,1D!\e(B-\e$,1DN\e(B" "w" ?0) ; consonant
707 ("\e$,1DPDRDSD]D`\e(B-\e$,1Dd\e(B" "w" ?1) ; vowel base
708 ("\e$,1DQDT\e(B-\e$,1DWD[Dm\e(B" "w" ?2) ; vowel upper
709 ("\e$,1DXDY\e(B" "w" ?3) ; vowel lower
710 ("\e$,1Dh\e(B-\e$,1Dk\e(B" "w" ?4) ; tone mark
711 ("\e$,1D\D]\e(B" "w" ?9) ; semivowel lower
712 ("\e$,1Dp\e(B-\e$,1Dy\e(B" "w" ?6) ; digit
713 ("\e$,1DODf\e(B" "_" ?5) ; symbol
714 ))
715 elm chars len syntax category to ch i)
716 (while deflist
717 (setq elm (car deflist))
718 (setq chars (car elm)
719 len (length chars)
720 syntax (nth 1 elm)
721 category (nth 2 elm)
722 i 0)
723 (while (< i len)
724 (if (= (aref chars i) ?-)
725 (setq i (1+ i)
726 to (aref chars i))
727 (setq ch (aref chars i)
728 to ch))
729 (while (<= ch to)
730 (unless (string-equal syntax "w")
731 (modify-syntax-entry ch syntax))
732 (modify-category-entry ch category)
733 (setq ch (1+ ch)))
734 (setq i (1+ i)))
735 (setq deflist (cdr deflist))))
736
737 ;; Thai character set (TIS620)
738
739 (modify-category-entry (make-char 'thai-tis620) ?t)
740 (dotimes (i (1+ (- #xe7f #xe00)))
741 (modify-category-entry (decode-char 'ucs (+ i #xe00)) ?t))
742
743 (let ((deflist '(;; chars syntax category
744 ("\e,T!\e(B-\e,TCEG\e(B-\e,TN\e(B" "w" ?0) ; consonant
745 ("\e,TDFPRS`\e(B-\e,Te\e(B" "w" ?1) ; vowel base
746 ("\e,TQT\e(B-\e,TWgn\e(B" "w" ?2) ; vowel upper
747 ("\e,TX\e(B-\e,TZ\e(B" "w" ?3) ; vowel lower
748 ("\e,Th\e(B-\e,Tm\e(B" "w" ?4) ; tone mark
749 ("\e,Tp\e(B-\e,Ty\e(B" "w" ?6) ; digit
750 ("\e,TOf_oz{\e(B" "_" ?5) ; symbol
751 ;; Unicode equivalents
752 ("\e$,1Ba\e(B-\e$,1C#C%C'\e(B-\e$,1C.\e(B" "w" ?0) ; consonant
753 ("\e$,1C$C&C0C2C3C@\e(B-\e$,1CE\e(B" "w" ?1) ; vowel base
754 ("\e$,1C1C4\e(B-\e$,1C7CGCN\e(B" "w" ?2) ; vowel upper
755 ("\e$,1C8\e(B-\e$,1C:\e(B" "w" ?3) ; vowel lower
756 ("\e$,1CH\e(B-\e$,1CM\e(B" "w" ?4) ; tone mark
757 ("\e$,1CP\e(B-\e$,1CY\e(B" "w" ?6) ; digit
758 ("\e$,1C/CFC?COCZC[\e(B" "_" ?5) ; symbol
759 ))
760 elm chars len syntax category to ch i)
761 (while deflist
762 (setq elm (car deflist))
763 (setq chars (car elm)
764 len (length chars)
765 syntax (nth 1 elm)
766 category (nth 2 elm)
767 i 0)
768 (while (< i len)
769 (if (= (aref chars i) ?-)
770 (setq i (1+ i)
771 to (aref chars i))
772 (setq ch (aref chars i)
773 to ch))
774 (while (<= ch to)
775 (unless (string-equal syntax "w")
776 (modify-syntax-entry ch syntax))
777 (modify-category-entry ch category)
778 (setq ch (1+ ch)))
779 (setq i (1+ i)))
780 (setq deflist (cdr deflist))))
781
782 ;; Tibetan character set
783
784 (modify-category-entry (make-char 'tibetan) ?q)
785 (modify-category-entry (make-char 'tibetan-1-column) ?q)
786 (dotimes (i (1+ (- #xfff #xf00)))
787 (modify-category-entry (decode-char 'ucs (+ i #xf00)) ?q))
788
789 (let ((deflist '(;; chars syntax category
790 ("\e4\e$(7"!\e0"!\e1\e(B-\e4\e$(7"J\e0"J\e1\e4"K\e0"K\e1\e(B" "w" ?0) ; consonant
791 ("\e$(7#!\e(B-\e$(7#J#K#L#M!"!#\e(B" "w" ?0) ;
792 ("\e$(7$!\e(B-\e$(7$e\e(B" "w" ?0) ;
793 ("\e$(7%!\e(B-\e$(7%u\e(B" "w" ?0) ;
794 ("\e$(7"S"["\"]"^"a\e(B" "w" ?2) ; upper vowel
795 ("\e$(7"_"c"d"g"h"i"j"k"l\e(B" "w" ?2) ; upper modifier
796 ("\e$(7!I"Q"R"U"e!e!g\e(B" "w" ?3) ; lowel vowel/modifier
797 ("\e$(7!P\e(B-\e$(7!Y!Z\e(B-\e$(7!c\e(B" "w" ?6) ; digit
798 ("\e$(7!;!=\e(B-\e$(7!B!D"`\e(B" "." ?|) ; line-break char
799 ("\e$(8!;!=!?!@!A!D"`\e(B" "." ?|) ;
800 ("\e$(7!8!;!=\e(B-\e$(7!B!D"`!m!d\e(B" "." ?>) ; prohibition
801 ("\e$(8!;!=!?!@!A!D"`\e(B" "." ?>) ;
802 ("\e$(7!0\e(B-\e$(7!:!l#R#S"f\e(B" "." ?<) ; prohibition
803 ("\e$(7!C!E\e(B-\e$(7!H!J\e(B-\e$(7!O!f!h\e(B-\e$(7!k!n!o#O#P\e(B-\e$(7#`\e(B" "." ?q) ; others
804
805 ;; Unicode version (not complete)
806 ("\e$,1F \e(B-\e$,1FIFJ\e(B" "w" ?0) ; consonant
807 ("\e$,1Fp\e(B-\e$,1G9G:G;G<\e(B" "w" ?0) ;
808 ("\e$,1FRFZF[F\F]F`\e(B" "w" ?2) ; upper vowel
809 ("\e$,1F^FbFcFfFgFhFiFjFk\e(B" "w" ?2) ; upper modifier
810 ("\e$,1EYFPFQFTFdEuEw\e(B" "w" ?3) ; lowel vowel/modifier
811 ("\e$,1E`\e(B-\e$,1EiEj\e(B-\e$,1Es\e(B" "w" ?6) ; digit
812 ("\e$,1EKEM\e(B-\e$,1ERETF_\e(B" "." ?|) ; line-break char
813 ("\e$,1EHEKEM\e(B-\e$,1ERETF_E}Et\e(B" "." ?>) ; prohibition
814 ("\e$,1E@\e(B-\e$,1EJE|GAGBFe\e(B" "." ?<) ; prohibition
815 ("\e$,1ESEU\e(B-\e$,1EXEZ\e(B-\e$,1E_EvEx\e(B-\e$,1E{E~E\7fG>G?\e(B-\e$,1GO\e(B" "." ?q) ; others
816 ))
817 elm chars len syntax category to ch i)
818 (while deflist
819 (setq elm (car deflist))
820 (setq chars (car elm)
821 len (length chars)
822 syntax (nth 1 elm)
823 category (nth 2 elm)
824 i 0)
825 (while (< i len)
826 (if (= (aref chars i) ?-)
827 (setq i (1+ i)
828 to (aref chars i))
829 (setq ch (aref chars i)
830 to ch))
831 (while (<= ch to)
832 (unless (string-equal syntax "w")
833 (modify-syntax-entry ch syntax))
834 (modify-category-entry ch category)
835 (setq ch (1+ ch)))
836 (setq i (1+ i)))
837 (setq deflist (cdr deflist))))
838
839 ;; Vietnamese character set
840
841 (let ((lower (make-char 'vietnamese-viscii-lower))
842 (upper (make-char 'vietnamese-viscii-upper)))
843 ;; (modify-syntax-entry lower "w")
844 ;; (modify-syntax-entry upper "w")
845 (modify-category-entry lower ?v)
846 (modify-category-entry upper ?v)
847 (modify-category-entry lower ?l) ; To make a word with
848 (modify-category-entry upper ?l) ; latin characters.
849 )
850
851 (let ((tbl (standard-case-table))
852 (i 32))
853 (while (< i 128)
854 (set-case-syntax-pair (make-char 'vietnamese-viscii-upper i)
855 (make-char 'vietnamese-viscii-lower i)
856 tbl)
857 (setq i (1+ i))))
858
859 ;; Unicode (mule-unicode-0100-24ff)
860
861 (let ((tbl (standard-case-table)) c)
862
863 ;; In some languages, U+0049 LATIN CAPITAL LETTER I and U+0131 LATIN
864 ;; SMALL LETTER DOTLESS I make a case pair, and so do U+0130 LATIN
865 ;; CAPITAL LETTER I WITH DOT ABOVE and U+0069 LATIN SMALL LETTER I.
866 ;; Thus we have to check language-environment to handle casing
867 ;; correctly. Currently only I<->i is available.
868
869 ;; Latin Extended-A, Latin Extended-B
870 (setq c #x0100)
871 (while (<= c #x0233)
872 (modify-category-entry (decode-char 'ucs c) ?l)
873 (and (or (<= c #x012e)
874 (and (>= c #x014a) (<= c #x0177)))
875 (zerop (% c 2))
876 (set-case-syntax-pair
877 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
878 (and (>= c #x013a)
879 (<= c #x0148)
880 (zerop (% c 2))
881 (set-case-syntax-pair
882 (decode-char 'ucs (1- c)) (decode-char 'ucs c) tbl))
883 (setq c (1+ c)))
884 (set-case-syntax-pair ?\e$,1 R\e(B ?\e$,1 S\e(B tbl)
885 (set-case-syntax-pair ?\e$,1 T\e(B ?\e$,1 U\e(B tbl)
886 (set-case-syntax-pair ?\e$,1 V\e(B ?\e$,1 W\e(B tbl)
887 ;;; (set-case-syntax-pair ?\e$,1!8\e(B ?\e,A\7f\e(B tbl) ; these two have different length!
888 (set-case-syntax-pair ?\e$,1!9\e(B ?\e$,1!:\e(B tbl)
889 (set-case-syntax-pair ?\e$,1!;\e(B ?\e$,1!<\e(B tbl)
890 (set-case-syntax-pair ?\e$,1!=\e(B ?\e$,1!>\e(B tbl)
891
892 ;; Latin Extended-B
893 (set-case-syntax-pair ?\e$,1!A\e(B ?\e$,1#S\e(B tbl)
894 (set-case-syntax-pair ?\e$,1!B\e(B ?\e$,1!C\e(B tbl)
895 (set-case-syntax-pair ?\e$,1!D\e(B ?\e$,1!E\e(B tbl)
896 (set-case-syntax-pair ?\e$,1!F\e(B ?\e$,1#T\e(B tbl)
897 (set-case-syntax-pair ?\e$,1!G\e(B ?\e$,1!H\e(B tbl)
898 (set-case-syntax-pair ?\e$,1!I\e(B ?\e$,1#V\e(B tbl)
899 (set-case-syntax-pair ?\e$,1!J\e(B ?\e$,1#W\e(B tbl)
900 (set-case-syntax-pair ?\e$,1!K\e(B ?\e$,1!L\e(B tbl)
901 (set-case-syntax-pair ?\e$,1!N\e(B ?\e$,1"=\e(B tbl)
902 (set-case-syntax-pair ?\e$,1!O\e(B ?\e$,1#Y\e(B tbl)
903 (set-case-syntax-pair ?\e$,1!P\e(B ?\e$,1#[\e(B tbl)
904 (set-case-syntax-pair ?\e$,1!Q\e(B ?\e$,1!R\e(B tbl)
905 (set-case-syntax-pair ?\e$,1!S\e(B ?\e$,1#`\e(B tbl)
906 (set-case-syntax-pair ?\e$,1!T\e(B ?\e$,1#c\e(B tbl)
907 (set-case-syntax-pair ?\e$,1!V\e(B ?\e$,1#i\e(B tbl)
908 (set-case-syntax-pair ?\e$,1!W\e(B ?\e$,1#h\e(B tbl)
909 (set-case-syntax-pair ?\e$,1!X\e(B ?\e$,1!Y\e(B tbl)
910 (set-case-syntax-pair ?\e$,1!\\e(B ?\e$,1#o\e(B tbl)
911 (set-case-syntax-pair ?\e$,1!]\e(B ?\e$,1#r\e(B tbl)
912 (set-case-syntax-pair ?\e$,1!_\e(B ?\e$,1#u\e(B tbl)
913 (set-case-syntax-pair ?\e$,1!`\e(B ?\e$,1!a\e(B tbl)
914 (set-case-syntax-pair ?\e$,1!b\e(B ?\e$,1!c\e(B tbl)
915 (set-case-syntax-pair ?\e$,1!d\e(B ?\e$,1!e\e(B tbl)
916 (set-case-syntax-pair ?\e$,1!f\e(B ?\e$,1$ \e(B tbl)
917 (set-case-syntax-pair ?\e$,1!g\e(B ?\e$,1!h\e(B tbl)
918 (set-case-syntax-pair ?\e$,1!i\e(B ?\e$,1$#\e(B tbl)
919 (set-case-syntax-pair ?\e$,1!l\e(B ?\e$,1!m\e(B tbl)
920 (set-case-syntax-pair ?\e$,1!n\e(B ?\e$,1$(\e(B tbl)
921 (set-case-syntax-pair ?\e$,1!o\e(B ?\e$,1!p\e(B tbl)
922 (set-case-syntax-pair ?\e$,1!q\e(B ?\e$,1$*\e(B tbl)
923 (set-case-syntax-pair ?\e$,1!r\e(B ?\e$,1$+\e(B tbl)
924 (set-case-syntax-pair ?\e$,1!s\e(B ?\e$,1!t\e(B tbl)
925 (set-case-syntax-pair ?\e$,1!u\e(B ?\e$,1!v\e(B tbl)
926 (set-case-syntax-pair ?\e$,1!w\e(B ?\e$,1$2\e(B tbl)
927 (set-case-syntax-pair ?\e$,1!x\e(B ?\e$,1!y\e(B tbl)
928 (set-case-syntax-pair ?\e$,1!|\e(B ?\e$,1!}\e(B tbl)
929 (set-case-syntax-pair ?\e$,1"$\e(B ?\e$,1"&\e(B tbl)
930 (set-case-syntax-pair ?\e$,1"%\e(B ?\e$,1"&\e(B tbl)
931 (set-case-syntax-pair ?\e$,1"'\e(B ?\e$,1")\e(B tbl)
932 (set-case-syntax-pair ?\e$,1"(\e(B ?\e$,1")\e(B tbl)
933 (set-case-syntax-pair ?\e$,1"*\e(B ?\e$,1",\e(B tbl)
934 (set-case-syntax-pair ?\e$,1"+\e(B ?\e$,1",\e(B tbl)
935 (set-case-syntax-pair ?\e$,1"-\e(B ?\e$,1".\e(B tbl)
936 (set-case-syntax-pair ?\e$,1"/\e(B ?\e$,1"0\e(B tbl)
937 (set-case-syntax-pair ?\e$,1"1\e(B ?\e$,1"2\e(B tbl)
938 (set-case-syntax-pair ?\e$,1"3\e(B ?\e$,1"4\e(B tbl)
939 (set-case-syntax-pair ?\e$,1"5\e(B ?\e$,1"6\e(B tbl)
940 (set-case-syntax-pair ?\e$,1"7\e(B ?\e$,1"8\e(B tbl)
941 (set-case-syntax-pair ?\e$,1"9\e(B ?\e$,1":\e(B tbl)
942 (set-case-syntax-pair ?\e$,1";\e(B ?\e$,1"<\e(B tbl)
943 (set-case-syntax-pair ?\e$,1">\e(B ?\e$,1"?\e(B tbl)
944 (set-case-syntax-pair ?\e$,1"@\e(B ?\e$,1"A\e(B tbl)
945 (set-case-syntax-pair ?\e$,1"B\e(B ?\e$,1"C\e(B tbl)
946 (set-case-syntax-pair ?\e$,1"D\e(B ?\e$,1"E\e(B tbl)
947 (set-case-syntax-pair ?\e$,1"F\e(B ?\e$,1"G\e(B tbl)
948 (set-case-syntax-pair ?\e$,1"H\e(B ?\e$,1"I\e(B tbl)
949 (set-case-syntax-pair ?\e$,1"J\e(B ?\e$,1"K\e(B tbl)
950 (set-case-syntax-pair ?\e$,1"L\e(B ?\e$,1"M\e(B tbl)
951 (set-case-syntax-pair ?\e$,1"N\e(B ?\e$,1"O\e(B tbl)
952 ;; 01F0; F; 006A 030C; # LATIN SMALL LETTER J WITH CARON
953 (set-case-syntax-pair ?\e$,1"Q\e(B ?\e$,1"S\e(B tbl)
954 (set-case-syntax-pair ?\e$,1"R\e(B ?\e$,1"S\e(B tbl)
955 (set-case-syntax-pair ?\e$,1"T\e(B ?\e$,1"U\e(B tbl)
956 (set-case-syntax-pair ?\e$,1"V\e(B ?\e$,1!U\e(B tbl)
957 (set-case-syntax-pair ?\e$,1"W\e(B ?\e$,1!\7f\e(B tbl)
958 (set-case-syntax-pair ?\e$,1"X\e(B ?\e$,1"Y\e(B tbl)
959 (set-case-syntax-pair ?\e$,1"Z\e(B ?\e$,1"[\e(B tbl)
960 (set-case-syntax-pair ?\e$,1"\\e(B ?\e$,1"]\e(B tbl)
961 (set-case-syntax-pair ?\e$,1"^\e(B ?\e$,1"_\e(B tbl)
962 (set-case-syntax-pair ?\e$,1"`\e(B ?\e$,1"a\e(B tbl)
963 (set-case-syntax-pair ?\e$,1"b\e(B ?\e$,1"c\e(B tbl)
964 (set-case-syntax-pair ?\e$,1"d\e(B ?\e$,1"e\e(B tbl)
965 (set-case-syntax-pair ?\e$,1"f\e(B ?\e$,1"g\e(B tbl)
966 (set-case-syntax-pair ?\e$,1"h\e(B ?\e$,1"i\e(B tbl)
967 (set-case-syntax-pair ?\e$,1"j\e(B ?\e$,1"k\e(B tbl)
968 (set-case-syntax-pair ?\e$,1"l\e(B ?\e$,1"m\e(B tbl)
969 (set-case-syntax-pair ?\e$,1"n\e(B ?\e$,1"o\e(B tbl)
970 (set-case-syntax-pair ?\e$,1"p\e(B ?\e$,1"q\e(B tbl)
971 (set-case-syntax-pair ?\e$,1"r\e(B ?\e$,1"s\e(B tbl)
972 (set-case-syntax-pair ?\e$,1"t\e(B ?\e$,1"u\e(B tbl)
973 (set-case-syntax-pair ?\e$,1"v\e(B ?\e$,1"w\e(B tbl)
974 (set-case-syntax-pair ?\e$,1"x\e(B ?\e$,1"y\e(B tbl)
975 (set-case-syntax-pair ?\e$,1"z\e(B ?\e$,1"{\e(B tbl)
976 (set-case-syntax-pair ?\e$,1"|\e(B ?\e$,1"}\e(B tbl)
977 (set-case-syntax-pair ?\e$,1"~\e(B ?\e$,1"\7f\e(B tbl)
978 (set-case-syntax-pair ?\e$,1#"\e(B ?\e$,1##\e(B tbl)
979 (set-case-syntax-pair ?\e$,1#$\e(B ?\e$,1#%\e(B tbl)
980 (set-case-syntax-pair ?\e$,1#&\e(B ?\e$,1#'\e(B tbl)
981 (set-case-syntax-pair ?\e$,1#(\e(B ?\e$,1#)\e(B tbl)
982 (set-case-syntax-pair ?\e$,1#*\e(B ?\e$,1#+\e(B tbl)
983 (set-case-syntax-pair ?\e$,1#,\e(B ?\e$,1#-\e(B tbl)
984 (set-case-syntax-pair ?\e$,1#.\e(B ?\e$,1#/\e(B tbl)
985 (set-case-syntax-pair ?\e$,1#0\e(B ?\e$,1#1\e(B tbl)
986 (set-case-syntax-pair ?\e$,1#2\e(B ?\e$,1#3\e(B tbl)
987
988 ;; Latin Extended Additional
989 (setq c #x1e00)
990 (while (<= c #x1ef9)
991 (modify-category-entry (decode-char 'ucs c) ?l)
992 (and (zerop (% c 2))
993 (or (<= c #x1e94) (>= c #x1ea0))
994 (set-case-syntax-pair
995 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
996 (setq c (1+ c)))
997
998 ;; Greek
999 (setq c #x0370)
1000 (while (<= c #x03ff)
1001 (modify-category-entry (decode-char 'ucs c) ?g)
1002 (if (or (and (>= c #x0391) (<= c #x03a1))
1003 (and (>= c #x03a3) (<= c #x03ab)))
1004 (set-case-syntax-pair
1005 (decode-char 'ucs c) (decode-char 'ucs (+ c 32)) tbl))
1006 (and (>= c #x03da)
1007 (<= c #x03ee)
1008 (zerop (% c 2))
1009 (set-case-syntax-pair
1010 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
1011 (setq c (1+ c)))
1012 (set-case-syntax-pair ?\e$,1&f\e(B ?\e$,1',\e(B tbl)
1013 (set-case-syntax-pair ?\e$,1&h\e(B ?\e$,1'-\e(B tbl)
1014 (set-case-syntax-pair ?\e$,1&i\e(B ?\e$,1'.\e(B tbl)
1015 (set-case-syntax-pair ?\e$,1&j\e(B ?\e$,1'/\e(B tbl)
1016 (set-case-syntax-pair ?\e$,1&l\e(B ?\e$,1'L\e(B tbl)
1017 (set-case-syntax-pair ?\e$,1&n\e(B ?\e$,1'M\e(B tbl)
1018 (set-case-syntax-pair ?\e$,1&o\e(B ?\e$,1'N\e(B tbl)
1019
1020 ;; Armenian
1021 (setq c #x531)
1022 (while (<= c #x556)
1023 (set-case-syntax-pair (decode-char 'ucs c)
1024 (decode-char 'ucs (+ c #x30)) tbl)
1025 (setq c (1+ c)))
1026
1027 ;; Greek Extended
1028 (setq c #x1f00)
1029 (while (<= c #x1fff)
1030 (modify-category-entry (decode-char 'ucs c) ?g)
1031 (and (<= (logand c #x000f) 7)
1032 (<= c #x1fa7)
1033 (not (memq c '(#x1f50 #x1f52 #x1f54 #x1f56)))
1034 (/= (logand c #x00f0) 7)
1035 (set-case-syntax-pair
1036 (decode-char 'ucs (+ c 8)) (decode-char 'ucs c) tbl))
1037 (setq c (1+ c)))
1038 (set-case-syntax-pair ?\e$,1qx\e(B ?\e$,1qp\e(B tbl)
1039 (set-case-syntax-pair ?\e$,1qy\e(B ?\e$,1qq\e(B tbl)
1040 (set-case-syntax-pair ?\e$,1qz\e(B ?\e$,1q0\e(B tbl)
1041 (set-case-syntax-pair ?\e$,1q{\e(B ?\e$,1q1\e(B tbl)
1042 (set-case-syntax-pair ?\e$,1q|\e(B ?\e$,1qs\e(B tbl)
1043 (set-case-syntax-pair ?\e$,1r(\e(B ?\e$,1q2\e(B tbl)
1044 (set-case-syntax-pair ?\e$,1r)\e(B ?\e$,1q3\e(B tbl)
1045 (set-case-syntax-pair ?\e$,1r*\e(B ?\e$,1q4\e(B tbl)
1046 (set-case-syntax-pair ?\e$,1r+\e(B ?\e$,1q5\e(B tbl)
1047 (set-case-syntax-pair ?\e$,1r,\e(B ?\e$,1r#\e(B tbl)
1048 (set-case-syntax-pair ?\e$,1r8\e(B ?\e$,1r0\e(B tbl)
1049 (set-case-syntax-pair ?\e$,1r9\e(B ?\e$,1r1\e(B tbl)
1050 (set-case-syntax-pair ?\e$,1r:\e(B ?\e$,1q6\e(B tbl)
1051 (set-case-syntax-pair ?\e$,1r;\e(B ?\e$,1q7\e(B tbl)
1052 (set-case-syntax-pair ?\e$,1rH\e(B ?\e$,1r@\e(B tbl)
1053 (set-case-syntax-pair ?\e$,1rI\e(B ?\e$,1rA\e(B tbl)
1054 (set-case-syntax-pair ?\e$,1rJ\e(B ?\e$,1q:\e(B tbl)
1055 (set-case-syntax-pair ?\e$,1rK\e(B ?\e$,1q;\e(B tbl)
1056 (set-case-syntax-pair ?\e$,1rL\e(B ?\e$,1rE\e(B tbl)
1057 (set-case-syntax-pair ?\e$,1rX\e(B ?\e$,1q8\e(B tbl)
1058 (set-case-syntax-pair ?\e$,1rY\e(B ?\e$,1q9\e(B tbl)
1059 (set-case-syntax-pair ?\e$,1rZ\e(B ?\e$,1q<\e(B tbl)
1060 (set-case-syntax-pair ?\e$,1r[\e(B ?\e$,1q=\e(B tbl)
1061 (set-case-syntax-pair ?\e$,1r\\e(B ?\e$,1rS\e(B tbl)
1062
1063 ;; cyrillic
1064 (setq c #x0400)
1065 (while (<= c #x04ff)
1066 (modify-category-entry (decode-char 'ucs c) ?y)
1067 (and (>= c #x0400)
1068 (<= c #x040f)
1069 (set-case-syntax-pair
1070 (decode-char 'ucs c) (decode-char 'ucs (+ c 80)) tbl))
1071 (and (>= c #x0410)
1072 (<= c #x042f)
1073 (set-case-syntax-pair
1074 (decode-char 'ucs c) (decode-char 'ucs (+ c 32)) tbl))
1075 (and (zerop (% c 2))
1076 (or (and (>= c #x0460) (<= c #x0480))
1077 (and (>= c #x048c) (<= c #x04be))
1078 (and (>= c #x04d0) (<= c #x04f4)))
1079 (set-case-syntax-pair
1080 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
1081 (setq c (1+ c)))
1082 (set-case-syntax-pair ?\e$,1*!\e(B ?\e$,1*"\e(B tbl)
1083 (set-case-syntax-pair ?\e$,1*#\e(B ?\e$,1*$\e(B tbl)
1084 (set-case-syntax-pair ?\e$,1*'\e(B ?\e$,1*(\e(B tbl)
1085 (set-case-syntax-pair ?\e$,1*+\e(B ?\e$,1*,\e(B tbl)
1086 (set-case-syntax-pair ?\e$,1*X\e(B ?\e$,1*Y\e(B tbl)
1087
1088 ;; general punctuation
1089 (setq c #x2000)
1090 (while (<= c #x200b)
1091 (set-case-syntax (decode-char 'ucs c) " " tbl)
1092 (setq c (decode-char 'ucs (1+ c))))
1093 (setq c #x2010)
1094 (while (<= c #x2027)
1095 (set-case-syntax (decode-char 'ucs c) "_" tbl)
1096 (setq c (decode-char 'ucs (1+ c))))
1097
1098 ;; Roman numerals
1099 (setq c #x2160)
1100 (while (<= c #x216f)
1101 (set-case-syntax-pair (decode-char 'ucs c)
1102 (decode-char 'ucs (+ c #x10)) tbl)
1103 (setq c (1+ c)))
1104
1105 ;; Circled Latin
1106 (setq c #x24b6)
1107 (while (<= c #x24cf)
1108 (set-case-syntax-pair (decode-char 'ucs c)
1109 (decode-char 'ucs (+ c 26)) tbl)
1110 (modify-category-entry (decode-char 'ucs c) ?l)
1111 (modify-category-entry (decode-char 'ucs (+ c 26)) ?l)
1112 (setq c (1+ c)))
1113
1114 ;; Fullwidth Latin
1115 (setq c #xff21)
1116 (while (<= c #xff3a)
1117 (set-case-syntax-pair (decode-char 'ucs c)
1118 (decode-char 'ucs (+ c #x20)) tbl)
1119 (modify-category-entry (decode-char 'ucs c) ?l)
1120 (modify-category-entry (decode-char 'ucs (+ c #x20)) ?l)
1121 (setq c (1+ c)))
1122
1123 ;; Combining diacritics
1124 (setq c #x300)
1125 (while (<= c #x362)
1126 (modify-category-entry (decode-char 'ucs c) ?^)
1127 (setq c (1+ c)))
1128
1129 ;; Combining marks
1130 (setq c #x20d0)
1131 (while (<= c #x20e3)
1132 (modify-category-entry (decode-char 'ucs c) ?^)
1133 (setq c (1+ c)))
1134
1135 ;; Fixme: syntax for symbols &c
1136 )
1137 \f
1138 ;;; Setting word boundary.
1139
1140 (setq word-combining-categories
1141 '((?l . ?l)))
1142
1143 (setq word-separating-categories ; (2-byte character sets)
1144 '((?A . ?K) ; Alpha numeric - Katakana
1145 (?A . ?C) ; Alpha numeric - Chinese
1146 (?H . ?A) ; Hiragana - Alpha numeric
1147 (?H . ?K) ; Hiragana - Katakana
1148 (?H . ?C) ; Hiragana - Chinese
1149 (?K . ?A) ; Katakana - Alpha numeric
1150 (?K . ?C) ; Katakana - Chinese
1151 (?C . ?A) ; Chinese - Alpha numeric
1152 (?C . ?K) ; Chinese - Katakana
1153 ))
1154
1155 \f
1156 ;; For each character set, put the information of the most proper
1157 ;; coding system to encode it by `preferred-coding-system' property.
1158
1159 (let ((l '((latin-iso8859-1 . iso-latin-1)
1160 (latin-iso8859-2 . iso-latin-2)
1161 (latin-iso8859-3 . iso-latin-3)
1162 (latin-iso8859-4 . iso-latin-4)
1163 (thai-tis620 . thai-tis620)
1164 (greek-iso8859-7 . greek-iso-8bit)
1165 (arabic-iso8859-6 . iso-2022-7bit)
1166 (hebrew-iso8859-8 . hebrew-iso-8bit)
1167 (katakana-jisx0201 . japanese-shift-jis)
1168 (latin-jisx0201 . japanese-shift-jis)
1169 (cyrillic-iso8859-5 . cyrillic-iso-8bit)
1170 (latin-iso8859-9 . iso-latin-5)
1171 (japanese-jisx0208-1978 . iso-2022-jp)
1172 (chinese-gb2312 . cn-gb-2312)
1173 (japanese-jisx0208 . iso-2022-jp)
1174 (korean-ksc5601 . iso-2022-kr)
1175 (japanese-jisx0212 . iso-2022-jp)
1176 (chinese-cns11643-1 . iso-2022-cn)
1177 (chinese-cns11643-2 . iso-2022-cn)
1178 (chinese-big5-1 . chinese-big5)
1179 (chinese-big5-2 . chinese-big5)
1180 (chinese-sisheng . iso-2022-7bit)
1181 (ipa . iso-2022-7bit)
1182 (vietnamese-viscii-lower . vietnamese-viscii)
1183 (vietnamese-viscii-upper . vietnamese-viscii)
1184 (arabic-digit . iso-2022-7bit)
1185 (arabic-1-column . iso-2022-7bit)
1186 (ascii-right-to-left . iso-2022-7bit)
1187 (lao . lao)
1188 (arabic-2-column . iso-2022-7bit)
1189 (indian-is13194 . devanagari)
1190 (indian-glyph . devanagari)
1191 (tibetan-1-column . tibetan)
1192 (ethiopic . iso-2022-7bit)
1193 (chinese-cns11643-3 . iso-2022-cn)
1194 (chinese-cns11643-4 . iso-2022-cn)
1195 (chinese-cns11643-5 . iso-2022-cn)
1196 (chinese-cns11643-6 . iso-2022-cn)
1197 (chinese-cns11643-7 . iso-2022-cn)
1198 (indian-2-column . devanagari)
1199 (tibetan . tibetan)
1200 (latin-iso8859-14 . iso-latin-8)
1201 (latin-iso8859-15 . iso-latin-9))))
1202 (while l
1203 (put-charset-property (car (car l)) 'preferred-coding-system (cdr (car l)))
1204 (setq l (cdr l))))
1205
1206 \f
1207 ;; Setup auto-fill-chars for charsets that should invoke auto-filling.
1208 ;; SPACE and NEWLINE are already set. Also put `nospace-between-words'
1209 ;; property on the charsets.
1210 (let ((l '(katakana-jisx0201
1211 japanese-jisx0208 japanese-jisx0212
1212 chinese-gb2312 chinese-big5-1 chinese-big5-2)))
1213 (while l
1214 (aset auto-fill-chars (make-char (car l)) t)
1215 (put-charset-property (car l) 'nospace-between-words t)
1216 (setq l (cdr l))))
1217
1218 ;;; Local Variables:
1219 ;;; coding: iso-2022-7bit
1220 ;;; End:
1221
1222 ;;; characters.el ends here