]> code.delx.au - gnu-emacs/blob - lisp/international/characters.el
Fix syntax (open/close) of CJK characters.
[gnu-emacs] / lisp / international / characters.el
1 ;;; characters.el --- set syntax and category for multibyte characters
2
3 ;; Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN.
4 ;; Licensed to the Free Software Foundation.
5 ;; Copyright (C) 2001, 2002 Free Software Foundation, Inc.
6
7 ;; Keywords: multibyte character, character set, syntax, category
8
9 ;; This file is part of GNU Emacs.
10
11 ;; GNU Emacs is free software; you can redistribute it and/or modify
12 ;; it under the terms of the GNU General Public License as published by
13 ;; the Free Software Foundation; either version 2, or (at your option)
14 ;; any later version.
15
16 ;; GNU Emacs is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 ;; GNU General Public License for more details.
20
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with GNU Emacs; see the file COPYING. If not, write to the
23 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 ;; Boston, MA 02111-1307, USA.
25
26 ;;; Commentary:
27
28 ;; This file contains multibyte characters. Save this file always in
29 ;; the coding system `iso-2022-7bit'.
30
31 ;; This file does not define the syntax for Latin-N character sets;
32 ;; those are defined by the files latin-N.el.
33
34 ;;; Code:
35
36 ;;; Predefined categories.
37
38 ;; For each character set.
39
40 (define-category ?a "ASCII")
41 (define-category ?l "Latin")
42 (define-category ?t "Thai")
43 (define-category ?g "Greek")
44 (define-category ?b "Arabic")
45 (define-category ?w "Hebrew")
46 (define-category ?y "Cyrillic")
47 (define-category ?k "Japanese katakana")
48 (define-category ?r "Japanese roman")
49 (define-category ?c "Chinese")
50 (define-category ?j "Japanese")
51 (define-category ?h "Korean")
52 (define-category ?e "Ethiopic (Ge'ez)")
53 (define-category ?v "Vietnamese")
54 (define-category ?i "Indian")
55 (define-category ?o "Lao")
56 (define-category ?q "Tibetan")
57
58 ;; For each group (row) of 2-byte character sets.
59
60 (define-category ?A "Alpha-numeric characters of 2-byte character sets")
61 (define-category ?C "Chinese (Han) characters of 2-byte character sets")
62 (define-category ?G "Greek characters of 2-byte character sets")
63 (define-category ?H "Japanese Hiragana characters of 2-byte character sets")
64 (define-category ?K "Japanese Katakana characters of 2-byte character sets")
65 (define-category ?N "Korean Hangul characters of 2-byte character sets")
66 (define-category ?Y "Cyrillic characters of 2-byte character sets")
67 (define-category ?I "Indian Glyphs")
68
69 ;; For phonetic classifications.
70
71 (define-category ?0 "consonant")
72 (define-category ?1 "base (independent) vowel")
73 (define-category ?2 "upper diacritical mark (including upper vowel)")
74 (define-category ?3 "lower diacritical mark (including lower vowel)")
75 (define-category ?4 "tone mark")
76 (define-category ?5 "symbol")
77 (define-category ?6 "digit")
78 (define-category ?7 "vowel-modifying diacritical mark")
79 (define-category ?8 "vowel-signs")
80 (define-category ?9 "semivowel lower")
81
82 ;; For filling.
83 (define-category ?| "While filling, we can break a line at this character.")
84
85 ;; For indentation calculation.
86 (define-category ?\s
87 "This character counts as a space for indentation purposes.")
88
89 ;; Keep the following for `kinsoku' processing. See comments in
90 ;; kinsoku.el.
91 (define-category ?> "A character which can't be placed at beginning of line.")
92 (define-category ?< "A character which can't be placed at end of line.")
93
94 ;; Combining
95 (define-category ?^ "Combining diacritic or mark")
96 \f
97 ;;; Setting syntax and category.
98
99 ;; ASCII
100
101 (let ((ch 32))
102 (while (< ch 127) ; All ASCII characters have
103 (modify-category-entry ch ?a) ; the category `a' (ASCII)
104 (modify-category-entry ch ?l) ; and `l' (Latin).
105 (setq ch (1+ ch))))
106
107 ;; Arabic character set
108
109 (let ((charsets '(arabic-iso8859-6
110 arabic-digit
111 arabic-1-column
112 arabic-2-column)))
113 (while charsets
114 ;; (modify-syntax-entry (make-char (car charsets)) "w")
115 (modify-category-entry (make-char (car charsets)) ?b)
116 (setq charsets (cdr charsets))))
117 (let ((ch #x600))
118 (while (<= ch #x6ff)
119 (modify-category-entry (decode-char 'ucs ch) ?b)
120 (setq ch (1+ ch)))
121 (setq ch #xfb50)
122 (while (<= ch #xfdff)
123 (modify-category-entry (decode-char 'ucs ch) ?b)
124 (setq ch (1+ ch)))
125 (setq ch #xfe70)
126 (while (<= ch #xfefe)
127 (modify-category-entry (decode-char 'ucs ch) ?b)
128 (setq ch (1+ ch))))
129
130 ;; Chinese character set (GB2312)
131
132 ;; (modify-syntax-entry (make-char 'chinese-gb2312) "w")
133 (modify-syntax-entry (make-char 'chinese-gb2312 33) "_")
134 (modify-syntax-entry (make-char 'chinese-gb2312 34) "_")
135 (modify-syntax-entry (make-char 'chinese-gb2312 41) "_")
136 (modify-syntax-entry ?\\e$A!2\e(B "(\e$A!3\e(B")
137 (modify-syntax-entry ?\\e$A!4\e(B "(\e$A!5\e(B")
138 (modify-syntax-entry ?\\e$A!6\e(B "(\e$A!7\e(B")
139 (modify-syntax-entry ?\\e$A!8\e(B "(\e$A!9\e(B")
140 (modify-syntax-entry ?\\e$A!:\e(B "(\e$A!;\e(B")
141 (modify-syntax-entry ?\\e$A!<\e(B "(\e$A!=\e(B")
142 (modify-syntax-entry ?\\e$A!>\e(B "(\e$A!?\e(B")
143 (modify-syntax-entry ?\\e$A#(\e(B "(\e$A#)\e(B")
144 (modify-syntax-entry ?\\e$A#{\e(B "(\e$A#}\e(B")
145 (modify-syntax-entry ?\\e$A#[\e(B "(\e$A#]\e(B")
146 (modify-syntax-entry ?\\e$A!3\e(B ")\e$A!2\e(B")
147 (modify-syntax-entry ?\\e$A!5\e(B ")\e$A!4\e(B")
148 (modify-syntax-entry ?\\e$A!7\e(B ")\e$A!6\e(B")
149 (modify-syntax-entry ?\\e$A!9\e(B ")\e$A!8\e(B")
150 (modify-syntax-entry ?\\e$A!;\e(B ")\e$A!:\e(B")
151 (modify-syntax-entry ?\\e$A!=\e(B ")\e$A!<\e(B")
152 (modify-syntax-entry ?\\e$A!?\e(B ")\e$A!>\e(B")
153 (modify-syntax-entry ?\\e$A#)\e(B ")\e$A#(\e(B")
154 (modify-syntax-entry ?\\e$A#}\e(B ")\e$A#{\e(B")
155 (modify-syntax-entry ?\\e$A#]\e(B ")\e$A#[\e(B")
156
157 (let ((chars "\e$A#,!"!##.!$#;#:#?#!!C!-!'#|#_!.!/!0!1#"!e#`!d\e(B"))
158 (dotimes (i (length chars))
159 (modify-syntax-entry (aref chars i) ".")))
160
161 (modify-category-entry (make-char 'chinese-gb2312) ?c)
162 (modify-category-entry (make-char 'chinese-gb2312) ?\|)
163 (modify-category-entry (make-char 'chinese-gb2312 35) ?A)
164 (modify-category-entry (make-char 'chinese-gb2312 36) ?H)
165 (modify-category-entry (make-char 'chinese-gb2312 37) ?K)
166 (modify-category-entry (make-char 'chinese-gb2312 38) ?G)
167 (modify-category-entry (make-char 'chinese-gb2312 39) ?Y)
168 (let ((row 48))
169 (while (< row 127)
170 (modify-category-entry (make-char 'chinese-gb2312 row) ?C)
171 (setq row (1+ row))))
172
173 ;; Chinese character set (BIG5)
174
175 (let ((from (decode-big5-char #xA141))
176 (to (decode-big5-char #xA15D)))
177 (while (< from to)
178 (modify-syntax-entry from ".")
179 (setq from (1+ from))))
180 (let ((from (decode-big5-char #xA1A5))
181 (to (decode-big5-char #xA1AD)))
182 (while (< from to)
183 (modify-syntax-entry from ".")
184 (setq from (1+ from))))
185 (let ((from (decode-big5-char #xA1AD))
186 (to (decode-big5-char #xA2AF)))
187 (while (< from to)
188 (modify-syntax-entry from "_")
189 (setq from (1+ from))))
190
191 (let ((parens "\e$(0!>!?!@!A!B!C!D!E!F!G!H!I!J!K!L!M!N!O!P!Q!R!S!T!U!V!W!X!Y!Z![!\!]!^!_!`!a!b!c\e(B")
192 open close)
193 (dotimes (i (/ (length parens) 2))
194 (setq open (aref parens (* i 2))
195 close (aref parens (1+ (* i 2))))
196 (modify-syntax-entry open (format "(%c" close))
197 (modify-syntax-entry close (format ")%c" open))))
198
199 (let ((generic-big5-1-char (make-char 'chinese-big5-1))
200 (generic-big5-2-char (make-char 'chinese-big5-2)))
201 ;; (modify-syntax-entry generic-big5-1-char "w")
202 ;; (modify-syntax-entry generic-big5-2-char "w")
203
204 (modify-category-entry generic-big5-1-char ?c)
205 (modify-category-entry generic-big5-2-char ?c)
206
207 (modify-category-entry generic-big5-1-char ?C)
208 (modify-category-entry generic-big5-2-char ?C)
209
210 (modify-category-entry generic-big5-1-char ?\|)
211 (modify-category-entry generic-big5-2-char ?\|))
212
213
214 ;; Chinese character set (CNS11643)
215
216 (let ((cns-list '(chinese-cns11643-1
217 chinese-cns11643-2
218 chinese-cns11643-3
219 chinese-cns11643-4
220 chinese-cns11643-5
221 chinese-cns11643-6
222 chinese-cns11643-7))
223 generic-char)
224 (while cns-list
225 (setq generic-char (make-char (car cns-list)))
226 ;; (modify-syntax-entry generic-char "w")
227 (modify-category-entry generic-char ?c)
228 (modify-category-entry generic-char ?C)
229 (modify-category-entry generic-char ?|)
230 (setq cns-list (cdr cns-list))))
231
232 (let ((parens "\e$(G!>!?!@!A!B!C!D!E!F!G!H!I!J!K!L!M!N!O!P!Q!R!S!T!U!V!W!X!Y!Z![!\!]!^!_!`!a!b!c\e(B")
233 open close)
234 (dotimes (i (/ (length parens) 2))
235 (setq open (aref parens (* i 2))
236 close (aref parens (1+ (* i 2))))
237 (modify-syntax-entry open (format "(%c" close))
238 (modify-syntax-entry close (format ")%c" open))))
239
240 ;; Cyrillic character set (ISO-8859-5)
241
242 (modify-category-entry (make-char 'cyrillic-iso8859-5) ?y)
243
244 (modify-syntax-entry (make-char 'cyrillic-iso8859-5 160) " ")
245 (modify-syntax-entry ?\e,L-\e(B ".")
246 (modify-syntax-entry ?\e,Lp\e(B ".")
247 (modify-syntax-entry ?\e,L}\e(B ".")
248 (let ((tbl (standard-case-table)))
249 (set-case-syntax-pair ?\e,L!\e(B ?\e,Lq\e(B tbl)
250 (set-case-syntax-pair ?\e,L"\e(B ?\e,Lr\e(B tbl)
251 (set-case-syntax-pair ?\e,L#\e(B ?\e,Ls\e(B tbl)
252 (set-case-syntax-pair ?\e,L$\e(B ?\e,Lt\e(B tbl)
253 (set-case-syntax-pair ?\e,L%\e(B ?\e,Lu\e(B tbl)
254 (set-case-syntax-pair ?\e,L&\e(B ?\e,Lv\e(B tbl)
255 (set-case-syntax-pair ?\e,L'\e(B ?\e,Lw\e(B tbl)
256 (set-case-syntax-pair ?\e,L(\e(B ?\e,Lx\e(B tbl)
257 (set-case-syntax-pair ?\e,L)\e(B ?\e,Ly\e(B tbl)
258 (set-case-syntax-pair ?\e,L*\e(B ?\e,Lz\e(B tbl)
259 (set-case-syntax-pair ?\e,L+\e(B ?\e,L{\e(B tbl)
260 (set-case-syntax-pair ?\e,L,\e(B ?\e,L|\e(B tbl)
261 (set-case-syntax-pair ?\e,L.\e(B ?\e,L~\e(B tbl)
262 (set-case-syntax-pair ?\e,L/\e(B ?\e,L\7f\e(B tbl)
263 (set-case-syntax-pair ?\e,L0\e(B ?\e,LP\e(B tbl)
264 (set-case-syntax-pair ?\e,L1\e(B ?\e,LQ\e(B tbl)
265 (set-case-syntax-pair ?\e,L2\e(B ?\e,LR\e(B tbl)
266 (set-case-syntax-pair ?\e,L3\e(B ?\e,LS\e(B tbl)
267 (set-case-syntax-pair ?\e,L4\e(B ?\e,LT\e(B tbl)
268 (set-case-syntax-pair ?\e,L5\e(B ?\e,LU\e(B tbl)
269 (set-case-syntax-pair ?\e,L6\e(B ?\e,LV\e(B tbl)
270 (set-case-syntax-pair ?\e,L7\e(B ?\e,LW\e(B tbl)
271 (set-case-syntax-pair ?\e,L8\e(B ?\e,LX\e(B tbl)
272 (set-case-syntax-pair ?\e,L9\e(B ?\e,LY\e(B tbl)
273 (set-case-syntax-pair ?\e,L:\e(B ?\e,LZ\e(B tbl)
274 (set-case-syntax-pair ?\e,L;\e(B ?\e,L[\e(B tbl)
275 (set-case-syntax-pair ?\e,L<\e(B ?\e,L\\e(B tbl)
276 (set-case-syntax-pair ?\e,L=\e(B ?\e,L]\e(B tbl)
277 (set-case-syntax-pair ?\e,L>\e(B ?\e,L^\e(B tbl)
278 (set-case-syntax-pair ?\e,L?\e(B ?\e,L_\e(B tbl)
279 (set-case-syntax-pair ?\e,L@\e(B ?\e,L`\e(B tbl)
280 (set-case-syntax-pair ?\e,LA\e(B ?\e,La\e(B tbl)
281 (set-case-syntax-pair ?\e,LB\e(B ?\e,Lb\e(B tbl)
282 (set-case-syntax-pair ?\e,LC\e(B ?\e,Lc\e(B tbl)
283 (set-case-syntax-pair ?\e,LD\e(B ?\e,Ld\e(B tbl)
284 (set-case-syntax-pair ?\e,LE\e(B ?\e,Le\e(B tbl)
285 (set-case-syntax-pair ?\e,LF\e(B ?\e,Lf\e(B tbl)
286 (set-case-syntax-pair ?\e,LG\e(B ?\e,Lg\e(B tbl)
287 (set-case-syntax-pair ?\e,LH\e(B ?\e,Lh\e(B tbl)
288 (set-case-syntax-pair ?\e,LI\e(B ?\e,Li\e(B tbl)
289 (set-case-syntax-pair ?\e,LJ\e(B ?\e,Lj\e(B tbl)
290 (set-case-syntax-pair ?\e,LK\e(B ?\e,Lk\e(B tbl)
291 (set-case-syntax-pair ?\e,LL\e(B ?\e,Ll\e(B tbl)
292 (set-case-syntax-pair ?\e,LM\e(B ?\e,Lm\e(B tbl)
293 (set-case-syntax-pair ?\e,LN\e(B ?\e,Ln\e(B tbl)
294 (set-case-syntax-pair ?\e,LO\e(B ?\e,Lo\e(B tbl)
295 (set-case-syntax-pair ?\e$,1(!\e(B ?\e$,1(q\e(B tbl)
296 (set-case-syntax-pair ?\e$,1("\e(B ?\e$,1(r\e(B tbl)
297 (set-case-syntax-pair ?\e$,1(#\e(B ?\e$,1(s\e(B tbl)
298 (set-case-syntax-pair ?\e$,1($\e(B ?\e$,1(t\e(B tbl)
299 (set-case-syntax-pair ?\e$,1(%\e(B ?\e$,1(u\e(B tbl)
300 (set-case-syntax-pair ?\e$,1(&\e(B ?\e$,1(v\e(B tbl)
301 (set-case-syntax-pair ?\e$,1('\e(B ?\e$,1(w\e(B tbl)
302 (set-case-syntax-pair ?\e$,1((\e(B ?\e$,1(x\e(B tbl)
303 (set-case-syntax-pair ?\e$,1()\e(B ?\e$,1(y\e(B tbl)
304 (set-case-syntax-pair ?\e$,1(*\e(B ?\e$,1(z\e(B tbl)
305 (set-case-syntax-pair ?\e$,1(+\e(B ?\e$,1({\e(B tbl)
306 (set-case-syntax-pair ?\e$,1(,\e(B ?\e$,1(|\e(B tbl)
307 (set-case-syntax-pair ?\e$,1(.\e(B ?\e$,1(~\e(B tbl)
308 (set-case-syntax-pair ?\e$,1(/\e(B ?\e$,1(\7f\e(B tbl)
309 (set-case-syntax-pair ?\e$,1(0\e(B ?\e$,1(P\e(B tbl)
310 (set-case-syntax-pair ?\e$,1(1\e(B ?\e$,1(Q\e(B tbl)
311 (set-case-syntax-pair ?\e$,1(2\e(B ?\e$,1(R\e(B tbl)
312 (set-case-syntax-pair ?\e$,1(3\e(B ?\e$,1(S\e(B tbl)
313 (set-case-syntax-pair ?\e$,1(4\e(B ?\e$,1(T\e(B tbl)
314 (set-case-syntax-pair ?\e$,1(5\e(B ?\e$,1(U\e(B tbl)
315 (set-case-syntax-pair ?\e$,1(6\e(B ?\e$,1(V\e(B tbl)
316 (set-case-syntax-pair ?\e$,1(7\e(B ?\e$,1(W\e(B tbl)
317 (set-case-syntax-pair ?\e$,1(8\e(B ?\e$,1(X\e(B tbl)
318 (set-case-syntax-pair ?\e$,1(9\e(B ?\e$,1(Y\e(B tbl)
319 (set-case-syntax-pair ?\e$,1(:\e(B ?\e$,1(Z\e(B tbl)
320 (set-case-syntax-pair ?\e$,1(;\e(B ?\e$,1([\e(B tbl)
321 (set-case-syntax-pair ?\e$,1(<\e(B ?\e$,1(\\e(B tbl)
322 (set-case-syntax-pair ?\e$,1(=\e(B ?\e$,1(]\e(B tbl)
323 (set-case-syntax-pair ?\e$,1(>\e(B ?\e$,1(^\e(B tbl)
324 (set-case-syntax-pair ?\e$,1(?\e(B ?\e$,1(_\e(B tbl)
325 (set-case-syntax-pair ?\e$,1(@\e(B ?\e$,1(`\e(B tbl)
326 (set-case-syntax-pair ?\e$,1(A\e(B ?\e$,1(a\e(B tbl)
327 (set-case-syntax-pair ?\e$,1(B\e(B ?\e$,1(b\e(B tbl)
328 (set-case-syntax-pair ?\e$,1(C\e(B ?\e$,1(c\e(B tbl)
329 (set-case-syntax-pair ?\e$,1(D\e(B ?\e$,1(d\e(B tbl)
330 (set-case-syntax-pair ?\e$,1(E\e(B ?\e$,1(e\e(B tbl)
331 (set-case-syntax-pair ?\e$,1(F\e(B ?\e$,1(f\e(B tbl)
332 (set-case-syntax-pair ?\e$,1(G\e(B ?\e$,1(g\e(B tbl)
333 (set-case-syntax-pair ?\e$,1(H\e(B ?\e$,1(h\e(B tbl)
334 (set-case-syntax-pair ?\e$,1(I\e(B ?\e$,1(i\e(B tbl)
335 (set-case-syntax-pair ?\e$,1(J\e(B ?\e$,1(j\e(B tbl)
336 (set-case-syntax-pair ?\e$,1(K\e(B ?\e$,1(k\e(B tbl)
337 (set-case-syntax-pair ?\e$,1(L\e(B ?\e$,1(l\e(B tbl)
338 (set-case-syntax-pair ?\e$,1(M\e(B ?\e$,1(m\e(B tbl)
339 (set-case-syntax-pair ?\e$,1(N\e(B ?\e$,1(n\e(B tbl)
340 (set-case-syntax-pair ?\e$,1(O\e(B ?\e$,1(o\e(B tbl))
341
342 ;; Devanagari character set
343
344 ;;; Commented out since the categories appear not to be used anywhere
345 ;;; and word syntax is the default.
346 ;; (let ((deflist '(;; chars syntax category
347 ;; ("\e$(5!!!"!#\e(B" "w" ?7) ; vowel-modifying diacritical mark
348 ;; ; chandrabindu, anuswar, visarga
349 ;; ("\e$(5!$\e(B-\e$(5!2\e(B" "w" ?1) ; independent vowel
350 ;; ("\e$(5!3\e(B-\e$(5!X\e(B" "w" ?0) ; consonant
351 ;; ("\e$(5!Z\e(B-\e$(5!g\e(B" "w" ?8) ; matra
352 ;; ("\e$(5!q\e(B-\e$(5!z\e(B" "w" ?6) ; digit
353 ;; ;; Unicode equivalents
354 ;; ("\e$,15A5B5C\e(B" "w" ?7) ; vowel-modifying diacritical mark
355 ;; ; chandrabindu, anuswar, visarga
356 ;; ("\e$,15E\e(B-\e$,15M\e(B" "w" ?1) ; independent vowel
357 ;; ("\e$,15U\e(B-\e$,15y\e(B" "w" ?0) ; consonant
358 ;; ("\e$,15~\e(B-\e$,16)\e(B" "w" ?8) ; matra
359 ;; ("\e$,16F\e(B-\e$,16O\e(B" "w" ?6) ; digit
360 ;; ))
361 ;; elm chars len syntax category to ch i)
362 ;; (while deflist
363 ;; (setq elm (car deflist))
364 ;; (setq chars (car elm)
365 ;; len (length chars)
366 ;; syntax (nth 1 elm)
367 ;; category (nth 2 elm)
368 ;; i 0)
369 ;; (while (< i len)
370 ;; (if (= (aref chars i) ?-)
371 ;; (setq i (1+ i)
372 ;; to (aref chars i))
373 ;; (setq ch (aref chars i)
374 ;; to ch))
375 ;; (while (<= ch to)
376 ;; (modify-syntax-entry ch syntax)
377 ;; (modify-category-entry ch category)
378 ;; (setq ch (1+ ch)))
379 ;; (setq i (1+ i)))
380 ;; (setq deflist (cdr deflist))))
381
382 ;; Ethiopic character set
383
384 (modify-category-entry (make-char 'ethiopic) ?e)
385 ;; (modify-syntax-entry (make-char 'ethiopic) "w")
386 (dotimes (i (1+ (- #x137c #x1200)))
387 (modify-category-entry (decode-char 'ucs (+ #x1200 i)) ?e))
388 (let ((chars '(?\e$(3$h\e(B ?\e$(3$i\e(B ?\e$(3$j\e(B ?\e$(3$k\e(B ?\e$(3$l\e(B ?\e$(3$m\e(B ?\e$(3$n\e(B ?\e$(3$o\e(B ?\e$(3%i\e(B ?\e$(3%t\e(B ?\e$(3%u\e(B ?\e$(3%v\e(B ?\e$(3%w\e(B ?\e$(3%x\e(B
389 ;; Unicode equivalents of the above:
390 ?\e$,1Q!\e(B ?\e$,1Q"\e(B ?\e$,1Q#\e(B ?\e$,1Q$\e(B ?\e$,1Q%\e(B ?\e$,1Q&\e(B ?\e$,1Q'\e(B ?\e$,1Q(\e(B ?\e$,3op\e(B ?\e$,3o{\e(B ?\e$,3o|\e(B ?\e$,3o}\e(B ?\e$,3o~\e(B ?\e$,3o\7f\e(B)))
391 (while chars
392 (modify-syntax-entry (car chars) ".")
393 (setq chars (cdr chars))))
394
395 ;; Greek character set (ISO-8859-7)
396
397 (modify-category-entry (make-char 'greek-iso8859-7) ?g)
398 (let ((c #x370))
399 (while (<= c #x3ff)
400 (modify-category-entry (decode-char 'ucs c) ?g)
401 (setq c (1+ c))))
402
403 ;; (let ((c 182))
404 ;; (while (< c 255)
405 ;; (modify-syntax-entry (make-char 'greek-iso8859-7 c) "w")
406 ;; (setq c (1+ c))))
407 ;; (modify-syntax-entry (make-char 'greek-iso8859-7 160) "w") ; NBSP
408 (modify-syntax-entry ?\e,F7\e(B ".")
409 (modify-syntax-entry ?\e,F;\e(B ".")
410 (modify-syntax-entry ?\e,F=\e(B ".")
411 (let ((tbl (standard-case-table)))
412 ;; Fixme: non-letter syntax copied from latin-1, but that's dubious
413 ;; in several cases.
414 (set-case-syntax ?\e,F!\e(B "." tbl)
415 (set-case-syntax ?\e,F"\e(B "." tbl)
416 (set-case-syntax ?\e,F&\e(B "." tbl)
417 (set-case-syntax ?\e,F&\e(B "_" tbl)
418 (set-case-syntax ?\e,F'\e(B "." tbl)
419 (set-case-syntax ?\e,F)\e(B "_" tbl)
420 (set-case-syntax ?\e,F+\e(B "." tbl)
421 (set-case-syntax ?\e,F,\e(B "_" tbl)
422 (set-case-syntax ?\e,F-\e(B "_" tbl)
423 (set-case-syntax ?\e,F/\e(B "." tbl)
424 (set-case-syntax ?\e,F0\e(B "_" tbl)
425 (set-case-syntax ?\e,F1\e(B "_" tbl)
426 ;; (set-case-syntax ?\e,F7\e(B "_" tbl)
427 ;; (set-case-syntax ?\e,F=\e(B "_" tbl)
428 (set-case-syntax-pair ?\e,FA\e(B ?\e,Fa\e(B tbl)
429 (set-case-syntax-pair ?\e,FB\e(B ?\e,Fb\e(B tbl)
430 (set-case-syntax-pair ?\e,FC\e(B ?\e,Fc\e(B tbl)
431 (set-case-syntax-pair ?\e,FD\e(B ?\e,Fd\e(B tbl)
432 (set-case-syntax-pair ?\e,FE\e(B ?\e,Fe\e(B tbl)
433 (set-case-syntax-pair ?\e,FF\e(B ?\e,Ff\e(B tbl)
434 (set-case-syntax-pair ?\e,FG\e(B ?\e,Fg\e(B tbl)
435 (set-case-syntax-pair ?\e,FH\e(B ?\e,Fh\e(B tbl)
436 (set-case-syntax-pair ?\e,FI\e(B ?\e,Fi\e(B tbl)
437 (set-case-syntax-pair ?\e,FJ\e(B ?\e,Fj\e(B tbl)
438 (set-case-syntax-pair ?\e,FK\e(B ?\e,Fk\e(B tbl)
439 (set-case-syntax-pair ?\e,FL\e(B ?\e,Fl\e(B tbl)
440 (set-case-syntax-pair ?\e,FM\e(B ?\e,Fm\e(B tbl)
441 (set-case-syntax-pair ?\e,FN\e(B ?\e,Fn\e(B tbl)
442 (set-case-syntax-pair ?\e,FO\e(B ?\e,Fo\e(B tbl)
443 (set-case-syntax-pair ?\e,FP\e(B ?\e,Fp\e(B tbl)
444 (set-case-syntax-pair ?\e,FQ\e(B ?\e,Fq\e(B tbl)
445 (set-case-syntax-pair ?\e,FS\e(B ?\e,Fs\e(B tbl)
446 (set-case-syntax-pair ?\e,FT\e(B ?\e,Ft\e(B tbl)
447 (set-case-syntax-pair ?\e,FU\e(B ?\e,Fu\e(B tbl)
448 (set-case-syntax-pair ?\e,FV\e(B ?\e,Fv\e(B tbl)
449 (set-case-syntax-pair ?\e,FW\e(B ?\e,Fw\e(B tbl)
450 (set-case-syntax-pair ?\e,FX\e(B ?\e,Fx\e(B tbl)
451 (set-case-syntax-pair ?\e,FY\e(B ?\e,Fy\e(B tbl)
452 (set-case-syntax-pair ?\e,FZ\e(B ?\e,Fz\e(B tbl)
453 (set-case-syntax-pair ?\e,F[\e(B ?\e,F{\e(B tbl)
454 (set-case-syntax-pair ?\e,F?\e(B ?\e,F~\e(B tbl)
455 (set-case-syntax-pair ?\e,F>\e(B ?\e,F}\e(B tbl)
456 (set-case-syntax-pair ?\e,F<\e(B ?\e,F|\e(B tbl)
457 (set-case-syntax-pair ?\e,F6\e(B ?\e,F\\e(B tbl)
458 (set-case-syntax-pair ?\e,F8\e(B ?\e,F]\e(B tbl)
459 (set-case-syntax-pair ?\e,F9\e(B ?\e,F^\e(B tbl)
460 (set-case-syntax-pair ?\e,F:\e(B ?\e,F_\e(B tbl)
461 ;; Unicode equivalents
462 (set-case-syntax-pair ?\e$,1&q\e(B ?\e$,1'1\e(B tbl)
463 (set-case-syntax-pair ?\e$,1&r\e(B ?\e$,1'2\e(B tbl)
464 (set-case-syntax-pair ?\e$,1&s\e(B ?\e$,1'3\e(B tbl)
465 (set-case-syntax-pair ?\e$,1&t\e(B ?\e$,1'4\e(B tbl)
466 (set-case-syntax-pair ?\e$,1&u\e(B ?\e$,1'5\e(B tbl)
467 (set-case-syntax-pair ?\e$,1&v\e(B ?\e$,1'6\e(B tbl)
468 (set-case-syntax-pair ?\e$,1&w\e(B ?\e$,1'7\e(B tbl)
469 (set-case-syntax-pair ?\e$,1&x\e(B ?\e$,1'8\e(B tbl)
470 (set-case-syntax-pair ?\e$,1&y\e(B ?\e$,1'9\e(B tbl)
471 (set-case-syntax-pair ?\e$,1&z\e(B ?\e$,1':\e(B tbl)
472 (set-case-syntax-pair ?\e$,1&{\e(B ?\e$,1';\e(B tbl)
473 (set-case-syntax-pair ?\e$,1&|\e(B ?\e$,1'<\e(B tbl)
474 (set-case-syntax-pair ?\e$,1&}\e(B ?\e$,1'=\e(B tbl)
475 (set-case-syntax-pair ?\e$,1&~\e(B ?\e$,1'>\e(B tbl)
476 (set-case-syntax-pair ?\e$,1&\7f\e(B ?\e$,1'?\e(B tbl)
477 (set-case-syntax-pair ?\e$,1' \e(B ?\e$,1'@\e(B tbl)
478 (set-case-syntax-pair ?\e$,1'!\e(B ?\e$,1'A\e(B tbl)
479 (set-case-syntax-pair ?\e$,1'#\e(B ?\e$,1'C\e(B tbl)
480 (set-case-syntax-pair ?\e$,1'$\e(B ?\e$,1'D\e(B tbl)
481 (set-case-syntax-pair ?\e$,1'%\e(B ?\e$,1'E\e(B tbl)
482 (set-case-syntax-pair ?\e$,1'&\e(B ?\e$,1'F\e(B tbl)
483 (set-case-syntax-pair ?\e$,1''\e(B ?\e$,1'G\e(B tbl)
484 (set-case-syntax-pair ?\e$,1'(\e(B ?\e$,1'H\e(B tbl)
485 (set-case-syntax-pair ?\e$,1')\e(B ?\e$,1'I\e(B tbl)
486 (set-case-syntax-pair ?\e$,1'*\e(B ?\e$,1'J\e(B tbl)
487 (set-case-syntax-pair ?\e$,1'+\e(B ?\e$,1'K\e(B tbl)
488 (set-case-syntax-pair ?\e$,1&o\e(B ?\e$,1'N\e(B tbl)
489 (set-case-syntax-pair ?\e$,1&n\e(B ?\e$,1'M\e(B tbl)
490 (set-case-syntax-pair ?\e$,1&l\e(B ?\e$,1'L\e(B tbl)
491 (set-case-syntax-pair ?\e$,1&f\e(B ?\e$,1',\e(B tbl)
492 (set-case-syntax-pair ?\e$,1&h\e(B ?\e$,1'-\e(B tbl)
493 (set-case-syntax-pair ?\e$,1&i\e(B ?\e$,1'.\e(B tbl)
494 (set-case-syntax-pair ?\e$,1&j\e(B ?\e$,1'/\e(B tbl))
495
496 ;; Hebrew character set (ISO-8859-8)
497
498 (modify-category-entry (make-char 'hebrew-iso8859-8) ?w)
499 (let ((c #x591))
500 (while (<= c #x5f4)
501 (modify-category-entry (decode-char 'ucs c) ?w)
502 (setq c (1+ c))))
503
504 (modify-syntax-entry (make-char 'hebrew-iso8859-8 208) ".") ; PASEQ
505 (modify-syntax-entry (make-char 'hebrew-iso8859-8 211) ".") ; SOF PASUQ
506 (modify-syntax-entry (decode-char 'ucs #x5be) ".") ; MAQAF
507 (modify-syntax-entry (decode-char 'ucs #x5c0) ".") ; PASEQ
508 (modify-syntax-entry (decode-char 'ucs #x5c3) ".") ; SOF PASUQ
509 (modify-syntax-entry (decode-char 'ucs #x5f3) ".") ; GERESH
510 (modify-syntax-entry (decode-char 'ucs #x5f4) ".") ; GERSHAYIM
511
512 ;; (let ((c 224))
513 ;; (while (< c 251)
514 ;; (modify-syntax-entry (make-char 'hebrew-iso8859-8 c) "w")
515 ;; (setq c (1+ c))))
516 ;; (modify-syntax-entry (make-char 'hebrew-iso8859-8 160) "w") ; NBSP
517
518 ;; Indian character set (IS 13194 and other Emacs original Indian charsets)
519
520 (modify-category-entry (make-char 'indian-is13194) ?i)
521 (modify-category-entry (make-char 'indian-2-column) ?I)
522 (modify-category-entry (make-char 'indian-glyph) ?I)
523 ;; Unicode Devanagari block
524 (let ((c #x901))
525 (while (<= c #x970)
526 (modify-category-entry (decode-char 'ucs c) ?i)
527 (setq c (1+ c))))
528
529 (let ((l '(;; RANGE CATEGORY MEANINGS
530 (#x01 #x03 ?7) ; vowel modifier
531 (#x05 #x14 ?1) ; base vowel
532 (#x15 #x39 ?0) ; consonants
533 (#x3e #x4d ?8) ; vowel modifier
534 (#x51 #x54 ?4) ; stress/tone mark
535 (#x58 #x5f ?0) ; consonants
536 (#x60 #x61 ?1) ; base vowel
537 (#x62 #x63 ?8) ; vowel modifier
538 (#x66 #x6f ?6) ; digits
539 )))
540 (dolist (elt1 '(#x900 #x980 #xa00 #xa80 #xb00 #xb80 #xc00 #xc80 #xd00))
541 (dolist (elt2 l)
542 (let* ((from (car elt2))
543 (counts (1+ (- (nth 1 elt2) from)))
544 (category (nth 2 elt2)))
545 (dotimes (i counts)
546 (modify-category-entry (decode-char 'ucs (+ elt1 from i))
547 category))))))
548
549 ;; Japanese character set (JISX0201-kana, JISX0201-roman, JISX0208, JISX0212)
550
551 (modify-category-entry (make-char 'katakana-jisx0201) ?k)
552 (modify-category-entry (make-char 'katakana-jisx0201) ?j)
553 (modify-category-entry (make-char 'latin-jisx0201) ?r)
554 (modify-category-entry (make-char 'japanese-jisx0208) ?j)
555 (modify-category-entry (make-char 'japanese-jisx0212) ?j)
556 (modify-category-entry (make-char 'katakana-jisx0201) ?\|)
557 (modify-category-entry (make-char 'japanese-jisx0208) ?\|)
558 (modify-category-entry (make-char 'japanese-jisx0212) ?\|)
559
560 ;; Unicode equivalents of JISX0201-kana
561 (let ((c #xff61))
562 (while (<= c #xff9f)
563 (modify-category-entry (decode-char 'ucs c) ?k)
564 (modify-category-entry (decode-char 'ucs c) ?j)
565 (modify-category-entry (decode-char 'ucs c) ?\|)
566 (setq c (1+ c))))
567
568 ;; Katakana block
569 (let ((c #x30a0))
570 (while (<= c #x30ff)
571 ;; ?K is double width, ?k isn't specified
572 (modify-category-entry (decode-char 'ucs c) ?k)
573 (modify-category-entry (decode-char 'ucs c) ?j)
574 (modify-category-entry (decode-char 'ucs c) ?\|)
575 (setq c (1+ c))))
576
577 ;; Hiragana block
578 (let ((c #x3040))
579 (while (<= c #x309f)
580 ;; ?H is actually defined to be double width
581 (modify-category-entry (decode-char 'ucs c) ?H)
582 ;;(modify-category-entry (decode-char 'ucs c) ?j)
583 (modify-category-entry (decode-char 'ucs c) ?\|)
584 (setq c (1+ c))))
585
586 ;; JISX0208
587 ;; (modify-syntax-entry (make-char 'japanese-jisx0208) "w")
588 (modify-syntax-entry (make-char 'japanese-jisx0208 33) "_")
589 (modify-syntax-entry (make-char 'japanese-jisx0208 34) "_")
590 (modify-syntax-entry (make-char 'japanese-jisx0208 40) "_")
591 (let ((chars '(?\e$B!<\e(B ?\e$B!+\e(B ?\e$B!,\e(B ?\e$B!3\e(B ?\e$B!4\e(B ?\e$B!5\e(B ?\e$B!6\e(B ?\e$B!7\e(B ?\e$B!8\e(B ?\e$B!9\e(B ?\e$B!:\e(B ?\e$B!;\e(B)))
592 (while chars
593 (modify-syntax-entry (car chars) "w")
594 (setq chars (cdr chars))))
595 (let ((parens "\e$B!J!K!L!M!N!O!P!Q!R!S!T!U!V!W!X!Y!Z![\e(B" )
596 open close)
597 (dotimes (i (/ (length parens) 2))
598 (setq open (aref parens (* i 2))
599 close (aref parens (1+ (* i 2))))
600 (modify-syntax-entry open (format "(%c" close))
601 (modify-syntax-entry close (format ")%c" open))))
602
603 (modify-category-entry (make-char 'japanese-jisx0208 35) ?A)
604 (modify-category-entry (make-char 'japanese-jisx0208 36) ?H)
605 (modify-category-entry (make-char 'japanese-jisx0208 37) ?K)
606 (modify-category-entry (make-char 'japanese-jisx0208 38) ?G)
607 (modify-category-entry (make-char 'japanese-jisx0208 39) ?Y)
608 (let ((row 48))
609 (while (< row 127)
610 (modify-category-entry (make-char 'japanese-jisx0208 row) ?C)
611 (setq row (1+ row))))
612 (modify-category-entry ?\e$B!<\e(B ?K)
613 (let ((chars '(?\e$B!+\e(B ?\e$B!,\e(B)))
614 (while chars
615 (modify-category-entry (car chars) ?K)
616 (modify-category-entry (car chars) ?H)
617 (setq chars (cdr chars))))
618 (let ((chars '(?\e$B!3\e(B ?\e$B!4\e(B ?\e$B!5\e(B ?\e$B!6\e(B ?\e$B!7\e(B ?\e$B!8\e(B ?\e$B!9\e(B ?\e$B!:\e(B ?\e$B!;\e(B)))
619 (while chars
620 (modify-category-entry (car chars) ?C)
621 (setq chars (cdr chars))))
622
623 ;; JISX0212
624 ;; (modify-syntax-entry (make-char 'japanese-jisx0212) "w")
625 (modify-syntax-entry (make-char 'japanese-jisx0212 33) "_")
626 (modify-syntax-entry (make-char 'japanese-jisx0212 34) "_")
627 (modify-syntax-entry (make-char 'japanese-jisx0212 35) "_")
628
629 (modify-category-entry (make-char 'japanese-jisx0212 ) ?C)
630
631 ;; JISX0201-Kana
632 ;; (modify-syntax-entry (make-char 'katakana-jisx0201) "w")
633 (let ((chars '(?\e(I!\e(B ?\e(I$\e(B ?\e(I%\e(B
634 ;; Unicode:
635 ?\e$,3sa\e(B ?\e$,3sd\e(B ?\e$,3se\e(B)))
636 (while chars
637 (modify-syntax-entry (car chars) ".")
638 (setq chars (cdr chars))))
639
640 (modify-syntax-entry ?\\e(I"\e(B "(\e(I#\e(B")
641 (modify-syntax-entry ?\\e(I#\e(B "(\e(I"\e(B")
642
643 ;; Korean character set (KSC5601)
644
645 ;; (modify-syntax-entry (make-char 'korean-ksc5601) "w")
646 (modify-syntax-entry (make-char 'korean-ksc5601 33) "_")
647 (modify-syntax-entry (make-char 'korean-ksc5601 34) "_")
648 (modify-syntax-entry (make-char 'korean-ksc5601 38) "_")
649 (modify-syntax-entry (make-char 'korean-ksc5601 39) "_")
650 (modify-syntax-entry (make-char 'korean-ksc5601 40) "_")
651 (modify-syntax-entry (make-char 'korean-ksc5601 41) "_")
652
653 (modify-category-entry (make-char 'korean-ksc5601) ?h)
654 (modify-category-entry (make-char 'korean-ksc5601 35) ?A)
655 (modify-category-entry (make-char 'korean-ksc5601 37) ?G)
656 (modify-category-entry (make-char 'korean-ksc5601 42) ?H)
657 (modify-category-entry (make-char 'korean-ksc5601 43) ?K)
658 (modify-category-entry (make-char 'korean-ksc5601 44) ?Y)
659
660 (let ((parens "\e$(C!2!3!4!5!6!7!8!9!:!;!<!=\e(B" )
661 open close)
662 (dotimes (i (/ (length parens) 2))
663 (setq open (aref parens (* i 2))
664 close (aref parens (1+ (* i 2))))
665 (modify-syntax-entry open (format "(%c" close))
666 (modify-syntax-entry close (format ")%c" open))))
667
668 ;; Latin character set (latin-1,2,3,4,5,8,9)
669
670 (modify-category-entry (make-char 'latin-iso8859-1) ?l)
671 (modify-category-entry (make-char 'latin-iso8859-2) ?l)
672 (modify-category-entry (make-char 'latin-iso8859-3) ?l)
673 (modify-category-entry (make-char 'latin-iso8859-4) ?l)
674 (modify-category-entry (make-char 'latin-iso8859-9) ?l)
675 (modify-category-entry (make-char 'latin-iso8859-14) ?l)
676 (modify-category-entry (make-char 'latin-iso8859-15) ?l)
677
678 (modify-category-entry (make-char 'latin-iso8859-1 160) ?\ )
679 (modify-category-entry (make-char 'latin-iso8859-2 160) ?\ )
680 (modify-category-entry (make-char 'latin-iso8859-3 160) ?\ )
681 (modify-category-entry (make-char 'latin-iso8859-4 160) ?\ )
682 (modify-category-entry (make-char 'latin-iso8859-9 160) ?\ )
683 (modify-category-entry (make-char 'latin-iso8859-14 160) ?\ )
684 (modify-category-entry (make-char 'latin-iso8859-15 160) ?\ )
685
686 ;; Lao character set
687
688 (modify-category-entry (make-char 'lao) ?o)
689 (dotimes (i (1+ (- #xeff #xe80)))
690 (modify-category-entry (decode-char 'ucs (+ i #xe80)) ?o))
691
692 (let ((deflist '(;; chars syntax category
693 ("\e(1!\e(B-\e(1N\e(B" "w" ?0) ; consonant
694 ("\e(1PRS]`\e(B-\e(1d\e(B" "w" ?1) ; vowel base
695 ("\e(1QT\e(B-\e(1W[m\e(B" "w" ?2) ; vowel upper
696 ("\e(1XY\e(B" "w" ?3) ; vowel lower
697 ("\e(1h\e(B-\e(1l\e(B" "w" ?4) ; tone mark
698 ("\e(1\\e(B" "w" ?9) ; semivowel lower
699 ("\e(1p\e(B-\e(1y\e(B" "w" ?6) ; digit
700 ("\e(1Of\e(B" "_" ?5) ; symbol
701 ;; Unicode equivalents
702 ("\e$,1D!\e(B-\e$,1DN\e(B" "w" ?0) ; consonant
703 ("\e$,1DPDRDSD]D`\e(B-\e$,1Dd\e(B" "w" ?1) ; vowel base
704 ("\e$,1DQDT\e(B-\e$,1DWD[Dm\e(B" "w" ?2) ; vowel upper
705 ("\e$,1DXDY\e(B" "w" ?3) ; vowel lower
706 ("\e$,1Dh\e(B-\e$,1Dk\e(B" "w" ?4) ; tone mark
707 ("\e$,1D\D]\e(B" "w" ?9) ; semivowel lower
708 ("\e$,1Dp\e(B-\e$,1Dy\e(B" "w" ?6) ; digit
709 ("\e$,1DODf\e(B" "_" ?5) ; symbol
710 ))
711 elm chars len syntax category to ch i)
712 (while deflist
713 (setq elm (car deflist))
714 (setq chars (car elm)
715 len (length chars)
716 syntax (nth 1 elm)
717 category (nth 2 elm)
718 i 0)
719 (while (< i len)
720 (if (= (aref chars i) ?-)
721 (setq i (1+ i)
722 to (aref chars i))
723 (setq ch (aref chars i)
724 to ch))
725 (while (<= ch to)
726 (unless (string-equal syntax "w")
727 (modify-syntax-entry ch syntax))
728 (modify-category-entry ch category)
729 (setq ch (1+ ch)))
730 (setq i (1+ i)))
731 (setq deflist (cdr deflist))))
732
733 ;; Thai character set (TIS620)
734
735 (modify-category-entry (make-char 'thai-tis620) ?t)
736 (dotimes (i (1+ (- #xe7f #xe00)))
737 (modify-category-entry (decode-char 'ucs (+ i #xe00)) ?t))
738
739 (let ((deflist '(;; chars syntax category
740 ("\e,T!\e(B-\e,TCEG\e(B-\e,TN\e(B" "w" ?0) ; consonant
741 ("\e,TDFPRS`\e(B-\e,Te\e(B" "w" ?1) ; vowel base
742 ("\e,TQT\e(B-\e,TWgn\e(B" "w" ?2) ; vowel upper
743 ("\e,TX\e(B-\e,TZ\e(B" "w" ?3) ; vowel lower
744 ("\e,Th\e(B-\e,Tm\e(B" "w" ?4) ; tone mark
745 ("\e,Tp\e(B-\e,Ty\e(B" "w" ?6) ; digit
746 ("\e,TOf_oz{\e(B" "_" ?5) ; symbol
747 ;; Unicode equivalents
748 ("\e$,1Ba\e(B-\e$,1C#C%C'\e(B-\e$,1C.\e(B" "w" ?0) ; consonant
749 ("\e$,1C$C&C0C2C3C@\e(B-\e$,1CE\e(B" "w" ?1) ; vowel base
750 ("\e$,1C1C4\e(B-\e$,1C7CGCN\e(B" "w" ?2) ; vowel upper
751 ("\e$,1C8\e(B-\e$,1C:\e(B" "w" ?3) ; vowel lower
752 ("\e$,1CH\e(B-\e$,1CM\e(B" "w" ?4) ; tone mark
753 ("\e$,1CP\e(B-\e$,1CY\e(B" "w" ?6) ; digit
754 ("\e$,1C/CFC?COCZC[\e(B" "_" ?5) ; symbol
755 ))
756 elm chars len syntax category to ch i)
757 (while deflist
758 (setq elm (car deflist))
759 (setq chars (car elm)
760 len (length chars)
761 syntax (nth 1 elm)
762 category (nth 2 elm)
763 i 0)
764 (while (< i len)
765 (if (= (aref chars i) ?-)
766 (setq i (1+ i)
767 to (aref chars i))
768 (setq ch (aref chars i)
769 to ch))
770 (while (<= ch to)
771 (unless (string-equal syntax "w")
772 (modify-syntax-entry ch syntax))
773 (modify-category-entry ch category)
774 (setq ch (1+ ch)))
775 (setq i (1+ i)))
776 (setq deflist (cdr deflist))))
777
778 ;; Tibetan character set
779
780 (modify-category-entry (make-char 'tibetan) ?q)
781 (modify-category-entry (make-char 'tibetan-1-column) ?q)
782 (dotimes (i (1+ (- #xfff #xf00)))
783 (modify-category-entry (decode-char 'ucs (+ i #xf00)) ?q))
784
785 (let ((deflist '(;; chars syntax category
786 ("\e4\e$(7"!\e0"!\e1\e(B-\e4\e$(7"J\e0"J\e1\e4"K\e0"K\e1\e(B" "w" ?0) ; consonant
787 ("\e$(7#!\e(B-\e$(7#J#K#L#M!"!#\e(B" "w" ?0) ;
788 ("\e$(7$!\e(B-\e$(7$e\e(B" "w" ?0) ;
789 ("\e$(7%!\e(B-\e$(7%u\e(B" "w" ?0) ;
790 ("\e$(7"S"["\"]"^"a\e(B" "w" ?2) ; upper vowel
791 ("\e$(7"_"c"d"g"h"i"j"k"l\e(B" "w" ?2) ; upper modifier
792 ("\e$(7!I"Q"R"U"e!e!g\e(B" "w" ?3) ; lowel vowel/modifier
793 ("\e$(7!P\e(B-\e$(7!Y!Z\e(B-\e$(7!c\e(B" "w" ?6) ; digit
794 ("\e$(7!;!=\e(B-\e$(7!B!D"`\e(B" "." ?|) ; line-break char
795 ("\e$(8!;!=!?!@!A!D"`\e(B" "." ?|) ;
796 ("\e$(7!8!;!=\e(B-\e$(7!B!D"`!m!d\e(B" "." ?>) ; prohibition
797 ("\e$(8!;!=!?!@!A!D"`\e(B" "." ?>) ;
798 ("\e$(7!0\e(B-\e$(7!:!l#R#S"f\e(B" "." ?<) ; prohibition
799 ("\e$(7!C!E\e(B-\e$(7!H!J\e(B-\e$(7!O!f!h\e(B-\e$(7!k!n!o#O#P\e(B-\e$(7#`\e(B" "." ?q) ; others
800
801 ;; Unicode version (not complete)
802 ("\e$,1F \e(B-\e$,1FIFJ\e(B" "w" ?0) ; consonant
803 ("\e$,1Fp\e(B-\e$,1G9G:G;G<\e(B" "w" ?0) ;
804 ("\e$,1FRFZF[F\F]F`\e(B" "w" ?2) ; upper vowel
805 ("\e$,1F^FbFcFfFgFhFiFjFk\e(B" "w" ?2) ; upper modifier
806 ("\e$,1EYFPFQFTFdEuEw\e(B" "w" ?3) ; lowel vowel/modifier
807 ("\e$,1E`\e(B-\e$,1EiEj\e(B-\e$,1Es\e(B" "w" ?6) ; digit
808 ("\e$,1EKEM\e(B-\e$,1ERETF_\e(B" "." ?|) ; line-break char
809 ("\e$,1EHEKEM\e(B-\e$,1ERETF_E}Et\e(B" "." ?>) ; prohibition
810 ("\e$,1E@\e(B-\e$,1EJE|GAGBFe\e(B" "." ?<) ; prohibition
811 ("\e$,1ESEU\e(B-\e$,1EXEZ\e(B-\e$,1E_EvEx\e(B-\e$,1E{E~E\7fG>G?\e(B-\e$,1GO\e(B" "." ?q) ; others
812 ))
813 elm chars len syntax category to ch i)
814 (while deflist
815 (setq elm (car deflist))
816 (setq chars (car elm)
817 len (length chars)
818 syntax (nth 1 elm)
819 category (nth 2 elm)
820 i 0)
821 (while (< i len)
822 (if (= (aref chars i) ?-)
823 (setq i (1+ i)
824 to (aref chars i))
825 (setq ch (aref chars i)
826 to ch))
827 (while (<= ch to)
828 (unless (string-equal syntax "w")
829 (modify-syntax-entry ch syntax))
830 (modify-category-entry ch category)
831 (setq ch (1+ ch)))
832 (setq i (1+ i)))
833 (setq deflist (cdr deflist))))
834
835 ;; Vietnamese character set
836
837 (let ((lower (make-char 'vietnamese-viscii-lower))
838 (upper (make-char 'vietnamese-viscii-upper)))
839 ;; (modify-syntax-entry lower "w")
840 ;; (modify-syntax-entry upper "w")
841 (modify-category-entry lower ?v)
842 (modify-category-entry upper ?v)
843 (modify-category-entry lower ?l) ; To make a word with
844 (modify-category-entry upper ?l) ; latin characters.
845 )
846
847 (let ((tbl (standard-case-table))
848 (i 32))
849 (while (< i 128)
850 (set-case-syntax-pair (make-char 'vietnamese-viscii-upper i)
851 (make-char 'vietnamese-viscii-lower i)
852 tbl)
853 (setq i (1+ i))))
854
855 ;; Unicode (mule-unicode-0100-24ff)
856
857 (let ((tbl (standard-case-table)) c)
858
859 ;; In some languages, U+0049 LATIN CAPITAL LETTER I and U+0131 LATIN
860 ;; SMALL LETTER DOTLESS I make a case pair, and so do U+0130 LATIN
861 ;; CAPITAL LETTER I WITH DOT ABOVE and U+0069 LATIN SMALL LETTER I.
862 ;; Thus we have to check language-environment to handle casing
863 ;; correctly. Currently only I<->i is available.
864
865 ;; Latin Extended-A, Latin Extended-B
866 (setq c #x0100)
867 (while (<= c #x0233)
868 (modify-category-entry (decode-char 'ucs c) ?l)
869 (and (or (<= c #x012e)
870 (and (>= c #x014a) (<= c #x0177)))
871 (zerop (% c 2))
872 (set-case-syntax-pair
873 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
874 (and (>= c #x013a)
875 (<= c #x0148)
876 (zerop (% c 2))
877 (set-case-syntax-pair
878 (decode-char 'ucs (1- c)) (decode-char 'ucs c) tbl))
879 (setq c (1+ c)))
880 (set-case-syntax-pair ?\e$,1 R\e(B ?\e$,1 S\e(B tbl)
881 (set-case-syntax-pair ?\e$,1 T\e(B ?\e$,1 U\e(B tbl)
882 (set-case-syntax-pair ?\e$,1 V\e(B ?\e$,1 W\e(B tbl)
883 ;;; (set-case-syntax-pair ?\e$,1!8\e(B ?\e,A\7f\e(B tbl) ; these two have different length!
884 (set-case-syntax-pair ?\e$,1!9\e(B ?\e$,1!:\e(B tbl)
885 (set-case-syntax-pair ?\e$,1!;\e(B ?\e$,1!<\e(B tbl)
886 (set-case-syntax-pair ?\e$,1!=\e(B ?\e$,1!>\e(B tbl)
887
888 ;; Latin Extended-B
889 (set-case-syntax-pair ?\e$,1!A\e(B ?\e$,1#S\e(B tbl)
890 (set-case-syntax-pair ?\e$,1!B\e(B ?\e$,1!C\e(B tbl)
891 (set-case-syntax-pair ?\e$,1!D\e(B ?\e$,1!E\e(B tbl)
892 (set-case-syntax-pair ?\e$,1!F\e(B ?\e$,1#T\e(B tbl)
893 (set-case-syntax-pair ?\e$,1!G\e(B ?\e$,1!H\e(B tbl)
894 (set-case-syntax-pair ?\e$,1!I\e(B ?\e$,1#V\e(B tbl)
895 (set-case-syntax-pair ?\e$,1!J\e(B ?\e$,1#W\e(B tbl)
896 (set-case-syntax-pair ?\e$,1!K\e(B ?\e$,1!L\e(B tbl)
897 (set-case-syntax-pair ?\e$,1!N\e(B ?\e$,1"=\e(B tbl)
898 (set-case-syntax-pair ?\e$,1!O\e(B ?\e$,1#Y\e(B tbl)
899 (set-case-syntax-pair ?\e$,1!P\e(B ?\e$,1#[\e(B tbl)
900 (set-case-syntax-pair ?\e$,1!Q\e(B ?\e$,1!R\e(B tbl)
901 (set-case-syntax-pair ?\e$,1!S\e(B ?\e$,1#`\e(B tbl)
902 (set-case-syntax-pair ?\e$,1!T\e(B ?\e$,1#c\e(B tbl)
903 (set-case-syntax-pair ?\e$,1!V\e(B ?\e$,1#i\e(B tbl)
904 (set-case-syntax-pair ?\e$,1!W\e(B ?\e$,1#h\e(B tbl)
905 (set-case-syntax-pair ?\e$,1!X\e(B ?\e$,1!Y\e(B tbl)
906 (set-case-syntax-pair ?\e$,1!\\e(B ?\e$,1#o\e(B tbl)
907 (set-case-syntax-pair ?\e$,1!]\e(B ?\e$,1#r\e(B tbl)
908 (set-case-syntax-pair ?\e$,1!_\e(B ?\e$,1#u\e(B tbl)
909 (set-case-syntax-pair ?\e$,1!`\e(B ?\e$,1!a\e(B tbl)
910 (set-case-syntax-pair ?\e$,1!b\e(B ?\e$,1!c\e(B tbl)
911 (set-case-syntax-pair ?\e$,1!d\e(B ?\e$,1!e\e(B tbl)
912 (set-case-syntax-pair ?\e$,1!f\e(B ?\e$,1$ \e(B tbl)
913 (set-case-syntax-pair ?\e$,1!g\e(B ?\e$,1!h\e(B tbl)
914 (set-case-syntax-pair ?\e$,1!i\e(B ?\e$,1$#\e(B tbl)
915 (set-case-syntax-pair ?\e$,1!l\e(B ?\e$,1!m\e(B tbl)
916 (set-case-syntax-pair ?\e$,1!n\e(B ?\e$,1$(\e(B tbl)
917 (set-case-syntax-pair ?\e$,1!o\e(B ?\e$,1!p\e(B tbl)
918 (set-case-syntax-pair ?\e$,1!q\e(B ?\e$,1$*\e(B tbl)
919 (set-case-syntax-pair ?\e$,1!r\e(B ?\e$,1$+\e(B tbl)
920 (set-case-syntax-pair ?\e$,1!s\e(B ?\e$,1!t\e(B tbl)
921 (set-case-syntax-pair ?\e$,1!u\e(B ?\e$,1!v\e(B tbl)
922 (set-case-syntax-pair ?\e$,1!w\e(B ?\e$,1$2\e(B tbl)
923 (set-case-syntax-pair ?\e$,1!x\e(B ?\e$,1!y\e(B tbl)
924 (set-case-syntax-pair ?\e$,1!|\e(B ?\e$,1!}\e(B tbl)
925 (set-case-syntax-pair ?\e$,1"$\e(B ?\e$,1"&\e(B tbl)
926 (set-case-syntax-pair ?\e$,1"%\e(B ?\e$,1"&\e(B tbl)
927 (set-case-syntax-pair ?\e$,1"'\e(B ?\e$,1")\e(B tbl)
928 (set-case-syntax-pair ?\e$,1"(\e(B ?\e$,1")\e(B tbl)
929 (set-case-syntax-pair ?\e$,1"*\e(B ?\e$,1",\e(B tbl)
930 (set-case-syntax-pair ?\e$,1"+\e(B ?\e$,1",\e(B tbl)
931 (set-case-syntax-pair ?\e$,1"-\e(B ?\e$,1".\e(B tbl)
932 (set-case-syntax-pair ?\e$,1"/\e(B ?\e$,1"0\e(B tbl)
933 (set-case-syntax-pair ?\e$,1"1\e(B ?\e$,1"2\e(B tbl)
934 (set-case-syntax-pair ?\e$,1"3\e(B ?\e$,1"4\e(B tbl)
935 (set-case-syntax-pair ?\e$,1"5\e(B ?\e$,1"6\e(B tbl)
936 (set-case-syntax-pair ?\e$,1"7\e(B ?\e$,1"8\e(B tbl)
937 (set-case-syntax-pair ?\e$,1"9\e(B ?\e$,1":\e(B tbl)
938 (set-case-syntax-pair ?\e$,1";\e(B ?\e$,1"<\e(B tbl)
939 (set-case-syntax-pair ?\e$,1">\e(B ?\e$,1"?\e(B tbl)
940 (set-case-syntax-pair ?\e$,1"@\e(B ?\e$,1"A\e(B tbl)
941 (set-case-syntax-pair ?\e$,1"B\e(B ?\e$,1"C\e(B tbl)
942 (set-case-syntax-pair ?\e$,1"D\e(B ?\e$,1"E\e(B tbl)
943 (set-case-syntax-pair ?\e$,1"F\e(B ?\e$,1"G\e(B tbl)
944 (set-case-syntax-pair ?\e$,1"H\e(B ?\e$,1"I\e(B tbl)
945 (set-case-syntax-pair ?\e$,1"J\e(B ?\e$,1"K\e(B tbl)
946 (set-case-syntax-pair ?\e$,1"L\e(B ?\e$,1"M\e(B tbl)
947 (set-case-syntax-pair ?\e$,1"N\e(B ?\e$,1"O\e(B tbl)
948 ;; 01F0; F; 006A 030C; # LATIN SMALL LETTER J WITH CARON
949 (set-case-syntax-pair ?\e$,1"Q\e(B ?\e$,1"S\e(B tbl)
950 (set-case-syntax-pair ?\e$,1"R\e(B ?\e$,1"S\e(B tbl)
951 (set-case-syntax-pair ?\e$,1"T\e(B ?\e$,1"U\e(B tbl)
952 (set-case-syntax-pair ?\e$,1"V\e(B ?\e$,1!U\e(B tbl)
953 (set-case-syntax-pair ?\e$,1"W\e(B ?\e$,1!\7f\e(B tbl)
954 (set-case-syntax-pair ?\e$,1"X\e(B ?\e$,1"Y\e(B tbl)
955 (set-case-syntax-pair ?\e$,1"Z\e(B ?\e$,1"[\e(B tbl)
956 (set-case-syntax-pair ?\e$,1"\\e(B ?\e$,1"]\e(B tbl)
957 (set-case-syntax-pair ?\e$,1"^\e(B ?\e$,1"_\e(B tbl)
958 (set-case-syntax-pair ?\e$,1"`\e(B ?\e$,1"a\e(B tbl)
959 (set-case-syntax-pair ?\e$,1"b\e(B ?\e$,1"c\e(B tbl)
960 (set-case-syntax-pair ?\e$,1"d\e(B ?\e$,1"e\e(B tbl)
961 (set-case-syntax-pair ?\e$,1"f\e(B ?\e$,1"g\e(B tbl)
962 (set-case-syntax-pair ?\e$,1"h\e(B ?\e$,1"i\e(B tbl)
963 (set-case-syntax-pair ?\e$,1"j\e(B ?\e$,1"k\e(B tbl)
964 (set-case-syntax-pair ?\e$,1"l\e(B ?\e$,1"m\e(B tbl)
965 (set-case-syntax-pair ?\e$,1"n\e(B ?\e$,1"o\e(B tbl)
966 (set-case-syntax-pair ?\e$,1"p\e(B ?\e$,1"q\e(B tbl)
967 (set-case-syntax-pair ?\e$,1"r\e(B ?\e$,1"s\e(B tbl)
968 (set-case-syntax-pair ?\e$,1"t\e(B ?\e$,1"u\e(B tbl)
969 (set-case-syntax-pair ?\e$,1"v\e(B ?\e$,1"w\e(B tbl)
970 (set-case-syntax-pair ?\e$,1"x\e(B ?\e$,1"y\e(B tbl)
971 (set-case-syntax-pair ?\e$,1"z\e(B ?\e$,1"{\e(B tbl)
972 (set-case-syntax-pair ?\e$,1"|\e(B ?\e$,1"}\e(B tbl)
973 (set-case-syntax-pair ?\e$,1"~\e(B ?\e$,1"\7f\e(B tbl)
974 (set-case-syntax-pair ?\e$,1#"\e(B ?\e$,1##\e(B tbl)
975 (set-case-syntax-pair ?\e$,1#$\e(B ?\e$,1#%\e(B tbl)
976 (set-case-syntax-pair ?\e$,1#&\e(B ?\e$,1#'\e(B tbl)
977 (set-case-syntax-pair ?\e$,1#(\e(B ?\e$,1#)\e(B tbl)
978 (set-case-syntax-pair ?\e$,1#*\e(B ?\e$,1#+\e(B tbl)
979 (set-case-syntax-pair ?\e$,1#,\e(B ?\e$,1#-\e(B tbl)
980 (set-case-syntax-pair ?\e$,1#.\e(B ?\e$,1#/\e(B tbl)
981 (set-case-syntax-pair ?\e$,1#0\e(B ?\e$,1#1\e(B tbl)
982 (set-case-syntax-pair ?\e$,1#2\e(B ?\e$,1#3\e(B tbl)
983
984 ;; Latin Extended Additional
985 (setq c #x1e00)
986 (while (<= c #x1ef9)
987 (modify-category-entry (decode-char 'ucs c) ?l)
988 (and (zerop (% c 2))
989 (or (<= c #x1e94) (>= c #x1ea0))
990 (set-case-syntax-pair
991 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
992 (setq c (1+ c)))
993
994 ;; Greek
995 (setq c #x0370)
996 (while (<= c #x03ff)
997 (modify-category-entry (decode-char 'ucs c) ?g)
998 (if (or (and (>= c #x0391) (<= c #x03a1))
999 (and (>= c #x03a3) (<= c #x03ab)))
1000 (set-case-syntax-pair
1001 (decode-char 'ucs c) (decode-char 'ucs (+ c 32)) tbl))
1002 (and (>= c #x03da)
1003 (<= c #x03ee)
1004 (zerop (% c 2))
1005 (set-case-syntax-pair
1006 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
1007 (setq c (1+ c)))
1008 (set-case-syntax-pair ?\e$,1&f\e(B ?\e$,1',\e(B tbl)
1009 (set-case-syntax-pair ?\e$,1&h\e(B ?\e$,1'-\e(B tbl)
1010 (set-case-syntax-pair ?\e$,1&i\e(B ?\e$,1'.\e(B tbl)
1011 (set-case-syntax-pair ?\e$,1&j\e(B ?\e$,1'/\e(B tbl)
1012 (set-case-syntax-pair ?\e$,1&l\e(B ?\e$,1'L\e(B tbl)
1013 (set-case-syntax-pair ?\e$,1&n\e(B ?\e$,1'M\e(B tbl)
1014 (set-case-syntax-pair ?\e$,1&o\e(B ?\e$,1'N\e(B tbl)
1015
1016 ;; Armenian
1017 (setq c #x531)
1018 (while (<= c #x556)
1019 (set-case-syntax-pair (decode-char 'ucs c)
1020 (decode-char 'ucs (+ c #x30)) tbl)
1021 (setq c (1+ c)))
1022
1023 ;; Greek Extended
1024 (setq c #x1f00)
1025 (while (<= c #x1fff)
1026 (modify-category-entry (decode-char 'ucs c) ?g)
1027 (and (<= (logand c #x000f) 7)
1028 (<= c #x1fa7)
1029 (not (memq c '(#x1f50 #x1f52 #x1f54 #x1f56)))
1030 (/= (logand c #x00f0) 7)
1031 (set-case-syntax-pair
1032 (decode-char 'ucs (+ c 8)) (decode-char 'ucs c) tbl))
1033 (setq c (1+ c)))
1034 (set-case-syntax-pair ?\e$,1qx\e(B ?\e$,1qp\e(B tbl)
1035 (set-case-syntax-pair ?\e$,1qy\e(B ?\e$,1qq\e(B tbl)
1036 (set-case-syntax-pair ?\e$,1qz\e(B ?\e$,1q0\e(B tbl)
1037 (set-case-syntax-pair ?\e$,1q{\e(B ?\e$,1q1\e(B tbl)
1038 (set-case-syntax-pair ?\e$,1q|\e(B ?\e$,1qs\e(B tbl)
1039 (set-case-syntax-pair ?\e$,1r(\e(B ?\e$,1q2\e(B tbl)
1040 (set-case-syntax-pair ?\e$,1r)\e(B ?\e$,1q3\e(B tbl)
1041 (set-case-syntax-pair ?\e$,1r*\e(B ?\e$,1q4\e(B tbl)
1042 (set-case-syntax-pair ?\e$,1r+\e(B ?\e$,1q5\e(B tbl)
1043 (set-case-syntax-pair ?\e$,1r,\e(B ?\e$,1r#\e(B tbl)
1044 (set-case-syntax-pair ?\e$,1r8\e(B ?\e$,1r0\e(B tbl)
1045 (set-case-syntax-pair ?\e$,1r9\e(B ?\e$,1r1\e(B tbl)
1046 (set-case-syntax-pair ?\e$,1r:\e(B ?\e$,1q6\e(B tbl)
1047 (set-case-syntax-pair ?\e$,1r;\e(B ?\e$,1q7\e(B tbl)
1048 (set-case-syntax-pair ?\e$,1rH\e(B ?\e$,1r@\e(B tbl)
1049 (set-case-syntax-pair ?\e$,1rI\e(B ?\e$,1rA\e(B tbl)
1050 (set-case-syntax-pair ?\e$,1rJ\e(B ?\e$,1q:\e(B tbl)
1051 (set-case-syntax-pair ?\e$,1rK\e(B ?\e$,1q;\e(B tbl)
1052 (set-case-syntax-pair ?\e$,1rL\e(B ?\e$,1rE\e(B tbl)
1053 (set-case-syntax-pair ?\e$,1rX\e(B ?\e$,1q8\e(B tbl)
1054 (set-case-syntax-pair ?\e$,1rY\e(B ?\e$,1q9\e(B tbl)
1055 (set-case-syntax-pair ?\e$,1rZ\e(B ?\e$,1q<\e(B tbl)
1056 (set-case-syntax-pair ?\e$,1r[\e(B ?\e$,1q=\e(B tbl)
1057 (set-case-syntax-pair ?\e$,1r\\e(B ?\e$,1rS\e(B tbl)
1058
1059 ;; cyrillic
1060 (setq c #x0400)
1061 (while (<= c #x04ff)
1062 (modify-category-entry (decode-char 'ucs c) ?y)
1063 (and (>= c #x0400)
1064 (<= c #x040f)
1065 (set-case-syntax-pair
1066 (decode-char 'ucs c) (decode-char 'ucs (+ c 80)) tbl))
1067 (and (>= c #x0410)
1068 (<= c #x042f)
1069 (set-case-syntax-pair
1070 (decode-char 'ucs c) (decode-char 'ucs (+ c 32)) tbl))
1071 (and (zerop (% c 2))
1072 (or (and (>= c #x0460) (<= c #x0480))
1073 (and (>= c #x048c) (<= c #x04be))
1074 (and (>= c #x04d0) (<= c #x04f4)))
1075 (set-case-syntax-pair
1076 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
1077 (setq c (1+ c)))
1078 (set-case-syntax-pair ?\e$,1*!\e(B ?\e$,1*"\e(B tbl)
1079 (set-case-syntax-pair ?\e$,1*#\e(B ?\e$,1*$\e(B tbl)
1080 (set-case-syntax-pair ?\e$,1*'\e(B ?\e$,1*(\e(B tbl)
1081 (set-case-syntax-pair ?\e$,1*+\e(B ?\e$,1*,\e(B tbl)
1082 (set-case-syntax-pair ?\e$,1*X\e(B ?\e$,1*Y\e(B tbl)
1083
1084 ;; general punctuation
1085 (setq c #x2000)
1086 (while (<= c #x200b)
1087 (set-case-syntax (decode-char 'ucs c) " " tbl)
1088 (setq c (1+ c)))
1089 (setq c #x2010)
1090 (while (<= c #x2016)
1091 (set-case-syntax (decode-char 'ucs c) "_" tbl)
1092 (setq c (1+ c)))
1093 ;; Punctuation syntax for quotation marks (like `)
1094 (while (<= c #x201f)
1095 (set-case-syntax (decode-char 'ucs c) "." tbl)
1096 (setq c (1+ c)))
1097 (while (<= c #x2027)
1098 (set-case-syntax (decode-char 'ucs c) "_" tbl)
1099 (setq c (1+ c)))
1100
1101 ;; Roman numerals
1102 (setq c #x2160)
1103 (while (<= c #x216f)
1104 (set-case-syntax-pair (decode-char 'ucs c)
1105 (decode-char 'ucs (+ c #x10)) tbl)
1106 (setq c (1+ c)))
1107
1108 ;; Circled Latin
1109 (setq c #x24b6)
1110 (while (<= c #x24cf)
1111 (set-case-syntax-pair (decode-char 'ucs c)
1112 (decode-char 'ucs (+ c 26)) tbl)
1113 (modify-category-entry (decode-char 'ucs c) ?l)
1114 (modify-category-entry (decode-char 'ucs (+ c 26)) ?l)
1115 (setq c (1+ c)))
1116
1117 ;; Fullwidth Latin
1118 (setq c #xff21)
1119 (while (<= c #xff3a)
1120 (set-case-syntax-pair (decode-char 'ucs c)
1121 (decode-char 'ucs (+ c #x20)) tbl)
1122 (modify-category-entry (decode-char 'ucs c) ?l)
1123 (modify-category-entry (decode-char 'ucs (+ c #x20)) ?l)
1124 (setq c (1+ c)))
1125
1126 ;; Combining diacritics
1127 (setq c #x300)
1128 (while (<= c #x362)
1129 (modify-category-entry (decode-char 'ucs c) ?^)
1130 (setq c (1+ c)))
1131
1132 ;; Combining marks
1133 (setq c #x20d0)
1134 (while (<= c #x20e3)
1135 (modify-category-entry (decode-char 'ucs c) ?^)
1136 (setq c (1+ c)))
1137
1138 ;; Fixme: syntax for symbols &c
1139 )
1140
1141 (let ((pairs
1142 '("\e$,1sEsF\e(B" ; U+2045 U+2046
1143 "\e$,1s}s~\e(B" ; U+207D U+207E
1144 "\e$,1t-t.\e(B" ; U+208D U+208E
1145 "\e$,1{){*\e(B" ; U+2329 U+232A
1146 "\e$,1|T|U\e(B" ; U+23B4 U+23B5
1147 "\e$,2&H&I\e(B" ; U+2768 U+2769
1148 "\e$,2&J&K\e(B" ; U+276A U+276B
1149 "\e$,2&L&M\e(B" ; U+276C U+276D
1150 "\e$,2&P&Q\e(B" ; U+2770 U+2771
1151 "\e$,2&R&S\e(B" ; U+2772 U+2773
1152 "\e$,2&T&U\e(B" ; U+2774 U+2775
1153 "\e$,2'f'g\e(B" ; U+27E6 U+27E7
1154 "\e$,2'h'i\e(B" ; U+27E8 U+27E9
1155 "\e$,2'j'k\e(B" ; U+27EA U+27EB
1156 "\e$,2,#,$\e(B" ; U+2983 U+2984
1157 "\e$,2,%,&\e(B" ; U+2985 U+2986
1158 "\e$,2,',(\e(B" ; U+2987 U+2988
1159 "\e$,2,),*\e(B" ; U+2989 U+298A
1160 "\e$,2,+,,\e(B" ; U+298B U+298C
1161 "\e$,2,-,.\e(B" ; U+298D U+298E
1162 "\e$,2,/,0\e(B" ; U+298F U+2990
1163 "\e$,2,1,2\e(B" ; U+2991 U+2992
1164 "\e$,2,3,4\e(B" ; U+2993 U+2994
1165 "\e$,2,5,6\e(B" ; U+2995 U+2996
1166 "\e$,2,7,8\e(B" ; U+2997 U+2998
1167 "\e$,2-<-=\e(B" ; U+29FC U+29FD
1168 "\e$,2=H=I\e(B" ; U+3008 U+3009
1169 "\e$,2=J=K\e(B" ; U+300A U+300B
1170 "\e$,2=L=M\e(B" ; U+300C U+300D
1171 "\e$,2=N=O\e(B" ; U+300E U+300F
1172 "\e$,2=P=Q\e(B" ; U+3010 U+3011
1173 "\e$,2=T=U\e(B" ; U+3014 U+3015
1174 "\e$,2=V=W\e(B" ; U+3016 U+3017
1175 "\e$,2=X=Y\e(B" ; U+3018 U+3019
1176 "\e$,2=Z=[\e(B" ; U+301A U+301B
1177 "\e$,3m~m\7f\e(B" ; U+FD3E U+FD3F
1178 "\e$,3pUpV\e(B" ; U+FE35 U+FE36
1179 "\e$,3pWpX\e(B" ; U+FE37 U+FE38
1180 "\e$,3pYpZ\e(B" ; U+FE39 U+FE3A
1181 "\e$,3p[p\\e(B" ; U+FE3B U+FE3C
1182 "\e$,3p]p^\e(B" ; U+FE3D U+FE3E
1183 "\e$,3p_p`\e(B" ; U+FE3F U+FE40
1184 "\e$,3papb\e(B" ; U+FE41 U+FE42
1185 "\e$,3pcpd\e(B" ; U+FE43 U+FE44
1186 "\e$,3pypz\e(B" ; U+FE59 U+FE5A
1187 "\e$,3p{p|\e(B" ; U+FE5B U+FE5C
1188 "\e$,3p}p~\e(B" ; U+FE5D U+FE5E
1189 "\e$,3rhri\e(B" ; U+FF08 U+FF09
1190 "\e$,3s;s=\e(B" ; U+FF3B U+FF3D
1191 "\e$,3s[s]\e(B" ; U+FF5B U+FF5D
1192 "\e$,3s_s`\e(B" ; U+FF5F U+FF60
1193 "\e$,3sbsc\e(B" ; U+FF62 U+FF63
1194 )))
1195 (dolist (elt pairs)
1196 (modify-syntax-entry (aref elt 0) (string ?\( (aref elt 1)))
1197 (modify-syntax-entry (aref elt 1) (string ?\) (aref elt 0)))))
1198
1199 \f
1200 ;;; Setting word boundary.
1201
1202 (setq word-combining-categories
1203 '((?l . ?l)))
1204
1205 (setq word-separating-categories ; (2-byte character sets)
1206 '((?A . ?K) ; Alpha numeric - Katakana
1207 (?A . ?C) ; Alpha numeric - Chinese
1208 (?H . ?A) ; Hiragana - Alpha numeric
1209 (?H . ?K) ; Hiragana - Katakana
1210 (?H . ?C) ; Hiragana - Chinese
1211 (?K . ?A) ; Katakana - Alpha numeric
1212 (?K . ?C) ; Katakana - Chinese
1213 (?C . ?A) ; Chinese - Alpha numeric
1214 (?C . ?K) ; Chinese - Katakana
1215 ))
1216
1217 \f
1218 ;; For each character set, put the information of the most proper
1219 ;; coding system to encode it by `preferred-coding-system' property.
1220
1221 (let ((l '((latin-iso8859-1 . iso-latin-1)
1222 (latin-iso8859-2 . iso-latin-2)
1223 (latin-iso8859-3 . iso-latin-3)
1224 (latin-iso8859-4 . iso-latin-4)
1225 (thai-tis620 . thai-tis620)
1226 (greek-iso8859-7 . greek-iso-8bit)
1227 (arabic-iso8859-6 . iso-2022-7bit)
1228 (hebrew-iso8859-8 . hebrew-iso-8bit)
1229 (katakana-jisx0201 . japanese-shift-jis)
1230 (latin-jisx0201 . japanese-shift-jis)
1231 (cyrillic-iso8859-5 . cyrillic-iso-8bit)
1232 (latin-iso8859-9 . iso-latin-5)
1233 (japanese-jisx0208-1978 . iso-2022-jp)
1234 (chinese-gb2312 . cn-gb-2312)
1235 (japanese-jisx0208 . iso-2022-jp)
1236 (korean-ksc5601 . iso-2022-kr)
1237 (japanese-jisx0212 . iso-2022-jp)
1238 (chinese-cns11643-1 . iso-2022-cn)
1239 (chinese-cns11643-2 . iso-2022-cn)
1240 (chinese-big5-1 . chinese-big5)
1241 (chinese-big5-2 . chinese-big5)
1242 (chinese-sisheng . iso-2022-7bit)
1243 (ipa . iso-2022-7bit)
1244 (vietnamese-viscii-lower . vietnamese-viscii)
1245 (vietnamese-viscii-upper . vietnamese-viscii)
1246 (arabic-digit . iso-2022-7bit)
1247 (arabic-1-column . iso-2022-7bit)
1248 (ascii-right-to-left . iso-2022-7bit)
1249 (lao . lao)
1250 (arabic-2-column . iso-2022-7bit)
1251 (indian-is13194 . devanagari)
1252 (indian-glyph . devanagari)
1253 (tibetan-1-column . tibetan)
1254 (ethiopic . iso-2022-7bit)
1255 (chinese-cns11643-3 . iso-2022-cn)
1256 (chinese-cns11643-4 . iso-2022-cn)
1257 (chinese-cns11643-5 . iso-2022-cn)
1258 (chinese-cns11643-6 . iso-2022-cn)
1259 (chinese-cns11643-7 . iso-2022-cn)
1260 (indian-2-column . devanagari)
1261 (tibetan . tibetan)
1262 (latin-iso8859-14 . iso-latin-8)
1263 (latin-iso8859-15 . iso-latin-9))))
1264 (while l
1265 (put-charset-property (car (car l)) 'preferred-coding-system (cdr (car l)))
1266 (setq l (cdr l))))
1267
1268 \f
1269 ;; Setup auto-fill-chars for charsets that should invoke auto-filling.
1270 ;; SPACE and NEWLINE are already set. Also put `nospace-between-words'
1271 ;; property on the charsets.
1272 (let ((l '(katakana-jisx0201
1273 japanese-jisx0208 japanese-jisx0212
1274 chinese-gb2312 chinese-big5-1 chinese-big5-2)))
1275 (while l
1276 (aset auto-fill-chars (make-char (car l)) t)
1277 (put-charset-property (car l) 'nospace-between-words t)
1278 (setq l (cdr l))))
1279
1280 ;;; Local Variables:
1281 ;;; coding: iso-2022-7bit
1282 ;;; End:
1283
1284 ;;; arch-tag: 85889c35-9f4d-4912-9bf5-82de31b0d42d
1285 ;;; characters.el ends here