]> code.delx.au - gnu-emacs/blob - lisp/international/characters.el
Setup syntaxes for more parentheses Unicode characters.
[gnu-emacs] / lisp / international / characters.el
1 ;;; characters.el --- set syntax and category for multibyte characters
2
3 ;; Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN.
4 ;; Licensed to the Free Software Foundation.
5 ;; Copyright (C) 2001, 2002 Free Software Foundation, Inc.
6
7 ;; Keywords: multibyte character, character set, syntax, category
8
9 ;; This file is part of GNU Emacs.
10
11 ;; GNU Emacs is free software; you can redistribute it and/or modify
12 ;; it under the terms of the GNU General Public License as published by
13 ;; the Free Software Foundation; either version 2, or (at your option)
14 ;; any later version.
15
16 ;; GNU Emacs is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 ;; GNU General Public License for more details.
20
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with GNU Emacs; see the file COPYING. If not, write to the
23 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 ;; Boston, MA 02111-1307, USA.
25
26 ;;; Commentary:
27
28 ;; This file contains multibyte characters. Save this file always in
29 ;; the coding system `iso-2022-7bit'.
30
31 ;; This file does not define the syntax for Latin-N character sets;
32 ;; those are defined by the files latin-N.el.
33
34 ;;; Code:
35
36 ;;; Predefined categories.
37
38 ;; For each character set.
39
40 (define-category ?a "ASCII")
41 (define-category ?l "Latin")
42 (define-category ?t "Thai")
43 (define-category ?g "Greek")
44 (define-category ?b "Arabic")
45 (define-category ?w "Hebrew")
46 (define-category ?y "Cyrillic")
47 (define-category ?k "Japanese katakana")
48 (define-category ?r "Japanese roman")
49 (define-category ?c "Chinese")
50 (define-category ?j "Japanese")
51 (define-category ?h "Korean")
52 (define-category ?e "Ethiopic (Ge'ez)")
53 (define-category ?v "Vietnamese")
54 (define-category ?i "Indian")
55 (define-category ?o "Lao")
56 (define-category ?q "Tibetan")
57
58 ;; For each group (row) of 2-byte character sets.
59
60 (define-category ?A "Alpha-numeric characters of 2-byte character sets")
61 (define-category ?C "Chinese (Han) characters of 2-byte character sets")
62 (define-category ?G "Greek characters of 2-byte character sets")
63 (define-category ?H "Japanese Hiragana characters of 2-byte character sets")
64 (define-category ?K "Japanese Katakana characters of 2-byte character sets")
65 (define-category ?N "Korean Hangul characters of 2-byte character sets")
66 (define-category ?Y "Cyrillic characters of 2-byte character sets")
67 (define-category ?I "Indian Glyphs")
68
69 ;; For phonetic classifications.
70
71 (define-category ?0 "consonant")
72 (define-category ?1 "base (independent) vowel")
73 (define-category ?2 "upper diacritical mark (including upper vowel)")
74 (define-category ?3 "lower diacritical mark (including lower vowel)")
75 (define-category ?4 "tone mark")
76 (define-category ?5 "symbol")
77 (define-category ?6 "digit")
78 (define-category ?7 "vowel-modifying diacritical mark")
79 (define-category ?8 "vowel-signs")
80 (define-category ?9 "semivowel lower")
81
82 ;; For filling.
83 (define-category ?| "While filling, we can break a line at this character.")
84
85 ;; For indentation calculation.
86 (define-category ?\s
87 "This character counts as a space for indentation purposes.")
88
89 ;; Keep the following for `kinsoku' processing. See comments in
90 ;; kinsoku.el.
91 (define-category ?> "A character which can't be placed at beginning of line.")
92 (define-category ?< "A character which can't be placed at end of line.")
93
94 ;; Combining
95 (define-category ?^ "Combining diacritic or mark")
96 \f
97 ;;; Setting syntax and category.
98
99 ;; ASCII
100
101 (let ((ch 32))
102 (while (< ch 127) ; All ASCII characters have
103 (modify-category-entry ch ?a) ; the category `a' (ASCII)
104 (modify-category-entry ch ?l) ; and `l' (Latin).
105 (setq ch (1+ ch))))
106
107 ;; Arabic character set
108
109 (let ((charsets '(arabic-iso8859-6
110 arabic-digit
111 arabic-1-column
112 arabic-2-column)))
113 (while charsets
114 ;; (modify-syntax-entry (make-char (car charsets)) "w")
115 (modify-category-entry (make-char (car charsets)) ?b)
116 (setq charsets (cdr charsets))))
117 (let ((ch #x600))
118 (while (<= ch #x6ff)
119 (modify-category-entry (decode-char 'ucs ch) ?b)
120 (setq ch (1+ ch)))
121 (setq ch #xfb50)
122 (while (<= ch #xfdff)
123 (modify-category-entry (decode-char 'ucs ch) ?b)
124 (setq ch (1+ ch)))
125 (setq ch #xfe70)
126 (while (<= ch #xfefe)
127 (modify-category-entry (decode-char 'ucs ch) ?b)
128 (setq ch (1+ ch))))
129
130 ;; Chinese character set (GB2312)
131
132 ;; (modify-syntax-entry (make-char 'chinese-gb2312) "w")
133 (modify-syntax-entry (make-char 'chinese-gb2312 33) "_")
134 (modify-syntax-entry (make-char 'chinese-gb2312 34) "_")
135 (modify-syntax-entry (make-char 'chinese-gb2312 41) "_")
136 (modify-syntax-entry ?\\e$A!2\e(B "(\e$A!3\e(B")
137 (modify-syntax-entry ?\\e$A!4\e(B "(\e$A!5\e(B")
138 (modify-syntax-entry ?\\e$A!6\e(B "(\e$A!7\e(B")
139 (modify-syntax-entry ?\\e$A!8\e(B "(\e$A!9\e(B")
140 (modify-syntax-entry ?\\e$A!:\e(B "(\e$A!;\e(B")
141 (modify-syntax-entry ?\\e$A!<\e(B "(\e$A!=\e(B")
142 (modify-syntax-entry ?\\e$A!>\e(B "(\e$A!?\e(B")
143 (modify-syntax-entry ?\\e$A#(\e(B "(\e$A#)\e(B")
144 (modify-syntax-entry ?\\e$A#{\e(B "(\e$A#}\e(B")
145 (modify-syntax-entry ?\\e$A#[\e(B "(\e$A#]\e(B")
146 (modify-syntax-entry ?\\e$A!3\e(B ")\e$A!2\e(B")
147 (modify-syntax-entry ?\\e$A!5\e(B ")\e$A!4\e(B")
148 (modify-syntax-entry ?\\e$A!7\e(B ")\e$A!6\e(B")
149 (modify-syntax-entry ?\\e$A!9\e(B ")\e$A!8\e(B")
150 (modify-syntax-entry ?\\e$A!;\e(B ")\e$A!:\e(B")
151 (modify-syntax-entry ?\\e$A!=\e(B ")\e$A!<\e(B")
152 (modify-syntax-entry ?\\e$A!?\e(B ")\e$A!>\e(B")
153 (modify-syntax-entry ?\\e$A#)\e(B ")\e$A#(\e(B")
154 (modify-syntax-entry ?\\e$A#}\e(B ")\e$A#{\e(B")
155 (modify-syntax-entry ?\\e$A#]\e(B ")\e$A#[\e(B")
156
157 (let ((chars "\e$A#,!"!##.!$#;#:#?#!!C!-!'#|#_!.!/!0!1#"!e#`!d\e(B"))
158 (dotimes (i (length chars))
159 (modify-syntax-entry (aref chars i) ".")))
160
161 (modify-category-entry (make-char 'chinese-gb2312) ?c)
162 (modify-category-entry (make-char 'chinese-gb2312) ?\|)
163 (modify-category-entry (make-char 'chinese-gb2312 35) ?A)
164 (modify-category-entry (make-char 'chinese-gb2312 36) ?H)
165 (modify-category-entry (make-char 'chinese-gb2312 37) ?K)
166 (modify-category-entry (make-char 'chinese-gb2312 38) ?G)
167 (modify-category-entry (make-char 'chinese-gb2312 39) ?Y)
168 (let ((row 48))
169 (while (< row 127)
170 (modify-category-entry (make-char 'chinese-gb2312 row) ?C)
171 (setq row (1+ row))))
172
173 ;; Chinese character set (BIG5)
174
175 (let ((from (decode-big5-char #xA141))
176 (to (decode-big5-char #xA15D)))
177 (while (< from to)
178 (modify-syntax-entry from ".")
179 (setq from (1+ from))))
180 (let ((from (decode-big5-char #xA1A5))
181 (to (decode-big5-char #xA1AD)))
182 (while (< from to)
183 (modify-syntax-entry from ".")
184 (setq from (1+ from))))
185 (let ((from (decode-big5-char #xA1AD))
186 (to (decode-big5-char #xA2AF)))
187 (while (< from to)
188 (modify-syntax-entry from "_")
189 (setq from (1+ from))))
190
191 (let ((parens "\e$(0!>!?!@!A!B!C!D!E!F!G!H!I!J!K!L!M!N!O!P!Q!R!S!T!U!V!W!X!Y!Z![!\!]!^!_!`!a!b!c\e(B")
192 open close)
193 (dotimes (i (/ (length parens) 2))
194 (setq open (aref parens (* i 2))
195 close (aref parens (1+ (* i 2))))
196 (modify-syntax-entry open (format "(%c" close))
197 (modify-syntax-entry close (format ")%c" open))))
198
199 (let ((generic-big5-1-char (make-char 'chinese-big5-1))
200 (generic-big5-2-char (make-char 'chinese-big5-2)))
201 ;; (modify-syntax-entry generic-big5-1-char "w")
202 ;; (modify-syntax-entry generic-big5-2-char "w")
203
204 (modify-category-entry generic-big5-1-char ?c)
205 (modify-category-entry generic-big5-2-char ?c)
206
207 (modify-category-entry generic-big5-1-char ?C)
208 (modify-category-entry generic-big5-2-char ?C)
209
210 (modify-category-entry generic-big5-1-char ?\|)
211 (modify-category-entry generic-big5-2-char ?\|))
212
213
214 ;; Chinese character set (CNS11643)
215
216 (let ((cns-list '(chinese-cns11643-1
217 chinese-cns11643-2
218 chinese-cns11643-3
219 chinese-cns11643-4
220 chinese-cns11643-5
221 chinese-cns11643-6
222 chinese-cns11643-7))
223 generic-char)
224 (while cns-list
225 (setq generic-char (make-char (car cns-list)))
226 ;; (modify-syntax-entry generic-char "w")
227 (modify-category-entry generic-char ?c)
228 (modify-category-entry generic-char ?C)
229 (modify-category-entry generic-char ?|)
230 (setq cns-list (cdr cns-list))))
231
232 ;; Cyrillic character set (ISO-8859-5)
233
234 (modify-category-entry (make-char 'cyrillic-iso8859-5) ?y)
235
236 (modify-syntax-entry (make-char 'cyrillic-iso8859-5 160) " ")
237 (modify-syntax-entry ?\e,L-\e(B ".")
238 (modify-syntax-entry ?\e,Lp\e(B ".")
239 (modify-syntax-entry ?\e,L}\e(B ".")
240 (let ((tbl (standard-case-table)))
241 (set-case-syntax-pair ?\e,L!\e(B ?\e,Lq\e(B tbl)
242 (set-case-syntax-pair ?\e,L"\e(B ?\e,Lr\e(B tbl)
243 (set-case-syntax-pair ?\e,L#\e(B ?\e,Ls\e(B tbl)
244 (set-case-syntax-pair ?\e,L$\e(B ?\e,Lt\e(B tbl)
245 (set-case-syntax-pair ?\e,L%\e(B ?\e,Lu\e(B tbl)
246 (set-case-syntax-pair ?\e,L&\e(B ?\e,Lv\e(B tbl)
247 (set-case-syntax-pair ?\e,L'\e(B ?\e,Lw\e(B tbl)
248 (set-case-syntax-pair ?\e,L(\e(B ?\e,Lx\e(B tbl)
249 (set-case-syntax-pair ?\e,L)\e(B ?\e,Ly\e(B tbl)
250 (set-case-syntax-pair ?\e,L*\e(B ?\e,Lz\e(B tbl)
251 (set-case-syntax-pair ?\e,L+\e(B ?\e,L{\e(B tbl)
252 (set-case-syntax-pair ?\e,L,\e(B ?\e,L|\e(B tbl)
253 (set-case-syntax-pair ?\e,L.\e(B ?\e,L~\e(B tbl)
254 (set-case-syntax-pair ?\e,L/\e(B ?\e,L\7f\e(B tbl)
255 (set-case-syntax-pair ?\e,L0\e(B ?\e,LP\e(B tbl)
256 (set-case-syntax-pair ?\e,L1\e(B ?\e,LQ\e(B tbl)
257 (set-case-syntax-pair ?\e,L2\e(B ?\e,LR\e(B tbl)
258 (set-case-syntax-pair ?\e,L3\e(B ?\e,LS\e(B tbl)
259 (set-case-syntax-pair ?\e,L4\e(B ?\e,LT\e(B tbl)
260 (set-case-syntax-pair ?\e,L5\e(B ?\e,LU\e(B tbl)
261 (set-case-syntax-pair ?\e,L6\e(B ?\e,LV\e(B tbl)
262 (set-case-syntax-pair ?\e,L7\e(B ?\e,LW\e(B tbl)
263 (set-case-syntax-pair ?\e,L8\e(B ?\e,LX\e(B tbl)
264 (set-case-syntax-pair ?\e,L9\e(B ?\e,LY\e(B tbl)
265 (set-case-syntax-pair ?\e,L:\e(B ?\e,LZ\e(B tbl)
266 (set-case-syntax-pair ?\e,L;\e(B ?\e,L[\e(B tbl)
267 (set-case-syntax-pair ?\e,L<\e(B ?\e,L\\e(B tbl)
268 (set-case-syntax-pair ?\e,L=\e(B ?\e,L]\e(B tbl)
269 (set-case-syntax-pair ?\e,L>\e(B ?\e,L^\e(B tbl)
270 (set-case-syntax-pair ?\e,L?\e(B ?\e,L_\e(B tbl)
271 (set-case-syntax-pair ?\e,L@\e(B ?\e,L`\e(B tbl)
272 (set-case-syntax-pair ?\e,LA\e(B ?\e,La\e(B tbl)
273 (set-case-syntax-pair ?\e,LB\e(B ?\e,Lb\e(B tbl)
274 (set-case-syntax-pair ?\e,LC\e(B ?\e,Lc\e(B tbl)
275 (set-case-syntax-pair ?\e,LD\e(B ?\e,Ld\e(B tbl)
276 (set-case-syntax-pair ?\e,LE\e(B ?\e,Le\e(B tbl)
277 (set-case-syntax-pair ?\e,LF\e(B ?\e,Lf\e(B tbl)
278 (set-case-syntax-pair ?\e,LG\e(B ?\e,Lg\e(B tbl)
279 (set-case-syntax-pair ?\e,LH\e(B ?\e,Lh\e(B tbl)
280 (set-case-syntax-pair ?\e,LI\e(B ?\e,Li\e(B tbl)
281 (set-case-syntax-pair ?\e,LJ\e(B ?\e,Lj\e(B tbl)
282 (set-case-syntax-pair ?\e,LK\e(B ?\e,Lk\e(B tbl)
283 (set-case-syntax-pair ?\e,LL\e(B ?\e,Ll\e(B tbl)
284 (set-case-syntax-pair ?\e,LM\e(B ?\e,Lm\e(B tbl)
285 (set-case-syntax-pair ?\e,LN\e(B ?\e,Ln\e(B tbl)
286 (set-case-syntax-pair ?\e,LO\e(B ?\e,Lo\e(B tbl)
287 (set-case-syntax-pair ?\e$,1(!\e(B ?\e$,1(q\e(B tbl)
288 (set-case-syntax-pair ?\e$,1("\e(B ?\e$,1(r\e(B tbl)
289 (set-case-syntax-pair ?\e$,1(#\e(B ?\e$,1(s\e(B tbl)
290 (set-case-syntax-pair ?\e$,1($\e(B ?\e$,1(t\e(B tbl)
291 (set-case-syntax-pair ?\e$,1(%\e(B ?\e$,1(u\e(B tbl)
292 (set-case-syntax-pair ?\e$,1(&\e(B ?\e$,1(v\e(B tbl)
293 (set-case-syntax-pair ?\e$,1('\e(B ?\e$,1(w\e(B tbl)
294 (set-case-syntax-pair ?\e$,1((\e(B ?\e$,1(x\e(B tbl)
295 (set-case-syntax-pair ?\e$,1()\e(B ?\e$,1(y\e(B tbl)
296 (set-case-syntax-pair ?\e$,1(*\e(B ?\e$,1(z\e(B tbl)
297 (set-case-syntax-pair ?\e$,1(+\e(B ?\e$,1({\e(B tbl)
298 (set-case-syntax-pair ?\e$,1(,\e(B ?\e$,1(|\e(B tbl)
299 (set-case-syntax-pair ?\e$,1(.\e(B ?\e$,1(~\e(B tbl)
300 (set-case-syntax-pair ?\e$,1(/\e(B ?\e$,1(\7f\e(B tbl)
301 (set-case-syntax-pair ?\e$,1(0\e(B ?\e$,1(P\e(B tbl)
302 (set-case-syntax-pair ?\e$,1(1\e(B ?\e$,1(Q\e(B tbl)
303 (set-case-syntax-pair ?\e$,1(2\e(B ?\e$,1(R\e(B tbl)
304 (set-case-syntax-pair ?\e$,1(3\e(B ?\e$,1(S\e(B tbl)
305 (set-case-syntax-pair ?\e$,1(4\e(B ?\e$,1(T\e(B tbl)
306 (set-case-syntax-pair ?\e$,1(5\e(B ?\e$,1(U\e(B tbl)
307 (set-case-syntax-pair ?\e$,1(6\e(B ?\e$,1(V\e(B tbl)
308 (set-case-syntax-pair ?\e$,1(7\e(B ?\e$,1(W\e(B tbl)
309 (set-case-syntax-pair ?\e$,1(8\e(B ?\e$,1(X\e(B tbl)
310 (set-case-syntax-pair ?\e$,1(9\e(B ?\e$,1(Y\e(B tbl)
311 (set-case-syntax-pair ?\e$,1(:\e(B ?\e$,1(Z\e(B tbl)
312 (set-case-syntax-pair ?\e$,1(;\e(B ?\e$,1([\e(B tbl)
313 (set-case-syntax-pair ?\e$,1(<\e(B ?\e$,1(\\e(B tbl)
314 (set-case-syntax-pair ?\e$,1(=\e(B ?\e$,1(]\e(B tbl)
315 (set-case-syntax-pair ?\e$,1(>\e(B ?\e$,1(^\e(B tbl)
316 (set-case-syntax-pair ?\e$,1(?\e(B ?\e$,1(_\e(B tbl)
317 (set-case-syntax-pair ?\e$,1(@\e(B ?\e$,1(`\e(B tbl)
318 (set-case-syntax-pair ?\e$,1(A\e(B ?\e$,1(a\e(B tbl)
319 (set-case-syntax-pair ?\e$,1(B\e(B ?\e$,1(b\e(B tbl)
320 (set-case-syntax-pair ?\e$,1(C\e(B ?\e$,1(c\e(B tbl)
321 (set-case-syntax-pair ?\e$,1(D\e(B ?\e$,1(d\e(B tbl)
322 (set-case-syntax-pair ?\e$,1(E\e(B ?\e$,1(e\e(B tbl)
323 (set-case-syntax-pair ?\e$,1(F\e(B ?\e$,1(f\e(B tbl)
324 (set-case-syntax-pair ?\e$,1(G\e(B ?\e$,1(g\e(B tbl)
325 (set-case-syntax-pair ?\e$,1(H\e(B ?\e$,1(h\e(B tbl)
326 (set-case-syntax-pair ?\e$,1(I\e(B ?\e$,1(i\e(B tbl)
327 (set-case-syntax-pair ?\e$,1(J\e(B ?\e$,1(j\e(B tbl)
328 (set-case-syntax-pair ?\e$,1(K\e(B ?\e$,1(k\e(B tbl)
329 (set-case-syntax-pair ?\e$,1(L\e(B ?\e$,1(l\e(B tbl)
330 (set-case-syntax-pair ?\e$,1(M\e(B ?\e$,1(m\e(B tbl)
331 (set-case-syntax-pair ?\e$,1(N\e(B ?\e$,1(n\e(B tbl)
332 (set-case-syntax-pair ?\e$,1(O\e(B ?\e$,1(o\e(B tbl))
333
334 ;; Devanagari character set
335
336 ;;; Commented out since the categories appear not to be used anywhere
337 ;;; and word syntax is the default.
338 ;; (let ((deflist '(;; chars syntax category
339 ;; ("\e$(5!!!"!#\e(B" "w" ?7) ; vowel-modifying diacritical mark
340 ;; ; chandrabindu, anuswar, visarga
341 ;; ("\e$(5!$\e(B-\e$(5!2\e(B" "w" ?1) ; independent vowel
342 ;; ("\e$(5!3\e(B-\e$(5!X\e(B" "w" ?0) ; consonant
343 ;; ("\e$(5!Z\e(B-\e$(5!g\e(B" "w" ?8) ; matra
344 ;; ("\e$(5!q\e(B-\e$(5!z\e(B" "w" ?6) ; digit
345 ;; ;; Unicode equivalents
346 ;; ("\e$,15A5B5C\e(B" "w" ?7) ; vowel-modifying diacritical mark
347 ;; ; chandrabindu, anuswar, visarga
348 ;; ("\e$,15E\e(B-\e$,15M\e(B" "w" ?1) ; independent vowel
349 ;; ("\e$,15U\e(B-\e$,15y\e(B" "w" ?0) ; consonant
350 ;; ("\e$,15~\e(B-\e$,16)\e(B" "w" ?8) ; matra
351 ;; ("\e$,16F\e(B-\e$,16O\e(B" "w" ?6) ; digit
352 ;; ))
353 ;; elm chars len syntax category to ch i)
354 ;; (while deflist
355 ;; (setq elm (car deflist))
356 ;; (setq chars (car elm)
357 ;; len (length chars)
358 ;; syntax (nth 1 elm)
359 ;; category (nth 2 elm)
360 ;; i 0)
361 ;; (while (< i len)
362 ;; (if (= (aref chars i) ?-)
363 ;; (setq i (1+ i)
364 ;; to (aref chars i))
365 ;; (setq ch (aref chars i)
366 ;; to ch))
367 ;; (while (<= ch to)
368 ;; (modify-syntax-entry ch syntax)
369 ;; (modify-category-entry ch category)
370 ;; (setq ch (1+ ch)))
371 ;; (setq i (1+ i)))
372 ;; (setq deflist (cdr deflist))))
373
374 ;; Ethiopic character set
375
376 (modify-category-entry (make-char 'ethiopic) ?e)
377 ;; (modify-syntax-entry (make-char 'ethiopic) "w")
378 (dotimes (i (1+ (- #x137c #x1200)))
379 (modify-category-entry (decode-char 'ucs (+ #x1200 i)) ?e))
380 (let ((chars '(?\e$(3$h\e(B ?\e$(3$i\e(B ?\e$(3$j\e(B ?\e$(3$k\e(B ?\e$(3$l\e(B ?\e$(3$m\e(B ?\e$(3$n\e(B ?\e$(3$o\e(B ?\e$(3%i\e(B ?\e$(3%t\e(B ?\e$(3%u\e(B ?\e$(3%v\e(B ?\e$(3%w\e(B ?\e$(3%x\e(B
381 ;; Unicode equivalents of the above:
382 ?\e$,1Q!\e(B ?\e$,1Q"\e(B ?\e$,1Q#\e(B ?\e$,1Q$\e(B ?\e$,1Q%\e(B ?\e$,1Q&\e(B ?\e$,1Q'\e(B ?\e$,1Q(\e(B ?\e$,3op\e(B ?\e$,3o{\e(B ?\e$,3o|\e(B ?\e$,3o}\e(B ?\e$,3o~\e(B ?\e$,3o\7f\e(B)))
383 (while chars
384 (modify-syntax-entry (car chars) ".")
385 (setq chars (cdr chars))))
386
387 ;; Greek character set (ISO-8859-7)
388
389 (modify-category-entry (make-char 'greek-iso8859-7) ?g)
390 (let ((c #x370))
391 (while (<= c #x3ff)
392 (modify-category-entry (decode-char 'ucs c) ?g)
393 (setq c (1+ c))))
394
395 ;; (let ((c 182))
396 ;; (while (< c 255)
397 ;; (modify-syntax-entry (make-char 'greek-iso8859-7 c) "w")
398 ;; (setq c (1+ c))))
399 ;; (modify-syntax-entry (make-char 'greek-iso8859-7 160) "w") ; NBSP
400 (modify-syntax-entry ?\e,F7\e(B ".")
401 (modify-syntax-entry ?\e,F;\e(B ".")
402 (modify-syntax-entry ?\e,F=\e(B ".")
403 (let ((tbl (standard-case-table)))
404 ;; Fixme: non-letter syntax copied from latin-1, but that's dubious
405 ;; in several cases.
406 (set-case-syntax ?\e,F!\e(B "." tbl)
407 (set-case-syntax ?\e,F"\e(B "." tbl)
408 (set-case-syntax ?\e,F&\e(B "." tbl)
409 (set-case-syntax ?\e,F&\e(B "_" tbl)
410 (set-case-syntax ?\e,F'\e(B "." tbl)
411 (set-case-syntax ?\e,F)\e(B "_" tbl)
412 (set-case-syntax ?\e,F+\e(B "." tbl)
413 (set-case-syntax ?\e,F,\e(B "_" tbl)
414 (set-case-syntax ?\e,F-\e(B "_" tbl)
415 (set-case-syntax ?\e,F/\e(B "." tbl)
416 (set-case-syntax ?\e,F0\e(B "_" tbl)
417 (set-case-syntax ?\e,F1\e(B "_" tbl)
418 ;; (set-case-syntax ?\e,F7\e(B "_" tbl)
419 ;; (set-case-syntax ?\e,F=\e(B "_" tbl)
420 (set-case-syntax-pair ?\e,FA\e(B ?\e,Fa\e(B tbl)
421 (set-case-syntax-pair ?\e,FB\e(B ?\e,Fb\e(B tbl)
422 (set-case-syntax-pair ?\e,FC\e(B ?\e,Fc\e(B tbl)
423 (set-case-syntax-pair ?\e,FD\e(B ?\e,Fd\e(B tbl)
424 (set-case-syntax-pair ?\e,FE\e(B ?\e,Fe\e(B tbl)
425 (set-case-syntax-pair ?\e,FF\e(B ?\e,Ff\e(B tbl)
426 (set-case-syntax-pair ?\e,FG\e(B ?\e,Fg\e(B tbl)
427 (set-case-syntax-pair ?\e,FH\e(B ?\e,Fh\e(B tbl)
428 (set-case-syntax-pair ?\e,FI\e(B ?\e,Fi\e(B tbl)
429 (set-case-syntax-pair ?\e,FJ\e(B ?\e,Fj\e(B tbl)
430 (set-case-syntax-pair ?\e,FK\e(B ?\e,Fk\e(B tbl)
431 (set-case-syntax-pair ?\e,FL\e(B ?\e,Fl\e(B tbl)
432 (set-case-syntax-pair ?\e,FM\e(B ?\e,Fm\e(B tbl)
433 (set-case-syntax-pair ?\e,FN\e(B ?\e,Fn\e(B tbl)
434 (set-case-syntax-pair ?\e,FO\e(B ?\e,Fo\e(B tbl)
435 (set-case-syntax-pair ?\e,FP\e(B ?\e,Fp\e(B tbl)
436 (set-case-syntax-pair ?\e,FQ\e(B ?\e,Fq\e(B tbl)
437 (set-case-syntax-pair ?\e,FS\e(B ?\e,Fs\e(B tbl)
438 (set-case-syntax-pair ?\e,FT\e(B ?\e,Ft\e(B tbl)
439 (set-case-syntax-pair ?\e,FU\e(B ?\e,Fu\e(B tbl)
440 (set-case-syntax-pair ?\e,FV\e(B ?\e,Fv\e(B tbl)
441 (set-case-syntax-pair ?\e,FW\e(B ?\e,Fw\e(B tbl)
442 (set-case-syntax-pair ?\e,FX\e(B ?\e,Fx\e(B tbl)
443 (set-case-syntax-pair ?\e,FY\e(B ?\e,Fy\e(B tbl)
444 (set-case-syntax-pair ?\e,FZ\e(B ?\e,Fz\e(B tbl)
445 (set-case-syntax-pair ?\e,F[\e(B ?\e,F{\e(B tbl)
446 (set-case-syntax-pair ?\e,F?\e(B ?\e,F~\e(B tbl)
447 (set-case-syntax-pair ?\e,F>\e(B ?\e,F}\e(B tbl)
448 (set-case-syntax-pair ?\e,F<\e(B ?\e,F|\e(B tbl)
449 (set-case-syntax-pair ?\e,F6\e(B ?\e,F\\e(B tbl)
450 (set-case-syntax-pair ?\e,F8\e(B ?\e,F]\e(B tbl)
451 (set-case-syntax-pair ?\e,F9\e(B ?\e,F^\e(B tbl)
452 (set-case-syntax-pair ?\e,F:\e(B ?\e,F_\e(B tbl)
453 ;; Unicode equivalents
454 (set-case-syntax-pair ?\e$,1&q\e(B ?\e$,1'1\e(B tbl)
455 (set-case-syntax-pair ?\e$,1&r\e(B ?\e$,1'2\e(B tbl)
456 (set-case-syntax-pair ?\e$,1&s\e(B ?\e$,1'3\e(B tbl)
457 (set-case-syntax-pair ?\e$,1&t\e(B ?\e$,1'4\e(B tbl)
458 (set-case-syntax-pair ?\e$,1&u\e(B ?\e$,1'5\e(B tbl)
459 (set-case-syntax-pair ?\e$,1&v\e(B ?\e$,1'6\e(B tbl)
460 (set-case-syntax-pair ?\e$,1&w\e(B ?\e$,1'7\e(B tbl)
461 (set-case-syntax-pair ?\e$,1&x\e(B ?\e$,1'8\e(B tbl)
462 (set-case-syntax-pair ?\e$,1&y\e(B ?\e$,1'9\e(B tbl)
463 (set-case-syntax-pair ?\e$,1&z\e(B ?\e$,1':\e(B tbl)
464 (set-case-syntax-pair ?\e$,1&{\e(B ?\e$,1';\e(B tbl)
465 (set-case-syntax-pair ?\e$,1&|\e(B ?\e$,1'<\e(B tbl)
466 (set-case-syntax-pair ?\e$,1&}\e(B ?\e$,1'=\e(B tbl)
467 (set-case-syntax-pair ?\e$,1&~\e(B ?\e$,1'>\e(B tbl)
468 (set-case-syntax-pair ?\e$,1&\7f\e(B ?\e$,1'?\e(B tbl)
469 (set-case-syntax-pair ?\e$,1' \e(B ?\e$,1'@\e(B tbl)
470 (set-case-syntax-pair ?\e$,1'!\e(B ?\e$,1'A\e(B tbl)
471 (set-case-syntax-pair ?\e$,1'#\e(B ?\e$,1'C\e(B tbl)
472 (set-case-syntax-pair ?\e$,1'$\e(B ?\e$,1'D\e(B tbl)
473 (set-case-syntax-pair ?\e$,1'%\e(B ?\e$,1'E\e(B tbl)
474 (set-case-syntax-pair ?\e$,1'&\e(B ?\e$,1'F\e(B tbl)
475 (set-case-syntax-pair ?\e$,1''\e(B ?\e$,1'G\e(B tbl)
476 (set-case-syntax-pair ?\e$,1'(\e(B ?\e$,1'H\e(B tbl)
477 (set-case-syntax-pair ?\e$,1')\e(B ?\e$,1'I\e(B tbl)
478 (set-case-syntax-pair ?\e$,1'*\e(B ?\e$,1'J\e(B tbl)
479 (set-case-syntax-pair ?\e$,1'+\e(B ?\e$,1'K\e(B tbl)
480 (set-case-syntax-pair ?\e$,1&o\e(B ?\e$,1'N\e(B tbl)
481 (set-case-syntax-pair ?\e$,1&n\e(B ?\e$,1'M\e(B tbl)
482 (set-case-syntax-pair ?\e$,1&l\e(B ?\e$,1'L\e(B tbl)
483 (set-case-syntax-pair ?\e$,1&f\e(B ?\e$,1',\e(B tbl)
484 (set-case-syntax-pair ?\e$,1&h\e(B ?\e$,1'-\e(B tbl)
485 (set-case-syntax-pair ?\e$,1&i\e(B ?\e$,1'.\e(B tbl)
486 (set-case-syntax-pair ?\e$,1&j\e(B ?\e$,1'/\e(B tbl))
487
488 ;; Hebrew character set (ISO-8859-8)
489
490 (modify-category-entry (make-char 'hebrew-iso8859-8) ?w)
491 (let ((c #x591))
492 (while (<= c #x5f4)
493 (modify-category-entry (decode-char 'ucs c) ?w)
494 (setq c (1+ c))))
495
496 (modify-syntax-entry (make-char 'hebrew-iso8859-8 208) ".") ; PASEQ
497 (modify-syntax-entry (make-char 'hebrew-iso8859-8 211) ".") ; SOF PASUQ
498 (modify-syntax-entry (decode-char 'ucs #x5be) ".") ; MAQAF
499 (modify-syntax-entry (decode-char 'ucs #x5c0) ".") ; PASEQ
500 (modify-syntax-entry (decode-char 'ucs #x5c3) ".") ; SOF PASUQ
501 (modify-syntax-entry (decode-char 'ucs #x5f3) ".") ; GERESH
502 (modify-syntax-entry (decode-char 'ucs #x5f4) ".") ; GERSHAYIM
503
504 ;; (let ((c 224))
505 ;; (while (< c 251)
506 ;; (modify-syntax-entry (make-char 'hebrew-iso8859-8 c) "w")
507 ;; (setq c (1+ c))))
508 ;; (modify-syntax-entry (make-char 'hebrew-iso8859-8 160) "w") ; NBSP
509
510 ;; Indian character set (IS 13194 and other Emacs original Indian charsets)
511
512 (modify-category-entry (make-char 'indian-is13194) ?i)
513 (modify-category-entry (make-char 'indian-2-column) ?I)
514 (modify-category-entry (make-char 'indian-glyph) ?I)
515 ;; Unicode Devanagari block
516 (let ((c #x901))
517 (while (<= c #x970)
518 (modify-category-entry (decode-char 'ucs c) ?i)
519 (setq c (1+ c))))
520
521 (let ((l '(;; RANGE CATEGORY MEANINGS
522 (#x01 #x03 ?7) ; vowel modifier
523 (#x05 #x14 ?1) ; base vowel
524 (#x15 #x39 ?0) ; consonants
525 (#x3e #x4d ?8) ; vowel modifier
526 (#x51 #x54 ?4) ; stress/tone mark
527 (#x58 #x5f ?0) ; consonants
528 (#x60 #x61 ?1) ; base vowel
529 (#x62 #x63 ?8) ; vowel modifier
530 (#x66 #x6f ?6) ; digits
531 )))
532 (dolist (elt1 '(#x900 #x980 #xa00 #xa80 #xb00 #xb80 #xc00 #xc80 #xd00))
533 (dolist (elt2 l)
534 (let* ((from (car elt2))
535 (counts (1+ (- (nth 1 elt2) from)))
536 (category (nth 2 elt2)))
537 (dotimes (i counts)
538 (modify-category-entry (decode-char 'ucs (+ elt1 from i))
539 category))))))
540
541 ;; Japanese character set (JISX0201-kana, JISX0201-roman, JISX0208, JISX0212)
542
543 (modify-category-entry (make-char 'katakana-jisx0201) ?k)
544 (modify-category-entry (make-char 'katakana-jisx0201) ?j)
545 (modify-category-entry (make-char 'latin-jisx0201) ?r)
546 (modify-category-entry (make-char 'japanese-jisx0208) ?j)
547 (modify-category-entry (make-char 'japanese-jisx0212) ?j)
548 (modify-category-entry (make-char 'katakana-jisx0201) ?\|)
549 (modify-category-entry (make-char 'japanese-jisx0208) ?\|)
550 (modify-category-entry (make-char 'japanese-jisx0212) ?\|)
551
552 ;; Unicode equivalents of JISX0201-kana
553 (let ((c #xff61))
554 (while (<= c #xff9f)
555 (modify-category-entry (decode-char 'ucs c) ?k)
556 (modify-category-entry (decode-char 'ucs c) ?j)
557 (modify-category-entry (decode-char 'ucs c) ?\|)
558 (setq c (1+ c))))
559
560 ;; Katakana block
561 (let ((c #x30a0))
562 (while (<= c #x30ff)
563 ;; ?K is double width, ?k isn't specified
564 (modify-category-entry (decode-char 'ucs c) ?k)
565 (modify-category-entry (decode-char 'ucs c) ?j)
566 (modify-category-entry (decode-char 'ucs c) ?\|)
567 (setq c (1+ c))))
568
569 ;; Hiragana block
570 (let ((c #x3040))
571 (while (<= c #x309f)
572 ;; ?H is actually defined to be double width
573 (modify-category-entry (decode-char 'ucs c) ?H)
574 ;;(modify-category-entry (decode-char 'ucs c) ?j)
575 (modify-category-entry (decode-char 'ucs c) ?\|)
576 (setq c (1+ c))))
577
578 ;; JISX0208
579 ;; (modify-syntax-entry (make-char 'japanese-jisx0208) "w")
580 (modify-syntax-entry (make-char 'japanese-jisx0208 33) "_")
581 (modify-syntax-entry (make-char 'japanese-jisx0208 34) "_")
582 (modify-syntax-entry (make-char 'japanese-jisx0208 40) "_")
583 (let ((chars '(?\e$B!<\e(B ?\e$B!+\e(B ?\e$B!,\e(B ?\e$B!3\e(B ?\e$B!4\e(B ?\e$B!5\e(B ?\e$B!6\e(B ?\e$B!7\e(B ?\e$B!8\e(B ?\e$B!9\e(B ?\e$B!:\e(B ?\e$B!;\e(B)))
584 (while chars
585 (modify-syntax-entry (car chars) "w")
586 (setq chars (cdr chars))))
587 (modify-syntax-entry ?\\e$B!J\e(B "(\e$B!K\e(B")
588 (modify-syntax-entry ?\\e$B!N\e(B "(\e$B!O\e(B")
589 (modify-syntax-entry ?\\e$B!P\e(B "(\e$B!Q\e(B")
590 (modify-syntax-entry ?\\e$B!V\e(B "(\e$B!W\e(B")
591 (modify-syntax-entry ?\\e$B!X\e(B "(\e$B!Y\e(B")
592 (modify-syntax-entry ?\\e$B!K\e(B ")\e$B!J\e(B")
593 (modify-syntax-entry ?\\e$B!O\e(B ")\e$B!N\e(B")
594 (modify-syntax-entry ?\\e$B!Q\e(B ")\e$B!P\e(B")
595 (modify-syntax-entry ?\\e$B!W\e(B ")\e$B!V\e(B")
596 (modify-syntax-entry ?\\e$B!Y\e(B ")\e$B!X\e(B")
597
598 (modify-category-entry (make-char 'japanese-jisx0208 35) ?A)
599 (modify-category-entry (make-char 'japanese-jisx0208 36) ?H)
600 (modify-category-entry (make-char 'japanese-jisx0208 37) ?K)
601 (modify-category-entry (make-char 'japanese-jisx0208 38) ?G)
602 (modify-category-entry (make-char 'japanese-jisx0208 39) ?Y)
603 (let ((row 48))
604 (while (< row 127)
605 (modify-category-entry (make-char 'japanese-jisx0208 row) ?C)
606 (setq row (1+ row))))
607 (modify-category-entry ?\e$B!<\e(B ?K)
608 (let ((chars '(?\e$B!+\e(B ?\e$B!,\e(B)))
609 (while chars
610 (modify-category-entry (car chars) ?K)
611 (modify-category-entry (car chars) ?H)
612 (setq chars (cdr chars))))
613 (let ((chars '(?\e$B!3\e(B ?\e$B!4\e(B ?\e$B!5\e(B ?\e$B!6\e(B ?\e$B!7\e(B ?\e$B!8\e(B ?\e$B!9\e(B ?\e$B!:\e(B ?\e$B!;\e(B)))
614 (while chars
615 (modify-category-entry (car chars) ?C)
616 (setq chars (cdr chars))))
617
618 ;; JISX0212
619 ;; (modify-syntax-entry (make-char 'japanese-jisx0212) "w")
620 (modify-syntax-entry (make-char 'japanese-jisx0212 33) "_")
621 (modify-syntax-entry (make-char 'japanese-jisx0212 34) "_")
622 (modify-syntax-entry (make-char 'japanese-jisx0212 35) "_")
623
624 (modify-category-entry (make-char 'japanese-jisx0212 ) ?C)
625
626 ;; JISX0201-Kana
627 ;; (modify-syntax-entry (make-char 'katakana-jisx0201) "w")
628 (let ((chars '(?\e(I!\e(B ?\e(I$\e(B ?\e(I%\e(B
629 ;; Unicode:
630 ?\e$,3sa\e(B ?\e$,3sd\e(B ?\e$,3se\e(B)))
631 (while chars
632 (modify-syntax-entry (car chars) ".")
633 (setq chars (cdr chars))))
634
635 (modify-syntax-entry ?\\e(I"\e(B "(\e(I#\e(B")
636 (modify-syntax-entry ?\\e(I#\e(B "(\e(I"\e(B")
637
638 ;; Korean character set (KSC5601)
639
640 ;; (modify-syntax-entry (make-char 'korean-ksc5601) "w")
641 (modify-syntax-entry (make-char 'korean-ksc5601 33) "_")
642 (modify-syntax-entry (make-char 'korean-ksc5601 34) "_")
643 (modify-syntax-entry (make-char 'korean-ksc5601 38) "_")
644 (modify-syntax-entry (make-char 'korean-ksc5601 39) "_")
645 (modify-syntax-entry (make-char 'korean-ksc5601 40) "_")
646 (modify-syntax-entry (make-char 'korean-ksc5601 41) "_")
647
648 (modify-category-entry (make-char 'korean-ksc5601) ?h)
649 (modify-category-entry (make-char 'korean-ksc5601 35) ?A)
650 (modify-category-entry (make-char 'korean-ksc5601 37) ?G)
651 (modify-category-entry (make-char 'korean-ksc5601 42) ?H)
652 (modify-category-entry (make-char 'korean-ksc5601 43) ?K)
653 (modify-category-entry (make-char 'korean-ksc5601 44) ?Y)
654
655 ;; Latin character set (latin-1,2,3,4,5,8,9)
656
657 (modify-category-entry (make-char 'latin-iso8859-1) ?l)
658 (modify-category-entry (make-char 'latin-iso8859-2) ?l)
659 (modify-category-entry (make-char 'latin-iso8859-3) ?l)
660 (modify-category-entry (make-char 'latin-iso8859-4) ?l)
661 (modify-category-entry (make-char 'latin-iso8859-9) ?l)
662 (modify-category-entry (make-char 'latin-iso8859-14) ?l)
663 (modify-category-entry (make-char 'latin-iso8859-15) ?l)
664
665 (modify-category-entry (make-char 'latin-iso8859-1 160) ?\ )
666 (modify-category-entry (make-char 'latin-iso8859-2 160) ?\ )
667 (modify-category-entry (make-char 'latin-iso8859-3 160) ?\ )
668 (modify-category-entry (make-char 'latin-iso8859-4 160) ?\ )
669 (modify-category-entry (make-char 'latin-iso8859-9 160) ?\ )
670 (modify-category-entry (make-char 'latin-iso8859-14 160) ?\ )
671 (modify-category-entry (make-char 'latin-iso8859-15 160) ?\ )
672
673 ;; Lao character set
674
675 (modify-category-entry (make-char 'lao) ?o)
676 (dotimes (i (1+ (- #xeff #xe80)))
677 (modify-category-entry (decode-char 'ucs (+ i #xe80)) ?o))
678
679 (let ((deflist '(;; chars syntax category
680 ("\e(1!\e(B-\e(1N\e(B" "w" ?0) ; consonant
681 ("\e(1PRS]`\e(B-\e(1d\e(B" "w" ?1) ; vowel base
682 ("\e(1QT\e(B-\e(1W[m\e(B" "w" ?2) ; vowel upper
683 ("\e(1XY\e(B" "w" ?3) ; vowel lower
684 ("\e(1h\e(B-\e(1l\e(B" "w" ?4) ; tone mark
685 ("\e(1\\e(B" "w" ?9) ; semivowel lower
686 ("\e(1p\e(B-\e(1y\e(B" "w" ?6) ; digit
687 ("\e(1Of\e(B" "_" ?5) ; symbol
688 ;; Unicode equivalents
689 ("\e$,1D!\e(B-\e$,1DN\e(B" "w" ?0) ; consonant
690 ("\e$,1DPDRDSD]D`\e(B-\e$,1Dd\e(B" "w" ?1) ; vowel base
691 ("\e$,1DQDT\e(B-\e$,1DWD[Dm\e(B" "w" ?2) ; vowel upper
692 ("\e$,1DXDY\e(B" "w" ?3) ; vowel lower
693 ("\e$,1Dh\e(B-\e$,1Dk\e(B" "w" ?4) ; tone mark
694 ("\e$,1D\D]\e(B" "w" ?9) ; semivowel lower
695 ("\e$,1Dp\e(B-\e$,1Dy\e(B" "w" ?6) ; digit
696 ("\e$,1DODf\e(B" "_" ?5) ; symbol
697 ))
698 elm chars len syntax category to ch i)
699 (while deflist
700 (setq elm (car deflist))
701 (setq chars (car elm)
702 len (length chars)
703 syntax (nth 1 elm)
704 category (nth 2 elm)
705 i 0)
706 (while (< i len)
707 (if (= (aref chars i) ?-)
708 (setq i (1+ i)
709 to (aref chars i))
710 (setq ch (aref chars i)
711 to ch))
712 (while (<= ch to)
713 (unless (string-equal syntax "w")
714 (modify-syntax-entry ch syntax))
715 (modify-category-entry ch category)
716 (setq ch (1+ ch)))
717 (setq i (1+ i)))
718 (setq deflist (cdr deflist))))
719
720 ;; Thai character set (TIS620)
721
722 (modify-category-entry (make-char 'thai-tis620) ?t)
723 (dotimes (i (1+ (- #xe7f #xe00)))
724 (modify-category-entry (decode-char 'ucs (+ i #xe00)) ?t))
725
726 (let ((deflist '(;; chars syntax category
727 ("\e,T!\e(B-\e,TCEG\e(B-\e,TN\e(B" "w" ?0) ; consonant
728 ("\e,TDFPRS`\e(B-\e,Te\e(B" "w" ?1) ; vowel base
729 ("\e,TQT\e(B-\e,TWgn\e(B" "w" ?2) ; vowel upper
730 ("\e,TX\e(B-\e,TZ\e(B" "w" ?3) ; vowel lower
731 ("\e,Th\e(B-\e,Tm\e(B" "w" ?4) ; tone mark
732 ("\e,Tp\e(B-\e,Ty\e(B" "w" ?6) ; digit
733 ("\e,TOf_oz{\e(B" "_" ?5) ; symbol
734 ;; Unicode equivalents
735 ("\e$,1Ba\e(B-\e$,1C#C%C'\e(B-\e$,1C.\e(B" "w" ?0) ; consonant
736 ("\e$,1C$C&C0C2C3C@\e(B-\e$,1CE\e(B" "w" ?1) ; vowel base
737 ("\e$,1C1C4\e(B-\e$,1C7CGCN\e(B" "w" ?2) ; vowel upper
738 ("\e$,1C8\e(B-\e$,1C:\e(B" "w" ?3) ; vowel lower
739 ("\e$,1CH\e(B-\e$,1CM\e(B" "w" ?4) ; tone mark
740 ("\e$,1CP\e(B-\e$,1CY\e(B" "w" ?6) ; digit
741 ("\e$,1C/CFC?COCZC[\e(B" "_" ?5) ; symbol
742 ))
743 elm chars len syntax category to ch i)
744 (while deflist
745 (setq elm (car deflist))
746 (setq chars (car elm)
747 len (length chars)
748 syntax (nth 1 elm)
749 category (nth 2 elm)
750 i 0)
751 (while (< i len)
752 (if (= (aref chars i) ?-)
753 (setq i (1+ i)
754 to (aref chars i))
755 (setq ch (aref chars i)
756 to ch))
757 (while (<= ch to)
758 (unless (string-equal syntax "w")
759 (modify-syntax-entry ch syntax))
760 (modify-category-entry ch category)
761 (setq ch (1+ ch)))
762 (setq i (1+ i)))
763 (setq deflist (cdr deflist))))
764
765 ;; Tibetan character set
766
767 (modify-category-entry (make-char 'tibetan) ?q)
768 (modify-category-entry (make-char 'tibetan-1-column) ?q)
769 (dotimes (i (1+ (- #xfff #xf00)))
770 (modify-category-entry (decode-char 'ucs (+ i #xf00)) ?q))
771
772 (let ((deflist '(;; chars syntax category
773 ("\e4\e$(7"!\e0"!\e1\e(B-\e4\e$(7"J\e0"J\e1\e4"K\e0"K\e1\e(B" "w" ?0) ; consonant
774 ("\e$(7#!\e(B-\e$(7#J#K#L#M!"!#\e(B" "w" ?0) ;
775 ("\e$(7$!\e(B-\e$(7$e\e(B" "w" ?0) ;
776 ("\e$(7%!\e(B-\e$(7%u\e(B" "w" ?0) ;
777 ("\e$(7"S"["\"]"^"a\e(B" "w" ?2) ; upper vowel
778 ("\e$(7"_"c"d"g"h"i"j"k"l\e(B" "w" ?2) ; upper modifier
779 ("\e$(7!I"Q"R"U"e!e!g\e(B" "w" ?3) ; lowel vowel/modifier
780 ("\e$(7!P\e(B-\e$(7!Y!Z\e(B-\e$(7!c\e(B" "w" ?6) ; digit
781 ("\e$(7!;!=\e(B-\e$(7!B!D"`\e(B" "." ?|) ; line-break char
782 ("\e$(8!;!=!?!@!A!D"`\e(B" "." ?|) ;
783 ("\e$(7!8!;!=\e(B-\e$(7!B!D"`!m!d\e(B" "." ?>) ; prohibition
784 ("\e$(8!;!=!?!@!A!D"`\e(B" "." ?>) ;
785 ("\e$(7!0\e(B-\e$(7!:!l#R#S"f\e(B" "." ?<) ; prohibition
786 ("\e$(7!C!E\e(B-\e$(7!H!J\e(B-\e$(7!O!f!h\e(B-\e$(7!k!n!o#O#P\e(B-\e$(7#`\e(B" "." ?q) ; others
787
788 ;; Unicode version (not complete)
789 ("\e$,1F \e(B-\e$,1FIFJ\e(B" "w" ?0) ; consonant
790 ("\e$,1Fp\e(B-\e$,1G9G:G;G<\e(B" "w" ?0) ;
791 ("\e$,1FRFZF[F\F]F`\e(B" "w" ?2) ; upper vowel
792 ("\e$,1F^FbFcFfFgFhFiFjFk\e(B" "w" ?2) ; upper modifier
793 ("\e$,1EYFPFQFTFdEuEw\e(B" "w" ?3) ; lowel vowel/modifier
794 ("\e$,1E`\e(B-\e$,1EiEj\e(B-\e$,1Es\e(B" "w" ?6) ; digit
795 ("\e$,1EKEM\e(B-\e$,1ERETF_\e(B" "." ?|) ; line-break char
796 ("\e$,1EHEKEM\e(B-\e$,1ERETF_E}Et\e(B" "." ?>) ; prohibition
797 ("\e$,1E@\e(B-\e$,1EJE|GAGBFe\e(B" "." ?<) ; prohibition
798 ("\e$,1ESEU\e(B-\e$,1EXEZ\e(B-\e$,1E_EvEx\e(B-\e$,1E{E~E\7fG>G?\e(B-\e$,1GO\e(B" "." ?q) ; others
799 ))
800 elm chars len syntax category to ch i)
801 (while deflist
802 (setq elm (car deflist))
803 (setq chars (car elm)
804 len (length chars)
805 syntax (nth 1 elm)
806 category (nth 2 elm)
807 i 0)
808 (while (< i len)
809 (if (= (aref chars i) ?-)
810 (setq i (1+ i)
811 to (aref chars i))
812 (setq ch (aref chars i)
813 to ch))
814 (while (<= ch to)
815 (unless (string-equal syntax "w")
816 (modify-syntax-entry ch syntax))
817 (modify-category-entry ch category)
818 (setq ch (1+ ch)))
819 (setq i (1+ i)))
820 (setq deflist (cdr deflist))))
821
822 ;; Vietnamese character set
823
824 (let ((lower (make-char 'vietnamese-viscii-lower))
825 (upper (make-char 'vietnamese-viscii-upper)))
826 ;; (modify-syntax-entry lower "w")
827 ;; (modify-syntax-entry upper "w")
828 (modify-category-entry lower ?v)
829 (modify-category-entry upper ?v)
830 (modify-category-entry lower ?l) ; To make a word with
831 (modify-category-entry upper ?l) ; latin characters.
832 )
833
834 (let ((tbl (standard-case-table))
835 (i 32))
836 (while (< i 128)
837 (set-case-syntax-pair (make-char 'vietnamese-viscii-upper i)
838 (make-char 'vietnamese-viscii-lower i)
839 tbl)
840 (setq i (1+ i))))
841
842 ;; Unicode (mule-unicode-0100-24ff)
843
844 (let ((tbl (standard-case-table)) c)
845
846 ;; In some languages, U+0049 LATIN CAPITAL LETTER I and U+0131 LATIN
847 ;; SMALL LETTER DOTLESS I make a case pair, and so do U+0130 LATIN
848 ;; CAPITAL LETTER I WITH DOT ABOVE and U+0069 LATIN SMALL LETTER I.
849 ;; Thus we have to check language-environment to handle casing
850 ;; correctly. Currently only I<->i is available.
851
852 ;; Latin Extended-A, Latin Extended-B
853 (setq c #x0100)
854 (while (<= c #x0233)
855 (modify-category-entry (decode-char 'ucs c) ?l)
856 (and (or (<= c #x012e)
857 (and (>= c #x014a) (<= c #x0177)))
858 (zerop (% c 2))
859 (set-case-syntax-pair
860 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
861 (and (>= c #x013a)
862 (<= c #x0148)
863 (zerop (% c 2))
864 (set-case-syntax-pair
865 (decode-char 'ucs (1- c)) (decode-char 'ucs c) tbl))
866 (setq c (1+ c)))
867 (set-case-syntax-pair ?\e$,1 R\e(B ?\e$,1 S\e(B tbl)
868 (set-case-syntax-pair ?\e$,1 T\e(B ?\e$,1 U\e(B tbl)
869 (set-case-syntax-pair ?\e$,1 V\e(B ?\e$,1 W\e(B tbl)
870 ;;; (set-case-syntax-pair ?\e$,1!8\e(B ?\e,A\7f\e(B tbl) ; these two have different length!
871 (set-case-syntax-pair ?\e$,1!9\e(B ?\e$,1!:\e(B tbl)
872 (set-case-syntax-pair ?\e$,1!;\e(B ?\e$,1!<\e(B tbl)
873 (set-case-syntax-pair ?\e$,1!=\e(B ?\e$,1!>\e(B tbl)
874
875 ;; Latin Extended-B
876 (set-case-syntax-pair ?\e$,1!A\e(B ?\e$,1#S\e(B tbl)
877 (set-case-syntax-pair ?\e$,1!B\e(B ?\e$,1!C\e(B tbl)
878 (set-case-syntax-pair ?\e$,1!D\e(B ?\e$,1!E\e(B tbl)
879 (set-case-syntax-pair ?\e$,1!F\e(B ?\e$,1#T\e(B tbl)
880 (set-case-syntax-pair ?\e$,1!G\e(B ?\e$,1!H\e(B tbl)
881 (set-case-syntax-pair ?\e$,1!I\e(B ?\e$,1#V\e(B tbl)
882 (set-case-syntax-pair ?\e$,1!J\e(B ?\e$,1#W\e(B tbl)
883 (set-case-syntax-pair ?\e$,1!K\e(B ?\e$,1!L\e(B tbl)
884 (set-case-syntax-pair ?\e$,1!N\e(B ?\e$,1"=\e(B tbl)
885 (set-case-syntax-pair ?\e$,1!O\e(B ?\e$,1#Y\e(B tbl)
886 (set-case-syntax-pair ?\e$,1!P\e(B ?\e$,1#[\e(B tbl)
887 (set-case-syntax-pair ?\e$,1!Q\e(B ?\e$,1!R\e(B tbl)
888 (set-case-syntax-pair ?\e$,1!S\e(B ?\e$,1#`\e(B tbl)
889 (set-case-syntax-pair ?\e$,1!T\e(B ?\e$,1#c\e(B tbl)
890 (set-case-syntax-pair ?\e$,1!V\e(B ?\e$,1#i\e(B tbl)
891 (set-case-syntax-pair ?\e$,1!W\e(B ?\e$,1#h\e(B tbl)
892 (set-case-syntax-pair ?\e$,1!X\e(B ?\e$,1!Y\e(B tbl)
893 (set-case-syntax-pair ?\e$,1!\\e(B ?\e$,1#o\e(B tbl)
894 (set-case-syntax-pair ?\e$,1!]\e(B ?\e$,1#r\e(B tbl)
895 (set-case-syntax-pair ?\e$,1!_\e(B ?\e$,1#u\e(B tbl)
896 (set-case-syntax-pair ?\e$,1!`\e(B ?\e$,1!a\e(B tbl)
897 (set-case-syntax-pair ?\e$,1!b\e(B ?\e$,1!c\e(B tbl)
898 (set-case-syntax-pair ?\e$,1!d\e(B ?\e$,1!e\e(B tbl)
899 (set-case-syntax-pair ?\e$,1!f\e(B ?\e$,1$ \e(B tbl)
900 (set-case-syntax-pair ?\e$,1!g\e(B ?\e$,1!h\e(B tbl)
901 (set-case-syntax-pair ?\e$,1!i\e(B ?\e$,1$#\e(B tbl)
902 (set-case-syntax-pair ?\e$,1!l\e(B ?\e$,1!m\e(B tbl)
903 (set-case-syntax-pair ?\e$,1!n\e(B ?\e$,1$(\e(B tbl)
904 (set-case-syntax-pair ?\e$,1!o\e(B ?\e$,1!p\e(B tbl)
905 (set-case-syntax-pair ?\e$,1!q\e(B ?\e$,1$*\e(B tbl)
906 (set-case-syntax-pair ?\e$,1!r\e(B ?\e$,1$+\e(B tbl)
907 (set-case-syntax-pair ?\e$,1!s\e(B ?\e$,1!t\e(B tbl)
908 (set-case-syntax-pair ?\e$,1!u\e(B ?\e$,1!v\e(B tbl)
909 (set-case-syntax-pair ?\e$,1!w\e(B ?\e$,1$2\e(B tbl)
910 (set-case-syntax-pair ?\e$,1!x\e(B ?\e$,1!y\e(B tbl)
911 (set-case-syntax-pair ?\e$,1!|\e(B ?\e$,1!}\e(B tbl)
912 (set-case-syntax-pair ?\e$,1"$\e(B ?\e$,1"&\e(B tbl)
913 (set-case-syntax-pair ?\e$,1"%\e(B ?\e$,1"&\e(B tbl)
914 (set-case-syntax-pair ?\e$,1"'\e(B ?\e$,1")\e(B tbl)
915 (set-case-syntax-pair ?\e$,1"(\e(B ?\e$,1")\e(B tbl)
916 (set-case-syntax-pair ?\e$,1"*\e(B ?\e$,1",\e(B tbl)
917 (set-case-syntax-pair ?\e$,1"+\e(B ?\e$,1",\e(B tbl)
918 (set-case-syntax-pair ?\e$,1"-\e(B ?\e$,1".\e(B tbl)
919 (set-case-syntax-pair ?\e$,1"/\e(B ?\e$,1"0\e(B tbl)
920 (set-case-syntax-pair ?\e$,1"1\e(B ?\e$,1"2\e(B tbl)
921 (set-case-syntax-pair ?\e$,1"3\e(B ?\e$,1"4\e(B tbl)
922 (set-case-syntax-pair ?\e$,1"5\e(B ?\e$,1"6\e(B tbl)
923 (set-case-syntax-pair ?\e$,1"7\e(B ?\e$,1"8\e(B tbl)
924 (set-case-syntax-pair ?\e$,1"9\e(B ?\e$,1":\e(B tbl)
925 (set-case-syntax-pair ?\e$,1";\e(B ?\e$,1"<\e(B tbl)
926 (set-case-syntax-pair ?\e$,1">\e(B ?\e$,1"?\e(B tbl)
927 (set-case-syntax-pair ?\e$,1"@\e(B ?\e$,1"A\e(B tbl)
928 (set-case-syntax-pair ?\e$,1"B\e(B ?\e$,1"C\e(B tbl)
929 (set-case-syntax-pair ?\e$,1"D\e(B ?\e$,1"E\e(B tbl)
930 (set-case-syntax-pair ?\e$,1"F\e(B ?\e$,1"G\e(B tbl)
931 (set-case-syntax-pair ?\e$,1"H\e(B ?\e$,1"I\e(B tbl)
932 (set-case-syntax-pair ?\e$,1"J\e(B ?\e$,1"K\e(B tbl)
933 (set-case-syntax-pair ?\e$,1"L\e(B ?\e$,1"M\e(B tbl)
934 (set-case-syntax-pair ?\e$,1"N\e(B ?\e$,1"O\e(B tbl)
935 ;; 01F0; F; 006A 030C; # LATIN SMALL LETTER J WITH CARON
936 (set-case-syntax-pair ?\e$,1"Q\e(B ?\e$,1"S\e(B tbl)
937 (set-case-syntax-pair ?\e$,1"R\e(B ?\e$,1"S\e(B tbl)
938 (set-case-syntax-pair ?\e$,1"T\e(B ?\e$,1"U\e(B tbl)
939 (set-case-syntax-pair ?\e$,1"V\e(B ?\e$,1!U\e(B tbl)
940 (set-case-syntax-pair ?\e$,1"W\e(B ?\e$,1!\7f\e(B tbl)
941 (set-case-syntax-pair ?\e$,1"X\e(B ?\e$,1"Y\e(B tbl)
942 (set-case-syntax-pair ?\e$,1"Z\e(B ?\e$,1"[\e(B tbl)
943 (set-case-syntax-pair ?\e$,1"\\e(B ?\e$,1"]\e(B tbl)
944 (set-case-syntax-pair ?\e$,1"^\e(B ?\e$,1"_\e(B tbl)
945 (set-case-syntax-pair ?\e$,1"`\e(B ?\e$,1"a\e(B tbl)
946 (set-case-syntax-pair ?\e$,1"b\e(B ?\e$,1"c\e(B tbl)
947 (set-case-syntax-pair ?\e$,1"d\e(B ?\e$,1"e\e(B tbl)
948 (set-case-syntax-pair ?\e$,1"f\e(B ?\e$,1"g\e(B tbl)
949 (set-case-syntax-pair ?\e$,1"h\e(B ?\e$,1"i\e(B tbl)
950 (set-case-syntax-pair ?\e$,1"j\e(B ?\e$,1"k\e(B tbl)
951 (set-case-syntax-pair ?\e$,1"l\e(B ?\e$,1"m\e(B tbl)
952 (set-case-syntax-pair ?\e$,1"n\e(B ?\e$,1"o\e(B tbl)
953 (set-case-syntax-pair ?\e$,1"p\e(B ?\e$,1"q\e(B tbl)
954 (set-case-syntax-pair ?\e$,1"r\e(B ?\e$,1"s\e(B tbl)
955 (set-case-syntax-pair ?\e$,1"t\e(B ?\e$,1"u\e(B tbl)
956 (set-case-syntax-pair ?\e$,1"v\e(B ?\e$,1"w\e(B tbl)
957 (set-case-syntax-pair ?\e$,1"x\e(B ?\e$,1"y\e(B tbl)
958 (set-case-syntax-pair ?\e$,1"z\e(B ?\e$,1"{\e(B tbl)
959 (set-case-syntax-pair ?\e$,1"|\e(B ?\e$,1"}\e(B tbl)
960 (set-case-syntax-pair ?\e$,1"~\e(B ?\e$,1"\7f\e(B tbl)
961 (set-case-syntax-pair ?\e$,1#"\e(B ?\e$,1##\e(B tbl)
962 (set-case-syntax-pair ?\e$,1#$\e(B ?\e$,1#%\e(B tbl)
963 (set-case-syntax-pair ?\e$,1#&\e(B ?\e$,1#'\e(B tbl)
964 (set-case-syntax-pair ?\e$,1#(\e(B ?\e$,1#)\e(B tbl)
965 (set-case-syntax-pair ?\e$,1#*\e(B ?\e$,1#+\e(B tbl)
966 (set-case-syntax-pair ?\e$,1#,\e(B ?\e$,1#-\e(B tbl)
967 (set-case-syntax-pair ?\e$,1#.\e(B ?\e$,1#/\e(B tbl)
968 (set-case-syntax-pair ?\e$,1#0\e(B ?\e$,1#1\e(B tbl)
969 (set-case-syntax-pair ?\e$,1#2\e(B ?\e$,1#3\e(B tbl)
970
971 ;; Latin Extended Additional
972 (setq c #x1e00)
973 (while (<= c #x1ef9)
974 (modify-category-entry (decode-char 'ucs c) ?l)
975 (and (zerop (% c 2))
976 (or (<= c #x1e94) (>= c #x1ea0))
977 (set-case-syntax-pair
978 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
979 (setq c (1+ c)))
980
981 ;; Greek
982 (setq c #x0370)
983 (while (<= c #x03ff)
984 (modify-category-entry (decode-char 'ucs c) ?g)
985 (if (or (and (>= c #x0391) (<= c #x03a1))
986 (and (>= c #x03a3) (<= c #x03ab)))
987 (set-case-syntax-pair
988 (decode-char 'ucs c) (decode-char 'ucs (+ c 32)) tbl))
989 (and (>= c #x03da)
990 (<= c #x03ee)
991 (zerop (% c 2))
992 (set-case-syntax-pair
993 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
994 (setq c (1+ c)))
995 (set-case-syntax-pair ?\e$,1&f\e(B ?\e$,1',\e(B tbl)
996 (set-case-syntax-pair ?\e$,1&h\e(B ?\e$,1'-\e(B tbl)
997 (set-case-syntax-pair ?\e$,1&i\e(B ?\e$,1'.\e(B tbl)
998 (set-case-syntax-pair ?\e$,1&j\e(B ?\e$,1'/\e(B tbl)
999 (set-case-syntax-pair ?\e$,1&l\e(B ?\e$,1'L\e(B tbl)
1000 (set-case-syntax-pair ?\e$,1&n\e(B ?\e$,1'M\e(B tbl)
1001 (set-case-syntax-pair ?\e$,1&o\e(B ?\e$,1'N\e(B tbl)
1002
1003 ;; Armenian
1004 (setq c #x531)
1005 (while (<= c #x556)
1006 (set-case-syntax-pair (decode-char 'ucs c)
1007 (decode-char 'ucs (+ c #x30)) tbl)
1008 (setq c (1+ c)))
1009
1010 ;; Greek Extended
1011 (setq c #x1f00)
1012 (while (<= c #x1fff)
1013 (modify-category-entry (decode-char 'ucs c) ?g)
1014 (and (<= (logand c #x000f) 7)
1015 (<= c #x1fa7)
1016 (not (memq c '(#x1f50 #x1f52 #x1f54 #x1f56)))
1017 (/= (logand c #x00f0) 7)
1018 (set-case-syntax-pair
1019 (decode-char 'ucs (+ c 8)) (decode-char 'ucs c) tbl))
1020 (setq c (1+ c)))
1021 (set-case-syntax-pair ?\e$,1qx\e(B ?\e$,1qp\e(B tbl)
1022 (set-case-syntax-pair ?\e$,1qy\e(B ?\e$,1qq\e(B tbl)
1023 (set-case-syntax-pair ?\e$,1qz\e(B ?\e$,1q0\e(B tbl)
1024 (set-case-syntax-pair ?\e$,1q{\e(B ?\e$,1q1\e(B tbl)
1025 (set-case-syntax-pair ?\e$,1q|\e(B ?\e$,1qs\e(B tbl)
1026 (set-case-syntax-pair ?\e$,1r(\e(B ?\e$,1q2\e(B tbl)
1027 (set-case-syntax-pair ?\e$,1r)\e(B ?\e$,1q3\e(B tbl)
1028 (set-case-syntax-pair ?\e$,1r*\e(B ?\e$,1q4\e(B tbl)
1029 (set-case-syntax-pair ?\e$,1r+\e(B ?\e$,1q5\e(B tbl)
1030 (set-case-syntax-pair ?\e$,1r,\e(B ?\e$,1r#\e(B tbl)
1031 (set-case-syntax-pair ?\e$,1r8\e(B ?\e$,1r0\e(B tbl)
1032 (set-case-syntax-pair ?\e$,1r9\e(B ?\e$,1r1\e(B tbl)
1033 (set-case-syntax-pair ?\e$,1r:\e(B ?\e$,1q6\e(B tbl)
1034 (set-case-syntax-pair ?\e$,1r;\e(B ?\e$,1q7\e(B tbl)
1035 (set-case-syntax-pair ?\e$,1rH\e(B ?\e$,1r@\e(B tbl)
1036 (set-case-syntax-pair ?\e$,1rI\e(B ?\e$,1rA\e(B tbl)
1037 (set-case-syntax-pair ?\e$,1rJ\e(B ?\e$,1q:\e(B tbl)
1038 (set-case-syntax-pair ?\e$,1rK\e(B ?\e$,1q;\e(B tbl)
1039 (set-case-syntax-pair ?\e$,1rL\e(B ?\e$,1rE\e(B tbl)
1040 (set-case-syntax-pair ?\e$,1rX\e(B ?\e$,1q8\e(B tbl)
1041 (set-case-syntax-pair ?\e$,1rY\e(B ?\e$,1q9\e(B tbl)
1042 (set-case-syntax-pair ?\e$,1rZ\e(B ?\e$,1q<\e(B tbl)
1043 (set-case-syntax-pair ?\e$,1r[\e(B ?\e$,1q=\e(B tbl)
1044 (set-case-syntax-pair ?\e$,1r\\e(B ?\e$,1rS\e(B tbl)
1045
1046 ;; cyrillic
1047 (setq c #x0400)
1048 (while (<= c #x04ff)
1049 (modify-category-entry (decode-char 'ucs c) ?y)
1050 (and (>= c #x0400)
1051 (<= c #x040f)
1052 (set-case-syntax-pair
1053 (decode-char 'ucs c) (decode-char 'ucs (+ c 80)) tbl))
1054 (and (>= c #x0410)
1055 (<= c #x042f)
1056 (set-case-syntax-pair
1057 (decode-char 'ucs c) (decode-char 'ucs (+ c 32)) tbl))
1058 (and (zerop (% c 2))
1059 (or (and (>= c #x0460) (<= c #x0480))
1060 (and (>= c #x048c) (<= c #x04be))
1061 (and (>= c #x04d0) (<= c #x04f4)))
1062 (set-case-syntax-pair
1063 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
1064 (setq c (1+ c)))
1065 (set-case-syntax-pair ?\e$,1*!\e(B ?\e$,1*"\e(B tbl)
1066 (set-case-syntax-pair ?\e$,1*#\e(B ?\e$,1*$\e(B tbl)
1067 (set-case-syntax-pair ?\e$,1*'\e(B ?\e$,1*(\e(B tbl)
1068 (set-case-syntax-pair ?\e$,1*+\e(B ?\e$,1*,\e(B tbl)
1069 (set-case-syntax-pair ?\e$,1*X\e(B ?\e$,1*Y\e(B tbl)
1070
1071 ;; general punctuation
1072 (setq c #x2000)
1073 (while (<= c #x200b)
1074 (set-case-syntax (decode-char 'ucs c) " " tbl)
1075 (setq c (1+ c)))
1076 (setq c #x2010)
1077 (while (<= c #x2016)
1078 (set-case-syntax (decode-char 'ucs c) "_" tbl)
1079 (setq c (1+ c)))
1080 ;; Punctuation syntax for quotation marks (like `)
1081 (while (<= c #x201f)
1082 (set-case-syntax (decode-char 'ucs c) "." tbl)
1083 (setq c (1+ c)))
1084 (while (<= c #x2027)
1085 (set-case-syntax (decode-char 'ucs c) "_" tbl)
1086 (setq c (1+ c)))
1087
1088 ;; Roman numerals
1089 (setq c #x2160)
1090 (while (<= c #x216f)
1091 (set-case-syntax-pair (decode-char 'ucs c)
1092 (decode-char 'ucs (+ c #x10)) tbl)
1093 (setq c (1+ c)))
1094
1095 ;; Circled Latin
1096 (setq c #x24b6)
1097 (while (<= c #x24cf)
1098 (set-case-syntax-pair (decode-char 'ucs c)
1099 (decode-char 'ucs (+ c 26)) tbl)
1100 (modify-category-entry (decode-char 'ucs c) ?l)
1101 (modify-category-entry (decode-char 'ucs (+ c 26)) ?l)
1102 (setq c (1+ c)))
1103
1104 ;; Fullwidth Latin
1105 (setq c #xff21)
1106 (while (<= c #xff3a)
1107 (set-case-syntax-pair (decode-char 'ucs c)
1108 (decode-char 'ucs (+ c #x20)) tbl)
1109 (modify-category-entry (decode-char 'ucs c) ?l)
1110 (modify-category-entry (decode-char 'ucs (+ c #x20)) ?l)
1111 (setq c (1+ c)))
1112
1113 ;; Combining diacritics
1114 (setq c #x300)
1115 (while (<= c #x362)
1116 (modify-category-entry (decode-char 'ucs c) ?^)
1117 (setq c (1+ c)))
1118
1119 ;; Combining marks
1120 (setq c #x20d0)
1121 (while (<= c #x20e3)
1122 (modify-category-entry (decode-char 'ucs c) ?^)
1123 (setq c (1+ c)))
1124
1125 ;; Fixme: syntax for symbols &c
1126 )
1127
1128 (let ((pairs
1129 '("\e$,1sEsF\e(B" ; U+2045 U+2046
1130 "\e$,1s}s~\e(B" ; U+207D U+207E
1131 "\e$,1t-t.\e(B" ; U+208D U+208E
1132 "\e$,1zhzi\e(B" ; U+2308 U+2309
1133 "\e$,1zjzk\e(B" ; U+230A U+230B
1134 "\e$,1{){*\e(B" ; U+2329 U+232A
1135 "\e$,1|T|U\e(B" ; U+23B4 U+23B5
1136 "\e$,2&H&I\e(B" ; U+2768 U+2769
1137 "\e$,2&J&K\e(B" ; U+276A U+276B
1138 "\e$,2&L&M\e(B" ; U+276C U+276D
1139 "\e$,2&P&Q\e(B" ; U+2770 U+2771
1140 "\e$,2&R&S\e(B" ; U+2772 U+2773
1141 "\e$,2&T&U\e(B" ; U+2774 U+2775
1142 "\e$,2'f'g\e(B" ; U+27E6 U+27E7
1143 "\e$,2'h'i\e(B" ; U+27E8 U+27E9
1144 "\e$,2'j'k\e(B" ; U+27EA U+27EB
1145 "\e$,2,#,$\e(B" ; U+2983 U+2984
1146 "\e$,2,%,&\e(B" ; U+2985 U+2986
1147 "\e$,2,',(\e(B" ; U+2987 U+2988
1148 "\e$,2,),*\e(B" ; U+2989 U+298A
1149 "\e$,2,+,,\e(B" ; U+298B U+298C
1150 "\e$,2,-,.\e(B" ; U+298D U+298E
1151 "\e$,2,/,0\e(B" ; U+298F U+2990
1152 "\e$,2,1,2\e(B" ; U+2991 U+2992
1153 "\e$,2,3,4\e(B" ; U+2993 U+2994
1154 "\e$,2,5,6\e(B" ; U+2995 U+2996
1155 "\e$,2,7,8\e(B" ; U+2997 U+2998
1156 "\e$,2-<-=\e(B" ; U+29FC U+29FD
1157 "\e$,2=H=I\e(B" ; U+3008 U+3009
1158 "\e$,2=J=K\e(B" ; U+300A U+300B
1159 "\e$,2=L=M\e(B" ; U+300C U+300D
1160 "\e$,2=N=O\e(B" ; U+300E U+300F
1161 "\e$,2=P=Q\e(B" ; U+3010 U+3011
1162 "\e$,2=T=U\e(B" ; U+3014 U+3015
1163 "\e$,2=V=W\e(B" ; U+3016 U+3017
1164 "\e$,2=X=Y\e(B" ; U+3018 U+3019
1165 "\e$,2=Z=[\e(B" ; U+301A U+301B
1166 "\e$,3m~m\7f\e(B" ; U+FD3E U+FD3F
1167 "\e$,3pUpV\e(B" ; U+FE35 U+FE36
1168 "\e$,3pWpX\e(B" ; U+FE37 U+FE38
1169 "\e$,3pYpZ\e(B" ; U+FE39 U+FE3A
1170 "\e$,3p[p\\e(B" ; U+FE3B U+FE3C
1171 "\e$,3p]p^\e(B" ; U+FE3D U+FE3E
1172 "\e$,3p_p`\e(B" ; U+FE3F U+FE40
1173 "\e$,3papb\e(B" ; U+FE41 U+FE42
1174 "\e$,3pcpd\e(B" ; U+FE43 U+FE44
1175 "\e$,3pypz\e(B" ; U+FE59 U+FE5A
1176 "\e$,3p{p|\e(B" ; U+FE5B U+FE5C
1177 "\e$,3p}p~\e(B" ; U+FE5D U+FE5E
1178 "\e$,3rhri\e(B" ; U+FF08 U+FF09
1179 "\e$,3s;s=\e(B" ; U+FF3B U+FF3D
1180 "\e$,3s[s]\e(B" ; U+FF5B U+FF5D
1181 "\e$,3s_s`\e(B" ; U+FF5F U+FF60
1182 "\e$,3sbsc\e(B" ; U+FF62 U+FF63
1183 )))
1184 (dolist (elt pairs)
1185 (modify-syntax-entry (aref elt 0) (string ?\( (aref elt 1)))
1186 (modify-syntax-entry (aref elt 1) (string ?\) (aref elt 0)))))
1187
1188 \f
1189 ;;; Setting word boundary.
1190
1191 (setq word-combining-categories
1192 '((?l . ?l)))
1193
1194 (setq word-separating-categories ; (2-byte character sets)
1195 '((?A . ?K) ; Alpha numeric - Katakana
1196 (?A . ?C) ; Alpha numeric - Chinese
1197 (?H . ?A) ; Hiragana - Alpha numeric
1198 (?H . ?K) ; Hiragana - Katakana
1199 (?H . ?C) ; Hiragana - Chinese
1200 (?K . ?A) ; Katakana - Alpha numeric
1201 (?K . ?C) ; Katakana - Chinese
1202 (?C . ?A) ; Chinese - Alpha numeric
1203 (?C . ?K) ; Chinese - Katakana
1204 ))
1205
1206 \f
1207 ;; For each character set, put the information of the most proper
1208 ;; coding system to encode it by `preferred-coding-system' property.
1209
1210 (let ((l '((latin-iso8859-1 . iso-latin-1)
1211 (latin-iso8859-2 . iso-latin-2)
1212 (latin-iso8859-3 . iso-latin-3)
1213 (latin-iso8859-4 . iso-latin-4)
1214 (thai-tis620 . thai-tis620)
1215 (greek-iso8859-7 . greek-iso-8bit)
1216 (arabic-iso8859-6 . iso-2022-7bit)
1217 (hebrew-iso8859-8 . hebrew-iso-8bit)
1218 (katakana-jisx0201 . japanese-shift-jis)
1219 (latin-jisx0201 . japanese-shift-jis)
1220 (cyrillic-iso8859-5 . cyrillic-iso-8bit)
1221 (latin-iso8859-9 . iso-latin-5)
1222 (japanese-jisx0208-1978 . iso-2022-jp)
1223 (chinese-gb2312 . cn-gb-2312)
1224 (japanese-jisx0208 . iso-2022-jp)
1225 (korean-ksc5601 . iso-2022-kr)
1226 (japanese-jisx0212 . iso-2022-jp)
1227 (chinese-cns11643-1 . iso-2022-cn)
1228 (chinese-cns11643-2 . iso-2022-cn)
1229 (chinese-big5-1 . chinese-big5)
1230 (chinese-big5-2 . chinese-big5)
1231 (chinese-sisheng . iso-2022-7bit)
1232 (ipa . iso-2022-7bit)
1233 (vietnamese-viscii-lower . vietnamese-viscii)
1234 (vietnamese-viscii-upper . vietnamese-viscii)
1235 (arabic-digit . iso-2022-7bit)
1236 (arabic-1-column . iso-2022-7bit)
1237 (ascii-right-to-left . iso-2022-7bit)
1238 (lao . lao)
1239 (arabic-2-column . iso-2022-7bit)
1240 (indian-is13194 . devanagari)
1241 (indian-glyph . devanagari)
1242 (tibetan-1-column . tibetan)
1243 (ethiopic . iso-2022-7bit)
1244 (chinese-cns11643-3 . iso-2022-cn)
1245 (chinese-cns11643-4 . iso-2022-cn)
1246 (chinese-cns11643-5 . iso-2022-cn)
1247 (chinese-cns11643-6 . iso-2022-cn)
1248 (chinese-cns11643-7 . iso-2022-cn)
1249 (indian-2-column . devanagari)
1250 (tibetan . tibetan)
1251 (latin-iso8859-14 . iso-latin-8)
1252 (latin-iso8859-15 . iso-latin-9))))
1253 (while l
1254 (put-charset-property (car (car l)) 'preferred-coding-system (cdr (car l)))
1255 (setq l (cdr l))))
1256
1257 \f
1258 ;; Setup auto-fill-chars for charsets that should invoke auto-filling.
1259 ;; SPACE and NEWLINE are already set. Also put `nospace-between-words'
1260 ;; property on the charsets.
1261 (let ((l '(katakana-jisx0201
1262 japanese-jisx0208 japanese-jisx0212
1263 chinese-gb2312 chinese-big5-1 chinese-big5-2)))
1264 (while l
1265 (aset auto-fill-chars (make-char (car l)) t)
1266 (put-charset-property (car l) 'nospace-between-words t)
1267 (setq l (cdr l))))
1268
1269 ;;; Local Variables:
1270 ;;; coding: iso-2022-7bit
1271 ;;; End:
1272
1273 ;;; arch-tag: 85889c35-9f4d-4912-9bf5-82de31b0d42d
1274 ;;; characters.el ends here