]> code.delx.au - gnu-emacs/blob - lisp/international/characters.el
Don't set word syntax (the default)
[gnu-emacs] / lisp / international / characters.el
1 ;;; characters.el --- set syntax and category for multibyte characters
2
3 ;; Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN.
4 ;; Licensed to the Free Software Foundation.
5 ;; Copyright (C) 2001 Free Software Foundation, Inc.
6
7 ;; Keywords: multibyte character, character set, syntax, category
8
9 ;; This file is part of GNU Emacs.
10
11 ;; GNU Emacs is free software; you can redistribute it and/or modify
12 ;; it under the terms of the GNU General Public License as published by
13 ;; the Free Software Foundation; either version 2, or (at your option)
14 ;; any later version.
15
16 ;; GNU Emacs is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 ;; GNU General Public License for more details.
20
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with GNU Emacs; see the file COPYING. If not, write to the
23 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 ;; Boston, MA 02111-1307, USA.
25
26 ;;; Commentary:
27
28 ;; This file contains multibyte characters. Save this file always in
29 ;; the coding system `iso-2022-7bit'.
30
31 ;; This file does not define the syntax for Latin-N character sets;
32 ;; those are defined by the files latin-N.el.
33
34 ;;; Code:
35
36 ;;; Predefined categories.
37
38 ;; For each character set.
39
40 (define-category ?a "ASCII")
41 (define-category ?l "Latin")
42 (define-category ?t "Thai")
43 (define-category ?g "Greek")
44 (define-category ?b "Arabic")
45 (define-category ?w "Hebrew")
46 (define-category ?y "Cyrillic")
47 (define-category ?k "Japanese katakana")
48 (define-category ?r "Japanese roman")
49 (define-category ?c "Chinese")
50 (define-category ?j "Japanese")
51 (define-category ?h "Korean")
52 (define-category ?e "Ethiopic (Ge'ez)")
53 (define-category ?v "Vietnamese")
54 (define-category ?i "Indian")
55 (define-category ?o "Lao")
56 (define-category ?q "Tibetan")
57
58 ;; For each group (row) of 2-byte character sets.
59
60 (define-category ?A "Alpha-numeric characters of 2-byte character sets")
61 (define-category ?C "Chinese (Han) characters of 2-byte character sets")
62 (define-category ?G "Greek characters of 2-byte character sets")
63 (define-category ?H "Japanese Hiragana characters of 2-byte character sets")
64 (define-category ?K "Japanese Katakana characters of 2-byte character sets")
65 (define-category ?N "Korean Hangul characters of 2-byte character sets")
66 (define-category ?Y "Cyrillic characters of 2-byte character sets")
67 (define-category ?I "Indian Glyphs")
68
69 ;; For phonetic classifications.
70
71 (define-category ?0 "consonant")
72 (define-category ?1 "base (independent) vowel")
73 (define-category ?2 "upper diacritical mark (including upper vowel)")
74 (define-category ?3 "lower diacritical mark (including lower vowel)")
75 (define-category ?4 "tone mark")
76 (define-category ?5 "symbol")
77 (define-category ?6 "digit")
78 (define-category ?7 "vowel-modifying diacritical mark")
79 (define-category ?8 "vowel-signs")
80 (define-category ?9 "semivowel lower")
81
82 ;; For filling.
83 (define-category ?| "While filling, we can break a line at this character.")
84
85 ;; For indentation calculation.
86 (define-category ?
87 "This character counts as a space for indentation purposes.")
88
89 ;; Keep the following for `kinsoku' processing. See comments in
90 ;; kinsoku.el.
91 (define-category ?> "A character which can't be placed at beginning of line.")
92 (define-category ?< "A character which can't be placed at end of line.")
93
94 ;; Combining
95 (define-category ?^ "Combining diacritic or mark")
96 \f
97 ;;; Setting syntax and category.
98
99 ;; ASCII
100
101 (let ((ch 32))
102 (while (< ch 127) ; All ASCII characters have
103 (modify-category-entry ch ?a) ; the category `a' (ASCII)
104 (modify-category-entry ch ?l) ; and `l' (Latin).
105 (setq ch (1+ ch))))
106
107 ;; Arabic character set
108
109 (let ((charsets '(arabic-iso8859-6
110 arabic-digit
111 arabic-1-column
112 arabic-2-column)))
113 (while charsets
114 ;; (modify-syntax-entry (make-char (car charsets)) "w")
115 (modify-category-entry (make-char (car charsets)) ?b)
116 (setq charsets (cdr charsets))))
117 (let ((ch #x600))
118 (while (<= ch #x6ff)
119 (modify-category-entry (decode-char 'ucs ch) ?b)
120 (setq ch (1+ ch)))
121 (setq ch #xfb50)
122 (while (<= ch #xfdff)
123 (modify-category-entry (decode-char 'ucs ch) ?b)
124 (setq ch (1+ ch)))
125 (setq ch #xfe70)
126 (while (<= ch #xfefe)
127 (modify-category-entry (decode-char 'ucs ch) ?b)
128 (setq ch (1+ ch))))
129
130 ;; Chinese character set (GB2312)
131
132 ;; (modify-syntax-entry (make-char 'chinese-gb2312) "w")
133 (modify-syntax-entry (make-char 'chinese-gb2312 33) "_")
134 (modify-syntax-entry (make-char 'chinese-gb2312 34) "_")
135 (modify-syntax-entry (make-char 'chinese-gb2312 41) "_")
136 (modify-syntax-entry ?\\e$A!2\e(B "(\e$A!3\e(B")
137 (modify-syntax-entry ?\\e$A!4\e(B "(\e$A!5\e(B")
138 (modify-syntax-entry ?\\e$A!6\e(B "(\e$A!7\e(B")
139 (modify-syntax-entry ?\\e$A!8\e(B "(\e$A!9\e(B")
140 (modify-syntax-entry ?\\e$A!:\e(B "(\e$A!;\e(B")
141 (modify-syntax-entry ?\\e$A!<\e(B "(\e$A!=\e(B")
142 (modify-syntax-entry ?\\e$A!>\e(B "(\e$A!?\e(B")
143 (modify-syntax-entry ?\\e$A!3\e(B ")\e$A!2\e(B")
144 (modify-syntax-entry ?\\e$A!5\e(B ")\e$A!4\e(B")
145 (modify-syntax-entry ?\\e$A!7\e(B ")\e$A!6\e(B")
146 (modify-syntax-entry ?\\e$A!9\e(B ")\e$A!8\e(B")
147 (modify-syntax-entry ?\\e$A!;\e(B ")\e$A!:\e(B")
148 (modify-syntax-entry ?\\e$A!=\e(B ")\e$A!<\e(B")
149 (modify-syntax-entry ?\\e$A!?\e(B ")\e$A!>\e(B")
150 ;; Unicode equivalents of above
151 (modify-syntax-entry ?\\e$,2=T\e(B "(\e$,2=U\e(B")
152 (modify-syntax-entry ?\\e$,2=H\e(B "(\e$,2=I\e(B")
153 (modify-syntax-entry ?\\e$,2=J\e(B "(\e$,2=K\e(B")
154 (modify-syntax-entry ?\\e$,2=L\e(B "(\e$,2=M\e(B")
155 (modify-syntax-entry ?\\e$,2=N\e(B "(\e$,2=O\e(B")
156 (modify-syntax-entry ?\\e$,2=V\e(B "(\e$,2=W\e(B")
157 (modify-syntax-entry ?\\e$,2=P\e(B "(\e$,2=Q\e(B")
158 (modify-syntax-entry ?\\e$,2=U\e(B ")\e$,2=T\e(B")
159 (modify-syntax-entry ?\\e$,2=I\e(B ")\e$,2=H\e(B")
160 (modify-syntax-entry ?\\e$,2=K\e(B ")\e$,2=J\e(B")
161 (modify-syntax-entry ?\\e$,2=M\e(B ")\e$,2=L\e(B")
162 (modify-syntax-entry ?\\e$,2=O\e(B ")\e$,2=N\e(B")
163 (modify-syntax-entry ?\\e$,2=W\e(B ")\e$,2=V\e(B")
164 (modify-syntax-entry ?\\e$,2=Q\e(B ")\e$,2=P\e(B")
165
166 (modify-category-entry (make-char 'chinese-gb2312) ?c)
167 (modify-category-entry (make-char 'chinese-gb2312) ?\|)
168 (modify-category-entry (make-char 'chinese-gb2312 35) ?A)
169 (modify-category-entry (make-char 'chinese-gb2312 36) ?H)
170 (modify-category-entry (make-char 'chinese-gb2312 37) ?K)
171 (modify-category-entry (make-char 'chinese-gb2312 38) ?G)
172 (modify-category-entry (make-char 'chinese-gb2312 39) ?Y)
173 (let ((row 48))
174 (while (< row 127)
175 (modify-category-entry (make-char 'chinese-gb2312 row) ?C)
176 (setq row (1+ row))))
177
178 ;; Chinese character set (BIG5)
179
180 (let ((generic-big5-1-char (make-char 'chinese-big5-1))
181 (generic-big5-2-char (make-char 'chinese-big5-2)))
182 ;; (modify-syntax-entry generic-big5-1-char "w")
183 ;; (modify-syntax-entry generic-big5-2-char "w")
184
185 (modify-category-entry generic-big5-1-char ?c)
186 (modify-category-entry generic-big5-2-char ?c)
187
188 (modify-category-entry generic-big5-1-char ?C)
189 (modify-category-entry generic-big5-2-char ?C)
190
191 (modify-category-entry generic-big5-1-char ?\|)
192 (modify-category-entry generic-big5-2-char ?\|))
193
194
195 ;; Chinese character set (CNS11643)
196
197 (let ((cns-list '(chinese-cns11643-1
198 chinese-cns11643-2
199 chinese-cns11643-3
200 chinese-cns11643-4
201 chinese-cns11643-5
202 chinese-cns11643-6
203 chinese-cns11643-7))
204 generic-char)
205 (while cns-list
206 (setq generic-char (make-char (car cns-list)))
207 ;; (modify-syntax-entry generic-char "w")
208 (modify-category-entry generic-char ?c)
209 (modify-category-entry generic-char ?C)
210 (modify-category-entry generic-char ?|)
211 (setq cns-list (cdr cns-list))))
212
213 ;; Cyrillic character set (ISO-8859-5)
214
215 (modify-category-entry (make-char 'cyrillic-iso8859-5) ?y)
216
217 (modify-syntax-entry (make-char 'cyrillic-iso8859-5 160) " ")
218 (modify-syntax-entry ?\e,L-\e(B ".")
219 (modify-syntax-entry ?\e,Lp\e(B ".")
220 (modify-syntax-entry ?\e,L}\e(B ".")
221 (let ((tbl (standard-case-table)))
222 (set-case-syntax-pair ?\e,L!\e(B ?\e,Lq\e(B tbl)
223 (set-case-syntax-pair ?\e,L"\e(B ?\e,Lr\e(B tbl)
224 (set-case-syntax-pair ?\e,L#\e(B ?\e,Ls\e(B tbl)
225 (set-case-syntax-pair ?\e,L$\e(B ?\e,Lt\e(B tbl)
226 (set-case-syntax-pair ?\e,L%\e(B ?\e,Lu\e(B tbl)
227 (set-case-syntax-pair ?\e,L&\e(B ?\e,Lv\e(B tbl)
228 (set-case-syntax-pair ?\e,L'\e(B ?\e,Lw\e(B tbl)
229 (set-case-syntax-pair ?\e,L(\e(B ?\e,Lx\e(B tbl)
230 (set-case-syntax-pair ?\e,L)\e(B ?\e,Ly\e(B tbl)
231 (set-case-syntax-pair ?\e,L*\e(B ?\e,Lz\e(B tbl)
232 (set-case-syntax-pair ?\e,L+\e(B ?\e,L{\e(B tbl)
233 (set-case-syntax-pair ?\e,L,\e(B ?\e,L|\e(B tbl)
234 (set-case-syntax-pair ?\e,L.\e(B ?\e,L~\e(B tbl)
235 (set-case-syntax-pair ?\e,L/\e(B ?\e,L\7f\e(B tbl)
236 (set-case-syntax-pair ?\e,L0\e(B ?\e,LP\e(B tbl)
237 (set-case-syntax-pair ?\e,L1\e(B ?\e,LQ\e(B tbl)
238 (set-case-syntax-pair ?\e,L2\e(B ?\e,LR\e(B tbl)
239 (set-case-syntax-pair ?\e,L3\e(B ?\e,LS\e(B tbl)
240 (set-case-syntax-pair ?\e,L4\e(B ?\e,LT\e(B tbl)
241 (set-case-syntax-pair ?\e,L5\e(B ?\e,LU\e(B tbl)
242 (set-case-syntax-pair ?\e,L6\e(B ?\e,LV\e(B tbl)
243 (set-case-syntax-pair ?\e,L7\e(B ?\e,LW\e(B tbl)
244 (set-case-syntax-pair ?\e,L8\e(B ?\e,LX\e(B tbl)
245 (set-case-syntax-pair ?\e,L9\e(B ?\e,LY\e(B tbl)
246 (set-case-syntax-pair ?\e,L:\e(B ?\e,LZ\e(B tbl)
247 (set-case-syntax-pair ?\e,L;\e(B ?\e,L[\e(B tbl)
248 (set-case-syntax-pair ?\e,L<\e(B ?\e,L\\e(B tbl)
249 (set-case-syntax-pair ?\e,L=\e(B ?\e,L]\e(B tbl)
250 (set-case-syntax-pair ?\e,L>\e(B ?\e,L^\e(B tbl)
251 (set-case-syntax-pair ?\e,L?\e(B ?\e,L_\e(B tbl)
252 (set-case-syntax-pair ?\e,L@\e(B ?\e,L`\e(B tbl)
253 (set-case-syntax-pair ?\e,LA\e(B ?\e,La\e(B tbl)
254 (set-case-syntax-pair ?\e,LB\e(B ?\e,Lb\e(B tbl)
255 (set-case-syntax-pair ?\e,LC\e(B ?\e,Lc\e(B tbl)
256 (set-case-syntax-pair ?\e,LD\e(B ?\e,Ld\e(B tbl)
257 (set-case-syntax-pair ?\e,LE\e(B ?\e,Le\e(B tbl)
258 (set-case-syntax-pair ?\e,LF\e(B ?\e,Lf\e(B tbl)
259 (set-case-syntax-pair ?\e,LG\e(B ?\e,Lg\e(B tbl)
260 (set-case-syntax-pair ?\e,LH\e(B ?\e,Lh\e(B tbl)
261 (set-case-syntax-pair ?\e,LI\e(B ?\e,Li\e(B tbl)
262 (set-case-syntax-pair ?\e,LJ\e(B ?\e,Lj\e(B tbl)
263 (set-case-syntax-pair ?\e,LK\e(B ?\e,Lk\e(B tbl)
264 (set-case-syntax-pair ?\e,LL\e(B ?\e,Ll\e(B tbl)
265 (set-case-syntax-pair ?\e,LM\e(B ?\e,Lm\e(B tbl)
266 (set-case-syntax-pair ?\e,LN\e(B ?\e,Ln\e(B tbl)
267 (set-case-syntax-pair ?\e,LO\e(B ?\e,Lo\e(B tbl)
268 (set-case-syntax-pair ?\e$,1(!\e(B ?\e$,1(q\e(B tbl)
269 (set-case-syntax-pair ?\e$,1("\e(B ?\e$,1(r\e(B tbl)
270 (set-case-syntax-pair ?\e$,1(#\e(B ?\e$,1(s\e(B tbl)
271 (set-case-syntax-pair ?\e$,1($\e(B ?\e$,1(t\e(B tbl)
272 (set-case-syntax-pair ?\e$,1(%\e(B ?\e$,1(u\e(B tbl)
273 (set-case-syntax-pair ?\e$,1(&\e(B ?\e$,1(v\e(B tbl)
274 (set-case-syntax-pair ?\e$,1('\e(B ?\e$,1(w\e(B tbl)
275 (set-case-syntax-pair ?\e$,1((\e(B ?\e$,1(x\e(B tbl)
276 (set-case-syntax-pair ?\e$,1()\e(B ?\e$,1(y\e(B tbl)
277 (set-case-syntax-pair ?\e$,1(*\e(B ?\e$,1(z\e(B tbl)
278 (set-case-syntax-pair ?\e$,1(+\e(B ?\e$,1({\e(B tbl)
279 (set-case-syntax-pair ?\e$,1(,\e(B ?\e$,1(|\e(B tbl)
280 (set-case-syntax-pair ?\e$,1(.\e(B ?\e$,1(~\e(B tbl)
281 (set-case-syntax-pair ?\e$,1(/\e(B ?\e$,1(\7f\e(B tbl)
282 (set-case-syntax-pair ?\e$,1(0\e(B ?\e$,1(P\e(B tbl)
283 (set-case-syntax-pair ?\e$,1(1\e(B ?\e$,1(Q\e(B tbl)
284 (set-case-syntax-pair ?\e$,1(2\e(B ?\e$,1(R\e(B tbl)
285 (set-case-syntax-pair ?\e$,1(3\e(B ?\e$,1(S\e(B tbl)
286 (set-case-syntax-pair ?\e$,1(4\e(B ?\e$,1(T\e(B tbl)
287 (set-case-syntax-pair ?\e$,1(5\e(B ?\e$,1(U\e(B tbl)
288 (set-case-syntax-pair ?\e$,1(6\e(B ?\e$,1(V\e(B tbl)
289 (set-case-syntax-pair ?\e$,1(7\e(B ?\e$,1(W\e(B tbl)
290 (set-case-syntax-pair ?\e$,1(8\e(B ?\e$,1(X\e(B tbl)
291 (set-case-syntax-pair ?\e$,1(9\e(B ?\e$,1(Y\e(B tbl)
292 (set-case-syntax-pair ?\e$,1(:\e(B ?\e$,1(Z\e(B tbl)
293 (set-case-syntax-pair ?\e$,1(;\e(B ?\e$,1([\e(B tbl)
294 (set-case-syntax-pair ?\e$,1(<\e(B ?\e$,1(\\e(B tbl)
295 (set-case-syntax-pair ?\e$,1(=\e(B ?\e$,1(]\e(B tbl)
296 (set-case-syntax-pair ?\e$,1(>\e(B ?\e$,1(^\e(B tbl)
297 (set-case-syntax-pair ?\e$,1(?\e(B ?\e$,1(_\e(B tbl)
298 (set-case-syntax-pair ?\e$,1(@\e(B ?\e$,1(`\e(B tbl)
299 (set-case-syntax-pair ?\e$,1(A\e(B ?\e$,1(a\e(B tbl)
300 (set-case-syntax-pair ?\e$,1(B\e(B ?\e$,1(b\e(B tbl)
301 (set-case-syntax-pair ?\e$,1(C\e(B ?\e$,1(c\e(B tbl)
302 (set-case-syntax-pair ?\e$,1(D\e(B ?\e$,1(d\e(B tbl)
303 (set-case-syntax-pair ?\e$,1(E\e(B ?\e$,1(e\e(B tbl)
304 (set-case-syntax-pair ?\e$,1(F\e(B ?\e$,1(f\e(B tbl)
305 (set-case-syntax-pair ?\e$,1(G\e(B ?\e$,1(g\e(B tbl)
306 (set-case-syntax-pair ?\e$,1(H\e(B ?\e$,1(h\e(B tbl)
307 (set-case-syntax-pair ?\e$,1(I\e(B ?\e$,1(i\e(B tbl)
308 (set-case-syntax-pair ?\e$,1(J\e(B ?\e$,1(j\e(B tbl)
309 (set-case-syntax-pair ?\e$,1(K\e(B ?\e$,1(k\e(B tbl)
310 (set-case-syntax-pair ?\e$,1(L\e(B ?\e$,1(l\e(B tbl)
311 (set-case-syntax-pair ?\e$,1(M\e(B ?\e$,1(m\e(B tbl)
312 (set-case-syntax-pair ?\e$,1(N\e(B ?\e$,1(n\e(B tbl)
313 (set-case-syntax-pair ?\e$,1(O\e(B ?\e$,1(o\e(B tbl))
314
315 ;; Devanagari character set
316
317 ;;; Commented out since the categories appear not to be used anywhere
318 ;;; and word syntax is the default.
319 ;; (let ((deflist '(;; chars syntax category
320 ;; ("\e$(5!!!"!#\e(B" "w" ?7) ; vowel-modifying diacritical mark
321 ;; ; chandrabindu, anuswar, visarga
322 ;; ("\e$(5!$\e(B-\e$(5!2\e(B" "w" ?1) ; independent vowel
323 ;; ("\e$(5!3\e(B-\e$(5!X\e(B" "w" ?0) ; consonant
324 ;; ("\e$(5!Z\e(B-\e$(5!g\e(B" "w" ?8) ; matra
325 ;; ("\e$(5!q\e(B-\e$(5!z\e(B" "w" ?6) ; digit
326 ;; ;; Unicode equivalents
327 ;; ("\e$,15A5B5C\e(B" "w" ?7) ; vowel-modifying diacritical mark
328 ;; ; chandrabindu, anuswar, visarga
329 ;; ("\e$,15E\e(B-\e$,15M\e(B" "w" ?1) ; independent vowel
330 ;; ("\e$,15U\e(B-\e$,15y\e(B" "w" ?0) ; consonant
331 ;; ("\e$,15~\e(B-\e$,16)\e(B" "w" ?8) ; matra
332 ;; ("\e$,16F\e(B-\e$,16O\e(B" "w" ?6) ; digit
333 ;; ))
334 ;; elm chars len syntax category to ch i)
335 ;; (while deflist
336 ;; (setq elm (car deflist))
337 ;; (setq chars (car elm)
338 ;; len (length chars)
339 ;; syntax (nth 1 elm)
340 ;; category (nth 2 elm)
341 ;; i 0)
342 ;; (while (< i len)
343 ;; (if (= (aref chars i) ?-)
344 ;; (setq i (1+ i)
345 ;; to (aref chars i))
346 ;; (setq ch (aref chars i)
347 ;; to ch))
348 ;; (while (<= ch to)
349 ;; (modify-syntax-entry ch syntax)
350 ;; (modify-category-entry ch category)
351 ;; (setq ch (1+ ch)))
352 ;; (setq i (1+ i)))
353 ;; (setq deflist (cdr deflist))))
354
355 ;; Ethiopic character set
356
357 (modify-category-entry (make-char 'ethiopic) ?e)
358 ;; (modify-syntax-entry (make-char 'ethiopic) "w")
359 (dotimes (i (1+ (- #x137c #x1200)))
360 (modify-category-entry (decode-char 'ucs (+ #x1200 i)) ?e))
361 (let ((chars '(?\e$(3$h\e(B ?\e$(3$i\e(B ?\e$(3$j\e(B ?\e$(3$k\e(B ?\e$(3$l\e(B ?\e$(3$m\e(B ?\e$(3$n\e(B ?\e$(3$o\e(B ?\e$(3%i\e(B ?\e$(3%t\e(B ?\e$(3%u\e(B ?\e$(3%v\e(B ?\e$(3%w\e(B ?\e$(3%x\e(B
362 ;; Unicode equivalents of the above:
363 ?\e$,1Q!\e(B ?\e$,1Q"\e(B ?\e$,1Q#\e(B ?\e$,1Q$\e(B ?\e$,1Q%\e(B ?\e$,1Q&\e(B ?\e$,1Q'\e(B ?\e$,1Q(\e(B ?\e$,3op\e(B ?\e$,3o{\e(B ?\e$,3o|\e(B ?\e$,3o}\e(B ?\e$,3o~\e(B ?\e$,3o\7f\e(B)))
364 (while chars
365 (modify-syntax-entry (car chars) ".")
366 (setq chars (cdr chars))))
367
368 ;; Greek character set (ISO-8859-7)
369
370 (modify-category-entry (make-char 'greek-iso8859-7) ?g)
371 (let ((c #x370))
372 (while (<= c #x3ff)
373 (modify-category-entry (decode-char 'ucs c) ?g)
374 (setq c (1+ c))))
375
376 ;; (let ((c 182))
377 ;; (while (< c 255)
378 ;; (modify-syntax-entry (make-char 'greek-iso8859-7 c) "w")
379 ;; (setq c (1+ c))))
380 ;; (modify-syntax-entry (make-char 'greek-iso8859-7 160) "w") ; NBSP
381 (modify-syntax-entry ?\e,F7\e(B ".")
382 (modify-syntax-entry ?\e,F;\e(B ".")
383 (modify-syntax-entry ?\e,F=\e(B ".")
384 (let ((tbl (standard-case-table)))
385 ;; Fixme: non-letter syntax copied from latin-1, but that's dubious
386 ;; in several cases.
387 (set-case-syntax ?\e,F!\e(B "." tbl)
388 (set-case-syntax ?\e,F"\e(B "." tbl)
389 (set-case-syntax ?\e,F&\e(B "." tbl)
390 (set-case-syntax ?\e,F&\e(B "_" tbl)
391 (set-case-syntax ?\e,F'\e(B "." tbl)
392 (set-case-syntax ?\e,F)\e(B "_" tbl)
393 (set-case-syntax ?\e,F+\e(B "." tbl)
394 (set-case-syntax ?\e,F,\e(B "_" tbl)
395 (set-case-syntax ?\e,F-\e(B "_" tbl)
396 (set-case-syntax ?\e,F/\e(B "." tbl)
397 (set-case-syntax ?\e,F0\e(B "_" tbl)
398 (set-case-syntax ?\e,F1\e(B "_" tbl)
399 ;; (set-case-syntax ?\e,F7\e(B "_" tbl)
400 ;; (set-case-syntax ?\e,F=\e(B "_" tbl)
401 (set-case-syntax-pair ?\e,FA\e(B ?\e,Fa\e(B tbl)
402 (set-case-syntax-pair ?\e,FB\e(B ?\e,Fb\e(B tbl)
403 (set-case-syntax-pair ?\e,FC\e(B ?\e,Fc\e(B tbl)
404 (set-case-syntax-pair ?\e,FD\e(B ?\e,Fd\e(B tbl)
405 (set-case-syntax-pair ?\e,FE\e(B ?\e,Fe\e(B tbl)
406 (set-case-syntax-pair ?\e,FF\e(B ?\e,Ff\e(B tbl)
407 (set-case-syntax-pair ?\e,FG\e(B ?\e,Fg\e(B tbl)
408 (set-case-syntax-pair ?\e,FH\e(B ?\e,Fh\e(B tbl)
409 (set-case-syntax-pair ?\e,FI\e(B ?\e,Fi\e(B tbl)
410 (set-case-syntax-pair ?\e,FJ\e(B ?\e,Fj\e(B tbl)
411 (set-case-syntax-pair ?\e,FK\e(B ?\e,Fk\e(B tbl)
412 (set-case-syntax-pair ?\e,FL\e(B ?\e,Fl\e(B tbl)
413 (set-case-syntax-pair ?\e,FM\e(B ?\e,Fm\e(B tbl)
414 (set-case-syntax-pair ?\e,FN\e(B ?\e,Fn\e(B tbl)
415 (set-case-syntax-pair ?\e,FO\e(B ?\e,Fo\e(B tbl)
416 (set-case-syntax-pair ?\e,FP\e(B ?\e,Fp\e(B tbl)
417 (set-case-syntax-pair ?\e,FQ\e(B ?\e,Fq\e(B tbl)
418 (set-case-syntax-pair ?\e,FS\e(B ?\e,Fs\e(B tbl)
419 (set-case-syntax-pair ?\e,FT\e(B ?\e,Ft\e(B tbl)
420 (set-case-syntax-pair ?\e,FU\e(B ?\e,Fu\e(B tbl)
421 (set-case-syntax-pair ?\e,FV\e(B ?\e,Fv\e(B tbl)
422 (set-case-syntax-pair ?\e,FW\e(B ?\e,Fw\e(B tbl)
423 (set-case-syntax-pair ?\e,FX\e(B ?\e,Fx\e(B tbl)
424 (set-case-syntax-pair ?\e,FY\e(B ?\e,Fy\e(B tbl)
425 (set-case-syntax-pair ?\e,FZ\e(B ?\e,Fz\e(B tbl)
426 (set-case-syntax-pair ?\e,F[\e(B ?\e,F{\e(B tbl)
427 (set-case-syntax-pair ?\e,F?\e(B ?\e,F~\e(B tbl)
428 (set-case-syntax-pair ?\e,F>\e(B ?\e,F}\e(B tbl)
429 (set-case-syntax-pair ?\e,F<\e(B ?\e,F|\e(B tbl)
430 (set-case-syntax-pair ?\e,F6\e(B ?\e,F\\e(B tbl)
431 (set-case-syntax-pair ?\e,F8\e(B ?\e,F]\e(B tbl)
432 (set-case-syntax-pair ?\e,F9\e(B ?\e,F^\e(B tbl)
433 (set-case-syntax-pair ?\e,F:\e(B ?\e,F_\e(B tbl)
434 ;; Unicode equivalents
435 (set-case-syntax-pair ?\e$,1&q\e(B ?\e$,1'1\e(B tbl)
436 (set-case-syntax-pair ?\e$,1&r\e(B ?\e$,1'2\e(B tbl)
437 (set-case-syntax-pair ?\e$,1&s\e(B ?\e$,1'3\e(B tbl)
438 (set-case-syntax-pair ?\e$,1&t\e(B ?\e$,1'4\e(B tbl)
439 (set-case-syntax-pair ?\e$,1&u\e(B ?\e$,1'5\e(B tbl)
440 (set-case-syntax-pair ?\e$,1&v\e(B ?\e$,1'6\e(B tbl)
441 (set-case-syntax-pair ?\e$,1&w\e(B ?\e$,1'7\e(B tbl)
442 (set-case-syntax-pair ?\e$,1&x\e(B ?\e$,1'8\e(B tbl)
443 (set-case-syntax-pair ?\e$,1&y\e(B ?\e$,1'9\e(B tbl)
444 (set-case-syntax-pair ?\e$,1&z\e(B ?\e$,1':\e(B tbl)
445 (set-case-syntax-pair ?\e$,1&{\e(B ?\e$,1';\e(B tbl)
446 (set-case-syntax-pair ?\e$,1&|\e(B ?\e$,1'<\e(B tbl)
447 (set-case-syntax-pair ?\e$,1&}\e(B ?\e$,1'=\e(B tbl)
448 (set-case-syntax-pair ?\e$,1&~\e(B ?\e$,1'>\e(B tbl)
449 (set-case-syntax-pair ?\e$,1&\7f\e(B ?\e$,1'?\e(B tbl)
450 (set-case-syntax-pair ?\e$,1' \e(B ?\e$,1'@\e(B tbl)
451 (set-case-syntax-pair ?\e$,1'!\e(B ?\e$,1'A\e(B tbl)
452 (set-case-syntax-pair ?\e$,1'#\e(B ?\e$,1'C\e(B tbl)
453 (set-case-syntax-pair ?\e$,1'$\e(B ?\e$,1'D\e(B tbl)
454 (set-case-syntax-pair ?\e$,1'%\e(B ?\e$,1'E\e(B tbl)
455 (set-case-syntax-pair ?\e$,1'&\e(B ?\e$,1'F\e(B tbl)
456 (set-case-syntax-pair ?\e$,1''\e(B ?\e$,1'G\e(B tbl)
457 (set-case-syntax-pair ?\e$,1'(\e(B ?\e$,1'H\e(B tbl)
458 (set-case-syntax-pair ?\e$,1')\e(B ?\e$,1'I\e(B tbl)
459 (set-case-syntax-pair ?\e$,1'*\e(B ?\e$,1'J\e(B tbl)
460 (set-case-syntax-pair ?\e$,1'+\e(B ?\e$,1'K\e(B tbl)
461 (set-case-syntax-pair ?\e$,1&o\e(B ?\e$,1'N\e(B tbl)
462 (set-case-syntax-pair ?\e$,1&n\e(B ?\e$,1'M\e(B tbl)
463 (set-case-syntax-pair ?\e$,1&l\e(B ?\e$,1'L\e(B tbl)
464 (set-case-syntax-pair ?\e$,1&f\e(B ?\e$,1',\e(B tbl)
465 (set-case-syntax-pair ?\e$,1&h\e(B ?\e$,1'-\e(B tbl)
466 (set-case-syntax-pair ?\e$,1&i\e(B ?\e$,1'.\e(B tbl)
467 (set-case-syntax-pair ?\e$,1&j\e(B ?\e$,1'/\e(B tbl))
468
469 ;; Hebrew character set (ISO-8859-8)
470
471 ;; (modify-category-entry (make-char 'hebrew-iso8859-8) ?w)
472
473 ;; (let ((c 224))
474 ;; (while (< c 251)
475 ;; (modify-syntax-entry (make-char 'hebrew-iso8859-8 c) "w")
476 ;; (setq c (1+ c))))
477 ;; (modify-syntax-entry (make-char 'hebrew-iso8859-8 160) "w") ; NBSP
478
479 ;; Indian character set (IS 13194 and other Emacs original Indian charsets)
480
481 (modify-category-entry (make-char 'indian-is13194) ?i)
482 (modify-category-entry (make-char 'indian-2-column) ?I)
483 (modify-category-entry (make-char 'indian-glyph) ?I)
484 ;; Unicode Devanagari block
485 (let ((c #x901))
486 (while (<= c #x970)
487 (modify-category-entry (decode-char 'ucs c) ?i)
488 (setq c (1+ c))))
489
490 ;;; Commented out since the categories appear not to be used anywhere
491 ;;; and word syntax is the default.
492 ;; (let ((deflist ;
493 ;; '(;; chars syntax category
494 ;; ("\e(5!"#\e(B" "w" ?7) ; vowel-modifying diacritical mark
495 ;; ; chandrabindu, anuswar, visarga
496 ;; ("\e(5$\e(B-\e(52\e(B" "w" ?1) ; base (independent) vowel
497 ;; ("\e(53\e(B-\e(5X\e(B" "w" ?0) ; consonant
498 ;; ("\e(5Z\e(B-\e(5g\e(B" "w" ?8) ; matra
499 ;; ("\e(5q\e(B-\e(5z\e(B" "w" ?6) ; digit
500 ;; ))
501 ;; elm chars len syntax category to ch i)
502 ;; (while deflist
503 ;; (setq elm (car deflist))
504 ;; (setq chars (car elm)
505 ;; len (length chars)
506 ;; syntax (nth 1 elm)
507 ;; category (nth 2 elm)
508 ;; i 0)
509 ;; (while (< i len)
510 ;; (if (= (aref chars i) ?-)
511 ;; (setq i (1+ i)
512 ;; to (aref chars i))
513 ;; (setq ch (aref chars i)
514 ;; to ch))
515 ;; (while (<= ch to)
516 ;; (modify-syntax-entry ch syntax)
517 ;; (modify-category-entry ch category)
518 ;; (setq ch (1+ ch)))
519 ;; (setq i (1+ i)))
520 ;; (setq deflist (cdr deflist))))
521
522
523 ;; Japanese character set (JISX0201-kana, JISX0201-roman, JISX0208, JISX0212)
524
525 (modify-category-entry (make-char 'katakana-jisx0201) ?k)
526 (modify-category-entry (make-char 'katakana-jisx0201) ?j)
527 (modify-category-entry (make-char 'latin-jisx0201) ?r)
528 (modify-category-entry (make-char 'japanese-jisx0208) ?j)
529 (modify-category-entry (make-char 'japanese-jisx0212) ?j)
530 (modify-category-entry (make-char 'katakana-jisx0201) ?\|)
531 (modify-category-entry (make-char 'japanese-jisx0208) ?\|)
532 (modify-category-entry (make-char 'japanese-jisx0212) ?\|)
533
534 ;; Unicode equivalents of JISX0201-kana
535 (let ((c #xff61))
536 (while (<= c #xff9f)
537 (modify-category-entry (decode-char 'ucs c) ?k)
538 (modify-category-entry (decode-char 'ucs c) ?j)
539 (modify-category-entry (decode-char 'ucs c) ?\|)
540 (setq c (1+ c))))
541
542 ;; Katakana block
543 (let ((c #x30a0))
544 (while (<= c #x30ff)
545 ;; ?K is double width, ?k isn't specified
546 (modify-category-entry (decode-char 'ucs c) ?k)
547 (modify-category-entry (decode-char 'ucs c) ?j)
548 (modify-category-entry (decode-char 'ucs c) ?\|)
549 (setq c (1+ c))))
550
551 ;; Hiragana block
552 (let ((c #x3040))
553 (while (<= c #x309f)
554 ;; ?H is actually defined to be double width
555 (modify-category-entry (decode-char 'ucs c) ?H)
556 ;;(modify-category-entry (decode-char 'ucs c) ?j)
557 (modify-category-entry (decode-char 'ucs c) ?\|)
558 (setq c (1+ c))))
559
560 ;; JISX0208
561 ;; (modify-syntax-entry (make-char 'japanese-jisx0208) "w")
562 (modify-syntax-entry (make-char 'japanese-jisx0208 33) "_")
563 (modify-syntax-entry (make-char 'japanese-jisx0208 34) "_")
564 (modify-syntax-entry (make-char 'japanese-jisx0208 40) "_")
565 ;; (let ((chars '(?\e$B!<\e(B ?\e$B!+\e(B ?\e$B!,\e(B ?\e$B!3\e(B ?\e$B!4\e(B ?\e$B!5\e(B ?\e$B!6\e(B ?\e$B!7\e(B ?\e$B!8\e(B ?\e$B!9\e(B ?\e$B!:\e(B ?\e$B!;\e(B)))
566 ;; (while chars
567 ;; (modify-syntax-entry (car chars) "w")
568 ;; (setq chars (cdr chars))))
569 (modify-syntax-entry ?\\e$B!J\e(B "(\e$B!K\e(B")
570 (modify-syntax-entry ?\\e$B!N\e(B "(\e$B!O\e(B")
571 (modify-syntax-entry ?\\e$B!P\e(B "(\e$B!Q\e(B")
572 (modify-syntax-entry ?\\e$B!V\e(B "(\e$B!W\e(B")
573 (modify-syntax-entry ?\\e$B!X\e(B "(\e$B!Y\e(B")
574 (modify-syntax-entry ?\\e$B!K\e(B ")\e$B!J\e(B")
575 (modify-syntax-entry ?\\e$B!O\e(B ")\e$B!N\e(B")
576 (modify-syntax-entry ?\\e$B!Q\e(B ")\e$B!P\e(B")
577 (modify-syntax-entry ?\\e$B!W\e(B ")\e$B!V\e(B")
578 (modify-syntax-entry ?\\e$B!Y\e(B ")\e$B!X\e(B")
579
580 (modify-category-entry (make-char 'japanese-jisx0208 35) ?A)
581 (modify-category-entry (make-char 'japanese-jisx0208 36) ?H)
582 (modify-category-entry (make-char 'japanese-jisx0208 37) ?K)
583 (modify-category-entry (make-char 'japanese-jisx0208 38) ?G)
584 (modify-category-entry (make-char 'japanese-jisx0208 39) ?Y)
585 (let ((row 48))
586 (while (< row 127)
587 (modify-category-entry (make-char 'japanese-jisx0208 row) ?C)
588 (setq row (1+ row))))
589 (modify-category-entry ?\e$B!<\e(B ?K)
590 (let ((chars '(?\e$B!+\e(B ?\e$B!,\e(B)))
591 (while chars
592 (modify-category-entry (car chars) ?K)
593 (modify-category-entry (car chars) ?H)
594 (setq chars (cdr chars))))
595 (let ((chars '(?\e$B!3\e(B ?\e$B!4\e(B ?\e$B!5\e(B ?\e$B!6\e(B ?\e$B!7\e(B ?\e$B!8\e(B ?\e$B!9\e(B ?\e$B!:\e(B ?\e$B!;\e(B)))
596 (while chars
597 (modify-category-entry (car chars) ?C)
598 (setq chars (cdr chars))))
599
600 ;; JISX0212
601 ;; (modify-syntax-entry (make-char 'japanese-jisx0212) "w")
602 (modify-syntax-entry (make-char 'japanese-jisx0212 33) "_")
603 (modify-syntax-entry (make-char 'japanese-jisx0212 34) "_")
604 (modify-syntax-entry (make-char 'japanese-jisx0212 35) "_")
605
606 (modify-category-entry (make-char 'japanese-jisx0212 ) ?C)
607
608 ;; JISX0201-Kana
609 ;; (modify-syntax-entry (make-char 'katakana-jisx0201) "w")
610 (let ((chars '(?\e(I!\e(B ?\e(I$\e(B ?\e(I%\e(B
611 ;; Unicode:
612 ?\e$,3sa\e(B ?\e$,3sd\e(B ?\e$,3se\e(B)))
613 (while chars
614 (modify-syntax-entry (car chars) ".")
615 (setq chars (cdr chars))))
616
617 (modify-syntax-entry ?\\e(I"\e(B "(\e(I#\e(B")
618 (modify-syntax-entry ?\\e(I#\e(B "(\e(I"\e(B")
619
620 ;; Korean character set (KSC5601)
621
622 ;; (modify-syntax-entry (make-char 'korean-ksc5601) "w")
623 (modify-syntax-entry (make-char 'korean-ksc5601 33) "_")
624 (modify-syntax-entry (make-char 'korean-ksc5601 34) "_")
625 (modify-syntax-entry (make-char 'korean-ksc5601 38) "_")
626 (modify-syntax-entry (make-char 'korean-ksc5601 39) "_")
627 (modify-syntax-entry (make-char 'korean-ksc5601 40) "_")
628 (modify-syntax-entry (make-char 'korean-ksc5601 41) "_")
629
630 (modify-category-entry (make-char 'korean-ksc5601) ?h)
631 (modify-category-entry (make-char 'korean-ksc5601 35) ?A)
632 (modify-category-entry (make-char 'korean-ksc5601 37) ?G)
633 (modify-category-entry (make-char 'korean-ksc5601 42) ?H)
634 (modify-category-entry (make-char 'korean-ksc5601 43) ?K)
635 (modify-category-entry (make-char 'korean-ksc5601 44) ?Y)
636
637 ;; Latin character set (latin-1,2,3,4,5,8,9)
638
639 (modify-category-entry (make-char 'latin-iso8859-1) ?l)
640 (modify-category-entry (make-char 'latin-iso8859-2) ?l)
641 (modify-category-entry (make-char 'latin-iso8859-3) ?l)
642 (modify-category-entry (make-char 'latin-iso8859-4) ?l)
643 (modify-category-entry (make-char 'latin-iso8859-9) ?l)
644 (modify-category-entry (make-char 'latin-iso8859-14) ?l)
645 (modify-category-entry (make-char 'latin-iso8859-15) ?l)
646
647 (modify-category-entry (make-char 'latin-iso8859-1 160) ?\ )
648 (modify-category-entry (make-char 'latin-iso8859-2 160) ?\ )
649 (modify-category-entry (make-char 'latin-iso8859-3 160) ?\ )
650 (modify-category-entry (make-char 'latin-iso8859-4 160) ?\ )
651 (modify-category-entry (make-char 'latin-iso8859-9 160) ?\ )
652 (modify-category-entry (make-char 'latin-iso8859-14 160) ?\ )
653 (modify-category-entry (make-char 'latin-iso8859-15 160) ?\ )
654
655 ;; Lao character set
656
657 (modify-category-entry (make-char 'lao) ?o)
658 (dotimes (i (1+ (- #xeff #xe80)))
659 (modify-category-entry (decode-char 'ucs (+ i #xe80)) ?o))
660
661 (let ((deflist '(;; chars syntax category
662 ("\e(1!\e(B-\e(1N\e(B" "w" ?0) ; consonant
663 ("\e(1PRS]`\e(B-\e(1d\e(B" "w" ?1) ; vowel base
664 ("\e(1QT\e(B-\e(1W[m\e(B" "w" ?2) ; vowel upper
665 ("\e(1XY\e(B" "w" ?3) ; vowel lower
666 ("\e(1h\e(B-\e(1l\e(B" "w" ?4) ; tone mark
667 ("\e(1\\e(B" "w" ?9) ; semivowel lower
668 ("\e(1p\e(B-\e(1y\e(B" "w" ?6) ; digit
669 ("\e(1Of\e(B" "_" ?5) ; symbol
670 ;; Unicode equivalents
671 ("\e$,1D!\e(B-\e$,1DN\e(B" "w" ?0) ; consonant
672 ("\e$,1DPDRDSD]D`\e(B-\e$,1Dd\e(B" "w" ?1) ; vowel base
673 ("\e$,1DQDT\e(B-\e$,1DWD[Dm\e(B" "w" ?2) ; vowel upper
674 ("\e$,1DXDY\e(B" "w" ?3) ; vowel lower
675 ("\e$,1Dh\e(B-\e$,1Dk\e(B" "w" ?4) ; tone mark
676 ("\e$,1D\D]\e(B" "w" ?9) ; semivowel lower
677 ("\e$,1Dp\e(B-\e$,1Dy\e(B" "w" ?6) ; digit
678 ("\e$,1DODf\e(B" "_" ?5) ; symbol
679 ))
680 elm chars len syntax category to ch i)
681 (while deflist
682 (setq elm (car deflist))
683 (setq chars (car elm)
684 len (length chars)
685 syntax (nth 1 elm)
686 category (nth 2 elm)
687 i 0)
688 (while (< i len)
689 (if (= (aref chars i) ?-)
690 (setq i (1+ i)
691 to (aref chars i))
692 (setq ch (aref chars i)
693 to ch))
694 (while (<= ch to)
695 (unless (string-equal syntax "w")
696 (modify-syntax-entry ch syntax))
697 (modify-category-entry ch category)
698 (setq ch (1+ ch)))
699 (setq i (1+ i)))
700 (setq deflist (cdr deflist))))
701
702 ;; Thai character set (TIS620)
703
704 (modify-category-entry (make-char 'thai-tis620) ?t)
705 (dotimes (i (1+ (- #xe7f #xe00)))
706 (modify-category-entry (decode-char 'ucs (+ i #xe00)) ?t))
707
708 (let ((deflist '(;; chars syntax category
709 ("\e,T!\e(B-\e,TCEG\e(B-\e,TN\e(B" "w" ?0) ; consonant
710 ("\e,TDFPRS`\e(B-\e,Te\e(B" "w" ?1) ; vowel base
711 ("\e,TQT\e(B-\e,TWgn\e(B" "w" ?2) ; vowel upper
712 ("\e,TX\e(B-\e,TZ\e(B" "w" ?3) ; vowel lower
713 ("\e,Th\e(B-\e,Tm\e(B" "w" ?4) ; tone mark
714 ("\e,Tp\e(B-\e,Ty\e(B" "w" ?6) ; digit
715 ("\e,TOf_oz{\e(B" "_" ?5) ; symbol
716 ;; Unicode equivalents
717 ("\e$,1Ba\e(B-\e$,1C#C%C'\e(B-\e$,1C.\e(B" "w" ?0) ; consonant
718 ("\e$,1C$C&C0C2C3C@\e(B-\e$,1CE\e(B" "w" ?1) ; vowel base
719 ("\e$,1C1C4\e(B-\e$,1C7CGCN\e(B" "w" ?2) ; vowel upper
720 ("\e$,1C8\e(B-\e$,1C:\e(B" "w" ?3) ; vowel lower
721 ("\e$,1CH\e(B-\e$,1CM\e(B" "w" ?4) ; tone mark
722 ("\e$,1CP\e(B-\e$,1CY\e(B" "w" ?6) ; digit
723 ("\e$,1C/CFC?COCZC[\e(B" "_" ?5) ; symbol
724 ))
725 elm chars len syntax category to ch i)
726 (while deflist
727 (setq elm (car deflist))
728 (setq chars (car elm)
729 len (length chars)
730 syntax (nth 1 elm)
731 category (nth 2 elm)
732 i 0)
733 (while (< i len)
734 (if (= (aref chars i) ?-)
735 (setq i (1+ i)
736 to (aref chars i))
737 (setq ch (aref chars i)
738 to ch))
739 (while (<= ch to)
740 (unless (string-equal syntax "w")
741 (modify-syntax-entry ch syntax))
742 (modify-category-entry ch category)
743 (setq ch (1+ ch)))
744 (setq i (1+ i)))
745 (setq deflist (cdr deflist))))
746
747 ;; Tibetan character set
748
749 (modify-category-entry (make-char 'tibetan) ?q)
750 (modify-category-entry (make-char 'tibetan-1-column) ?q)
751 (dotimes (i (1+ (- #xfff #xf00)))
752 (modify-category-entry (decode-char 'ucs (+ i #xf00)) ?q))
753
754 (let ((deflist '(;; chars syntax category
755 ("\e4\e$(7"!\e0"!\e1\e(B-\e4\e$(7"J\e0"J\e1\e4"K\e0"K\e1\e(B" "w" ?0) ; consonant
756 ("\e$(7#!\e(B-\e$(7#J#K#L#M!"!#\e(B" "w" ?0) ;
757 ("\e$(7$!\e(B-\e$(7$e\e(B" "w" ?0) ;
758 ("\e$(7%!\e(B-\e$(7%u\e(B" "w" ?0) ;
759 ("\e$(7"S"["\"]"^"a\e(B" "w" ?2) ; upper vowel
760 ("\e$(7"_"c"d"g"h"i"j"k"l\e(B" "w" ?2) ; upper modifier
761 ("\e$(7!I"Q"R"U"e!e!g\e(B" "w" ?3) ; lowel vowel/modifier
762 ("\e$(7!P\e(B-\e$(7!Y!Z\e(B-\e$(7!c\e(B" "w" ?6) ; digit
763 ("\e$(7!;!=\e(B-\e$(7!B!D"`\e(B" "." ?|) ; line-break char
764 ("\e$(8!;!=!?!@!A!D"`\e(B" "." ?|) ;
765 ("\e$(7!8!;!=\e(B-\e$(7!B!D"`!m!d\e(B" "." ?>) ; prohibition
766 ("\e$(8!;!=!?!@!A!D"`\e(B" "." ?>) ;
767 ("\e$(7!0\e(B-\e$(7!:!l#R#S"f\e(B" "." ?<) ; prohibition
768 ("\e$(7!C!E\e(B-\e$(7!H!J\e(B-\e$(7!O!f!h\e(B-\e$(7!k!n!o#O#P\e(B-\e$(7#`\e(B" "." ?q) ; others
769
770 ;; Unicode version (not complete)
771 ("\e$,1F \e(B-\e$,1FIFJ\e(B" "w" ?0) ; consonant
772 ("\e$,1Fp\e(B-\e$,1G9G:G;G<\e(B" "w" ?0) ;
773 ("\e$,1FRFZF[F\F]F`\e(B" "w" ?2) ; upper vowel
774 ("\e$,1F^FbFcFfFgFhFiFjFk\e(B" "w" ?2) ; upper modifier
775 ("\e$,1EYFPFQFTFdEuEw\e(B" "w" ?3) ; lowel vowel/modifier
776 ("\e$,1E`\e(B-\e$,1EiEj\e(B-\e$,1Es\e(B" "w" ?6) ; digit
777 ("\e$,1EKEM\e(B-\e$,1ERETF_\e(B" "." ?|) ; line-break char
778 ("\e$,1EHEKEM\e(B-\e$,1ERETF_E}Et\e(B" "." ?>) ; prohibition
779 ("\e$,1E@\e(B-\e$,1EJE|GAGBFe\e(B" "." ?<) ; prohibition
780 ("\e$,1ESEU\e(B-\e$,1EXEZ\e(B-\e$,1E_EvEx\e(B-\e$,1E{E~E\7fG>G?\e(B-\e$,1GO\e(B" "." ?q) ; others
781 ))
782 elm chars len syntax category to ch i)
783 (while deflist
784 (setq elm (car deflist))
785 (setq chars (car elm)
786 len (length chars)
787 syntax (nth 1 elm)
788 category (nth 2 elm)
789 i 0)
790 (while (< i len)
791 (if (= (aref chars i) ?-)
792 (setq i (1+ i)
793 to (aref chars i))
794 (setq ch (aref chars i)
795 to ch))
796 (while (<= ch to)
797 (unless (string-equal syntax "w")
798 (modify-syntax-entry ch syntax))
799 (modify-category-entry ch category)
800 (setq ch (1+ ch)))
801 (setq i (1+ i)))
802 (setq deflist (cdr deflist))))
803
804 ;; Vietnamese character set
805
806 (let ((lower (make-char 'vietnamese-viscii-lower))
807 (upper (make-char 'vietnamese-viscii-upper)))
808 ;; (modify-syntax-entry lower "w")
809 ;; (modify-syntax-entry upper "w")
810 (modify-category-entry lower ?v)
811 (modify-category-entry upper ?v)
812 (modify-category-entry lower ?l) ; To make a word with
813 (modify-category-entry upper ?l) ; latin characters.
814 )
815
816 (let ((tbl (standard-case-table))
817 (i 32))
818 (while (< i 128)
819 (set-case-syntax-pair (make-char 'vietnamese-viscii-upper i)
820 (make-char 'vietnamese-viscii-lower i)
821 tbl)
822 (setq i (1+ i))))
823
824 ;; Unicode (mule-unicode-0100-24ff)
825
826 (let ((tbl (standard-case-table)) c)
827
828 ;; In some languages, U+0049 LATIN CAPITAL LETTER I and U+0131 LATIN
829 ;; SMALL LETTER DOTLESS I make a case pair, and so do U+0130 LATIN
830 ;; CAPITAL LETTER I WITH DOT ABOVE and U+0069 LATIN SMALL LETTER I.
831 ;; Thus we have to check language-environment to handle casing
832 ;; correctly. Currently only I<->i is available.
833
834 ;; Latin Extended-A, Latin Extended-B
835 (setq c #x0100)
836 (while (<= c #x0233)
837 (modify-category-entry (decode-char 'ucs c) ?l)
838 (and (or (<= c #x012e)
839 (and (>= c #x014a) (<= c #x0177)))
840 (zerop (% c 2))
841 (set-case-syntax-pair
842 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
843 (and (>= c #x013a)
844 (<= c #x0148)
845 (zerop (% c 2))
846 (set-case-syntax-pair
847 (decode-char 'ucs (1- c)) (decode-char 'ucs c) tbl))
848 (setq c (1+ c)))
849 (set-case-syntax-pair ?\e$,1 R\e(B ?\e$,1 S\e(B tbl)
850 (set-case-syntax-pair ?\e$,1 T\e(B ?\e$,1 U\e(B tbl)
851 (set-case-syntax-pair ?\e$,1 V\e(B ?\e$,1 W\e(B tbl)
852 ; (set-case-syntax-pair ?\e$,1!8\e(B ?\e,A\7f\e(B tbl) ; these two have different length!
853 (set-case-syntax-pair ?\e$,1!9\e(B ?\e$,1!:\e(B tbl)
854 (set-case-syntax-pair ?\e$,1!;\e(B ?\e$,1!<\e(B tbl)
855 (set-case-syntax-pair ?\e$,1!=\e(B ?\e$,1!>\e(B tbl)
856
857 ;; Latin Extended-B
858 (set-case-syntax-pair ?\e$,1!A\e(B ?\e$,1#S\e(B tbl)
859 (set-case-syntax-pair ?\e$,1!B\e(B ?\e$,1!C\e(B tbl)
860 (set-case-syntax-pair ?\e$,1!D\e(B ?\e$,1!E\e(B tbl)
861 (set-case-syntax-pair ?\e$,1!F\e(B ?\e$,1#T\e(B tbl)
862 (set-case-syntax-pair ?\e$,1!G\e(B ?\e$,1!H\e(B tbl)
863 (set-case-syntax-pair ?\e$,1!I\e(B ?\e$,1#V\e(B tbl)
864 (set-case-syntax-pair ?\e$,1!J\e(B ?\e$,1#W\e(B tbl)
865 (set-case-syntax-pair ?\e$,1!K\e(B ?\e$,1!L\e(B tbl)
866 (set-case-syntax-pair ?\e$,1!N\e(B ?\e$,1"=\e(B tbl)
867 (set-case-syntax-pair ?\e$,1!O\e(B ?\e$,1#Y\e(B tbl)
868 (set-case-syntax-pair ?\e$,1!P\e(B ?\e$,1#[\e(B tbl)
869 (set-case-syntax-pair ?\e$,1!Q\e(B ?\e$,1!R\e(B tbl)
870 (set-case-syntax-pair ?\e$,1!S\e(B ?\e$,1#`\e(B tbl)
871 (set-case-syntax-pair ?\e$,1!T\e(B ?\e$,1#c\e(B tbl)
872 (set-case-syntax-pair ?\e$,1!V\e(B ?\e$,1#i\e(B tbl)
873 (set-case-syntax-pair ?\e$,1!W\e(B ?\e$,1#h\e(B tbl)
874 (set-case-syntax-pair ?\e$,1!X\e(B ?\e$,1!Y\e(B tbl)
875 (set-case-syntax-pair ?\e$,1!\\e(B ?\e$,1#o\e(B tbl)
876 (set-case-syntax-pair ?\e$,1!]\e(B ?\e$,1#r\e(B tbl)
877 (set-case-syntax-pair ?\e$,1!_\e(B ?\e$,1#u\e(B tbl)
878 (set-case-syntax-pair ?\e$,1!`\e(B ?\e$,1!a\e(B tbl)
879 (set-case-syntax-pair ?\e$,1!b\e(B ?\e$,1!c\e(B tbl)
880 (set-case-syntax-pair ?\e$,1!d\e(B ?\e$,1!e\e(B tbl)
881 (set-case-syntax-pair ?\e$,1!f\e(B ?\e$,1$ \e(B tbl)
882 (set-case-syntax-pair ?\e$,1!g\e(B ?\e$,1!h\e(B tbl)
883 (set-case-syntax-pair ?\e$,1!i\e(B ?\e$,1$#\e(B tbl)
884 (set-case-syntax-pair ?\e$,1!l\e(B ?\e$,1!m\e(B tbl)
885 (set-case-syntax-pair ?\e$,1!n\e(B ?\e$,1$(\e(B tbl)
886 (set-case-syntax-pair ?\e$,1!o\e(B ?\e$,1!p\e(B tbl)
887 (set-case-syntax-pair ?\e$,1!q\e(B ?\e$,1$*\e(B tbl)
888 (set-case-syntax-pair ?\e$,1!r\e(B ?\e$,1$+\e(B tbl)
889 (set-case-syntax-pair ?\e$,1!s\e(B ?\e$,1!t\e(B tbl)
890 (set-case-syntax-pair ?\e$,1!u\e(B ?\e$,1!v\e(B tbl)
891 (set-case-syntax-pair ?\e$,1!w\e(B ?\e$,1$2\e(B tbl)
892 (set-case-syntax-pair ?\e$,1!x\e(B ?\e$,1!y\e(B tbl)
893 (set-case-syntax-pair ?\e$,1!|\e(B ?\e$,1!}\e(B tbl)
894 (set-case-syntax-pair ?\e$,1"$\e(B ?\e$,1"&\e(B tbl)
895 (set-case-syntax-pair ?\e$,1"%\e(B ?\e$,1"&\e(B tbl)
896 (set-case-syntax-pair ?\e$,1"'\e(B ?\e$,1")\e(B tbl)
897 (set-case-syntax-pair ?\e$,1"(\e(B ?\e$,1")\e(B tbl)
898 (set-case-syntax-pair ?\e$,1"*\e(B ?\e$,1",\e(B tbl)
899 (set-case-syntax-pair ?\e$,1"+\e(B ?\e$,1",\e(B tbl)
900 (set-case-syntax-pair ?\e$,1"-\e(B ?\e$,1".\e(B tbl)
901 (set-case-syntax-pair ?\e$,1"/\e(B ?\e$,1"0\e(B tbl)
902 (set-case-syntax-pair ?\e$,1"1\e(B ?\e$,1"2\e(B tbl)
903 (set-case-syntax-pair ?\e$,1"3\e(B ?\e$,1"4\e(B tbl)
904 (set-case-syntax-pair ?\e$,1"5\e(B ?\e$,1"6\e(B tbl)
905 (set-case-syntax-pair ?\e$,1"7\e(B ?\e$,1"8\e(B tbl)
906 (set-case-syntax-pair ?\e$,1"9\e(B ?\e$,1":\e(B tbl)
907 (set-case-syntax-pair ?\e$,1";\e(B ?\e$,1"<\e(B tbl)
908 (set-case-syntax-pair ?\e$,1">\e(B ?\e$,1"?\e(B tbl)
909 (set-case-syntax-pair ?\e$,1"@\e(B ?\e$,1"A\e(B tbl)
910 (set-case-syntax-pair ?\e$,1"B\e(B ?\e$,1"C\e(B tbl)
911 (set-case-syntax-pair ?\e$,1"D\e(B ?\e$,1"E\e(B tbl)
912 (set-case-syntax-pair ?\e$,1"F\e(B ?\e$,1"G\e(B tbl)
913 (set-case-syntax-pair ?\e$,1"H\e(B ?\e$,1"I\e(B tbl)
914 (set-case-syntax-pair ?\e$,1"J\e(B ?\e$,1"K\e(B tbl)
915 (set-case-syntax-pair ?\e$,1"L\e(B ?\e$,1"M\e(B tbl)
916 (set-case-syntax-pair ?\e$,1"N\e(B ?\e$,1"O\e(B tbl)
917 ;; 01F0; F; 006A 030C; # LATIN SMALL LETTER J WITH CARON
918 (set-case-syntax-pair ?\e$,1"Q\e(B ?\e$,1"S\e(B tbl)
919 (set-case-syntax-pair ?\e$,1"R\e(B ?\e$,1"S\e(B tbl)
920 (set-case-syntax-pair ?\e$,1"T\e(B ?\e$,1"U\e(B tbl)
921 (set-case-syntax-pair ?\e$,1"V\e(B ?\e$,1!U\e(B tbl)
922 (set-case-syntax-pair ?\e$,1"W\e(B ?\e$,1!\7f\e(B tbl)
923 (set-case-syntax-pair ?\e$,1"X\e(B ?\e$,1"Y\e(B tbl)
924 (set-case-syntax-pair ?\e$,1"Z\e(B ?\e$,1"[\e(B tbl)
925 (set-case-syntax-pair ?\e$,1"\\e(B ?\e$,1"]\e(B tbl)
926 (set-case-syntax-pair ?\e$,1"^\e(B ?\e$,1"_\e(B tbl)
927 (set-case-syntax-pair ?\e$,1"`\e(B ?\e$,1"a\e(B tbl)
928 (set-case-syntax-pair ?\e$,1"b\e(B ?\e$,1"c\e(B tbl)
929 (set-case-syntax-pair ?\e$,1"d\e(B ?\e$,1"e\e(B tbl)
930 (set-case-syntax-pair ?\e$,1"f\e(B ?\e$,1"g\e(B tbl)
931 (set-case-syntax-pair ?\e$,1"h\e(B ?\e$,1"i\e(B tbl)
932 (set-case-syntax-pair ?\e$,1"j\e(B ?\e$,1"k\e(B tbl)
933 (set-case-syntax-pair ?\e$,1"l\e(B ?\e$,1"m\e(B tbl)
934 (set-case-syntax-pair ?\e$,1"n\e(B ?\e$,1"o\e(B tbl)
935 (set-case-syntax-pair ?\e$,1"p\e(B ?\e$,1"q\e(B tbl)
936 (set-case-syntax-pair ?\e$,1"r\e(B ?\e$,1"s\e(B tbl)
937 (set-case-syntax-pair ?\e$,1"t\e(B ?\e$,1"u\e(B tbl)
938 (set-case-syntax-pair ?\e$,1"v\e(B ?\e$,1"w\e(B tbl)
939 (set-case-syntax-pair ?\e$,1"x\e(B ?\e$,1"y\e(B tbl)
940 (set-case-syntax-pair ?\e$,1"z\e(B ?\e$,1"{\e(B tbl)
941 (set-case-syntax-pair ?\e$,1"|\e(B ?\e$,1"}\e(B tbl)
942 (set-case-syntax-pair ?\e$,1"~\e(B ?\e$,1"\7f\e(B tbl)
943 (set-case-syntax-pair ?\e$,1#"\e(B ?\e$,1##\e(B tbl)
944 (set-case-syntax-pair ?\e$,1#$\e(B ?\e$,1#%\e(B tbl)
945 (set-case-syntax-pair ?\e$,1#&\e(B ?\e$,1#'\e(B tbl)
946 (set-case-syntax-pair ?\e$,1#(\e(B ?\e$,1#)\e(B tbl)
947 (set-case-syntax-pair ?\e$,1#*\e(B ?\e$,1#+\e(B tbl)
948 (set-case-syntax-pair ?\e$,1#,\e(B ?\e$,1#-\e(B tbl)
949 (set-case-syntax-pair ?\e$,1#.\e(B ?\e$,1#/\e(B tbl)
950 (set-case-syntax-pair ?\e$,1#0\e(B ?\e$,1#1\e(B tbl)
951 (set-case-syntax-pair ?\e$,1#2\e(B ?\e$,1#3\e(B tbl)
952
953 ;; Latin Extended Additional
954 (setq c #x1e00)
955 (while (<= c #x1ef9)
956 (modify-category-entry (decode-char 'ucs c) ?l)
957 (and (zerop (% c 2))
958 (or (<= c #x1e94) (>= c #x1ea0))
959 (set-case-syntax-pair
960 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
961 (setq c (1+ c)))
962
963 ;; Greek
964 (setq c #x0370)
965 (while (<= c #x03ff)
966 (modify-category-entry (decode-char 'ucs c) ?g)
967 (if (or (and (>= c #x0391) (<= c #x03a1))
968 (and (>= c #x03a3) (<= c #x03ab)))
969 (set-case-syntax-pair
970 (decode-char 'ucs c) (decode-char 'ucs (+ c 32)) tbl))
971 (and (>= c #x03da)
972 (<= c #x03ee)
973 (zerop (% c 2))
974 (set-case-syntax-pair
975 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
976 (setq c (1+ c)))
977 (set-case-syntax-pair ?\e$,1&f\e(B ?\e$,1',\e(B tbl)
978 (set-case-syntax-pair ?\e$,1&h\e(B ?\e$,1'-\e(B tbl)
979 (set-case-syntax-pair ?\e$,1&i\e(B ?\e$,1'.\e(B tbl)
980 (set-case-syntax-pair ?\e$,1&j\e(B ?\e$,1'/\e(B tbl)
981 (set-case-syntax-pair ?\e$,1&l\e(B ?\e$,1'L\e(B tbl)
982 (set-case-syntax-pair ?\e$,1&n\e(B ?\e$,1'M\e(B tbl)
983 (set-case-syntax-pair ?\e$,1&o\e(B ?\e$,1'N\e(B tbl)
984
985 ;; Armenian
986 (setq c #x531)
987 (while (<= c #x556)
988 (set-case-syntax-pair (decode-char 'ucs c)
989 (decode-char 'ucs (+ c #x30)) tbl)
990 (setq c (1+ c)))
991
992 ;; Greek Extended
993 (setq c #x1f00)
994 (while (<= c #x1fff)
995 (modify-category-entry (decode-char 'ucs c) ?g)
996 (and (<= (logand c #x000f) 7)
997 (<= c #x1fa7)
998 (not (memq c '(#x1f50 #x1f52 #x1f54 #x1f56)))
999 (/= (logand c #x00f0) 7)
1000 (set-case-syntax-pair
1001 (decode-char 'ucs (+ c 8)) (decode-char 'ucs c) tbl))
1002 (setq c (1+ c)))
1003 (set-case-syntax-pair ?\e$,1qx\e(B ?\e$,1qp\e(B tbl)
1004 (set-case-syntax-pair ?\e$,1qy\e(B ?\e$,1qq\e(B tbl)
1005 (set-case-syntax-pair ?\e$,1qz\e(B ?\e$,1q0\e(B tbl)
1006 (set-case-syntax-pair ?\e$,1q{\e(B ?\e$,1q1\e(B tbl)
1007 (set-case-syntax-pair ?\e$,1q|\e(B ?\e$,1qs\e(B tbl)
1008 (set-case-syntax-pair ?\e$,1r(\e(B ?\e$,1q2\e(B tbl)
1009 (set-case-syntax-pair ?\e$,1r)\e(B ?\e$,1q3\e(B tbl)
1010 (set-case-syntax-pair ?\e$,1r*\e(B ?\e$,1q4\e(B tbl)
1011 (set-case-syntax-pair ?\e$,1r+\e(B ?\e$,1q5\e(B tbl)
1012 (set-case-syntax-pair ?\e$,1r,\e(B ?\e$,1r#\e(B tbl)
1013 (set-case-syntax-pair ?\e$,1r8\e(B ?\e$,1r0\e(B tbl)
1014 (set-case-syntax-pair ?\e$,1r9\e(B ?\e$,1r1\e(B tbl)
1015 (set-case-syntax-pair ?\e$,1r:\e(B ?\e$,1q6\e(B tbl)
1016 (set-case-syntax-pair ?\e$,1r;\e(B ?\e$,1q7\e(B tbl)
1017 (set-case-syntax-pair ?\e$,1rH\e(B ?\e$,1r@\e(B tbl)
1018 (set-case-syntax-pair ?\e$,1rI\e(B ?\e$,1rA\e(B tbl)
1019 (set-case-syntax-pair ?\e$,1rJ\e(B ?\e$,1q:\e(B tbl)
1020 (set-case-syntax-pair ?\e$,1rK\e(B ?\e$,1q;\e(B tbl)
1021 (set-case-syntax-pair ?\e$,1rL\e(B ?\e$,1rE\e(B tbl)
1022 (set-case-syntax-pair ?\e$,1rX\e(B ?\e$,1q8\e(B tbl)
1023 (set-case-syntax-pair ?\e$,1rY\e(B ?\e$,1q9\e(B tbl)
1024 (set-case-syntax-pair ?\e$,1rZ\e(B ?\e$,1q<\e(B tbl)
1025 (set-case-syntax-pair ?\e$,1r[\e(B ?\e$,1q=\e(B tbl)
1026 (set-case-syntax-pair ?\e$,1r\\e(B ?\e$,1rS\e(B tbl)
1027
1028 ;; cyrillic
1029 (setq c #x0400)
1030 (while (<= c #x04ff)
1031 (modify-category-entry (decode-char 'ucs c) ?y)
1032 (and (>= c #x0400)
1033 (<= c #x040f)
1034 (set-case-syntax-pair
1035 (decode-char 'ucs c) (decode-char 'ucs (+ c 80)) tbl))
1036 (and (>= c #x0410)
1037 (<= c #x042f)
1038 (set-case-syntax-pair
1039 (decode-char 'ucs c) (decode-char 'ucs (+ c 32)) tbl))
1040 (and (zerop (% c 2))
1041 (or (and (>= c #x0460) (<= c #x0480))
1042 (and (>= c #x048c) (<= c #x04be))
1043 (and (>= c #x04d0) (<= c #x04f4)))
1044 (set-case-syntax-pair
1045 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
1046 (setq c (1+ c)))
1047 (set-case-syntax-pair ?\e$,1*!\e(B ?\e$,1*"\e(B tbl)
1048 (set-case-syntax-pair ?\e$,1*#\e(B ?\e$,1*$\e(B tbl)
1049 (set-case-syntax-pair ?\e$,1*'\e(B ?\e$,1*(\e(B tbl)
1050 (set-case-syntax-pair ?\e$,1*+\e(B ?\e$,1*,\e(B tbl)
1051 (set-case-syntax-pair ?\e$,1*X\e(B ?\e$,1*Y\e(B tbl)
1052
1053 ;; general punctuation
1054 (setq c #x2000)
1055 (while (<= c #x200b)
1056 (set-case-syntax c " " tbl)
1057 (setq c (1+ c)))
1058 (setq c #x2010)
1059 (while (<= c #x2027)
1060 (set-case-syntax c "_" tbl)
1061 (setq c (1+ c)))
1062
1063 ;; Roman numerals
1064 (setq c #x2160)
1065 (while (<= c #x216f)
1066 (set-case-syntax-pair (decode-char 'ucs c)
1067 (decode-char 'ucs (+ c #x10)) tbl)
1068 (setq c (1+ c)))
1069
1070 ;; Circled Latin
1071 (setq c #x24b6)
1072 (while (<= c #x24cf)
1073 (set-case-syntax-pair (decode-char 'ucs c)
1074 (decode-char 'ucs (+ c 26)) tbl)
1075 (modify-category-entry (decode-char 'ucs c) ?l)
1076 (modify-category-entry (decode-char 'ucs (+ c 26)) ?l)
1077 (setq c (1+ c)))
1078
1079 ;; Fullwidth Latin
1080 (setq c #xff21)
1081 (while (<= c #xff3a)
1082 (set-case-syntax-pair (decode-char 'ucs c)
1083 (decode-char 'ucs (+ c #x20)) tbl)
1084 (modify-category-entry (decode-char 'ucs c) ?l)
1085 (modify-category-entry (decode-char 'ucs (+ c #x20)) ?l)
1086 (setq c (1+ c)))
1087
1088 ;; Ohm, Kelvin, Angstrom
1089 (set-case-syntax-pair ?\e$,1uf\e(B ?\e$,1'I\e(B tbl)
1090 (set-case-syntax-pair ?\e$,1uj\e(B ?k tbl)
1091 (set-case-syntax-pair ?\e$,1uk\e(B ?\e,Ae\e(B tbl)
1092
1093 ;; Combining diacritics
1094 (setq c #x300)
1095 (while (<= c #x362)
1096 (modify-category-entry (decode-char 'ucs c) ?^)
1097 (setq c (1+ c)))
1098
1099 ;; Combining marks
1100 (setq c #x20d0)
1101 (while (<= c #x20e3)
1102 (modify-category-entry (decode-char 'ucs c) ?^)
1103 (setq c (1+ c)))
1104
1105 ;; Fixme: syntax for symbols &c
1106 )
1107 \f
1108 ;;; Setting word boundary.
1109
1110 (setq word-combining-categories
1111 '((?l . ?l)))
1112
1113 (setq word-separating-categories ; (2-byte character sets)
1114 '((?A . ?K) ; Alpha numeric - Katakana
1115 (?A . ?C) ; Alpha numeric - Chinese
1116 (?H . ?A) ; Hiragana - Alpha numeric
1117 (?H . ?K) ; Hiragana - Katakana
1118 (?H . ?C) ; Hiragana - Chinese
1119 (?K . ?A) ; Katakana - Alpha numeric
1120 (?K . ?C) ; Katakana - Chinese
1121 (?C . ?A) ; Chinese - Alpha numeric
1122 (?C . ?K) ; Chinese - Katakana
1123 ))
1124
1125 \f
1126 ;; For each character set, put the information of the most proper
1127 ;; coding system to encode it by `preferred-coding-system' property.
1128
1129 (let ((l '((latin-iso8859-1 . iso-latin-1)
1130 (latin-iso8859-2 . iso-latin-2)
1131 (latin-iso8859-3 . iso-latin-3)
1132 (latin-iso8859-4 . iso-latin-4)
1133 (thai-tis620 . thai-tis620)
1134 (greek-iso8859-7 . greek-iso-8bit)
1135 (arabic-iso8859-6 . iso-2022-7bit)
1136 (hebrew-iso8859-8 . hebrew-iso-8bit)
1137 (katakana-jisx0201 . japanese-shift-jis)
1138 (latin-jisx0201 . japanese-shift-jis)
1139 (cyrillic-iso8859-5 . cyrillic-iso-8bit)
1140 (latin-iso8859-9 . iso-latin-5)
1141 (japanese-jisx0208-1978 . iso-2022-jp)
1142 (chinese-gb2312 . cn-gb-2312)
1143 (japanese-jisx0208 . iso-2022-jp)
1144 (korean-ksc5601 . iso-2022-kr)
1145 (japanese-jisx0212 . iso-2022-jp)
1146 (chinese-cns11643-1 . iso-2022-cn)
1147 (chinese-cns11643-2 . iso-2022-cn)
1148 (chinese-big5-1 . chinese-big5)
1149 (chinese-big5-2 . chinese-big5)
1150 (chinese-sisheng . iso-2022-7bit)
1151 (ipa . iso-2022-7bit)
1152 (vietnamese-viscii-lower . vietnamese-viscii)
1153 (vietnamese-viscii-upper . vietnamese-viscii)
1154 (arabic-digit . iso-2022-7bit)
1155 (arabic-1-column . iso-2022-7bit)
1156 (ascii-right-to-left . iso-2022-7bit)
1157 (lao . lao)
1158 (arabic-2-column . iso-2022-7bit)
1159 (indian-is13194 . devanagari)
1160 (indian-glyph . devanagari)
1161 (tibetan-1-column . tibetan)
1162 (ethiopic . iso-2022-7bit)
1163 (chinese-cns11643-3 . iso-2022-cn)
1164 (chinese-cns11643-4 . iso-2022-cn)
1165 (chinese-cns11643-5 . iso-2022-cn)
1166 (chinese-cns11643-6 . iso-2022-cn)
1167 (chinese-cns11643-7 . iso-2022-cn)
1168 (indian-2-column . devanagari)
1169 (tibetan . tibetan)
1170 (latin-iso8859-14 . iso-latin-8)
1171 (latin-iso8859-15 . iso-latin-9))))
1172 (while l
1173 (put-charset-property (car (car l)) 'preferred-coding-system (cdr (car l)))
1174 (setq l (cdr l))))
1175
1176 \f
1177 ;; Setup auto-fill-chars for charsets that should invoke auto-filling.
1178 ;; SPACE and NEWLINE are already set. Also put `nospace-between-words'
1179 ;; property on the charsets.
1180 (let ((l '(katakana-jisx0201
1181 japanese-jisx0208 japanese-jisx0212
1182 chinese-gb2312 chinese-big5-1 chinese-big5-2)))
1183 (while l
1184 (aset auto-fill-chars (make-char (car l)) t)
1185 (put-charset-property (car l) 'nospace-between-words t)
1186 (setq l (cdr l))))
1187
1188 ;;; Local Variables:
1189 ;;; coding: iso-2022-7bit
1190 ;;; End:
1191
1192 ;;; characters.el ends here