]> code.delx.au - gnu-emacs/blob - lisp/international/characters.el
Re-enable code giving word syntax to certain japanese-jisx0208 characters.
[gnu-emacs] / lisp / international / characters.el
1 ;;; characters.el --- set syntax and category for multibyte characters
2
3 ;; Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN.
4 ;; Licensed to the Free Software Foundation.
5 ;; Copyright (C) 2001, 2002 Free Software Foundation, Inc.
6
7 ;; Keywords: multibyte character, character set, syntax, category
8
9 ;; This file is part of GNU Emacs.
10
11 ;; GNU Emacs is free software; you can redistribute it and/or modify
12 ;; it under the terms of the GNU General Public License as published by
13 ;; the Free Software Foundation; either version 2, or (at your option)
14 ;; any later version.
15
16 ;; GNU Emacs is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 ;; GNU General Public License for more details.
20
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with GNU Emacs; see the file COPYING. If not, write to the
23 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 ;; Boston, MA 02111-1307, USA.
25
26 ;;; Commentary:
27
28 ;; This file contains multibyte characters. Save this file always in
29 ;; the coding system `iso-2022-7bit'.
30
31 ;; This file does not define the syntax for Latin-N character sets;
32 ;; those are defined by the files latin-N.el.
33
34 ;;; Code:
35
36 ;;; Predefined categories.
37
38 ;; For each character set.
39
40 (define-category ?a "ASCII")
41 (define-category ?l "Latin")
42 (define-category ?t "Thai")
43 (define-category ?g "Greek")
44 (define-category ?b "Arabic")
45 (define-category ?w "Hebrew")
46 (define-category ?y "Cyrillic")
47 (define-category ?k "Japanese katakana")
48 (define-category ?r "Japanese roman")
49 (define-category ?c "Chinese")
50 (define-category ?j "Japanese")
51 (define-category ?h "Korean")
52 (define-category ?e "Ethiopic (Ge'ez)")
53 (define-category ?v "Vietnamese")
54 (define-category ?i "Indian")
55 (define-category ?o "Lao")
56 (define-category ?q "Tibetan")
57
58 ;; For each group (row) of 2-byte character sets.
59
60 (define-category ?A "Alpha-numeric characters of 2-byte character sets")
61 (define-category ?C "Chinese (Han) characters of 2-byte character sets")
62 (define-category ?G "Greek characters of 2-byte character sets")
63 (define-category ?H "Japanese Hiragana characters of 2-byte character sets")
64 (define-category ?K "Japanese Katakana characters of 2-byte character sets")
65 (define-category ?N "Korean Hangul characters of 2-byte character sets")
66 (define-category ?Y "Cyrillic characters of 2-byte character sets")
67 (define-category ?I "Indian Glyphs")
68
69 ;; For phonetic classifications.
70
71 (define-category ?0 "consonant")
72 (define-category ?1 "base (independent) vowel")
73 (define-category ?2 "upper diacritical mark (including upper vowel)")
74 (define-category ?3 "lower diacritical mark (including lower vowel)")
75 (define-category ?4 "tone mark")
76 (define-category ?5 "symbol")
77 (define-category ?6 "digit")
78 (define-category ?7 "vowel-modifying diacritical mark")
79 (define-category ?8 "vowel-signs")
80 (define-category ?9 "semivowel lower")
81
82 ;; For filling.
83 (define-category ?| "While filling, we can break a line at this character.")
84
85 ;; For indentation calculation.
86 (define-category ?
87 "This character counts as a space for indentation purposes.")
88
89 ;; Keep the following for `kinsoku' processing. See comments in
90 ;; kinsoku.el.
91 (define-category ?> "A character which can't be placed at beginning of line.")
92 (define-category ?< "A character which can't be placed at end of line.")
93
94 ;; Combining
95 (define-category ?^ "Combining diacritic or mark")
96 \f
97 ;;; Setting syntax and category.
98
99 ;; ASCII
100
101 (let ((ch 32))
102 (while (< ch 127) ; All ASCII characters have
103 (modify-category-entry ch ?a) ; the category `a' (ASCII)
104 (modify-category-entry ch ?l) ; and `l' (Latin).
105 (setq ch (1+ ch))))
106
107 ;; Arabic character set
108
109 (let ((charsets '(arabic-iso8859-6
110 arabic-digit
111 arabic-1-column
112 arabic-2-column)))
113 (while charsets
114 ;; (modify-syntax-entry (make-char (car charsets)) "w")
115 (modify-category-entry (make-char (car charsets)) ?b)
116 (setq charsets (cdr charsets))))
117 (let ((ch #x600))
118 (while (<= ch #x6ff)
119 (modify-category-entry (decode-char 'ucs ch) ?b)
120 (setq ch (1+ ch)))
121 (setq ch #xfb50)
122 (while (<= ch #xfdff)
123 (modify-category-entry (decode-char 'ucs ch) ?b)
124 (setq ch (1+ ch)))
125 (setq ch #xfe70)
126 (while (<= ch #xfefe)
127 (modify-category-entry (decode-char 'ucs ch) ?b)
128 (setq ch (1+ ch))))
129
130 ;; Chinese character set (GB2312)
131
132 ;; (modify-syntax-entry (make-char 'chinese-gb2312) "w")
133 (modify-syntax-entry (make-char 'chinese-gb2312 33) "_")
134 (modify-syntax-entry (make-char 'chinese-gb2312 34) "_")
135 (modify-syntax-entry (make-char 'chinese-gb2312 41) "_")
136 (modify-syntax-entry ?\\e$A!2\e(B "(\e$A!3\e(B")
137 (modify-syntax-entry ?\\e$A!4\e(B "(\e$A!5\e(B")
138 (modify-syntax-entry ?\\e$A!6\e(B "(\e$A!7\e(B")
139 (modify-syntax-entry ?\\e$A!8\e(B "(\e$A!9\e(B")
140 (modify-syntax-entry ?\\e$A!:\e(B "(\e$A!;\e(B")
141 (modify-syntax-entry ?\\e$A!<\e(B "(\e$A!=\e(B")
142 (modify-syntax-entry ?\\e$A!>\e(B "(\e$A!?\e(B")
143 (modify-syntax-entry ?\\e$A!3\e(B ")\e$A!2\e(B")
144 (modify-syntax-entry ?\\e$A!5\e(B ")\e$A!4\e(B")
145 (modify-syntax-entry ?\\e$A!7\e(B ")\e$A!6\e(B")
146 (modify-syntax-entry ?\\e$A!9\e(B ")\e$A!8\e(B")
147 (modify-syntax-entry ?\\e$A!;\e(B ")\e$A!:\e(B")
148 (modify-syntax-entry ?\\e$A!=\e(B ")\e$A!<\e(B")
149 (modify-syntax-entry ?\\e$A!?\e(B ")\e$A!>\e(B")
150 ;; Unicode equivalents of above
151 (modify-syntax-entry ?\\e$,2=T\e(B "(\e$,2=U\e(B")
152 (modify-syntax-entry ?\\e$,2=H\e(B "(\e$,2=I\e(B")
153 (modify-syntax-entry ?\\e$,2=J\e(B "(\e$,2=K\e(B")
154 (modify-syntax-entry ?\\e$,2=L\e(B "(\e$,2=M\e(B")
155 (modify-syntax-entry ?\\e$,2=N\e(B "(\e$,2=O\e(B")
156 (modify-syntax-entry ?\\e$,2=V\e(B "(\e$,2=W\e(B")
157 (modify-syntax-entry ?\\e$,2=P\e(B "(\e$,2=Q\e(B")
158 (modify-syntax-entry ?\\e$,2=U\e(B ")\e$,2=T\e(B")
159 (modify-syntax-entry ?\\e$,2=I\e(B ")\e$,2=H\e(B")
160 (modify-syntax-entry ?\\e$,2=K\e(B ")\e$,2=J\e(B")
161 (modify-syntax-entry ?\\e$,2=M\e(B ")\e$,2=L\e(B")
162 (modify-syntax-entry ?\\e$,2=O\e(B ")\e$,2=N\e(B")
163 (modify-syntax-entry ?\\e$,2=W\e(B ")\e$,2=V\e(B")
164 (modify-syntax-entry ?\\e$,2=Q\e(B ")\e$,2=P\e(B")
165
166 (modify-category-entry (make-char 'chinese-gb2312) ?c)
167 (modify-category-entry (make-char 'chinese-gb2312) ?\|)
168 (modify-category-entry (make-char 'chinese-gb2312 35) ?A)
169 (modify-category-entry (make-char 'chinese-gb2312 36) ?H)
170 (modify-category-entry (make-char 'chinese-gb2312 37) ?K)
171 (modify-category-entry (make-char 'chinese-gb2312 38) ?G)
172 (modify-category-entry (make-char 'chinese-gb2312 39) ?Y)
173 (let ((row 48))
174 (while (< row 127)
175 (modify-category-entry (make-char 'chinese-gb2312 row) ?C)
176 (setq row (1+ row))))
177
178 ;; Chinese character set (BIG5)
179
180 (let ((generic-big5-1-char (make-char 'chinese-big5-1))
181 (generic-big5-2-char (make-char 'chinese-big5-2)))
182 ;; (modify-syntax-entry generic-big5-1-char "w")
183 ;; (modify-syntax-entry generic-big5-2-char "w")
184
185 (modify-category-entry generic-big5-1-char ?c)
186 (modify-category-entry generic-big5-2-char ?c)
187
188 (modify-category-entry generic-big5-1-char ?C)
189 (modify-category-entry generic-big5-2-char ?C)
190
191 (modify-category-entry generic-big5-1-char ?\|)
192 (modify-category-entry generic-big5-2-char ?\|))
193
194
195 ;; Chinese character set (CNS11643)
196
197 (let ((cns-list '(chinese-cns11643-1
198 chinese-cns11643-2
199 chinese-cns11643-3
200 chinese-cns11643-4
201 chinese-cns11643-5
202 chinese-cns11643-6
203 chinese-cns11643-7))
204 generic-char)
205 (while cns-list
206 (setq generic-char (make-char (car cns-list)))
207 ;; (modify-syntax-entry generic-char "w")
208 (modify-category-entry generic-char ?c)
209 (modify-category-entry generic-char ?C)
210 (modify-category-entry generic-char ?|)
211 (setq cns-list (cdr cns-list))))
212
213 ;; Cyrillic character set (ISO-8859-5)
214
215 (modify-category-entry (make-char 'cyrillic-iso8859-5) ?y)
216
217 (modify-syntax-entry (make-char 'cyrillic-iso8859-5 160) " ")
218 (modify-syntax-entry ?\e,L-\e(B ".")
219 (modify-syntax-entry ?\e,Lp\e(B ".")
220 (modify-syntax-entry ?\e,L}\e(B ".")
221 (let ((tbl (standard-case-table)))
222 (set-case-syntax-pair ?\e,L!\e(B ?\e,Lq\e(B tbl)
223 (set-case-syntax-pair ?\e,L"\e(B ?\e,Lr\e(B tbl)
224 (set-case-syntax-pair ?\e,L#\e(B ?\e,Ls\e(B tbl)
225 (set-case-syntax-pair ?\e,L$\e(B ?\e,Lt\e(B tbl)
226 (set-case-syntax-pair ?\e,L%\e(B ?\e,Lu\e(B tbl)
227 (set-case-syntax-pair ?\e,L&\e(B ?\e,Lv\e(B tbl)
228 (set-case-syntax-pair ?\e,L'\e(B ?\e,Lw\e(B tbl)
229 (set-case-syntax-pair ?\e,L(\e(B ?\e,Lx\e(B tbl)
230 (set-case-syntax-pair ?\e,L)\e(B ?\e,Ly\e(B tbl)
231 (set-case-syntax-pair ?\e,L*\e(B ?\e,Lz\e(B tbl)
232 (set-case-syntax-pair ?\e,L+\e(B ?\e,L{\e(B tbl)
233 (set-case-syntax-pair ?\e,L,\e(B ?\e,L|\e(B tbl)
234 (set-case-syntax-pair ?\e,L.\e(B ?\e,L~\e(B tbl)
235 (set-case-syntax-pair ?\e,L/\e(B ?\e,L\7f\e(B tbl)
236 (set-case-syntax-pair ?\e,L0\e(B ?\e,LP\e(B tbl)
237 (set-case-syntax-pair ?\e,L1\e(B ?\e,LQ\e(B tbl)
238 (set-case-syntax-pair ?\e,L2\e(B ?\e,LR\e(B tbl)
239 (set-case-syntax-pair ?\e,L3\e(B ?\e,LS\e(B tbl)
240 (set-case-syntax-pair ?\e,L4\e(B ?\e,LT\e(B tbl)
241 (set-case-syntax-pair ?\e,L5\e(B ?\e,LU\e(B tbl)
242 (set-case-syntax-pair ?\e,L6\e(B ?\e,LV\e(B tbl)
243 (set-case-syntax-pair ?\e,L7\e(B ?\e,LW\e(B tbl)
244 (set-case-syntax-pair ?\e,L8\e(B ?\e,LX\e(B tbl)
245 (set-case-syntax-pair ?\e,L9\e(B ?\e,LY\e(B tbl)
246 (set-case-syntax-pair ?\e,L:\e(B ?\e,LZ\e(B tbl)
247 (set-case-syntax-pair ?\e,L;\e(B ?\e,L[\e(B tbl)
248 (set-case-syntax-pair ?\e,L<\e(B ?\e,L\\e(B tbl)
249 (set-case-syntax-pair ?\e,L=\e(B ?\e,L]\e(B tbl)
250 (set-case-syntax-pair ?\e,L>\e(B ?\e,L^\e(B tbl)
251 (set-case-syntax-pair ?\e,L?\e(B ?\e,L_\e(B tbl)
252 (set-case-syntax-pair ?\e,L@\e(B ?\e,L`\e(B tbl)
253 (set-case-syntax-pair ?\e,LA\e(B ?\e,La\e(B tbl)
254 (set-case-syntax-pair ?\e,LB\e(B ?\e,Lb\e(B tbl)
255 (set-case-syntax-pair ?\e,LC\e(B ?\e,Lc\e(B tbl)
256 (set-case-syntax-pair ?\e,LD\e(B ?\e,Ld\e(B tbl)
257 (set-case-syntax-pair ?\e,LE\e(B ?\e,Le\e(B tbl)
258 (set-case-syntax-pair ?\e,LF\e(B ?\e,Lf\e(B tbl)
259 (set-case-syntax-pair ?\e,LG\e(B ?\e,Lg\e(B tbl)
260 (set-case-syntax-pair ?\e,LH\e(B ?\e,Lh\e(B tbl)
261 (set-case-syntax-pair ?\e,LI\e(B ?\e,Li\e(B tbl)
262 (set-case-syntax-pair ?\e,LJ\e(B ?\e,Lj\e(B tbl)
263 (set-case-syntax-pair ?\e,LK\e(B ?\e,Lk\e(B tbl)
264 (set-case-syntax-pair ?\e,LL\e(B ?\e,Ll\e(B tbl)
265 (set-case-syntax-pair ?\e,LM\e(B ?\e,Lm\e(B tbl)
266 (set-case-syntax-pair ?\e,LN\e(B ?\e,Ln\e(B tbl)
267 (set-case-syntax-pair ?\e,LO\e(B ?\e,Lo\e(B tbl)
268 (set-case-syntax-pair ?\e$,1(!\e(B ?\e$,1(q\e(B tbl)
269 (set-case-syntax-pair ?\e$,1("\e(B ?\e$,1(r\e(B tbl)
270 (set-case-syntax-pair ?\e$,1(#\e(B ?\e$,1(s\e(B tbl)
271 (set-case-syntax-pair ?\e$,1($\e(B ?\e$,1(t\e(B tbl)
272 (set-case-syntax-pair ?\e$,1(%\e(B ?\e$,1(u\e(B tbl)
273 (set-case-syntax-pair ?\e$,1(&\e(B ?\e$,1(v\e(B tbl)
274 (set-case-syntax-pair ?\e$,1('\e(B ?\e$,1(w\e(B tbl)
275 (set-case-syntax-pair ?\e$,1((\e(B ?\e$,1(x\e(B tbl)
276 (set-case-syntax-pair ?\e$,1()\e(B ?\e$,1(y\e(B tbl)
277 (set-case-syntax-pair ?\e$,1(*\e(B ?\e$,1(z\e(B tbl)
278 (set-case-syntax-pair ?\e$,1(+\e(B ?\e$,1({\e(B tbl)
279 (set-case-syntax-pair ?\e$,1(,\e(B ?\e$,1(|\e(B tbl)
280 (set-case-syntax-pair ?\e$,1(.\e(B ?\e$,1(~\e(B tbl)
281 (set-case-syntax-pair ?\e$,1(/\e(B ?\e$,1(\7f\e(B tbl)
282 (set-case-syntax-pair ?\e$,1(0\e(B ?\e$,1(P\e(B tbl)
283 (set-case-syntax-pair ?\e$,1(1\e(B ?\e$,1(Q\e(B tbl)
284 (set-case-syntax-pair ?\e$,1(2\e(B ?\e$,1(R\e(B tbl)
285 (set-case-syntax-pair ?\e$,1(3\e(B ?\e$,1(S\e(B tbl)
286 (set-case-syntax-pair ?\e$,1(4\e(B ?\e$,1(T\e(B tbl)
287 (set-case-syntax-pair ?\e$,1(5\e(B ?\e$,1(U\e(B tbl)
288 (set-case-syntax-pair ?\e$,1(6\e(B ?\e$,1(V\e(B tbl)
289 (set-case-syntax-pair ?\e$,1(7\e(B ?\e$,1(W\e(B tbl)
290 (set-case-syntax-pair ?\e$,1(8\e(B ?\e$,1(X\e(B tbl)
291 (set-case-syntax-pair ?\e$,1(9\e(B ?\e$,1(Y\e(B tbl)
292 (set-case-syntax-pair ?\e$,1(:\e(B ?\e$,1(Z\e(B tbl)
293 (set-case-syntax-pair ?\e$,1(;\e(B ?\e$,1([\e(B tbl)
294 (set-case-syntax-pair ?\e$,1(<\e(B ?\e$,1(\\e(B tbl)
295 (set-case-syntax-pair ?\e$,1(=\e(B ?\e$,1(]\e(B tbl)
296 (set-case-syntax-pair ?\e$,1(>\e(B ?\e$,1(^\e(B tbl)
297 (set-case-syntax-pair ?\e$,1(?\e(B ?\e$,1(_\e(B tbl)
298 (set-case-syntax-pair ?\e$,1(@\e(B ?\e$,1(`\e(B tbl)
299 (set-case-syntax-pair ?\e$,1(A\e(B ?\e$,1(a\e(B tbl)
300 (set-case-syntax-pair ?\e$,1(B\e(B ?\e$,1(b\e(B tbl)
301 (set-case-syntax-pair ?\e$,1(C\e(B ?\e$,1(c\e(B tbl)
302 (set-case-syntax-pair ?\e$,1(D\e(B ?\e$,1(d\e(B tbl)
303 (set-case-syntax-pair ?\e$,1(E\e(B ?\e$,1(e\e(B tbl)
304 (set-case-syntax-pair ?\e$,1(F\e(B ?\e$,1(f\e(B tbl)
305 (set-case-syntax-pair ?\e$,1(G\e(B ?\e$,1(g\e(B tbl)
306 (set-case-syntax-pair ?\e$,1(H\e(B ?\e$,1(h\e(B tbl)
307 (set-case-syntax-pair ?\e$,1(I\e(B ?\e$,1(i\e(B tbl)
308 (set-case-syntax-pair ?\e$,1(J\e(B ?\e$,1(j\e(B tbl)
309 (set-case-syntax-pair ?\e$,1(K\e(B ?\e$,1(k\e(B tbl)
310 (set-case-syntax-pair ?\e$,1(L\e(B ?\e$,1(l\e(B tbl)
311 (set-case-syntax-pair ?\e$,1(M\e(B ?\e$,1(m\e(B tbl)
312 (set-case-syntax-pair ?\e$,1(N\e(B ?\e$,1(n\e(B tbl)
313 (set-case-syntax-pair ?\e$,1(O\e(B ?\e$,1(o\e(B tbl))
314
315 ;; Devanagari character set
316
317 ;;; Commented out since the categories appear not to be used anywhere
318 ;;; and word syntax is the default.
319 ;; (let ((deflist '(;; chars syntax category
320 ;; ("\e$(5!!!"!#\e(B" "w" ?7) ; vowel-modifying diacritical mark
321 ;; ; chandrabindu, anuswar, visarga
322 ;; ("\e$(5!$\e(B-\e$(5!2\e(B" "w" ?1) ; independent vowel
323 ;; ("\e$(5!3\e(B-\e$(5!X\e(B" "w" ?0) ; consonant
324 ;; ("\e$(5!Z\e(B-\e$(5!g\e(B" "w" ?8) ; matra
325 ;; ("\e$(5!q\e(B-\e$(5!z\e(B" "w" ?6) ; digit
326 ;; ;; Unicode equivalents
327 ;; ("\e$,15A5B5C\e(B" "w" ?7) ; vowel-modifying diacritical mark
328 ;; ; chandrabindu, anuswar, visarga
329 ;; ("\e$,15E\e(B-\e$,15M\e(B" "w" ?1) ; independent vowel
330 ;; ("\e$,15U\e(B-\e$,15y\e(B" "w" ?0) ; consonant
331 ;; ("\e$,15~\e(B-\e$,16)\e(B" "w" ?8) ; matra
332 ;; ("\e$,16F\e(B-\e$,16O\e(B" "w" ?6) ; digit
333 ;; ))
334 ;; elm chars len syntax category to ch i)
335 ;; (while deflist
336 ;; (setq elm (car deflist))
337 ;; (setq chars (car elm)
338 ;; len (length chars)
339 ;; syntax (nth 1 elm)
340 ;; category (nth 2 elm)
341 ;; i 0)
342 ;; (while (< i len)
343 ;; (if (= (aref chars i) ?-)
344 ;; (setq i (1+ i)
345 ;; to (aref chars i))
346 ;; (setq ch (aref chars i)
347 ;; to ch))
348 ;; (while (<= ch to)
349 ;; (modify-syntax-entry ch syntax)
350 ;; (modify-category-entry ch category)
351 ;; (setq ch (1+ ch)))
352 ;; (setq i (1+ i)))
353 ;; (setq deflist (cdr deflist))))
354
355 ;; Ethiopic character set
356
357 (modify-category-entry (make-char 'ethiopic) ?e)
358 ;; (modify-syntax-entry (make-char 'ethiopic) "w")
359 (dotimes (i (1+ (- #x137c #x1200)))
360 (modify-category-entry (decode-char 'ucs (+ #x1200 i)) ?e))
361 (let ((chars '(?\e$(3$h\e(B ?\e$(3$i\e(B ?\e$(3$j\e(B ?\e$(3$k\e(B ?\e$(3$l\e(B ?\e$(3$m\e(B ?\e$(3$n\e(B ?\e$(3$o\e(B ?\e$(3%i\e(B ?\e$(3%t\e(B ?\e$(3%u\e(B ?\e$(3%v\e(B ?\e$(3%w\e(B ?\e$(3%x\e(B
362 ;; Unicode equivalents of the above:
363 ?\e$,1Q!\e(B ?\e$,1Q"\e(B ?\e$,1Q#\e(B ?\e$,1Q$\e(B ?\e$,1Q%\e(B ?\e$,1Q&\e(B ?\e$,1Q'\e(B ?\e$,1Q(\e(B ?\e$,3op\e(B ?\e$,3o{\e(B ?\e$,3o|\e(B ?\e$,3o}\e(B ?\e$,3o~\e(B ?\e$,3o\7f\e(B)))
364 (while chars
365 (modify-syntax-entry (car chars) ".")
366 (setq chars (cdr chars))))
367
368 ;; Greek character set (ISO-8859-7)
369
370 (modify-category-entry (make-char 'greek-iso8859-7) ?g)
371 (let ((c #x370))
372 (while (<= c #x3ff)
373 (modify-category-entry (decode-char 'ucs c) ?g)
374 (setq c (1+ c))))
375
376 ;; (let ((c 182))
377 ;; (while (< c 255)
378 ;; (modify-syntax-entry (make-char 'greek-iso8859-7 c) "w")
379 ;; (setq c (1+ c))))
380 ;; (modify-syntax-entry (make-char 'greek-iso8859-7 160) "w") ; NBSP
381 (modify-syntax-entry ?\e,F7\e(B ".")
382 (modify-syntax-entry ?\e,F;\e(B ".")
383 (modify-syntax-entry ?\e,F=\e(B ".")
384 (let ((tbl (standard-case-table)))
385 ;; Fixme: non-letter syntax copied from latin-1, but that's dubious
386 ;; in several cases.
387 (set-case-syntax ?\e,F!\e(B "." tbl)
388 (set-case-syntax ?\e,F"\e(B "." tbl)
389 (set-case-syntax ?\e,F&\e(B "." tbl)
390 (set-case-syntax ?\e,F&\e(B "_" tbl)
391 (set-case-syntax ?\e,F'\e(B "." tbl)
392 (set-case-syntax ?\e,F)\e(B "_" tbl)
393 (set-case-syntax ?\e,F+\e(B "." tbl)
394 (set-case-syntax ?\e,F,\e(B "_" tbl)
395 (set-case-syntax ?\e,F-\e(B "_" tbl)
396 (set-case-syntax ?\e,F/\e(B "." tbl)
397 (set-case-syntax ?\e,F0\e(B "_" tbl)
398 (set-case-syntax ?\e,F1\e(B "_" tbl)
399 ;; (set-case-syntax ?\e,F7\e(B "_" tbl)
400 ;; (set-case-syntax ?\e,F=\e(B "_" tbl)
401 (set-case-syntax-pair ?\e,FA\e(B ?\e,Fa\e(B tbl)
402 (set-case-syntax-pair ?\e,FB\e(B ?\e,Fb\e(B tbl)
403 (set-case-syntax-pair ?\e,FC\e(B ?\e,Fc\e(B tbl)
404 (set-case-syntax-pair ?\e,FD\e(B ?\e,Fd\e(B tbl)
405 (set-case-syntax-pair ?\e,FE\e(B ?\e,Fe\e(B tbl)
406 (set-case-syntax-pair ?\e,FF\e(B ?\e,Ff\e(B tbl)
407 (set-case-syntax-pair ?\e,FG\e(B ?\e,Fg\e(B tbl)
408 (set-case-syntax-pair ?\e,FH\e(B ?\e,Fh\e(B tbl)
409 (set-case-syntax-pair ?\e,FI\e(B ?\e,Fi\e(B tbl)
410 (set-case-syntax-pair ?\e,FJ\e(B ?\e,Fj\e(B tbl)
411 (set-case-syntax-pair ?\e,FK\e(B ?\e,Fk\e(B tbl)
412 (set-case-syntax-pair ?\e,FL\e(B ?\e,Fl\e(B tbl)
413 (set-case-syntax-pair ?\e,FM\e(B ?\e,Fm\e(B tbl)
414 (set-case-syntax-pair ?\e,FN\e(B ?\e,Fn\e(B tbl)
415 (set-case-syntax-pair ?\e,FO\e(B ?\e,Fo\e(B tbl)
416 (set-case-syntax-pair ?\e,FP\e(B ?\e,Fp\e(B tbl)
417 (set-case-syntax-pair ?\e,FQ\e(B ?\e,Fq\e(B tbl)
418 (set-case-syntax-pair ?\e,FS\e(B ?\e,Fs\e(B tbl)
419 (set-case-syntax-pair ?\e,FT\e(B ?\e,Ft\e(B tbl)
420 (set-case-syntax-pair ?\e,FU\e(B ?\e,Fu\e(B tbl)
421 (set-case-syntax-pair ?\e,FV\e(B ?\e,Fv\e(B tbl)
422 (set-case-syntax-pair ?\e,FW\e(B ?\e,Fw\e(B tbl)
423 (set-case-syntax-pair ?\e,FX\e(B ?\e,Fx\e(B tbl)
424 (set-case-syntax-pair ?\e,FY\e(B ?\e,Fy\e(B tbl)
425 (set-case-syntax-pair ?\e,FZ\e(B ?\e,Fz\e(B tbl)
426 (set-case-syntax-pair ?\e,F[\e(B ?\e,F{\e(B tbl)
427 (set-case-syntax-pair ?\e,F?\e(B ?\e,F~\e(B tbl)
428 (set-case-syntax-pair ?\e,F>\e(B ?\e,F}\e(B tbl)
429 (set-case-syntax-pair ?\e,F<\e(B ?\e,F|\e(B tbl)
430 (set-case-syntax-pair ?\e,F6\e(B ?\e,F\\e(B tbl)
431 (set-case-syntax-pair ?\e,F8\e(B ?\e,F]\e(B tbl)
432 (set-case-syntax-pair ?\e,F9\e(B ?\e,F^\e(B tbl)
433 (set-case-syntax-pair ?\e,F:\e(B ?\e,F_\e(B tbl)
434 ;; Unicode equivalents
435 (set-case-syntax-pair ?\e$,1&q\e(B ?\e$,1'1\e(B tbl)
436 (set-case-syntax-pair ?\e$,1&r\e(B ?\e$,1'2\e(B tbl)
437 (set-case-syntax-pair ?\e$,1&s\e(B ?\e$,1'3\e(B tbl)
438 (set-case-syntax-pair ?\e$,1&t\e(B ?\e$,1'4\e(B tbl)
439 (set-case-syntax-pair ?\e$,1&u\e(B ?\e$,1'5\e(B tbl)
440 (set-case-syntax-pair ?\e$,1&v\e(B ?\e$,1'6\e(B tbl)
441 (set-case-syntax-pair ?\e$,1&w\e(B ?\e$,1'7\e(B tbl)
442 (set-case-syntax-pair ?\e$,1&x\e(B ?\e$,1'8\e(B tbl)
443 (set-case-syntax-pair ?\e$,1&y\e(B ?\e$,1'9\e(B tbl)
444 (set-case-syntax-pair ?\e$,1&z\e(B ?\e$,1':\e(B tbl)
445 (set-case-syntax-pair ?\e$,1&{\e(B ?\e$,1';\e(B tbl)
446 (set-case-syntax-pair ?\e$,1&|\e(B ?\e$,1'<\e(B tbl)
447 (set-case-syntax-pair ?\e$,1&}\e(B ?\e$,1'=\e(B tbl)
448 (set-case-syntax-pair ?\e$,1&~\e(B ?\e$,1'>\e(B tbl)
449 (set-case-syntax-pair ?\e$,1&\7f\e(B ?\e$,1'?\e(B tbl)
450 (set-case-syntax-pair ?\e$,1' \e(B ?\e$,1'@\e(B tbl)
451 (set-case-syntax-pair ?\e$,1'!\e(B ?\e$,1'A\e(B tbl)
452 (set-case-syntax-pair ?\e$,1'#\e(B ?\e$,1'C\e(B tbl)
453 (set-case-syntax-pair ?\e$,1'$\e(B ?\e$,1'D\e(B tbl)
454 (set-case-syntax-pair ?\e$,1'%\e(B ?\e$,1'E\e(B tbl)
455 (set-case-syntax-pair ?\e$,1'&\e(B ?\e$,1'F\e(B tbl)
456 (set-case-syntax-pair ?\e$,1''\e(B ?\e$,1'G\e(B tbl)
457 (set-case-syntax-pair ?\e$,1'(\e(B ?\e$,1'H\e(B tbl)
458 (set-case-syntax-pair ?\e$,1')\e(B ?\e$,1'I\e(B tbl)
459 (set-case-syntax-pair ?\e$,1'*\e(B ?\e$,1'J\e(B tbl)
460 (set-case-syntax-pair ?\e$,1'+\e(B ?\e$,1'K\e(B tbl)
461 (set-case-syntax-pair ?\e$,1&o\e(B ?\e$,1'N\e(B tbl)
462 (set-case-syntax-pair ?\e$,1&n\e(B ?\e$,1'M\e(B tbl)
463 (set-case-syntax-pair ?\e$,1&l\e(B ?\e$,1'L\e(B tbl)
464 (set-case-syntax-pair ?\e$,1&f\e(B ?\e$,1',\e(B tbl)
465 (set-case-syntax-pair ?\e$,1&h\e(B ?\e$,1'-\e(B tbl)
466 (set-case-syntax-pair ?\e$,1&i\e(B ?\e$,1'.\e(B tbl)
467 (set-case-syntax-pair ?\e$,1&j\e(B ?\e$,1'/\e(B tbl))
468
469 ;; Hebrew character set (ISO-8859-8)
470
471 (modify-category-entry (make-char 'hebrew-iso8859-8) ?w)
472 (let ((c #x591))
473 (while (<= c #x5f4)
474 (modify-category-entry (decode-char 'ucs c) ?w)
475 (setq c (1+ c))))
476
477 (modify-syntax-entry (make-char 'hebrew-iso8859-8 208) ".") ; PASEQ
478 (modify-syntax-entry (make-char 'hebrew-iso8859-8 211) ".") ; SOF PASUQ
479 (modify-syntax-entry (decode-char 'ucs #x5be) ".") ; MAQAF
480 (modify-syntax-entry (decode-char 'ucs #x5c0) ".") ; PASEQ
481 (modify-syntax-entry (decode-char 'ucs #x5c3) ".") ; SOF PASUQ
482 (modify-syntax-entry (decode-char 'ucs #x5f3) ".") ; GERESH
483 (modify-syntax-entry (decode-char 'ucs #x5f4) ".") ; GERSHAYIM
484
485 ;; (let ((c 224))
486 ;; (while (< c 251)
487 ;; (modify-syntax-entry (make-char 'hebrew-iso8859-8 c) "w")
488 ;; (setq c (1+ c))))
489 ;; (modify-syntax-entry (make-char 'hebrew-iso8859-8 160) "w") ; NBSP
490
491 ;; Indian character set (IS 13194 and other Emacs original Indian charsets)
492
493 (modify-category-entry (make-char 'indian-is13194) ?i)
494 (modify-category-entry (make-char 'indian-2-column) ?I)
495 (modify-category-entry (make-char 'indian-glyph) ?I)
496 ;; Unicode Devanagari block
497 (let ((c #x901))
498 (while (<= c #x970)
499 (modify-category-entry (decode-char 'ucs c) ?i)
500 (setq c (1+ c))))
501
502 ;;; Commented out since the categories appear not to be used anywhere
503 ;;; and word syntax is the default.
504 ;; (let ((deflist ;
505 ;; '(;; chars syntax category
506 ;; ("\e(5!"#\e(B" "w" ?7) ; vowel-modifying diacritical mark
507 ;; ; chandrabindu, anuswar, visarga
508 ;; ("\e(5$\e(B-\e(52\e(B" "w" ?1) ; base (independent) vowel
509 ;; ("\e(53\e(B-\e(5X\e(B" "w" ?0) ; consonant
510 ;; ("\e(5Z\e(B-\e(5g\e(B" "w" ?8) ; matra
511 ;; ("\e(5q\e(B-\e(5z\e(B" "w" ?6) ; digit
512 ;; ))
513 ;; elm chars len syntax category to ch i)
514 ;; (while deflist
515 ;; (setq elm (car deflist))
516 ;; (setq chars (car elm)
517 ;; len (length chars)
518 ;; syntax (nth 1 elm)
519 ;; category (nth 2 elm)
520 ;; i 0)
521 ;; (while (< i len)
522 ;; (if (= (aref chars i) ?-)
523 ;; (setq i (1+ i)
524 ;; to (aref chars i))
525 ;; (setq ch (aref chars i)
526 ;; to ch))
527 ;; (while (<= ch to)
528 ;; (modify-syntax-entry ch syntax)
529 ;; (modify-category-entry ch category)
530 ;; (setq ch (1+ ch)))
531 ;; (setq i (1+ i)))
532 ;; (setq deflist (cdr deflist))))
533
534
535 ;; Japanese character set (JISX0201-kana, JISX0201-roman, JISX0208, JISX0212)
536
537 (modify-category-entry (make-char 'katakana-jisx0201) ?k)
538 (modify-category-entry (make-char 'katakana-jisx0201) ?j)
539 (modify-category-entry (make-char 'latin-jisx0201) ?r)
540 (modify-category-entry (make-char 'japanese-jisx0208) ?j)
541 (modify-category-entry (make-char 'japanese-jisx0212) ?j)
542 (modify-category-entry (make-char 'katakana-jisx0201) ?\|)
543 (modify-category-entry (make-char 'japanese-jisx0208) ?\|)
544 (modify-category-entry (make-char 'japanese-jisx0212) ?\|)
545
546 ;; Unicode equivalents of JISX0201-kana
547 (let ((c #xff61))
548 (while (<= c #xff9f)
549 (modify-category-entry (decode-char 'ucs c) ?k)
550 (modify-category-entry (decode-char 'ucs c) ?j)
551 (modify-category-entry (decode-char 'ucs c) ?\|)
552 (setq c (1+ c))))
553
554 ;; Katakana block
555 (let ((c #x30a0))
556 (while (<= c #x30ff)
557 ;; ?K is double width, ?k isn't specified
558 (modify-category-entry (decode-char 'ucs c) ?k)
559 (modify-category-entry (decode-char 'ucs c) ?j)
560 (modify-category-entry (decode-char 'ucs c) ?\|)
561 (setq c (1+ c))))
562
563 ;; Hiragana block
564 (let ((c #x3040))
565 (while (<= c #x309f)
566 ;; ?H is actually defined to be double width
567 (modify-category-entry (decode-char 'ucs c) ?H)
568 ;;(modify-category-entry (decode-char 'ucs c) ?j)
569 (modify-category-entry (decode-char 'ucs c) ?\|)
570 (setq c (1+ c))))
571
572 ;; JISX0208
573 ;; (modify-syntax-entry (make-char 'japanese-jisx0208) "w")
574 (modify-syntax-entry (make-char 'japanese-jisx0208 33) "_")
575 (modify-syntax-entry (make-char 'japanese-jisx0208 34) "_")
576 (modify-syntax-entry (make-char 'japanese-jisx0208 40) "_")
577 (let ((chars '(?\e$B!<\e(B ?\e$B!+\e(B ?\e$B!,\e(B ?\e$B!3\e(B ?\e$B!4\e(B ?\e$B!5\e(B ?\e$B!6\e(B ?\e$B!7\e(B ?\e$B!8\e(B ?\e$B!9\e(B ?\e$B!:\e(B ?\e$B!;\e(B)))
578 (while chars
579 (modify-syntax-entry (car chars) "w")
580 (setq chars (cdr chars))))
581 (modify-syntax-entry ?\\e$B!J\e(B "(\e$B!K\e(B")
582 (modify-syntax-entry ?\\e$B!N\e(B "(\e$B!O\e(B")
583 (modify-syntax-entry ?\\e$B!P\e(B "(\e$B!Q\e(B")
584 (modify-syntax-entry ?\\e$B!V\e(B "(\e$B!W\e(B")
585 (modify-syntax-entry ?\\e$B!X\e(B "(\e$B!Y\e(B")
586 (modify-syntax-entry ?\\e$B!K\e(B ")\e$B!J\e(B")
587 (modify-syntax-entry ?\\e$B!O\e(B ")\e$B!N\e(B")
588 (modify-syntax-entry ?\\e$B!Q\e(B ")\e$B!P\e(B")
589 (modify-syntax-entry ?\\e$B!W\e(B ")\e$B!V\e(B")
590 (modify-syntax-entry ?\\e$B!Y\e(B ")\e$B!X\e(B")
591
592 (modify-category-entry (make-char 'japanese-jisx0208 35) ?A)
593 (modify-category-entry (make-char 'japanese-jisx0208 36) ?H)
594 (modify-category-entry (make-char 'japanese-jisx0208 37) ?K)
595 (modify-category-entry (make-char 'japanese-jisx0208 38) ?G)
596 (modify-category-entry (make-char 'japanese-jisx0208 39) ?Y)
597 (let ((row 48))
598 (while (< row 127)
599 (modify-category-entry (make-char 'japanese-jisx0208 row) ?C)
600 (setq row (1+ row))))
601 (modify-category-entry ?\e$B!<\e(B ?K)
602 (let ((chars '(?\e$B!+\e(B ?\e$B!,\e(B)))
603 (while chars
604 (modify-category-entry (car chars) ?K)
605 (modify-category-entry (car chars) ?H)
606 (setq chars (cdr chars))))
607 (let ((chars '(?\e$B!3\e(B ?\e$B!4\e(B ?\e$B!5\e(B ?\e$B!6\e(B ?\e$B!7\e(B ?\e$B!8\e(B ?\e$B!9\e(B ?\e$B!:\e(B ?\e$B!;\e(B)))
608 (while chars
609 (modify-category-entry (car chars) ?C)
610 (setq chars (cdr chars))))
611
612 ;; JISX0212
613 ;; (modify-syntax-entry (make-char 'japanese-jisx0212) "w")
614 (modify-syntax-entry (make-char 'japanese-jisx0212 33) "_")
615 (modify-syntax-entry (make-char 'japanese-jisx0212 34) "_")
616 (modify-syntax-entry (make-char 'japanese-jisx0212 35) "_")
617
618 (modify-category-entry (make-char 'japanese-jisx0212 ) ?C)
619
620 ;; JISX0201-Kana
621 ;; (modify-syntax-entry (make-char 'katakana-jisx0201) "w")
622 (let ((chars '(?\e(I!\e(B ?\e(I$\e(B ?\e(I%\e(B
623 ;; Unicode:
624 ?\e$,3sa\e(B ?\e$,3sd\e(B ?\e$,3se\e(B)))
625 (while chars
626 (modify-syntax-entry (car chars) ".")
627 (setq chars (cdr chars))))
628
629 (modify-syntax-entry ?\\e(I"\e(B "(\e(I#\e(B")
630 (modify-syntax-entry ?\\e(I#\e(B "(\e(I"\e(B")
631
632 ;; Korean character set (KSC5601)
633
634 ;; (modify-syntax-entry (make-char 'korean-ksc5601) "w")
635 (modify-syntax-entry (make-char 'korean-ksc5601 33) "_")
636 (modify-syntax-entry (make-char 'korean-ksc5601 34) "_")
637 (modify-syntax-entry (make-char 'korean-ksc5601 38) "_")
638 (modify-syntax-entry (make-char 'korean-ksc5601 39) "_")
639 (modify-syntax-entry (make-char 'korean-ksc5601 40) "_")
640 (modify-syntax-entry (make-char 'korean-ksc5601 41) "_")
641
642 (modify-category-entry (make-char 'korean-ksc5601) ?h)
643 (modify-category-entry (make-char 'korean-ksc5601 35) ?A)
644 (modify-category-entry (make-char 'korean-ksc5601 37) ?G)
645 (modify-category-entry (make-char 'korean-ksc5601 42) ?H)
646 (modify-category-entry (make-char 'korean-ksc5601 43) ?K)
647 (modify-category-entry (make-char 'korean-ksc5601 44) ?Y)
648
649 ;; Latin character set (latin-1,2,3,4,5,8,9)
650
651 (modify-category-entry (make-char 'latin-iso8859-1) ?l)
652 (modify-category-entry (make-char 'latin-iso8859-2) ?l)
653 (modify-category-entry (make-char 'latin-iso8859-3) ?l)
654 (modify-category-entry (make-char 'latin-iso8859-4) ?l)
655 (modify-category-entry (make-char 'latin-iso8859-9) ?l)
656 (modify-category-entry (make-char 'latin-iso8859-14) ?l)
657 (modify-category-entry (make-char 'latin-iso8859-15) ?l)
658
659 (modify-category-entry (make-char 'latin-iso8859-1 160) ?\ )
660 (modify-category-entry (make-char 'latin-iso8859-2 160) ?\ )
661 (modify-category-entry (make-char 'latin-iso8859-3 160) ?\ )
662 (modify-category-entry (make-char 'latin-iso8859-4 160) ?\ )
663 (modify-category-entry (make-char 'latin-iso8859-9 160) ?\ )
664 (modify-category-entry (make-char 'latin-iso8859-14 160) ?\ )
665 (modify-category-entry (make-char 'latin-iso8859-15 160) ?\ )
666
667 ;; Lao character set
668
669 (modify-category-entry (make-char 'lao) ?o)
670 (dotimes (i (1+ (- #xeff #xe80)))
671 (modify-category-entry (decode-char 'ucs (+ i #xe80)) ?o))
672
673 (let ((deflist '(;; chars syntax category
674 ("\e(1!\e(B-\e(1N\e(B" "w" ?0) ; consonant
675 ("\e(1PRS]`\e(B-\e(1d\e(B" "w" ?1) ; vowel base
676 ("\e(1QT\e(B-\e(1W[m\e(B" "w" ?2) ; vowel upper
677 ("\e(1XY\e(B" "w" ?3) ; vowel lower
678 ("\e(1h\e(B-\e(1l\e(B" "w" ?4) ; tone mark
679 ("\e(1\\e(B" "w" ?9) ; semivowel lower
680 ("\e(1p\e(B-\e(1y\e(B" "w" ?6) ; digit
681 ("\e(1Of\e(B" "_" ?5) ; symbol
682 ;; Unicode equivalents
683 ("\e$,1D!\e(B-\e$,1DN\e(B" "w" ?0) ; consonant
684 ("\e$,1DPDRDSD]D`\e(B-\e$,1Dd\e(B" "w" ?1) ; vowel base
685 ("\e$,1DQDT\e(B-\e$,1DWD[Dm\e(B" "w" ?2) ; vowel upper
686 ("\e$,1DXDY\e(B" "w" ?3) ; vowel lower
687 ("\e$,1Dh\e(B-\e$,1Dk\e(B" "w" ?4) ; tone mark
688 ("\e$,1D\D]\e(B" "w" ?9) ; semivowel lower
689 ("\e$,1Dp\e(B-\e$,1Dy\e(B" "w" ?6) ; digit
690 ("\e$,1DODf\e(B" "_" ?5) ; symbol
691 ))
692 elm chars len syntax category to ch i)
693 (while deflist
694 (setq elm (car deflist))
695 (setq chars (car elm)
696 len (length chars)
697 syntax (nth 1 elm)
698 category (nth 2 elm)
699 i 0)
700 (while (< i len)
701 (if (= (aref chars i) ?-)
702 (setq i (1+ i)
703 to (aref chars i))
704 (setq ch (aref chars i)
705 to ch))
706 (while (<= ch to)
707 (unless (string-equal syntax "w")
708 (modify-syntax-entry ch syntax))
709 (modify-category-entry ch category)
710 (setq ch (1+ ch)))
711 (setq i (1+ i)))
712 (setq deflist (cdr deflist))))
713
714 ;; Thai character set (TIS620)
715
716 (modify-category-entry (make-char 'thai-tis620) ?t)
717 (dotimes (i (1+ (- #xe7f #xe00)))
718 (modify-category-entry (decode-char 'ucs (+ i #xe00)) ?t))
719
720 (let ((deflist '(;; chars syntax category
721 ("\e,T!\e(B-\e,TCEG\e(B-\e,TN\e(B" "w" ?0) ; consonant
722 ("\e,TDFPRS`\e(B-\e,Te\e(B" "w" ?1) ; vowel base
723 ("\e,TQT\e(B-\e,TWgn\e(B" "w" ?2) ; vowel upper
724 ("\e,TX\e(B-\e,TZ\e(B" "w" ?3) ; vowel lower
725 ("\e,Th\e(B-\e,Tm\e(B" "w" ?4) ; tone mark
726 ("\e,Tp\e(B-\e,Ty\e(B" "w" ?6) ; digit
727 ("\e,TOf_oz{\e(B" "_" ?5) ; symbol
728 ;; Unicode equivalents
729 ("\e$,1Ba\e(B-\e$,1C#C%C'\e(B-\e$,1C.\e(B" "w" ?0) ; consonant
730 ("\e$,1C$C&C0C2C3C@\e(B-\e$,1CE\e(B" "w" ?1) ; vowel base
731 ("\e$,1C1C4\e(B-\e$,1C7CGCN\e(B" "w" ?2) ; vowel upper
732 ("\e$,1C8\e(B-\e$,1C:\e(B" "w" ?3) ; vowel lower
733 ("\e$,1CH\e(B-\e$,1CM\e(B" "w" ?4) ; tone mark
734 ("\e$,1CP\e(B-\e$,1CY\e(B" "w" ?6) ; digit
735 ("\e$,1C/CFC?COCZC[\e(B" "_" ?5) ; symbol
736 ))
737 elm chars len syntax category to ch i)
738 (while deflist
739 (setq elm (car deflist))
740 (setq chars (car elm)
741 len (length chars)
742 syntax (nth 1 elm)
743 category (nth 2 elm)
744 i 0)
745 (while (< i len)
746 (if (= (aref chars i) ?-)
747 (setq i (1+ i)
748 to (aref chars i))
749 (setq ch (aref chars i)
750 to ch))
751 (while (<= ch to)
752 (unless (string-equal syntax "w")
753 (modify-syntax-entry ch syntax))
754 (modify-category-entry ch category)
755 (setq ch (1+ ch)))
756 (setq i (1+ i)))
757 (setq deflist (cdr deflist))))
758
759 ;; Tibetan character set
760
761 (modify-category-entry (make-char 'tibetan) ?q)
762 (modify-category-entry (make-char 'tibetan-1-column) ?q)
763 (dotimes (i (1+ (- #xfff #xf00)))
764 (modify-category-entry (decode-char 'ucs (+ i #xf00)) ?q))
765
766 (let ((deflist '(;; chars syntax category
767 ("\e4\e$(7"!\e0"!\e1\e(B-\e4\e$(7"J\e0"J\e1\e4"K\e0"K\e1\e(B" "w" ?0) ; consonant
768 ("\e$(7#!\e(B-\e$(7#J#K#L#M!"!#\e(B" "w" ?0) ;
769 ("\e$(7$!\e(B-\e$(7$e\e(B" "w" ?0) ;
770 ("\e$(7%!\e(B-\e$(7%u\e(B" "w" ?0) ;
771 ("\e$(7"S"["\"]"^"a\e(B" "w" ?2) ; upper vowel
772 ("\e$(7"_"c"d"g"h"i"j"k"l\e(B" "w" ?2) ; upper modifier
773 ("\e$(7!I"Q"R"U"e!e!g\e(B" "w" ?3) ; lowel vowel/modifier
774 ("\e$(7!P\e(B-\e$(7!Y!Z\e(B-\e$(7!c\e(B" "w" ?6) ; digit
775 ("\e$(7!;!=\e(B-\e$(7!B!D"`\e(B" "." ?|) ; line-break char
776 ("\e$(8!;!=!?!@!A!D"`\e(B" "." ?|) ;
777 ("\e$(7!8!;!=\e(B-\e$(7!B!D"`!m!d\e(B" "." ?>) ; prohibition
778 ("\e$(8!;!=!?!@!A!D"`\e(B" "." ?>) ;
779 ("\e$(7!0\e(B-\e$(7!:!l#R#S"f\e(B" "." ?<) ; prohibition
780 ("\e$(7!C!E\e(B-\e$(7!H!J\e(B-\e$(7!O!f!h\e(B-\e$(7!k!n!o#O#P\e(B-\e$(7#`\e(B" "." ?q) ; others
781
782 ;; Unicode version (not complete)
783 ("\e$,1F \e(B-\e$,1FIFJ\e(B" "w" ?0) ; consonant
784 ("\e$,1Fp\e(B-\e$,1G9G:G;G<\e(B" "w" ?0) ;
785 ("\e$,1FRFZF[F\F]F`\e(B" "w" ?2) ; upper vowel
786 ("\e$,1F^FbFcFfFgFhFiFjFk\e(B" "w" ?2) ; upper modifier
787 ("\e$,1EYFPFQFTFdEuEw\e(B" "w" ?3) ; lowel vowel/modifier
788 ("\e$,1E`\e(B-\e$,1EiEj\e(B-\e$,1Es\e(B" "w" ?6) ; digit
789 ("\e$,1EKEM\e(B-\e$,1ERETF_\e(B" "." ?|) ; line-break char
790 ("\e$,1EHEKEM\e(B-\e$,1ERETF_E}Et\e(B" "." ?>) ; prohibition
791 ("\e$,1E@\e(B-\e$,1EJE|GAGBFe\e(B" "." ?<) ; prohibition
792 ("\e$,1ESEU\e(B-\e$,1EXEZ\e(B-\e$,1E_EvEx\e(B-\e$,1E{E~E\7fG>G?\e(B-\e$,1GO\e(B" "." ?q) ; others
793 ))
794 elm chars len syntax category to ch i)
795 (while deflist
796 (setq elm (car deflist))
797 (setq chars (car elm)
798 len (length chars)
799 syntax (nth 1 elm)
800 category (nth 2 elm)
801 i 0)
802 (while (< i len)
803 (if (= (aref chars i) ?-)
804 (setq i (1+ i)
805 to (aref chars i))
806 (setq ch (aref chars i)
807 to ch))
808 (while (<= ch to)
809 (unless (string-equal syntax "w")
810 (modify-syntax-entry ch syntax))
811 (modify-category-entry ch category)
812 (setq ch (1+ ch)))
813 (setq i (1+ i)))
814 (setq deflist (cdr deflist))))
815
816 ;; Vietnamese character set
817
818 (let ((lower (make-char 'vietnamese-viscii-lower))
819 (upper (make-char 'vietnamese-viscii-upper)))
820 ;; (modify-syntax-entry lower "w")
821 ;; (modify-syntax-entry upper "w")
822 (modify-category-entry lower ?v)
823 (modify-category-entry upper ?v)
824 (modify-category-entry lower ?l) ; To make a word with
825 (modify-category-entry upper ?l) ; latin characters.
826 )
827
828 (let ((tbl (standard-case-table))
829 (i 32))
830 (while (< i 128)
831 (set-case-syntax-pair (make-char 'vietnamese-viscii-upper i)
832 (make-char 'vietnamese-viscii-lower i)
833 tbl)
834 (setq i (1+ i))))
835
836 ;; Unicode (mule-unicode-0100-24ff)
837
838 (let ((tbl (standard-case-table)) c)
839
840 ;; In some languages, U+0049 LATIN CAPITAL LETTER I and U+0131 LATIN
841 ;; SMALL LETTER DOTLESS I make a case pair, and so do U+0130 LATIN
842 ;; CAPITAL LETTER I WITH DOT ABOVE and U+0069 LATIN SMALL LETTER I.
843 ;; Thus we have to check language-environment to handle casing
844 ;; correctly. Currently only I<->i is available.
845
846 ;; Latin Extended-A, Latin Extended-B
847 (setq c #x0100)
848 (while (<= c #x0233)
849 (modify-category-entry (decode-char 'ucs c) ?l)
850 (and (or (<= c #x012e)
851 (and (>= c #x014a) (<= c #x0177)))
852 (zerop (% c 2))
853 (set-case-syntax-pair
854 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
855 (and (>= c #x013a)
856 (<= c #x0148)
857 (zerop (% c 2))
858 (set-case-syntax-pair
859 (decode-char 'ucs (1- c)) (decode-char 'ucs c) tbl))
860 (setq c (1+ c)))
861 (set-case-syntax-pair ?\e$,1 R\e(B ?\e$,1 S\e(B tbl)
862 (set-case-syntax-pair ?\e$,1 T\e(B ?\e$,1 U\e(B tbl)
863 (set-case-syntax-pair ?\e$,1 V\e(B ?\e$,1 W\e(B tbl)
864 ;;; (set-case-syntax-pair ?\e$,1!8\e(B ?\e,A\7f\e(B tbl) ; these two have different length!
865 (set-case-syntax-pair ?\e$,1!9\e(B ?\e$,1!:\e(B tbl)
866 (set-case-syntax-pair ?\e$,1!;\e(B ?\e$,1!<\e(B tbl)
867 (set-case-syntax-pair ?\e$,1!=\e(B ?\e$,1!>\e(B tbl)
868
869 ;; Latin Extended-B
870 (set-case-syntax-pair ?\e$,1!A\e(B ?\e$,1#S\e(B tbl)
871 (set-case-syntax-pair ?\e$,1!B\e(B ?\e$,1!C\e(B tbl)
872 (set-case-syntax-pair ?\e$,1!D\e(B ?\e$,1!E\e(B tbl)
873 (set-case-syntax-pair ?\e$,1!F\e(B ?\e$,1#T\e(B tbl)
874 (set-case-syntax-pair ?\e$,1!G\e(B ?\e$,1!H\e(B tbl)
875 (set-case-syntax-pair ?\e$,1!I\e(B ?\e$,1#V\e(B tbl)
876 (set-case-syntax-pair ?\e$,1!J\e(B ?\e$,1#W\e(B tbl)
877 (set-case-syntax-pair ?\e$,1!K\e(B ?\e$,1!L\e(B tbl)
878 (set-case-syntax-pair ?\e$,1!N\e(B ?\e$,1"=\e(B tbl)
879 (set-case-syntax-pair ?\e$,1!O\e(B ?\e$,1#Y\e(B tbl)
880 (set-case-syntax-pair ?\e$,1!P\e(B ?\e$,1#[\e(B tbl)
881 (set-case-syntax-pair ?\e$,1!Q\e(B ?\e$,1!R\e(B tbl)
882 (set-case-syntax-pair ?\e$,1!S\e(B ?\e$,1#`\e(B tbl)
883 (set-case-syntax-pair ?\e$,1!T\e(B ?\e$,1#c\e(B tbl)
884 (set-case-syntax-pair ?\e$,1!V\e(B ?\e$,1#i\e(B tbl)
885 (set-case-syntax-pair ?\e$,1!W\e(B ?\e$,1#h\e(B tbl)
886 (set-case-syntax-pair ?\e$,1!X\e(B ?\e$,1!Y\e(B tbl)
887 (set-case-syntax-pair ?\e$,1!\\e(B ?\e$,1#o\e(B tbl)
888 (set-case-syntax-pair ?\e$,1!]\e(B ?\e$,1#r\e(B tbl)
889 (set-case-syntax-pair ?\e$,1!_\e(B ?\e$,1#u\e(B tbl)
890 (set-case-syntax-pair ?\e$,1!`\e(B ?\e$,1!a\e(B tbl)
891 (set-case-syntax-pair ?\e$,1!b\e(B ?\e$,1!c\e(B tbl)
892 (set-case-syntax-pair ?\e$,1!d\e(B ?\e$,1!e\e(B tbl)
893 (set-case-syntax-pair ?\e$,1!f\e(B ?\e$,1$ \e(B tbl)
894 (set-case-syntax-pair ?\e$,1!g\e(B ?\e$,1!h\e(B tbl)
895 (set-case-syntax-pair ?\e$,1!i\e(B ?\e$,1$#\e(B tbl)
896 (set-case-syntax-pair ?\e$,1!l\e(B ?\e$,1!m\e(B tbl)
897 (set-case-syntax-pair ?\e$,1!n\e(B ?\e$,1$(\e(B tbl)
898 (set-case-syntax-pair ?\e$,1!o\e(B ?\e$,1!p\e(B tbl)
899 (set-case-syntax-pair ?\e$,1!q\e(B ?\e$,1$*\e(B tbl)
900 (set-case-syntax-pair ?\e$,1!r\e(B ?\e$,1$+\e(B tbl)
901 (set-case-syntax-pair ?\e$,1!s\e(B ?\e$,1!t\e(B tbl)
902 (set-case-syntax-pair ?\e$,1!u\e(B ?\e$,1!v\e(B tbl)
903 (set-case-syntax-pair ?\e$,1!w\e(B ?\e$,1$2\e(B tbl)
904 (set-case-syntax-pair ?\e$,1!x\e(B ?\e$,1!y\e(B tbl)
905 (set-case-syntax-pair ?\e$,1!|\e(B ?\e$,1!}\e(B tbl)
906 (set-case-syntax-pair ?\e$,1"$\e(B ?\e$,1"&\e(B tbl)
907 (set-case-syntax-pair ?\e$,1"%\e(B ?\e$,1"&\e(B tbl)
908 (set-case-syntax-pair ?\e$,1"'\e(B ?\e$,1")\e(B tbl)
909 (set-case-syntax-pair ?\e$,1"(\e(B ?\e$,1")\e(B tbl)
910 (set-case-syntax-pair ?\e$,1"*\e(B ?\e$,1",\e(B tbl)
911 (set-case-syntax-pair ?\e$,1"+\e(B ?\e$,1",\e(B tbl)
912 (set-case-syntax-pair ?\e$,1"-\e(B ?\e$,1".\e(B tbl)
913 (set-case-syntax-pair ?\e$,1"/\e(B ?\e$,1"0\e(B tbl)
914 (set-case-syntax-pair ?\e$,1"1\e(B ?\e$,1"2\e(B tbl)
915 (set-case-syntax-pair ?\e$,1"3\e(B ?\e$,1"4\e(B tbl)
916 (set-case-syntax-pair ?\e$,1"5\e(B ?\e$,1"6\e(B tbl)
917 (set-case-syntax-pair ?\e$,1"7\e(B ?\e$,1"8\e(B tbl)
918 (set-case-syntax-pair ?\e$,1"9\e(B ?\e$,1":\e(B tbl)
919 (set-case-syntax-pair ?\e$,1";\e(B ?\e$,1"<\e(B tbl)
920 (set-case-syntax-pair ?\e$,1">\e(B ?\e$,1"?\e(B tbl)
921 (set-case-syntax-pair ?\e$,1"@\e(B ?\e$,1"A\e(B tbl)
922 (set-case-syntax-pair ?\e$,1"B\e(B ?\e$,1"C\e(B tbl)
923 (set-case-syntax-pair ?\e$,1"D\e(B ?\e$,1"E\e(B tbl)
924 (set-case-syntax-pair ?\e$,1"F\e(B ?\e$,1"G\e(B tbl)
925 (set-case-syntax-pair ?\e$,1"H\e(B ?\e$,1"I\e(B tbl)
926 (set-case-syntax-pair ?\e$,1"J\e(B ?\e$,1"K\e(B tbl)
927 (set-case-syntax-pair ?\e$,1"L\e(B ?\e$,1"M\e(B tbl)
928 (set-case-syntax-pair ?\e$,1"N\e(B ?\e$,1"O\e(B tbl)
929 ;; 01F0; F; 006A 030C; # LATIN SMALL LETTER J WITH CARON
930 (set-case-syntax-pair ?\e$,1"Q\e(B ?\e$,1"S\e(B tbl)
931 (set-case-syntax-pair ?\e$,1"R\e(B ?\e$,1"S\e(B tbl)
932 (set-case-syntax-pair ?\e$,1"T\e(B ?\e$,1"U\e(B tbl)
933 (set-case-syntax-pair ?\e$,1"V\e(B ?\e$,1!U\e(B tbl)
934 (set-case-syntax-pair ?\e$,1"W\e(B ?\e$,1!\7f\e(B tbl)
935 (set-case-syntax-pair ?\e$,1"X\e(B ?\e$,1"Y\e(B tbl)
936 (set-case-syntax-pair ?\e$,1"Z\e(B ?\e$,1"[\e(B tbl)
937 (set-case-syntax-pair ?\e$,1"\\e(B ?\e$,1"]\e(B tbl)
938 (set-case-syntax-pair ?\e$,1"^\e(B ?\e$,1"_\e(B tbl)
939 (set-case-syntax-pair ?\e$,1"`\e(B ?\e$,1"a\e(B tbl)
940 (set-case-syntax-pair ?\e$,1"b\e(B ?\e$,1"c\e(B tbl)
941 (set-case-syntax-pair ?\e$,1"d\e(B ?\e$,1"e\e(B tbl)
942 (set-case-syntax-pair ?\e$,1"f\e(B ?\e$,1"g\e(B tbl)
943 (set-case-syntax-pair ?\e$,1"h\e(B ?\e$,1"i\e(B tbl)
944 (set-case-syntax-pair ?\e$,1"j\e(B ?\e$,1"k\e(B tbl)
945 (set-case-syntax-pair ?\e$,1"l\e(B ?\e$,1"m\e(B tbl)
946 (set-case-syntax-pair ?\e$,1"n\e(B ?\e$,1"o\e(B tbl)
947 (set-case-syntax-pair ?\e$,1"p\e(B ?\e$,1"q\e(B tbl)
948 (set-case-syntax-pair ?\e$,1"r\e(B ?\e$,1"s\e(B tbl)
949 (set-case-syntax-pair ?\e$,1"t\e(B ?\e$,1"u\e(B tbl)
950 (set-case-syntax-pair ?\e$,1"v\e(B ?\e$,1"w\e(B tbl)
951 (set-case-syntax-pair ?\e$,1"x\e(B ?\e$,1"y\e(B tbl)
952 (set-case-syntax-pair ?\e$,1"z\e(B ?\e$,1"{\e(B tbl)
953 (set-case-syntax-pair ?\e$,1"|\e(B ?\e$,1"}\e(B tbl)
954 (set-case-syntax-pair ?\e$,1"~\e(B ?\e$,1"\7f\e(B tbl)
955 (set-case-syntax-pair ?\e$,1#"\e(B ?\e$,1##\e(B tbl)
956 (set-case-syntax-pair ?\e$,1#$\e(B ?\e$,1#%\e(B tbl)
957 (set-case-syntax-pair ?\e$,1#&\e(B ?\e$,1#'\e(B tbl)
958 (set-case-syntax-pair ?\e$,1#(\e(B ?\e$,1#)\e(B tbl)
959 (set-case-syntax-pair ?\e$,1#*\e(B ?\e$,1#+\e(B tbl)
960 (set-case-syntax-pair ?\e$,1#,\e(B ?\e$,1#-\e(B tbl)
961 (set-case-syntax-pair ?\e$,1#.\e(B ?\e$,1#/\e(B tbl)
962 (set-case-syntax-pair ?\e$,1#0\e(B ?\e$,1#1\e(B tbl)
963 (set-case-syntax-pair ?\e$,1#2\e(B ?\e$,1#3\e(B tbl)
964
965 ;; Latin Extended Additional
966 (setq c #x1e00)
967 (while (<= c #x1ef9)
968 (modify-category-entry (decode-char 'ucs c) ?l)
969 (and (zerop (% c 2))
970 (or (<= c #x1e94) (>= c #x1ea0))
971 (set-case-syntax-pair
972 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
973 (setq c (1+ c)))
974
975 ;; Greek
976 (setq c #x0370)
977 (while (<= c #x03ff)
978 (modify-category-entry (decode-char 'ucs c) ?g)
979 (if (or (and (>= c #x0391) (<= c #x03a1))
980 (and (>= c #x03a3) (<= c #x03ab)))
981 (set-case-syntax-pair
982 (decode-char 'ucs c) (decode-char 'ucs (+ c 32)) tbl))
983 (and (>= c #x03da)
984 (<= c #x03ee)
985 (zerop (% c 2))
986 (set-case-syntax-pair
987 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
988 (setq c (1+ c)))
989 (set-case-syntax-pair ?\e$,1&f\e(B ?\e$,1',\e(B tbl)
990 (set-case-syntax-pair ?\e$,1&h\e(B ?\e$,1'-\e(B tbl)
991 (set-case-syntax-pair ?\e$,1&i\e(B ?\e$,1'.\e(B tbl)
992 (set-case-syntax-pair ?\e$,1&j\e(B ?\e$,1'/\e(B tbl)
993 (set-case-syntax-pair ?\e$,1&l\e(B ?\e$,1'L\e(B tbl)
994 (set-case-syntax-pair ?\e$,1&n\e(B ?\e$,1'M\e(B tbl)
995 (set-case-syntax-pair ?\e$,1&o\e(B ?\e$,1'N\e(B tbl)
996
997 ;; Armenian
998 (setq c #x531)
999 (while (<= c #x556)
1000 (set-case-syntax-pair (decode-char 'ucs c)
1001 (decode-char 'ucs (+ c #x30)) tbl)
1002 (setq c (1+ c)))
1003
1004 ;; Greek Extended
1005 (setq c #x1f00)
1006 (while (<= c #x1fff)
1007 (modify-category-entry (decode-char 'ucs c) ?g)
1008 (and (<= (logand c #x000f) 7)
1009 (<= c #x1fa7)
1010 (not (memq c '(#x1f50 #x1f52 #x1f54 #x1f56)))
1011 (/= (logand c #x00f0) 7)
1012 (set-case-syntax-pair
1013 (decode-char 'ucs (+ c 8)) (decode-char 'ucs c) tbl))
1014 (setq c (1+ c)))
1015 (set-case-syntax-pair ?\e$,1qx\e(B ?\e$,1qp\e(B tbl)
1016 (set-case-syntax-pair ?\e$,1qy\e(B ?\e$,1qq\e(B tbl)
1017 (set-case-syntax-pair ?\e$,1qz\e(B ?\e$,1q0\e(B tbl)
1018 (set-case-syntax-pair ?\e$,1q{\e(B ?\e$,1q1\e(B tbl)
1019 (set-case-syntax-pair ?\e$,1q|\e(B ?\e$,1qs\e(B tbl)
1020 (set-case-syntax-pair ?\e$,1r(\e(B ?\e$,1q2\e(B tbl)
1021 (set-case-syntax-pair ?\e$,1r)\e(B ?\e$,1q3\e(B tbl)
1022 (set-case-syntax-pair ?\e$,1r*\e(B ?\e$,1q4\e(B tbl)
1023 (set-case-syntax-pair ?\e$,1r+\e(B ?\e$,1q5\e(B tbl)
1024 (set-case-syntax-pair ?\e$,1r,\e(B ?\e$,1r#\e(B tbl)
1025 (set-case-syntax-pair ?\e$,1r8\e(B ?\e$,1r0\e(B tbl)
1026 (set-case-syntax-pair ?\e$,1r9\e(B ?\e$,1r1\e(B tbl)
1027 (set-case-syntax-pair ?\e$,1r:\e(B ?\e$,1q6\e(B tbl)
1028 (set-case-syntax-pair ?\e$,1r;\e(B ?\e$,1q7\e(B tbl)
1029 (set-case-syntax-pair ?\e$,1rH\e(B ?\e$,1r@\e(B tbl)
1030 (set-case-syntax-pair ?\e$,1rI\e(B ?\e$,1rA\e(B tbl)
1031 (set-case-syntax-pair ?\e$,1rJ\e(B ?\e$,1q:\e(B tbl)
1032 (set-case-syntax-pair ?\e$,1rK\e(B ?\e$,1q;\e(B tbl)
1033 (set-case-syntax-pair ?\e$,1rL\e(B ?\e$,1rE\e(B tbl)
1034 (set-case-syntax-pair ?\e$,1rX\e(B ?\e$,1q8\e(B tbl)
1035 (set-case-syntax-pair ?\e$,1rY\e(B ?\e$,1q9\e(B tbl)
1036 (set-case-syntax-pair ?\e$,1rZ\e(B ?\e$,1q<\e(B tbl)
1037 (set-case-syntax-pair ?\e$,1r[\e(B ?\e$,1q=\e(B tbl)
1038 (set-case-syntax-pair ?\e$,1r\\e(B ?\e$,1rS\e(B tbl)
1039
1040 ;; cyrillic
1041 (setq c #x0400)
1042 (while (<= c #x04ff)
1043 (modify-category-entry (decode-char 'ucs c) ?y)
1044 (and (>= c #x0400)
1045 (<= c #x040f)
1046 (set-case-syntax-pair
1047 (decode-char 'ucs c) (decode-char 'ucs (+ c 80)) tbl))
1048 (and (>= c #x0410)
1049 (<= c #x042f)
1050 (set-case-syntax-pair
1051 (decode-char 'ucs c) (decode-char 'ucs (+ c 32)) tbl))
1052 (and (zerop (% c 2))
1053 (or (and (>= c #x0460) (<= c #x0480))
1054 (and (>= c #x048c) (<= c #x04be))
1055 (and (>= c #x04d0) (<= c #x04f4)))
1056 (set-case-syntax-pair
1057 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
1058 (setq c (1+ c)))
1059 (set-case-syntax-pair ?\e$,1*!\e(B ?\e$,1*"\e(B tbl)
1060 (set-case-syntax-pair ?\e$,1*#\e(B ?\e$,1*$\e(B tbl)
1061 (set-case-syntax-pair ?\e$,1*'\e(B ?\e$,1*(\e(B tbl)
1062 (set-case-syntax-pair ?\e$,1*+\e(B ?\e$,1*,\e(B tbl)
1063 (set-case-syntax-pair ?\e$,1*X\e(B ?\e$,1*Y\e(B tbl)
1064
1065 ;; general punctuation
1066 (setq c #x2000)
1067 (while (<= c #x200b)
1068 (set-case-syntax c " " tbl)
1069 (setq c (1+ c)))
1070 (setq c #x2010)
1071 (while (<= c #x2027)
1072 (set-case-syntax c "_" tbl)
1073 (setq c (1+ c)))
1074
1075 ;; Roman numerals
1076 (setq c #x2160)
1077 (while (<= c #x216f)
1078 (set-case-syntax-pair (decode-char 'ucs c)
1079 (decode-char 'ucs (+ c #x10)) tbl)
1080 (setq c (1+ c)))
1081
1082 ;; Circled Latin
1083 (setq c #x24b6)
1084 (while (<= c #x24cf)
1085 (set-case-syntax-pair (decode-char 'ucs c)
1086 (decode-char 'ucs (+ c 26)) tbl)
1087 (modify-category-entry (decode-char 'ucs c) ?l)
1088 (modify-category-entry (decode-char 'ucs (+ c 26)) ?l)
1089 (setq c (1+ c)))
1090
1091 ;; Fullwidth Latin
1092 (setq c #xff21)
1093 (while (<= c #xff3a)
1094 (set-case-syntax-pair (decode-char 'ucs c)
1095 (decode-char 'ucs (+ c #x20)) tbl)
1096 (modify-category-entry (decode-char 'ucs c) ?l)
1097 (modify-category-entry (decode-char 'ucs (+ c #x20)) ?l)
1098 (setq c (1+ c)))
1099
1100 ;; Ohm, Kelvin, Angstrom
1101 (set-case-syntax-pair ?\e$,1uf\e(B ?\e$,1'I\e(B tbl)
1102 ;;; These mess up the case conversion of k and \e,Ae\e(B.
1103 ;;; (set-case-syntax-pair ?\e$,1uj\e(B ?k tbl)
1104 ;;; (set-case-syntax-pair ?\e$,1uk\e(B ?\e,Ae\e(B tbl)
1105
1106 ;; Combining diacritics
1107 (setq c #x300)
1108 (while (<= c #x362)
1109 (modify-category-entry (decode-char 'ucs c) ?^)
1110 (setq c (1+ c)))
1111
1112 ;; Combining marks
1113 (setq c #x20d0)
1114 (while (<= c #x20e3)
1115 (modify-category-entry (decode-char 'ucs c) ?^)
1116 (setq c (1+ c)))
1117
1118 ;; Fixme: syntax for symbols &c
1119 )
1120 \f
1121 ;;; Setting word boundary.
1122
1123 (setq word-combining-categories
1124 '((?l . ?l)))
1125
1126 (setq word-separating-categories ; (2-byte character sets)
1127 '((?A . ?K) ; Alpha numeric - Katakana
1128 (?A . ?C) ; Alpha numeric - Chinese
1129 (?H . ?A) ; Hiragana - Alpha numeric
1130 (?H . ?K) ; Hiragana - Katakana
1131 (?H . ?C) ; Hiragana - Chinese
1132 (?K . ?A) ; Katakana - Alpha numeric
1133 (?K . ?C) ; Katakana - Chinese
1134 (?C . ?A) ; Chinese - Alpha numeric
1135 (?C . ?K) ; Chinese - Katakana
1136 ))
1137
1138 \f
1139 ;; For each character set, put the information of the most proper
1140 ;; coding system to encode it by `preferred-coding-system' property.
1141
1142 (let ((l '((latin-iso8859-1 . iso-latin-1)
1143 (latin-iso8859-2 . iso-latin-2)
1144 (latin-iso8859-3 . iso-latin-3)
1145 (latin-iso8859-4 . iso-latin-4)
1146 (thai-tis620 . thai-tis620)
1147 (greek-iso8859-7 . greek-iso-8bit)
1148 (arabic-iso8859-6 . iso-2022-7bit)
1149 (hebrew-iso8859-8 . hebrew-iso-8bit)
1150 (katakana-jisx0201 . japanese-shift-jis)
1151 (latin-jisx0201 . japanese-shift-jis)
1152 (cyrillic-iso8859-5 . cyrillic-iso-8bit)
1153 (latin-iso8859-9 . iso-latin-5)
1154 (japanese-jisx0208-1978 . iso-2022-jp)
1155 (chinese-gb2312 . cn-gb-2312)
1156 (japanese-jisx0208 . iso-2022-jp)
1157 (korean-ksc5601 . iso-2022-kr)
1158 (japanese-jisx0212 . iso-2022-jp)
1159 (chinese-cns11643-1 . iso-2022-cn)
1160 (chinese-cns11643-2 . iso-2022-cn)
1161 (chinese-big5-1 . chinese-big5)
1162 (chinese-big5-2 . chinese-big5)
1163 (chinese-sisheng . iso-2022-7bit)
1164 (ipa . iso-2022-7bit)
1165 (vietnamese-viscii-lower . vietnamese-viscii)
1166 (vietnamese-viscii-upper . vietnamese-viscii)
1167 (arabic-digit . iso-2022-7bit)
1168 (arabic-1-column . iso-2022-7bit)
1169 (ascii-right-to-left . iso-2022-7bit)
1170 (lao . lao)
1171 (arabic-2-column . iso-2022-7bit)
1172 (indian-is13194 . devanagari)
1173 (indian-glyph . devanagari)
1174 (tibetan-1-column . tibetan)
1175 (ethiopic . iso-2022-7bit)
1176 (chinese-cns11643-3 . iso-2022-cn)
1177 (chinese-cns11643-4 . iso-2022-cn)
1178 (chinese-cns11643-5 . iso-2022-cn)
1179 (chinese-cns11643-6 . iso-2022-cn)
1180 (chinese-cns11643-7 . iso-2022-cn)
1181 (indian-2-column . devanagari)
1182 (tibetan . tibetan)
1183 (latin-iso8859-14 . iso-latin-8)
1184 (latin-iso8859-15 . iso-latin-9))))
1185 (while l
1186 (put-charset-property (car (car l)) 'preferred-coding-system (cdr (car l)))
1187 (setq l (cdr l))))
1188
1189 \f
1190 ;; Setup auto-fill-chars for charsets that should invoke auto-filling.
1191 ;; SPACE and NEWLINE are already set. Also put `nospace-between-words'
1192 ;; property on the charsets.
1193 (let ((l '(katakana-jisx0201
1194 japanese-jisx0208 japanese-jisx0212
1195 chinese-gb2312 chinese-big5-1 chinese-big5-2)))
1196 (while l
1197 (aset auto-fill-chars (make-char (car l)) t)
1198 (put-charset-property (car l) 'nospace-between-words t)
1199 (setq l (cdr l))))
1200
1201 ;;; Local Variables:
1202 ;;; coding: iso-2022-7bit
1203 ;;; End:
1204
1205 ;;; characters.el ends here