]> code.delx.au - gnu-emacs/blob - lisp/international/characters.el
Add category `j' for katakana-jisx0201. Fix syntaxes of
[gnu-emacs] / lisp / international / characters.el
1 ;;; characters.el --- set syntax and category for multibyte characters
2
3 ;; Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN.
4 ;; Licensed to the Free Software Foundation.
5
6 ;; Keywords: multibyte character, character set, syntax, category
7
8 ;; This file is part of GNU Emacs.
9
10 ;; GNU Emacs is free software; you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation; either version 2, or (at your option)
13 ;; any later version.
14
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
19
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs; see the file COPYING. If not, write to the
22 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
24
25 ;;; Commentary:
26
27 ;; This file contains multibyte characters. Save this file always in
28 ;; the coding system `iso-2022-7bit'.
29
30 ;; This file does not define the syntax for Latin-N character sets;
31 ;; those are defined by the files latin-N.el.
32
33 ;;; Predefined categories.
34
35 ;; For each character set.
36
37 (define-category ?a "ASCII")
38 (define-category ?l "Latin")
39 (define-category ?t "Thai")
40 (define-category ?g "Greek")
41 (define-category ?b "Arabic")
42 (define-category ?w "Hebrew")
43 (define-category ?y "Cyrillic")
44 (define-category ?k "Japanese katakana")
45 (define-category ?r "Japanese roman")
46 (define-category ?c "Chinese")
47 (define-category ?j "Japanese")
48 (define-category ?h "Korean")
49 (define-category ?e "Ethiopic (Ge'ez)")
50 (define-category ?v "Vietnamese")
51 (define-category ?i "Indian")
52 (define-category ?o "Lao")
53 (define-category ?q "Tibetan")
54
55 ;; For each group (row) of 2-byte character sets.
56
57 (define-category ?A "Alpha-numeric characters of 2-byte character sets")
58 (define-category ?C "Chinese (Han) characters of 2-byte character sets")
59 (define-category ?G "Greek characters of 2-byte character sets")
60 (define-category ?H "Japanese Hiragana characters of 2-byte character sets")
61 (define-category ?K "Japanese Katakana characters of 2-byte character sets")
62 (define-category ?N "Korean Hangul characters of 2-byte character sets")
63 (define-category ?Y "Cyrillic characters of 2-byte character sets")
64 (define-category ?I "Indian Glyphs")
65
66 ;; For phonetic classifications.
67
68 (define-category ?0 "consonant")
69 (define-category ?1 "base (independent) vowel")
70 (define-category ?2 "upper diacritical mark (including upper vowel)")
71 (define-category ?3 "lower diacritical mark (including lower vowel)")
72 (define-category ?4 "tone mark")
73 (define-category ?5 "symbol")
74 (define-category ?6 "digit")
75 (define-category ?7 "vowel-modifying diacritical mark")
76 (define-category ?8 "vowel-signs")
77 (define-category ?9 "semivowel lower")
78
79 ;; For filling.
80 (define-category ?| "While filling, we can break a line at this character.")
81
82 ;; For indentation calculation.
83 (define-category ?
84 "This character counts as a space for indentation purposes.")
85
86 ;; Keep the following for `kinsoku' processing. See comments in
87 ;; kinsoku.el.
88 (define-category ?> "A character which can't be placed at beginning of line.")
89 (define-category ?< "A character which can't be placed at end of line.")
90
91 \f
92 ;;; Setting syntax and category.
93
94 ;; ASCII
95
96 (let ((ch 32))
97 (while (< ch 127) ; All ASCII characters have
98 (modify-category-entry ch ?a) ; the category `a' (ASCII)
99 (modify-category-entry ch ?l) ; and `l' (Latin).
100 (setq ch (1+ ch))))
101
102 ;; Arabic character set
103
104 (let ((charsets '(arabic-iso8859-6
105 arabic-digit
106 arabic-1-column
107 arabic-2-column)))
108 (while charsets
109 (modify-syntax-entry (make-char (car charsets)) "w")
110 (modify-category-entry (make-char (car charsets)) ?b)
111 (setq charsets (cdr charsets))))
112
113 ;; Chinese character set (GB2312)
114
115 (modify-syntax-entry (make-char 'chinese-gb2312) "w")
116 (modify-syntax-entry (make-char 'chinese-gb2312 33) "_")
117 (modify-syntax-entry (make-char 'chinese-gb2312 34) "_")
118 (modify-syntax-entry (make-char 'chinese-gb2312 41) "_")
119 (modify-syntax-entry ?\\e$A!2\e(B "(\e$A!3\e(B")
120 (modify-syntax-entry ?\\e$A!4\e(B "(\e$A!5\e(B")
121 (modify-syntax-entry ?\\e$A!6\e(B "(\e$A!7\e(B")
122 (modify-syntax-entry ?\\e$A!8\e(B "(\e$A!9\e(B")
123 (modify-syntax-entry ?\\e$A!:\e(B "(\e$A!;\e(B")
124 (modify-syntax-entry ?\\e$A!<\e(B "(\e$A!=\e(B")
125 (modify-syntax-entry ?\\e$A!>\e(B "(\e$A!?\e(B")
126 (modify-syntax-entry ?\\e$A!3\e(B ")\e$A!2\e(B")
127 (modify-syntax-entry ?\\e$A!5\e(B ")\e$A!4\e(B")
128 (modify-syntax-entry ?\\e$A!7\e(B ")\e$A!6\e(B")
129 (modify-syntax-entry ?\\e$A!9\e(B ")\e$A!8\e(B")
130 (modify-syntax-entry ?\\e$A!;\e(B ")\e$A!:\e(B")
131 (modify-syntax-entry ?\\e$A!=\e(B ")\e$A!<\e(B")
132 (modify-syntax-entry ?\\e$A!?\e(B ")\e$A!>\e(B")
133
134 (modify-category-entry (make-char 'chinese-gb2312) ?c)
135 (modify-category-entry (make-char 'chinese-gb2312) ?\|)
136 (modify-category-entry (make-char 'chinese-gb2312 35) ?A)
137 (modify-category-entry (make-char 'chinese-gb2312 36) ?H)
138 (modify-category-entry (make-char 'chinese-gb2312 37) ?K)
139 (modify-category-entry (make-char 'chinese-gb2312 38) ?G)
140 (modify-category-entry (make-char 'chinese-gb2312 39) ?Y)
141 (let ((row 48))
142 (while (< row 127)
143 (modify-category-entry (make-char 'chinese-gb2312 row) ?C)
144 (setq row (1+ row))))
145
146 ;; Chinese character set (BIG5)
147
148 (let ((generic-big5-1-char (make-char 'chinese-big5-1))
149 (generic-big5-2-char (make-char 'chinese-big5-2)))
150 (modify-syntax-entry generic-big5-1-char "w")
151 (modify-syntax-entry generic-big5-2-char "w")
152
153 (modify-category-entry generic-big5-1-char ?c)
154 (modify-category-entry generic-big5-2-char ?c)
155
156 (modify-category-entry generic-big5-1-char ?C)
157 (modify-category-entry generic-big5-2-char ?C)
158
159 (modify-category-entry generic-big5-1-char ?\|)
160 (modify-category-entry generic-big5-2-char ?\|))
161
162
163 ;; Chinese character set (CNS11643)
164
165 (let ((cns-list '(chinese-cns11643-1
166 chinese-cns11643-2
167 chinese-cns11643-3
168 chinese-cns11643-4
169 chinese-cns11643-5
170 chinese-cns11643-6
171 chinese-cns11643-7))
172 generic-char)
173 (while cns-list
174 (setq generic-char (make-char (car cns-list)))
175 (modify-syntax-entry generic-char "w")
176 (modify-category-entry generic-char ?c)
177 (modify-category-entry generic-char ?C)
178 (modify-category-entry generic-char ?|)
179 (setq cns-list (cdr cns-list))))
180
181 ;; Cyrillic character set (ISO-8859-5)
182
183 (modify-category-entry (make-char 'cyrillic-iso8859-5) ?y)
184
185 (modify-syntax-entry (make-char 'cyrillic-iso8859-5 160) " ")
186 (modify-syntax-entry ?\e,L-\e(B ".")
187 (modify-syntax-entry ?\e,Lp\e(B ".")
188 (modify-syntax-entry ?\e,L}\e(B ".")
189 (let ((tbl (standard-case-table)))
190 (set-case-syntax-pair ?\e,L!\e(B ?\e,Lq\e(B tbl)
191 (set-case-syntax-pair ?\e,L"\e(B ?\e,Lr\e(B tbl)
192 (set-case-syntax-pair ?\e,L#\e(B ?\e,Ls\e(B tbl)
193 (set-case-syntax-pair ?\e,L$\e(B ?\e,Lt\e(B tbl)
194 (set-case-syntax-pair ?\e,L%\e(B ?\e,Lu\e(B tbl)
195 (set-case-syntax-pair ?\e,L&\e(B ?\e,Lv\e(B tbl)
196 (set-case-syntax-pair ?\e,L'\e(B ?\e,Lw\e(B tbl)
197 (set-case-syntax-pair ?\e,L(\e(B ?\e,Lx\e(B tbl)
198 (set-case-syntax-pair ?\e,L)\e(B ?\e,Ly\e(B tbl)
199 (set-case-syntax-pair ?\e,L*\e(B ?\e,Lz\e(B tbl)
200 (set-case-syntax-pair ?\e,L+\e(B ?\e,L{\e(B tbl)
201 (set-case-syntax-pair ?\e,L,\e(B ?\e,L|\e(B tbl)
202 (set-case-syntax-pair ?\e,L.\e(B ?\e,L~\e(B tbl)
203 (set-case-syntax-pair ?\e,L/\e(B ?\e,L\7f\e(B tbl)
204 (set-case-syntax-pair ?\e,L0\e(B ?\e,LP\e(B tbl)
205 (set-case-syntax-pair ?\e,L1\e(B ?\e,LQ\e(B tbl)
206 (set-case-syntax-pair ?\e,L2\e(B ?\e,LR\e(B tbl)
207 (set-case-syntax-pair ?\e,L3\e(B ?\e,LS\e(B tbl)
208 (set-case-syntax-pair ?\e,L4\e(B ?\e,LT\e(B tbl)
209 (set-case-syntax-pair ?\e,L5\e(B ?\e,LU\e(B tbl)
210 (set-case-syntax-pair ?\e,L6\e(B ?\e,LV\e(B tbl)
211 (set-case-syntax-pair ?\e,L7\e(B ?\e,LW\e(B tbl)
212 (set-case-syntax-pair ?\e,L8\e(B ?\e,LX\e(B tbl)
213 (set-case-syntax-pair ?\e,L9\e(B ?\e,LY\e(B tbl)
214 (set-case-syntax-pair ?\e,L:\e(B ?\e,LZ\e(B tbl)
215 (set-case-syntax-pair ?\e,L;\e(B ?\e,L[\e(B tbl)
216 (set-case-syntax-pair ?\e,L<\e(B ?\e,L\\e(B tbl)
217 (set-case-syntax-pair ?\e,L=\e(B ?\e,L]\e(B tbl)
218 (set-case-syntax-pair ?\e,L>\e(B ?\e,L^\e(B tbl)
219 (set-case-syntax-pair ?\e,L?\e(B ?\e,L_\e(B tbl)
220 (set-case-syntax-pair ?\e,L@\e(B ?\e,L`\e(B tbl)
221 (set-case-syntax-pair ?\e,LA\e(B ?\e,La\e(B tbl)
222 (set-case-syntax-pair ?\e,LB\e(B ?\e,Lb\e(B tbl)
223 (set-case-syntax-pair ?\e,LC\e(B ?\e,Lc\e(B tbl)
224 (set-case-syntax-pair ?\e,LD\e(B ?\e,Ld\e(B tbl)
225 (set-case-syntax-pair ?\e,LE\e(B ?\e,Le\e(B tbl)
226 (set-case-syntax-pair ?\e,LF\e(B ?\e,Lf\e(B tbl)
227 (set-case-syntax-pair ?\e,LG\e(B ?\e,Lg\e(B tbl)
228 (set-case-syntax-pair ?\e,LH\e(B ?\e,Lh\e(B tbl)
229 (set-case-syntax-pair ?\e,LI\e(B ?\e,Li\e(B tbl)
230 (set-case-syntax-pair ?\e,LJ\e(B ?\e,Lj\e(B tbl)
231 (set-case-syntax-pair ?\e,LK\e(B ?\e,Lk\e(B tbl)
232 (set-case-syntax-pair ?\e,LL\e(B ?\e,Ll\e(B tbl)
233 (set-case-syntax-pair ?\e,LM\e(B ?\e,Lm\e(B tbl)
234 (set-case-syntax-pair ?\e,LN\e(B ?\e,Ln\e(B tbl)
235 (set-case-syntax-pair ?\e,LO\e(B ?\e,Lo\e(B tbl))
236
237 ;; Devanagari character set
238
239 (let ((deflist '(;; chars syntax category
240 ("\e$(5!!!"!#\e(B" "w" ?7) ; vowel-modifying diacritical mark
241 ; chandrabindu, anuswar, visarga
242 ("\e$(5!$\e(B-\e$(5!2\e(B" "w" ?1) ; independent vowel
243 ("\e$(5!3\e(B-\e$(5!X\e(B" "w" ?0) ; consonant
244 ("\e$(5!Z\e(B-\e$(5!g\e(B" "w" ?8) ; matra
245 ("\e$(5!q\e(B-\e$(5!z\e(B" "w" ?6) ; digit
246 ))
247 elm chars len syntax category to ch i)
248 (while deflist
249 (setq elm (car deflist))
250 (setq chars (car elm)
251 len (length chars)
252 syntax (nth 1 elm)
253 category (nth 2 elm)
254 i 0)
255 (while (< i len)
256 (if (= (aref chars i) ?-)
257 (setq i (1+ i)
258 to (aref chars i))
259 (setq ch (aref chars i)
260 to ch))
261 (while (<= ch to)
262 (modify-syntax-entry ch syntax)
263 (modify-category-entry ch category)
264 (setq ch (1+ ch)))
265 (setq i (1+ i)))
266 (setq deflist (cdr deflist))))
267
268 ;; Ethiopic character set
269
270 (modify-category-entry (make-char 'ethiopic) ?e)
271 (modify-syntax-entry (make-char 'ethiopic) "w")
272 (let ((chars '(?\e$(3$h\e(B ?\e$(3$i\e(B ?\e$(3$j\e(B ?\e$(3$k\e(B ?\e$(3$l\e(B ?\e$(3$m\e(B ?\e$(3$n\e(B ?\e$(3$o\e(B ?\e$(3%i\e(B ?\e$(3%t\e(B ?\e$(3%u\e(B ?\e$(3%v\e(B ?\e$(3%w\e(B ?\e$(3%x\e(B)))
273 (while chars
274 (modify-syntax-entry (car chars) ".")
275 (setq chars (cdr chars))))
276
277 ;; Greek character set (ISO-8859-7)
278
279 (modify-category-entry (make-char 'greek-iso8859-7) ?g)
280
281 (let ((c 182))
282 (while (< c 255)
283 (modify-syntax-entry (make-char 'greek-iso8859-7 c) "w")
284 (setq c (1+ c))))
285 (modify-syntax-entry (make-char 'greek-iso8859-7 160) "w") ; NBSP
286 (modify-syntax-entry ?\e,F7\e(B ".")
287 (modify-syntax-entry ?\e,F;\e(B ".")
288 (modify-syntax-entry ?\e,F=\e(B ".")
289 (let ((tbl (standard-case-table)))
290 (set-case-syntax-pair ?\e,FA\e(B ?\e,Fa\e(B tbl)
291 (set-case-syntax-pair ?\e,FB\e(B ?\e,Fb\e(B tbl)
292 (set-case-syntax-pair ?\e,FC\e(B ?\e,Fc\e(B tbl)
293 (set-case-syntax-pair ?\e,FD\e(B ?\e,Fd\e(B tbl)
294 (set-case-syntax-pair ?\e,FE\e(B ?\e,Fe\e(B tbl)
295 (set-case-syntax-pair ?\e,FF\e(B ?\e,Ff\e(B tbl)
296 (set-case-syntax-pair ?\e,FG\e(B ?\e,Fg\e(B tbl)
297 (set-case-syntax-pair ?\e,FH\e(B ?\e,Fh\e(B tbl)
298 (set-case-syntax-pair ?\e,FI\e(B ?\e,Fi\e(B tbl)
299 (set-case-syntax-pair ?\e,FJ\e(B ?\e,Fj\e(B tbl)
300 (set-case-syntax-pair ?\e,FK\e(B ?\e,Fk\e(B tbl)
301 (set-case-syntax-pair ?\e,FL\e(B ?\e,Fl\e(B tbl)
302 (set-case-syntax-pair ?\e,FM\e(B ?\e,Fm\e(B tbl)
303 (set-case-syntax-pair ?\e,FN\e(B ?\e,Fn\e(B tbl)
304 (set-case-syntax-pair ?\e,FO\e(B ?\e,Fo\e(B tbl)
305 (set-case-syntax-pair ?\e,FP\e(B ?\e,Fp\e(B tbl)
306 (set-case-syntax-pair ?\e,FQ\e(B ?\e,Fq\e(B tbl)
307 (set-case-syntax-pair ?\e,FS\e(B ?\e,Fs\e(B tbl)
308 (set-case-syntax-pair ?\e,FT\e(B ?\e,Ft\e(B tbl)
309 (set-case-syntax-pair ?\e,FU\e(B ?\e,Fu\e(B tbl)
310 (set-case-syntax-pair ?\e,FV\e(B ?\e,Fv\e(B tbl)
311 (set-case-syntax-pair ?\e,FW\e(B ?\e,Fw\e(B tbl)
312 (set-case-syntax-pair ?\e,FX\e(B ?\e,Fx\e(B tbl)
313 (set-case-syntax-pair ?\e,FY\e(B ?\e,Fy\e(B tbl)
314 (set-case-syntax-pair ?\e,FZ\e(B ?\e,Fz\e(B tbl)
315 (set-case-syntax-pair ?\e,F[\e(B ?\e,F{\e(B tbl)
316 (set-case-syntax-pair ?\e,F?\e(B ?\e,F~\e(B tbl)
317 (set-case-syntax-pair ?\e,F>\e(B ?\e,F}\e(B tbl)
318 (set-case-syntax-pair ?\e,F<\e(B ?\e,F|\e(B tbl)
319 (set-case-syntax-pair ?\e,F6\e(B ?\e,F\\e(B tbl)
320 (set-case-syntax-pair ?\e,F8\e(B ?\e,F]\e(B tbl)
321 (set-case-syntax-pair ?\e,F9\e(B ?\e,F^\e(B tbl)
322 (set-case-syntax-pair ?\e,F:\e(B ?\e,F_\e(B tbl))
323
324 ;; Hebrew character set (ISO-8859-8)
325
326 (modify-category-entry (make-char 'hebrew-iso8859-8) ?w)
327
328 (let ((c 224))
329 (while (< c 251)
330 (modify-syntax-entry (make-char 'hebrew-iso8859-8 c) "w")
331 (setq c (1+ c))))
332 (modify-syntax-entry (make-char 'hebrew-iso8859-8 160) "w") ; NBSP
333
334 ;; Indian character set (IS 13194 and other Emacs original Indian charsets)
335
336 (modify-category-entry (make-char 'indian-is13194) ?i)
337 (modify-category-entry (make-char 'indian-2-column) ?I)
338 (modify-category-entry (make-char 'indian-1-column) ?I)
339
340 (let ((deflist
341 '(;; chars syntax category
342 ("\e(5!"#\e(B" "w" ?7) ; vowel-modifying diacritical mark
343 ; chandrabindu, anuswar, visarga
344 ("\e(5$\e(B-\e(52\e(B" "w" ?1) ; base (independent) vowel
345 ("\e(53\e(B-\e(5X\e(B" "w" ?0) ; consonant
346 ("\e(5Z\e(B-\e(5g\e(B" "w" ?8) ; matra
347 ("\e(5q\e(B-\e(5z\e(B" "w" ?6) ; digit
348 ))
349 elm chars len syntax category to ch i)
350 (while deflist
351 (setq elm (car deflist))
352 (setq chars (car elm)
353 len (length chars)
354 syntax (nth 1 elm)
355 category (nth 2 elm)
356 i 0)
357 (while (< i len)
358 (if (= (aref chars i) ?-)
359 (setq i (1+ i)
360 to (aref chars i))
361 (setq ch (aref chars i)
362 to ch))
363 (while (<= ch to)
364 (modify-syntax-entry ch syntax)
365 (modify-category-entry ch category)
366 (setq ch (1+ ch)))
367 (setq i (1+ i)))
368 (setq deflist (cdr deflist))))
369
370
371 ;; Japanese character set (JISX0201-kana, JISX0201-roman, JISX0208, JISX0212)
372
373 (modify-category-entry (make-char 'katakana-jisx0201) ?k)
374 (modify-category-entry (make-char 'katakana-jisx0201) ?j)
375 (modify-category-entry (make-char 'latin-jisx0201) ?r)
376 (modify-category-entry (make-char 'japanese-jisx0208) ?j)
377 (modify-category-entry (make-char 'japanese-jisx0212) ?j)
378 (modify-category-entry (make-char 'katakana-jisx0201) ?\|)
379 (modify-category-entry (make-char 'japanese-jisx0208) ?\|)
380 (modify-category-entry (make-char 'japanese-jisx0212) ?\|)
381
382 ;; JISX0208
383 (modify-syntax-entry (make-char 'japanese-jisx0208) "w")
384 (modify-syntax-entry (make-char 'japanese-jisx0208 33) "_")
385 (modify-syntax-entry (make-char 'japanese-jisx0208 34) "_")
386 (modify-syntax-entry (make-char 'japanese-jisx0208 40) "_")
387 (let ((chars '(?\e$B!<\e(B ?\e$B!+\e(B ?\e$B!,\e(B ?\e$B!3\e(B ?\e$B!4\e(B ?\e$B!5\e(B ?\e$B!6\e(B ?\e$B!7\e(B ?\e$B!8\e(B ?\e$B!9\e(B ?\e$B!:\e(B ?\e$B!;\e(B)))
388 (while chars
389 (modify-syntax-entry (car chars) "w")
390 (setq chars (cdr chars))))
391 (modify-syntax-entry ?\\e$B!J\e(B "(\e$B!K\e(B")
392 (modify-syntax-entry ?\\e$B!N\e(B "(\e$B!O\e(B")
393 (modify-syntax-entry ?\\e$B!P\e(B "(\e$B!Q\e(B")
394 (modify-syntax-entry ?\\e$B!V\e(B "(\e$B!W\e(B")
395 (modify-syntax-entry ?\\e$B!X\e(B "(\e$B!Y\e(B")
396 (modify-syntax-entry ?\\e$B!K\e(B ")\e$B!J\e(B")
397 (modify-syntax-entry ?\\e$B!O\e(B ")\e$B!N\e(B")
398 (modify-syntax-entry ?\\e$B!Q\e(B ")\e$B!P\e(B")
399 (modify-syntax-entry ?\\e$B!W\e(B ")\e$B!V\e(B")
400 (modify-syntax-entry ?\\e$B!Y\e(B ")\e$B!X\e(B")
401
402 (modify-category-entry (make-char 'japanese-jisx0208 35) ?A)
403 (modify-category-entry (make-char 'japanese-jisx0208 36) ?H)
404 (modify-category-entry (make-char 'japanese-jisx0208 37) ?K)
405 (modify-category-entry (make-char 'japanese-jisx0208 38) ?G)
406 (modify-category-entry (make-char 'japanese-jisx0208 39) ?Y)
407 (let ((row 48))
408 (while (< row 127)
409 (modify-category-entry (make-char 'japanese-jisx0208 row) ?C)
410 (setq row (1+ row))))
411 (modify-category-entry ?\e$B!<\e(B ?K)
412 (let ((chars '(?\e$B!+\e(B ?\e$B!,\e(B)))
413 (while chars
414 (modify-category-entry (car chars) ?K)
415 (modify-category-entry (car chars) ?H)
416 (setq chars (cdr chars))))
417 (let ((chars '(?\e$B!3\e(B ?\e$B!4\e(B ?\e$B!5\e(B ?\e$B!6\e(B ?\e$B!7\e(B ?\e$B!8\e(B ?\e$B!9\e(B ?\e$B!:\e(B ?\e$B!;\e(B)))
418 (while chars
419 (modify-category-entry (car chars) ?C)
420 (setq chars (cdr chars))))
421
422 ;; JISX0212
423 (modify-syntax-entry (make-char 'japanese-jisx0212) "w")
424 (modify-syntax-entry (make-char 'japanese-jisx0212 33) "_")
425 (modify-syntax-entry (make-char 'japanese-jisx0212 34) "_")
426 (modify-syntax-entry (make-char 'japanese-jisx0212 35) "_")
427
428 (modify-category-entry (make-char 'japanese-jisx0212 ) ?C)
429
430 ;; JISX0201-Kana
431 (modify-syntax-entry (make-char 'katakana-jisx0201) "w")
432 (let ((chars '(?\e(I!\e(B ?\e(I$\e(B ?\e(I%\e(B)))
433 (while chars
434 (modify-syntax-entry (car chars) ".")
435 (setq chars (cdr chars))))
436
437 (modify-syntax-entry ?\\e(I"\e(B "(\e(I#\e(B")
438 (modify-syntax-entry ?\\e(I#\e(B "(\e(I"\e(B")
439
440 ;; Korean character set (KSC5601)
441
442 (modify-syntax-entry (make-char 'korean-ksc5601) "w")
443 (modify-syntax-entry (make-char 'korean-ksc5601 33) "_")
444 (modify-syntax-entry (make-char 'korean-ksc5601 34) "_")
445 (modify-syntax-entry (make-char 'korean-ksc5601 38) "_")
446 (modify-syntax-entry (make-char 'korean-ksc5601 39) "_")
447 (modify-syntax-entry (make-char 'korean-ksc5601 40) "_")
448 (modify-syntax-entry (make-char 'korean-ksc5601 41) "_")
449
450 (modify-category-entry (make-char 'korean-ksc5601) ?h)
451 (modify-category-entry (make-char 'korean-ksc5601 35) ?A)
452 (modify-category-entry (make-char 'korean-ksc5601 37) ?G)
453 (modify-category-entry (make-char 'korean-ksc5601 42) ?H)
454 (modify-category-entry (make-char 'korean-ksc5601 43) ?K)
455 (modify-category-entry (make-char 'korean-ksc5601 44) ?Y)
456
457 ;; Latin character set (latin-1,2,3,4,5,8,9)
458
459 (modify-category-entry (make-char 'latin-iso8859-1) ?l)
460 (modify-category-entry (make-char 'latin-iso8859-2) ?l)
461 (modify-category-entry (make-char 'latin-iso8859-3) ?l)
462 (modify-category-entry (make-char 'latin-iso8859-4) ?l)
463 (modify-category-entry (make-char 'latin-iso8859-9) ?l)
464 (modify-category-entry (make-char 'latin-iso8859-14) ?l)
465 (modify-category-entry (make-char 'latin-iso8859-15) ?l)
466
467 (modify-category-entry (make-char 'latin-iso8859-1 160) ?\ )
468 (modify-category-entry (make-char 'latin-iso8859-2 160) ?\ )
469 (modify-category-entry (make-char 'latin-iso8859-3 160) ?\ )
470 (modify-category-entry (make-char 'latin-iso8859-4 160) ?\ )
471 (modify-category-entry (make-char 'latin-iso8859-9 160) ?\ )
472 (modify-category-entry (make-char 'latin-iso8859-14 160) ?\ )
473 (modify-category-entry (make-char 'latin-iso8859-15 160) ?\ )
474
475 ;; Lao character set
476
477 (modify-category-entry (make-char 'lao) ?o)
478
479 (let ((deflist '(;; chars syntax category
480 ("\e(1!\e(B-\e(1N\e(B" "w" ?0) ; consonant
481 ("\e(1PRS]`\e(B-\e(1d\e(B" "w" ?1) ; vowel base
482 ("\e(1QT\e(B-\e(1W[m\e(B" "w" ?2) ; vowel upper
483 ("\e(1XY\e(B" "w" ?3) ; vowel lower
484 ("\e(1h\e(B-\e(1l\e(B" "w" ?4) ; tone mark
485 ("\e(1\\e(B" "w" ?9) ; semivowel lower
486 ("\e(1p\e(B-\e(1y\e(B" "w" ?6) ; digit
487 ("\e(1Of\e(B" "_" ?5) ; symbol
488 ))
489 elm chars len syntax category to ch i)
490 (while deflist
491 (setq elm (car deflist))
492 (setq chars (car elm)
493 len (length chars)
494 syntax (nth 1 elm)
495 category (nth 2 elm)
496 i 0)
497 (while (< i len)
498 (if (= (aref chars i) ?-)
499 (setq i (1+ i)
500 to (aref chars i))
501 (setq ch (aref chars i)
502 to ch))
503 (while (<= ch to)
504 (modify-syntax-entry ch syntax)
505 (modify-category-entry ch category)
506 (setq ch (1+ ch)))
507 (setq i (1+ i)))
508 (setq deflist (cdr deflist))))
509
510 ;; Thai character set (TIS620)
511
512 (modify-category-entry (make-char 'thai-tis620) ?t)
513
514 (let ((deflist '(;; chars syntax category
515 ("\e,T!\e(B-\e,TCEG\e(B-\e,TN\e(B" "w" ?0) ; consonant
516 ("\e,TDFPRS`\e(B-\e,Te\e(B" "w" ?1) ; vowel base
517 ("\e,TQT\e(B-\e,TWgn\e(B" "w" ?2) ; vowel upper
518 ("\e,TX\e(B-\e,TZ\e(B" "w" ?3) ; vowel lower
519 ("\e,Th\e(B-\e,Tm\e(B" "w" ?4) ; tone mark
520 ("\e,Tp\e(B-\e,Ty\e(B" "w" ?6) ; digit
521 ("\e,TOf_oz{\e(B" "_" ?5) ; symbol
522 ))
523 elm chars len syntax category to ch i)
524 (while deflist
525 (setq elm (car deflist))
526 (setq chars (car elm)
527 len (length chars)
528 syntax (nth 1 elm)
529 category (nth 2 elm)
530 i 0)
531 (while (< i len)
532 (if (= (aref chars i) ?-)
533 (setq i (1+ i)
534 to (aref chars i))
535 (setq ch (aref chars i)
536 to ch))
537 (while (<= ch to)
538 (modify-syntax-entry ch syntax)
539 (modify-category-entry ch category)
540 (setq ch (1+ ch)))
541 (setq i (1+ i)))
542 (setq deflist (cdr deflist))))
543
544 ;; Tibetan character set
545
546 (modify-category-entry (make-char 'tibetan) ?q)
547 (modify-category-entry (make-char 'tibetan-1-column) ?q)
548
549 (let ((deflist '(;; chars syntax category
550 ("\e$(7"!\e(B-\e$(7"J"K\e(B" "w" ?0) ; consonant
551 ("\e$(7#!\e(B-\e$(7#J#K#L#M!"!#\e(B" "w" ?0) ;
552 ("\e$(7$!\e(B-\e$(7$e\e(B" "w" ?0) ;
553 ("\e$(7%!\e(B-\e$(7%u\e(B" "w" ?0) ;
554 ("\e$(7"S"["\"]"^"a\e(B" "w" ?2) ; upper vowel
555 ("\e$(7"_"c"d"g"h"i"j"k"l\e(B" "w" ?2) ; upper modifier
556 ("\e$(7!I"Q"R"U"e!e!g\e(B" "w" ?3) ; lowel vowel/modifier
557 ("\e$(7!P\e(B-\e$(7!Y!Z\e(B-\e$(7!c\e(B" "w" ?6) ; digit
558 ("\e$(7!;!=\e(B-\e$(7!B!D"`\e(B" "." ?|) ; line-break char
559 ("\e$(8!;!=!?!@!A!D"`\e(B" "." ?|) ;
560 ("\e$(7!8!;!=\e(B-\e$(7!B!D"`!m!d\e(B" "." ?>) ; prohibition
561 ("\e$(8!;!=!?!@!A!D"`\e(B" "." ?>) ;
562 ("\e$(7!0\e(B-\e$(7!:!l#R#S"f\e(B" "." ?<) ; prohibition
563 ("\e$(7!C!E\e(B-\e$(7!H!J\e(B-\e$(7!O!f!h\e(B-\e$(7!k!n!o#O#P\e(B-\e$(7#`\e(B" "." ?q) ; others
564 ))
565 elm chars len syntax category to ch i)
566 (while deflist
567 (setq elm (car deflist))
568 (setq chars (car elm)
569 len (length chars)
570 syntax (nth 1 elm)
571 category (nth 2 elm)
572 i 0)
573 (while (< i len)
574 (if (= (aref chars i) ?-)
575 (setq i (1+ i)
576 to (aref chars i))
577 (setq ch (aref chars i)
578 to ch))
579 (while (<= ch to)
580 (modify-syntax-entry ch syntax)
581 (modify-category-entry ch category)
582 (setq ch (1+ ch)))
583 (setq i (1+ i)))
584 (setq deflist (cdr deflist))))
585
586 ;; Vietnamese character set
587
588 (let ((lower (make-char 'vietnamese-viscii-lower))
589 (upper (make-char 'vietnamese-viscii-upper)))
590 (modify-syntax-entry lower "w")
591 (modify-syntax-entry upper "w")
592 (modify-category-entry lower ?v)
593 (modify-category-entry upper ?v)
594 (modify-category-entry lower ?l) ; To make a word with
595 (modify-category-entry upper ?l) ; latin characters.
596 )
597
598 (let ((tbl (standard-case-table))
599 (i 32))
600 (while (< i 128)
601 (set-case-syntax-pair (make-char 'vietnamese-viscii-upper i)
602 (make-char 'vietnamese-viscii-lower i)
603 tbl)
604 (setq i (1+ i))))
605
606 ;; Unicode (mule-unicode-0100-24ff)
607
608 (let ((tbl (standard-case-table)) c)
609
610 ;; In some languages, U+0049 LATIN CAPITAL LETTER I and U+0131 LATIN
611 ;; SMALL LETTER DOTLESS I make a case pair, and so do U+0130 LATIN
612 ;; CAPITAL LETTER I WITH DOT ABOVE and U+0069 LATIN SMALL LETTER I.
613 ;; Thus we have to check language-environment to handle casing
614 ;; correctly. Currently only I<->i is available.
615
616 ;; case-syntax-pair's are not yet given for Latin Extendet-B
617
618 ;; Latin Extended-A, Latin Extended-B
619 (setq c #x0100)
620 (while (<= c #x0233)
621 (modify-category-entry (decode-char 'ucs c) ?l)
622 (and (or (<= c #x012e)
623 (and (>= c #x014a) (<= c #x0177)))
624 (zerop (% c 2))
625 (set-case-syntax-pair
626 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
627 (and (>= c #x013a)
628 (<= c #x0148)
629 (zerop (% c 2))
630 (set-case-syntax-pair
631 (decode-char 'ucs (1- c)) (decode-char 'ucs c) tbl))
632 (setq c (1+ c)))
633 (set-case-syntax-pair ?\e$,1 R\e(B ?\e$,1 S\e(B tbl)
634 (set-case-syntax-pair ?\e$,1 T\e(B ?\e$,1 U\e(B tbl)
635 (set-case-syntax-pair ?\e$,1 V\e(B ?\e$,1 W\e(B tbl)
636 ; (set-case-syntax-pair ?\e$,1!8\e(B ?\e,A\7f\e(B tbl) ; these two have different length!
637 (set-case-syntax-pair ?\e$,1!9\e(B ?\e$,1!:\e(B tbl)
638 (set-case-syntax-pair ?\e$,1!;\e(B ?\e$,1!<\e(B tbl)
639 (set-case-syntax-pair ?\e$,1!=\e(B ?\e$,1!>\e(B tbl)
640
641 ;; Latin Extended Additional
642 (setq c #x1e00)
643 (while (<= c #x1ef9)
644 (modify-category-entry (decode-char 'ucs c) ?l)
645 (and (zerop (% c 2))
646 (or (<= c #x1e94) (>= c #x1ea0))
647 (set-case-syntax-pair
648 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
649 (setq c (1+ c)))
650
651 ;; Greek
652 (setq c #x0370)
653 (while (<= c #x03ff)
654 (modify-category-entry (decode-char 'ucs c) ?g)
655 (if (or (and (>= c #x0391) (<= c #x03a1))
656 (and (>= c #x03a3) (<= c #x03ab)))
657 (set-case-syntax-pair
658 (decode-char 'ucs c) (decode-char 'ucs (+ c 32)) tbl))
659 (and (>= c #x03da)
660 (<= c #x03ee)
661 (zerop (% c 2))
662 (set-case-syntax-pair
663 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
664 (setq c (1+ c)))
665 (set-case-syntax-pair ?\e$,1&f\e(B ?\e$,1',\e(B tbl)
666 (set-case-syntax-pair ?\e$,1&h\e(B ?\e$,1'-\e(B tbl)
667 (set-case-syntax-pair ?\e$,1&i\e(B ?\e$,1'.\e(B tbl)
668 (set-case-syntax-pair ?\e$,1&j\e(B ?\e$,1'/\e(B tbl)
669 (set-case-syntax-pair ?\e$,1&l\e(B ?\e$,1'L\e(B tbl)
670 (set-case-syntax-pair ?\e$,1&n\e(B ?\e$,1'M\e(B tbl)
671 (set-case-syntax-pair ?\e$,1&o\e(B ?\e$,1'N\e(B tbl)
672
673 ;; Greek Extended
674 (setq c #x1f00)
675 (while (<= c #x1fff)
676 (modify-category-entry (decode-char 'ucs c) ?g)
677 (and (<= (logand c #x000f) 7)
678 (<= c #x1fa7)
679 (not (memq c '(#x1f50 #x1f52 #x1f54 #x1f56)))
680 (/= (logand c #x00f0) 7)
681 (set-case-syntax-pair
682 (decode-char 'ucs (+ c 8)) (decode-char 'ucs c) tbl))
683 (setq c (1+ c)))
684 (set-case-syntax-pair ?\e$,1qx\e(B ?\e$,1qp\e(B tbl)
685 (set-case-syntax-pair ?\e$,1qy\e(B ?\e$,1qq\e(B tbl)
686 (set-case-syntax-pair ?\e$,1qz\e(B ?\e$,1q0\e(B tbl)
687 (set-case-syntax-pair ?\e$,1q{\e(B ?\e$,1q1\e(B tbl)
688 (set-case-syntax-pair ?\e$,1q|\e(B ?\e$,1qs\e(B tbl)
689 (set-case-syntax-pair ?\e$,1r(\e(B ?\e$,1q2\e(B tbl)
690 (set-case-syntax-pair ?\e$,1r)\e(B ?\e$,1q3\e(B tbl)
691 (set-case-syntax-pair ?\e$,1r*\e(B ?\e$,1q4\e(B tbl)
692 (set-case-syntax-pair ?\e$,1r+\e(B ?\e$,1q5\e(B tbl)
693 (set-case-syntax-pair ?\e$,1r,\e(B ?\e$,1r#\e(B tbl)
694 (set-case-syntax-pair ?\e$,1r8\e(B ?\e$,1r0\e(B tbl)
695 (set-case-syntax-pair ?\e$,1r9\e(B ?\e$,1r1\e(B tbl)
696 (set-case-syntax-pair ?\e$,1r:\e(B ?\e$,1q6\e(B tbl)
697 (set-case-syntax-pair ?\e$,1r;\e(B ?\e$,1q7\e(B tbl)
698 (set-case-syntax-pair ?\e$,1rH\e(B ?\e$,1r@\e(B tbl)
699 (set-case-syntax-pair ?\e$,1rI\e(B ?\e$,1rA\e(B tbl)
700 (set-case-syntax-pair ?\e$,1rJ\e(B ?\e$,1q:\e(B tbl)
701 (set-case-syntax-pair ?\e$,1rK\e(B ?\e$,1q;\e(B tbl)
702 (set-case-syntax-pair ?\e$,1rL\e(B ?\e$,1rE\e(B tbl)
703 (set-case-syntax-pair ?\e$,1rX\e(B ?\e$,1q8\e(B tbl)
704 (set-case-syntax-pair ?\e$,1rY\e(B ?\e$,1q9\e(B tbl)
705 (set-case-syntax-pair ?\e$,1rZ\e(B ?\e$,1q<\e(B tbl)
706 (set-case-syntax-pair ?\e$,1r[\e(B ?\e$,1q=\e(B tbl)
707 (set-case-syntax-pair ?\e$,1r\\e(B ?\e$,1rS\e(B tbl)
708
709 ;; cyrillic
710 (setq c #x0400)
711 (while (<= c #x04ff)
712 (modify-category-entry (decode-char 'ucs c) ?y)
713 (and (>= c #x0400)
714 (<= c #x040f)
715 (set-case-syntax-pair
716 (decode-char 'ucs c) (decode-char 'ucs (+ c 80)) tbl))
717 (and (>= c #x0410)
718 (<= c #x042f)
719 (set-case-syntax-pair
720 (decode-char 'ucs c) (decode-char 'ucs (+ c 32)) tbl))
721 (and (zerop (% c 2))
722 (or (and (>= c #x0460) (<= c #x0480))
723 (and (>= c #x048c) (<= c #x04be))
724 (and (>= c #x04d0) (<= c #x04f4)))
725 (set-case-syntax-pair
726 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
727 (setq c (1+ c)))
728 (set-case-syntax-pair ?\e$,1*!\e(B ?\e$,1*"\e(B tbl)
729 (set-case-syntax-pair ?\e$,1*#\e(B ?\e$,1*$\e(B tbl)
730 (set-case-syntax-pair ?\e$,1*'\e(B ?\e$,1*(\e(B tbl)
731 (set-case-syntax-pair ?\e$,1*+\e(B ?\e$,1*,\e(B tbl)
732 (set-case-syntax-pair ?\e$,1*X\e(B ?\e$,1*Y\e(B tbl)
733
734 ;; general punctuation
735 (setq c #x2000)
736 (while (<= c #x200b)
737 (set-case-syntax c " " tbl)
738 (setq c (1+ c)))
739 (setq c #x2010)
740 (while (<= c #x2027)
741 (set-case-syntax c "_" tbl)
742 (setq c (1+ c)))
743 )
744
745 \f
746 ;;; Setting word boundary.
747
748 (setq word-combining-categories
749 '((?l . ?l)))
750
751 (setq word-separating-categories ; (2-byte character sets)
752 '((?A . ?K) ; Alpha numeric - Katakana
753 (?A . ?C) ; Alpha numeric - Chinese
754 (?H . ?A) ; Hiragana - Alpha numeric
755 (?H . ?K) ; Hiragana - Katakana
756 (?H . ?C) ; Hiragana - Chinese
757 (?K . ?A) ; Katakana - Alpha numeric
758 (?K . ?C) ; Katakana - Chinese
759 (?C . ?A) ; Chinese - Alpha numeric
760 (?C . ?K) ; Chinese - Katakana
761 ))
762
763 \f
764 ;; For each character set, put the information of the most proper
765 ;; coding system to encode it by `preferred-coding-system' property.
766
767 (let ((l '((latin-iso8859-1 . iso-latin-1)
768 (latin-iso8859-2 . iso-latin-2)
769 (latin-iso8859-3 . iso-latin-3)
770 (latin-iso8859-4 . iso-latin-4)
771 (thai-tis620 . thai-tis620)
772 (greek-iso8859-7 . greek-iso-8bit)
773 (arabic-iso8859-6 . iso-2022-7bit)
774 (hebrew-iso8859-8 . hebrew-iso-8bit)
775 (katakana-jisx0201 . japanese-shift-jis)
776 (latin-jisx0201 . japanese-shift-jis)
777 (cyrillic-iso8859-5 . cyrillic-iso-8bit)
778 (latin-iso8859-9 . iso-latin-5)
779 (japanese-jisx0208-1978 . iso-2022-jp)
780 (chinese-gb2312 . cn-gb-2312)
781 (japanese-jisx0208 . iso-2022-jp)
782 (korean-ksc5601 . iso-2022-kr)
783 (japanese-jisx0212 . iso-2022-jp)
784 (chinese-cns11643-1 . iso-2022-cn)
785 (chinese-cns11643-2 . iso-2022-cn)
786 (chinese-big5-1 . chinese-big5)
787 (chinese-big5-2 . chinese-big5)
788 (chinese-sisheng . iso-2022-7bit)
789 (ipa . iso-2022-7bit)
790 (vietnamese-viscii-lower . vietnamese-viscii)
791 (vietnamese-viscii-upper . vietnamese-viscii)
792 (arabic-digit . iso-2022-7bit)
793 (arabic-1-column . iso-2022-7bit)
794 (ascii-right-to-left . iso-2022-7bit)
795 (lao . lao)
796 (arabic-2-column . iso-2022-7bit)
797 (indian-is13194 . devanagari)
798 (indian-1-column . devanagari)
799 (tibetan-1-column . tibetan)
800 (ethiopic . iso-2022-7bit)
801 (chinese-cns11643-3 . iso-2022-cn)
802 (chinese-cns11643-4 . iso-2022-cn)
803 (chinese-cns11643-5 . iso-2022-cn)
804 (chinese-cns11643-6 . iso-2022-cn)
805 (chinese-cns11643-7 . iso-2022-cn)
806 (indian-2-column . devanagari)
807 (tibetan . tibetan)
808 (latin-iso8859-14 . iso-latin-8)
809 (latin-iso8859-15 . iso-latin-9))))
810 (while l
811 (put-charset-property (car (car l)) 'preferred-coding-system (cdr (car l)))
812 (setq l (cdr l))))
813
814 \f
815 ;; Setup auto-fill-chars for charsets that should invoke auto-filling.
816 ;; SPACE and NEWLIE are already set. Also put `nospace-between-words'
817 ;; property to the charsets.
818 (let ((l '(katakana-jisx0201
819 japanese-jisx0208 japanese-jisx0212
820 chinese-gb2312 chinese-big5-1 chinese-big5-2)))
821 (while l
822 (aset auto-fill-chars (make-char (car l)) t)
823 (put-charset-property (car l) 'nospace-between-words t)
824 (setq l (cdr l))))
825
826 ;;; Local Variables:
827 ;;; coding: iso-2022-7bit
828 ;;; End:
829
830 ;;; end of characters.el