]> code.delx.au - gnu-emacs/blob - lisp/international/characters.el
Put `prefered-coding-system'
[gnu-emacs] / lisp / international / characters.el
1 ;;; characters.el --- set syntax and category for multibyte characters
2
3 ;; Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN.
4 ;; Licensed to the Free Software Foundation.
5
6 ;; Keywords: multibyte character, character set, syntax, category
7
8 ;; This file is part of GNU Emacs.
9
10 ;; GNU Emacs is free software; you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation; either version 2, or (at your option)
13 ;; any later version.
14
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
19
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs; see the file COPYING. If not, write to the
22 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
24
25 ;;; Commentary:
26
27 ;; This file contains multibyte characters. Save this file always in
28 ;; the coding system `iso-2022-7bit'.
29
30 ;; This file does not define the syntax for Latin-N character sets;
31 ;; those are defined by the files latin-N.el.
32
33 ;;; Predefined categories.
34
35 ;; For each character set.
36
37 (define-category ?a "ASCII")
38 (define-category ?l "Latin")
39 (define-category ?t "Thai")
40 (define-category ?g "Greek")
41 (define-category ?b "Arabic")
42 (define-category ?w "Hebrew")
43 (define-category ?y "Cyrillic")
44 (define-category ?k "Japanese katakana")
45 (define-category ?r "Japanese roman")
46 (define-category ?c "Chinese")
47 (define-category ?j "Japanese")
48 (define-category ?h "Korean")
49 (define-category ?e "Ethiopic (Ge'ez)")
50 (define-category ?v "Vietnamese")
51 (define-category ?i "Indian")
52 (define-category ?o "Lao")
53 (define-category ?q "Tibetan")
54
55 ;; For each group (row) of 2-byte character sets.
56
57 (define-category ?A "Alpha numeric characters of 2-byte character sets")
58 (define-category ?C "Chinese (Han) characters of 2-byte character sets")
59 (define-category ?G "Greek characters of 2-byte characters sets")
60 (define-category ?H "Japanese Hiragana characters of 2-byte character sets")
61 (define-category ?K "Japanese Katakana characters of 2-byte character sets")
62 (define-category ?N "Korean Hangul characters of 2-byte character sets")
63 (define-category ?Y "Cyrillic character of 2-byte character sets")
64 (define-category ?I "Indian Glyphs")
65
66 ;; For phonetic classifications.
67
68 (define-category ?0 "consonant")
69 (define-category ?1 "base (independent) vowel")
70 (define-category ?2 "upper diacritical mark (including upper vowel)")
71 (define-category ?3 "lower diacritical mark (including lower vowel)")
72 (define-category ?4 "tone mark")
73 (define-category ?5 "symbol")
74 (define-category ?6 "digit")
75 (define-category ?7 "vowel-modifying diacritical mark")
76 (define-category ?8 "vowel-signs")
77 (define-category ?9 "semivowel lower")
78
79 ;; For filling.
80 (define-category ?| "While filling, we can break a line at this character.")
81
82 ;; For indentation calculation.
83 (define-category ?
84 "This character counts as a space for indentation purposes.")
85
86 ;; Keep the followings for `kinsoku' processing. See comments in
87 ;; kinsoku.el.
88 (define-category ?> "A character which can't be placed at beginning of line.")
89 (define-category ?< "A character which can't be placed at end of line.")
90
91 \f
92 ;;; Setting syntax and category.
93
94 ;; ASCII
95
96 (let ((ch 32))
97 (while (< ch 127) ; All ASCII characters have
98 (modify-category-entry ch ?a) ; the category `a' (ASCII)
99 (modify-category-entry ch ?l) ; and `l' (Latin).
100 (setq ch (1+ ch))))
101
102 ;; Arabic character set
103
104 (let ((charsets '(arabic-iso8859-6
105 arabic-digit
106 arabic-1-column
107 arabic-2-column)))
108 (while charsets
109 (modify-syntax-entry (make-char (car charsets)) "w")
110 (modify-category-entry (make-char (car charsets)) ?b)
111 (setq charsets (cdr charsets))))
112
113 ;; Chinese character set (GB2312)
114
115 (modify-syntax-entry (make-char 'chinese-gb2312) "w")
116 (modify-syntax-entry (make-char 'chinese-gb2312 33) "_")
117 (modify-syntax-entry (make-char 'chinese-gb2312 34) "_")
118 (modify-syntax-entry (make-char 'chinese-gb2312 41) "_")
119 (modify-syntax-entry ?\\e$A!2\e(B "(\e$A!3\e(B")
120 (modify-syntax-entry ?\\e$A!4\e(B "(\e$A!5\e(B")
121 (modify-syntax-entry ?\\e$A!6\e(B "(\e$A!7\e(B")
122 (modify-syntax-entry ?\\e$A!8\e(B "(\e$A!9\e(B")
123 (modify-syntax-entry ?\\e$A!:\e(B "(\e$A!;\e(B")
124 (modify-syntax-entry ?\\e$A!<\e(B "(\e$A!=\e(B")
125 (modify-syntax-entry ?\\e$A!>\e(B "(\e$A!?\e(B")
126 (modify-syntax-entry ?\\e$A!3\e(B ")\e$A!2\e(B")
127 (modify-syntax-entry ?\\e$A!5\e(B ")\e$A!4\e(B")
128 (modify-syntax-entry ?\\e$A!7\e(B ")\e$A!6\e(B")
129 (modify-syntax-entry ?\\e$A!9\e(B ")\e$A!8\e(B")
130 (modify-syntax-entry ?\\e$A!;\e(B ")\e$A!:\e(B")
131 (modify-syntax-entry ?\\e$A!=\e(B ")\e$A!<\e(B")
132 (modify-syntax-entry ?\\e$A!?\e(B ")\e$A!>\e(B")
133
134 (modify-category-entry (make-char 'chinese-gb2312) ?c)
135 (modify-category-entry (make-char 'chinese-gb2312) ?\|)
136 (modify-category-entry (make-char 'chinese-gb2312 35) ?A)
137 (modify-category-entry (make-char 'chinese-gb2312 36) ?H)
138 (modify-category-entry (make-char 'chinese-gb2312 37) ?K)
139 (modify-category-entry (make-char 'chinese-gb2312 38) ?G)
140 (modify-category-entry (make-char 'chinese-gb2312 39) ?Y)
141 (modify-category-entry (make-char 'chinese-gb2312 35) ?A)
142 (let ((row 48))
143 (while (< row 127)
144 (modify-category-entry (make-char 'chinese-gb2312 row) ?C)
145 (setq row (1+ row))))
146
147 ;; Chinese character set (BIG5)
148
149 (let ((generic-big5-1-char (make-char 'chinese-big5-1))
150 (generic-big5-2-char (make-char 'chinese-big5-2)))
151 (modify-syntax-entry generic-big5-1-char "w")
152 (modify-syntax-entry generic-big5-2-char "w")
153
154 (modify-category-entry generic-big5-1-char ?c)
155 (modify-category-entry generic-big5-2-char ?c)
156
157 (modify-category-entry generic-big5-1-char ?C)
158 (modify-category-entry generic-big5-2-char ?C)
159
160 (modify-category-entry generic-big5-1-char ?\|)
161 (modify-category-entry generic-big5-2-char ?\|))
162
163
164 ;; Chinese character set (CNS11643)
165
166 (let ((cns-list '(chinese-cns11643-1
167 chinese-cns11643-2
168 chinese-cns11643-3
169 chinese-cns11643-4
170 chinese-cns11643-5
171 chinese-cns11643-6
172 chinese-cns11643-7))
173 generic-char)
174 (while cns-list
175 (setq generic-char (make-char (car cns-list)))
176 (modify-syntax-entry generic-char "w")
177 (modify-category-entry generic-char ?c)
178 (modify-category-entry generic-char ?C)
179 (modify-category-entry generic-char ?|)
180 (setq cns-list (cdr cns-list))))
181
182 ;; Cyrillic character set (ISO-8859-5)
183
184 (modify-category-entry (make-char 'cyrillic-iso8859-5) ?y)
185
186 (modify-syntax-entry (make-char 'cyrillic-iso8859-5 160) " ")
187 (modify-syntax-entry ?\e,L-\e(B ".")
188 (modify-syntax-entry ?\e,Lp\e(B ".")
189 (modify-syntax-entry ?\e,L}\e(B ".")
190 (let ((tbl (standard-case-table)))
191 (set-case-syntax-pair ?\e,L!\e(B ?\e,Lq\e(B tbl)
192 (set-case-syntax-pair ?\e,L"\e(B ?\e,Lr\e(B tbl)
193 (set-case-syntax-pair ?\e,L#\e(B ?\e,Ls\e(B tbl)
194 (set-case-syntax-pair ?\e,L$\e(B ?\e,Lt\e(B tbl)
195 (set-case-syntax-pair ?\e,L%\e(B ?\e,Lu\e(B tbl)
196 (set-case-syntax-pair ?\e,L&\e(B ?\e,Lv\e(B tbl)
197 (set-case-syntax-pair ?\e,L'\e(B ?\e,Lw\e(B tbl)
198 (set-case-syntax-pair ?\e,L(\e(B ?\e,Lx\e(B tbl)
199 (set-case-syntax-pair ?\e,L)\e(B ?\e,Ly\e(B tbl)
200 (set-case-syntax-pair ?\e,L*\e(B ?\e,Lz\e(B tbl)
201 (set-case-syntax-pair ?\e,L+\e(B ?\e,L{\e(B tbl)
202 (set-case-syntax-pair ?\e,L,\e(B ?\e,L|\e(B tbl)
203 (set-case-syntax-pair ?\e,L.\e(B ?\e,L~\e(B tbl)
204 (set-case-syntax-pair ?\e,L/\e(B ?\e,L\7f\e(B tbl)
205 (set-case-syntax-pair ?\e,L0\e(B ?\e,LP\e(B tbl)
206 (set-case-syntax-pair ?\e,L1\e(B ?\e,LQ\e(B tbl)
207 (set-case-syntax-pair ?\e,L2\e(B ?\e,LR\e(B tbl)
208 (set-case-syntax-pair ?\e,L3\e(B ?\e,LS\e(B tbl)
209 (set-case-syntax-pair ?\e,L4\e(B ?\e,LT\e(B tbl)
210 (set-case-syntax-pair ?\e,L5\e(B ?\e,LU\e(B tbl)
211 (set-case-syntax-pair ?\e,L6\e(B ?\e,LV\e(B tbl)
212 (set-case-syntax-pair ?\e,L7\e(B ?\e,LW\e(B tbl)
213 (set-case-syntax-pair ?\e,L8\e(B ?\e,LX\e(B tbl)
214 (set-case-syntax-pair ?\e,L9\e(B ?\e,LY\e(B tbl)
215 (set-case-syntax-pair ?\e,L:\e(B ?\e,LZ\e(B tbl)
216 (set-case-syntax-pair ?\e,L;\e(B ?\e,L[\e(B tbl)
217 (set-case-syntax-pair ?\e,L<\e(B ?\e,L\\e(B tbl)
218 (set-case-syntax-pair ?\e,L=\e(B ?\e,L]\e(B tbl)
219 (set-case-syntax-pair ?\e,L>\e(B ?\e,L^\e(B tbl)
220 (set-case-syntax-pair ?\e,L?\e(B ?\e,L_\e(B tbl)
221 (set-case-syntax-pair ?\e,L@\e(B ?\e,L`\e(B tbl)
222 (set-case-syntax-pair ?\e,LA\e(B ?\e,La\e(B tbl)
223 (set-case-syntax-pair ?\e,LB\e(B ?\e,Lb\e(B tbl)
224 (set-case-syntax-pair ?\e,LC\e(B ?\e,Lc\e(B tbl)
225 (set-case-syntax-pair ?\e,LD\e(B ?\e,Ld\e(B tbl)
226 (set-case-syntax-pair ?\e,LE\e(B ?\e,Le\e(B tbl)
227 (set-case-syntax-pair ?\e,LF\e(B ?\e,Lf\e(B tbl)
228 (set-case-syntax-pair ?\e,LG\e(B ?\e,Lg\e(B tbl)
229 (set-case-syntax-pair ?\e,LH\e(B ?\e,Lh\e(B tbl)
230 (set-case-syntax-pair ?\e,LI\e(B ?\e,Li\e(B tbl)
231 (set-case-syntax-pair ?\e,LJ\e(B ?\e,Lj\e(B tbl)
232 (set-case-syntax-pair ?\e,LK\e(B ?\e,Lk\e(B tbl)
233 (set-case-syntax-pair ?\e,LL\e(B ?\e,Ll\e(B tbl)
234 (set-case-syntax-pair ?\e,LM\e(B ?\e,Lm\e(B tbl)
235 (set-case-syntax-pair ?\e,LN\e(B ?\e,Ln\e(B tbl)
236 (set-case-syntax-pair ?\e,LO\e(B ?\e,Lo\e(B tbl))
237
238 ;; Devanagari character set
239
240 (let ((deflist '(;; chars syntax category
241 ("\e$(5!!!"!#\e(B" "w" ?7) ; vowel-modifying diacritical mark
242 ; chandrabindu, anuswar, visarga
243 ("\e$(5!$\e(B-\e$(5!2\e(B" "w" ?1) ; independent vowel
244 ("\e$(5!3\e(B-\e$(5!X\e(B" "w" ?0) ; consonant
245 ("\e$(5!Z\e(B-\e$(5!g\e(B" "w" ?8) ; matra
246 ("\e$(5!q\e(B-\e$(5!z\e(B" "w" ?6) ; digit
247 ))
248 elm chars len syntax category to ch i)
249 (while deflist
250 (setq elm (car deflist))
251 (setq chars (car elm)
252 len (length chars)
253 syntax (nth 1 elm)
254 category (nth 2 elm)
255 i 0)
256 (while (< i len)
257 (if (= (aref chars i) ?-)
258 (setq i (1+ i)
259 to (sref chars i))
260 (setq ch (sref chars i)
261 to ch))
262 (while (<= ch to)
263 (modify-syntax-entry ch syntax)
264 (modify-category-entry ch category)
265 (setq ch (1+ ch)))
266 (setq i (+ i (char-bytes to))))
267 (setq deflist (cdr deflist))))
268
269 ;; Ethiopic character set
270
271 (modify-category-entry (make-char 'ethiopic) ?e)
272 (modify-syntax-entry (make-char 'ethiopic) "w")
273 (let ((chars '(?\e$(3$h\e(B ?\e$(3$i\e(B ?\e$(3$j\e(B ?\e$(3$k\e(B ?\e$(3$l\e(B ?\e$(3$m\e(B ?\e$(3$n\e(B ?\e$(3$o\e(B ?\e$(3%i\e(B ?\e$(3%t\e(B ?\e$(3%u\e(B ?\e$(3%v\e(B ?\e$(3%w\e(B ?\e$(3%x\e(B)))
274 (while chars
275 (modify-syntax-entry (car chars) ".")
276 (setq chars (cdr chars))))
277
278 ;; European character set (Latin-1,2,3,4,5)
279
280 (modify-category-entry (make-char 'latin-iso8859-1) ?l)
281 (modify-category-entry (make-char 'latin-iso8859-2) ?l)
282 (modify-category-entry (make-char 'latin-iso8859-3) ?l)
283 (modify-category-entry (make-char 'latin-iso8859-4) ?l)
284 (modify-category-entry (make-char 'latin-iso8859-9) ?l)
285
286 (modify-category-entry (make-char 'latin-iso8859-1 160) ?\ )
287 (modify-category-entry (make-char 'latin-iso8859-2 160) ?\ )
288 (modify-category-entry (make-char 'latin-iso8859-3 160) ?\ )
289 (modify-category-entry (make-char 'latin-iso8859-4 160) ?\ )
290 (modify-category-entry (make-char 'latin-iso8859-9 160) ?\ )
291
292 ;; Greek character set (ISO-8859-7)
293
294 (modify-category-entry (make-char 'greek-iso8859-7) ?g)
295
296 (let ((c 182))
297 (while (< c 255)
298 (modify-syntax-entry (make-char 'greek-iso8859-7 c) "w")
299 (setq c (1+ c))))
300 (modify-syntax-entry (make-char 'greek-iso8859-7 160) "w") ; NBSP
301 (modify-syntax-entry ?\e,F7\e(B ".")
302 (modify-syntax-entry ?\e,F;\e(B ".")
303 (modify-syntax-entry ?\e,F=\e(B ".")
304
305 ;; Hebrew character set (ISO-8859-8)
306
307 (modify-category-entry (make-char 'hebrew-iso8859-8) ?w)
308
309 (let ((c 224))
310 (while (< c 251)
311 (modify-syntax-entry (make-char 'hebrew-iso8859-8 c) "w")
312 (setq c (1+ c))))
313 (modify-syntax-entry (make-char 'hebrew-iso8859-8 160) "w") ; NBSP
314
315 ;; Indian character set (IS 13194 and other Emacs original Indian charsets)
316
317 (modify-category-entry (make-char 'indian-is13194) ?i)
318 (modify-category-entry (make-char 'indian-2-column) ?I)
319 (modify-category-entry (make-char 'indian-1-column) ?I)
320
321 ;; Japanese character set (JISX0201-kana, JISX0201-roman, JISX0208, JISX0212)
322
323 (modify-category-entry (make-char 'katakana-jisx0201) ?k)
324 (modify-category-entry (make-char 'latin-jisx0201) ?r)
325 (modify-category-entry (make-char 'japanese-jisx0208) ?j)
326 (modify-category-entry (make-char 'japanese-jisx0212) ?j)
327 (modify-category-entry (make-char 'katakana-jisx0201) ?\|)
328 (modify-category-entry (make-char 'japanese-jisx0208) ?\|)
329 (modify-category-entry (make-char 'japanese-jisx0212) ?\|)
330
331 ;; JISX0208
332 (modify-syntax-entry (make-char 'japanese-jisx0208) "w")
333 (modify-syntax-entry (make-char 'japanese-jisx0208 33) "_")
334 (modify-syntax-entry (make-char 'japanese-jisx0208 34) "_")
335 (modify-syntax-entry (make-char 'japanese-jisx0208 40) "_")
336 (let ((chars '(?\e$B!<\e(B ?\e$B!+\e(B ?\e$B!,\e(B ?\e$B!3\e(B ?\e$B!4\e(B ?\e$B!5\e(B ?\e$B!6\e(B ?\e$B!7\e(B ?\e$B!8\e(B ?\e$B!9\e(B ?\e$B!:\e(B ?\e$B!;\e(B)))
337 (while chars
338 (modify-syntax-entry (car chars) "w")
339 (setq chars (cdr chars))))
340 (modify-syntax-entry ?\\e$B!J\e(B "(\e$B!K\e(B")
341 (modify-syntax-entry ?\\e$B!N\e(B "(\e$B!O\e(B")
342 (modify-syntax-entry ?\\e$B!P\e(B "(\e$B!Q\e(B")
343 (modify-syntax-entry ?\\e$B!V\e(B "(\e$B!W\e(B")
344 (modify-syntax-entry ?\\e$B!X\e(B "(\e$B!Y\e(B")
345 (modify-syntax-entry ?\\e$B!K\e(B ")\e$B!J\e(B")
346 (modify-syntax-entry ?\\e$B!O\e(B ")\e$B!N\e(B")
347 (modify-syntax-entry ?\\e$B!Q\e(B ")\e$B!P\e(B")
348 (modify-syntax-entry ?\\e$B!W\e(B ")\e$B!V\e(B")
349 (modify-syntax-entry ?\\e$B!Y\e(B ")\e$B!X\e(B")
350
351 (modify-category-entry (make-char 'japanese-jisx0208 35) ?A)
352 (modify-category-entry (make-char 'japanese-jisx0208 36) ?H)
353 (modify-category-entry (make-char 'japanese-jisx0208 37) ?K)
354 (modify-category-entry (make-char 'japanese-jisx0208 38) ?G)
355 (modify-category-entry (make-char 'japanese-jisx0208 39) ?Y)
356 (let ((row 48))
357 (while (< row 127)
358 (modify-category-entry (make-char 'japanese-jisx0208 row) ?C)
359 (setq row (1+ row))))
360 (modify-category-entry ?\e$B!<\e(B ?K)
361 (let ((chars '(?\e$B!+\e(B ?\e$B!,\e(B)))
362 (while chars
363 (modify-category-entry (car chars) ?K)
364 (modify-category-entry (car chars) ?H)
365 (setq chars (cdr chars))))
366 (let ((chars '(?\e$B!3\e(B ?\e$B!4\e(B ?\e$B!5\e(B ?\e$B!6\e(B ?\e$B!7\e(B ?\e$B!8\e(B ?\e$B!9\e(B ?\e$B!:\e(B ?\e$B!;\e(B)))
367 (while chars
368 (modify-category-entry (car chars) ?C)
369 (setq chars (cdr chars))))
370
371 ;; JISX0212
372 (modify-syntax-entry (make-char 'japanese-jisx0212) "w")
373 (modify-syntax-entry (make-char 'japanese-jisx0212 33) "_")
374 (modify-syntax-entry (make-char 'japanese-jisx0212 34) "_")
375 (modify-syntax-entry (make-char 'japanese-jisx0212 35) "_")
376
377 (modify-category-entry (make-char 'japanese-jisx0212 ) ?C)
378
379 ;; JISX0201-Kana
380 (modify-syntax-entry (make-char 'katakana-jisx0201) "w")
381 (let ((chars '(?\e(I!\e(B ?\e(I"\e(B ?\e(I#\e(B ?\e(I$\e(B ?\e(I%\e(B)))
382 (while chars
383 (modify-syntax-entry (car chars) ".")
384 (setq chars (cdr chars))))
385
386 ;; Korean character set (KSC5601)
387
388 (modify-syntax-entry (make-char 'korean-ksc5601) "w")
389 (modify-syntax-entry (make-char 'korean-ksc5601 33) "_")
390 (modify-syntax-entry (make-char 'korean-ksc5601 34) "_")
391 (modify-syntax-entry (make-char 'korean-ksc5601 38) "_")
392 (modify-syntax-entry (make-char 'korean-ksc5601 39) "_")
393 (modify-syntax-entry (make-char 'korean-ksc5601 40) "_")
394 (modify-syntax-entry (make-char 'korean-ksc5601 41) "_")
395
396 (modify-category-entry (make-char 'korean-ksc5601) ?h)
397 (modify-category-entry (make-char 'korean-ksc5601 35) ?A)
398 (modify-category-entry (make-char 'korean-ksc5601 37) ?G)
399 (modify-category-entry (make-char 'korean-ksc5601 42) ?H)
400 (modify-category-entry (make-char 'korean-ksc5601 43) ?K)
401 (modify-category-entry (make-char 'korean-ksc5601 44) ?Y)
402
403 ;; Lao character set
404
405 (modify-category-entry (make-char 'lao) ?o)
406
407 (let ((deflist '(;; chars syntax category
408 ("\e(1!\e(B-\e(1N\e(B" "w" ?0) ; consonant
409 ("\e(1PRS]`\e(B-\e(1d\e(B" "w" ?1) ; vowel base
410 ("\e(1QT\e(B-\e(1W[m\e(B" "w" ?2) ; vowel upper
411 ("\e(1XY\e(B" "w" ?3) ; vowel lower
412 ("\e(1h\e(B-\e(1l\e(B" "w" ?4) ; tone mark
413 ("\e(1\\e(B" "w" ?9) ; semivowel lower
414 ("\e(1p\e(B-\e(1y\e(B" "w" ?6) ; digit
415 ("\e(1Of\e(B" "_" ?0) ; symbol
416 ))
417 elm chars len syntax category to ch i)
418 (while deflist
419 (setq elm (car deflist))
420 (setq chars (car elm)
421 len (length chars)
422 syntax (nth 1 elm)
423 category (nth 2 elm)
424 i 0)
425 (while (< i len)
426 (if (= (aref chars i) ?-)
427 (setq i (1+ i)
428 to (sref chars i))
429 (setq ch (sref chars i)
430 to ch))
431 (while (<= ch to)
432 (modify-syntax-entry ch syntax)
433 (modify-category-entry ch category)
434 (setq ch (1+ ch)))
435 (setq i (+ i (char-bytes to))))
436 (setq deflist (cdr deflist))))
437
438 ;; Thai character set (TIS620)
439
440 (modify-category-entry (make-char 'thai-tis620) ?t)
441
442 (let ((deflist '(;; chars syntax category
443 ("\e,T!\e(B-\e,TCEG\e(B-\e,TN\e(B" "w" ?0) ; consonant
444 ("\e,TDFPRS`\e(B-\e,Te\e(B" "w" ?1) ; vowel base
445 ("\e,TQT\e(B-\e,TWgn\e(B" "w" ?2) ; vowel upper
446 ("\e,TX\e(B-\e,TZ\e(B" "w" ?3) ; vowel lower
447 ("\e,Th\e(B-\e,Tm\e(B" "w" ?4) ; tone mark
448 ("\e,Tp\e(B-\e,Ty\e(B" "w" ?6) ; digit
449 ("\e,TOf_oz{\e(B" "_" ?5) ; symbol
450 ))
451 elm chars len syntax category to ch i)
452 (while deflist
453 (setq elm (car deflist))
454 (setq chars (car elm)
455 len (length chars)
456 syntax (nth 1 elm)
457 category (nth 2 elm)
458 i 0)
459 (while (< i len)
460 (if (= (aref chars i) ?-)
461 (setq i (1+ i)
462 to (sref chars i))
463 (setq ch (sref chars i)
464 to ch))
465 (while (<= ch to)
466 (modify-syntax-entry ch syntax)
467 (modify-category-entry ch category)
468 (setq ch (1+ ch)))
469 (setq i (+ i (char-bytes to))))
470 (setq deflist (cdr deflist))))
471
472 ;; Tibetan character set
473
474 (let ((row 33))
475 (while (< row 38)
476 (modify-category-entry (make-char 'tibetan row) ?q)
477 (setq row (1+ row))))
478
479 (modify-category-entry (make-char 'tibetan-1-column 33) ?q)
480
481 (let ((deflist '(;; chars syntax category
482 ("\e$(7"!\e(B-\e$(7"J\e(B" "w" ?0) ; consonant
483 ("\e$(7#!\e(B-\e$(7#J#P#Q\e(B" "w" ?0) ;
484 ("\e$(7$!\e(B-\e$(7$e\e(B" "w" ?0) ;
485 ("\e$(7%!\e(B-\e$(7%u\e(B" "w" ?0) ;
486 ("\e$(7"S"["\"]"^"a\e(B" "w" ?2) ; upper vowel
487 ("\e$(7"_"c"d"g"h"i"j"k"l\e(B" "w" ?2) ; upper modifier
488 ("\e$(7!I"Q"U"e!e!g\e(B" "w" ?3) ; lowel vowel/modifier
489 ("\e$(7!P\e(B-\e$(7!Y!Z\e(B-\e$(7!c\e(B" "w" ?6) ; digit
490 ("\e$(7!;!=\e(B-\e$(7!B!D"`\e(B" "." ?|) ; line-break char
491 ("\e$(8!;!=!?!@!A!D"`\e(B" "." ?|) ;
492 ("\e$(7!8!;!=\e(B-\e$(7!B!D"`!m!d\e(B" "." ?>) ; prohibition
493 ("\e$(8!;!=!?!@!A!D"`\e(B" "." ?>) ;
494 ("\e$(7!0\e(B-\e$(7!:!l#R#S"f\e(B" "." ?<) ; prohibition
495 ("\e$(7!C!E\e(B-\e$(7!H!J\e(B-\e$(7!O!f!h\e(B-\e$(7!k!n!o\e(B" "." ?q) ; others
496 ))
497 elm chars len syntax category to ch i)
498 (while deflist
499 (setq elm (car deflist))
500 (setq chars (car elm)
501 len (length chars)
502 syntax (nth 1 elm)
503 category (nth 2 elm)
504 i 0)
505 (while (< i len)
506 (if (= (aref chars i) ?-)
507 (setq i (1+ i)
508 to (sref chars i))
509 (setq ch (sref chars i)
510 to ch))
511 (while (<= ch to)
512 (modify-syntax-entry ch syntax)
513 (modify-category-entry ch category)
514 (setq ch (1+ ch)))
515 (setq i (+ i (char-bytes to))))
516 (setq deflist (cdr deflist))))
517
518 ;; Vietnamese character set
519
520 (let ((lower (make-char 'vietnamese-viscii-lower))
521 (upper (make-char 'vietnamese-viscii-upper)))
522 (modify-syntax-entry lower "w")
523 (modify-syntax-entry upper "w")
524 (modify-category-entry lower ?v)
525 (modify-category-entry upper ?v)
526 (modify-category-entry lower ?l) ; To make a word with
527 (modify-category-entry upper ?l) ; latin characters.
528 )
529
530 \f
531 ;;; Setting word boundary.
532
533 (setq word-combining-categories
534 '((?l . ?l)))
535
536 (setq word-separating-categories ; (2-byte character sets)
537 '((?A . ?K) ; Alpha numeric - Katakana
538 (?A . ?C) ; Alpha numeric - Chinese
539 (?H . ?A) ; Hiragana - Alpha numeric
540 (?H . ?K) ; Hiragana - Katakana
541 (?H . ?C) ; Hiragana - Chinese
542 (?K . ?A) ; Katakana - Alpha numeric
543 (?K . ?C) ; Katakana - Chinese
544 (?C . ?A) ; Chinese - Alpha numeric
545 (?C . ?K) ; Chinese - Katakana
546 ))
547
548 \f
549 ;; For each character set, put the information of the most proper
550 ;; coding system to encode it by `prefered-coding-system' property.
551
552 (let ((l '((latin-iso8859-1 . iso-latin-1)
553 (latin-iso8859-2 . iso-latin-2)
554 (latin-iso8859-3 . iso-latin-3)
555 (latin-iso8859-4 . iso-latin-4)
556 (thai-tis620 . thai-tis620)
557 (greek-iso8859-7 . greek-iso-8bit)
558 (arabic-iso8859-6 . iso-2022-7bit)
559 (hebrew-iso8859-8 . hebrew-iso-8bit)
560 (katakana-jisx0201 . japanese-shift-jis)
561 (latin-jisx0201 . japanese-shift-jis)
562 (cyrillic-iso8859-5 . cyrillic-iso-8bit)
563 (latin-iso8859-9 . iso-latin-5)
564 (japanese-jisx0208-1978 . iso-2022-jp)
565 (chinese-gb2312 . cn-gb-2312)
566 (japanese-jisx0208 . iso-2022-jp)
567 (korean-ksc5601 . iso-2022-kr)
568 (japanese-jisx0212 . iso-2022-jp)
569 (chinese-cns11643-1 . iso-2022-cn)
570 (chinese-cns11643-2 . iso-2022-cn)
571 (chinese-big5-1 . chinese-big5)
572 (chinese-big5-2 . chinese-big5)
573 (chinese-sisheng . iso-2022-7bit)
574 (ipa . iso-2022-7bit)
575 (vietnamese-viscii-lower . vietnamese-viscii)
576 (vietnamese-viscii-upper . vietnamese-viscii)
577 (arabic-digit . iso-2022-7bit)
578 (arabic-1-column . iso-2022-7bit)
579 (ascii-right-to-left . iso-2022-7bit)
580 (lao . lao)
581 (arabic-2-column . iso-2022-7bit)
582 (indian-is13194 . devanagari)
583 (indian-1-column . devanagari)
584 (tibetan-1-column . tibetan)
585 (ethiopic . iso-2022-jp)
586 (chinese-cns11643-3 . iso-2022-cn)
587 (chinese-cns11643-4 . iso-2022-cn)
588 (chinese-cns11643-5 . iso-2022-cn)
589 (chinese-cns11643-6 . iso-2022-cn)
590 (chinese-cns11643-7 . iso-2022-cn)
591 (indian-2-column . devanagari)
592 (tibetan . tibetan))))
593 (while l
594 (put-charset-property (car (car l)) 'prefered-coding-system (cdr (car l)))
595 (setq l (cdr l))))
596
597
598 ;;; Local Variables:
599 ;;; coding: iso-2022-7bit
600 ;;; End:
601
602 ;;; end of characters.el