]> code.delx.au - gnu-emacs/blob - lisp/international/characters.el
New category ` '.
[gnu-emacs] / lisp / international / characters.el
1 ;;; characters.el --- set syntax and category for multibyte characters
2
3 ;; Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN.
4 ;; Licensed to the Free Software Foundation.
5
6 ;; Keywords: multibyte character, character set, syntax, category
7
8 ;; This file is part of GNU Emacs.
9
10 ;; GNU Emacs is free software; you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation; either version 2, or (at your option)
13 ;; any later version.
14
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
19
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs; see the file COPYING. If not, write to the
22 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
24
25 ;;; Commentary:
26
27 ;; This file contains multibyte characters. Save this file always in
28 ;; the coding system `iso-2022-7bit'.
29
30 ;; This file does not define the syntax for Latin-N character sets;
31 ;; those are defined by the files latin-N.el.
32
33 ;;; Predefined categories.
34
35 ;; For each character set.
36
37 (define-category ?a "ASCII")
38 (define-category ?l "Latin")
39 (define-category ?t "Thai")
40 (define-category ?g "Greek")
41 (define-category ?b "Arabic")
42 (define-category ?w "Hebrew")
43 (define-category ?y "Cyrillic")
44 (define-category ?k "Japanese katakana")
45 (define-category ?r "Japanese roman")
46 (define-category ?c "Chinese")
47 (define-category ?j "Japanese")
48 (define-category ?h "Korean")
49 (define-category ?e "Ethiopic (Ge'ez)")
50 (define-category ?v "Vietnamese")
51 (define-category ?i "Indian")
52 (define-category ?o "Lao")
53 (define-category ?q "Tibetan")
54
55 ;; For each group (row) of 2-byte character sets.
56
57 (define-category ?A "Alpha numeric characters of 2-byte character sets")
58 (define-category ?C "Chinese (Han) characters of 2-byte character sets")
59 (define-category ?G "Greek characters of 2-byte characters sets")
60 (define-category ?H "Japanese Hiragana characters of 2-byte character sets")
61 (define-category ?K "Japanese Katakana characters of 2-byte character sets")
62 (define-category ?N "Korean Hangul characters of 2-byte character sets")
63 (define-category ?Y "Cyrillic character of 2-byte character sets")
64 (define-category ?I "Indian Glyphs")
65
66 ;; For phonetic classifications.
67
68 (define-category ?0 "consonant")
69 (define-category ?1 "base (independent) vowel")
70 (define-category ?2 "upper diacritical mark (including upper vowel)")
71 (define-category ?3 "lower diacritical mark (including lower vowel)")
72 (define-category ?4 "tone mark")
73 (define-category ?5 "symbol")
74 (define-category ?6 "digit")
75 (define-category ?7 "vowel-modifying diacritical mark")
76 (define-category ?8 "vowel-signs")
77 (define-category ?9 "semivowel lower")
78
79 ;; For filling.
80 (define-category ?| "While filling, we can break a line at this character.")
81
82 ;; For indentation calculation.
83 (define-category ?\ "This character counts as a space for indentation purposes.")
84
85 ;; Keep the followings for `kinsoku' processing. See comments in
86 ;; kinsoku.el.
87 (define-category ?> "A character which can't be placed at beginning of line.")
88 (define-category ?< "A character which can't be placed at end of line.")
89
90 \f
91 ;;; Setting syntax and category.
92
93 ;; ASCII
94
95 (let ((ch 32))
96 (while (< ch 127) ; All ASCII characters have
97 (modify-category-entry ch ?a) ; the category `a' (ASCII)
98 (modify-category-entry ch ?l) ; and `l' (Latin).
99 (setq ch (1+ ch))))
100
101 ;; Arabic character set
102
103 (let ((charsets '(arabic-iso8859-6
104 arabic-digit
105 arabic-1-column
106 arabic-2-column)))
107 (while charsets
108 (modify-syntax-entry (make-char (car charsets)) "w")
109 (modify-category-entry (make-char (car charsets)) ?b)
110 (setq charsets (cdr charsets))))
111
112 ;; Chinese character set (GB2312)
113
114 (modify-syntax-entry (make-char 'chinese-gb2312) "w")
115 (modify-syntax-entry (make-char 'chinese-gb2312 33) "_")
116 (modify-syntax-entry (make-char 'chinese-gb2312 34) "_")
117 (modify-syntax-entry (make-char 'chinese-gb2312 41) "_")
118 (modify-syntax-entry ?\\e$A!2\e(B "(\e$A!3\e(B")
119 (modify-syntax-entry ?\\e$A!4\e(B "(\e$A!5\e(B")
120 (modify-syntax-entry ?\\e$A!6\e(B "(\e$A!7\e(B")
121 (modify-syntax-entry ?\\e$A!8\e(B "(\e$A!9\e(B")
122 (modify-syntax-entry ?\\e$A!:\e(B "(\e$A!;\e(B")
123 (modify-syntax-entry ?\\e$A!<\e(B "(\e$A!=\e(B")
124 (modify-syntax-entry ?\\e$A!>\e(B "(\e$A!?\e(B")
125 (modify-syntax-entry ?\\e$A!3\e(B ")\e$A!2\e(B")
126 (modify-syntax-entry ?\\e$A!5\e(B ")\e$A!4\e(B")
127 (modify-syntax-entry ?\\e$A!7\e(B ")\e$A!6\e(B")
128 (modify-syntax-entry ?\\e$A!9\e(B ")\e$A!8\e(B")
129 (modify-syntax-entry ?\\e$A!;\e(B ")\e$A!:\e(B")
130 (modify-syntax-entry ?\\e$A!=\e(B ")\e$A!<\e(B")
131 (modify-syntax-entry ?\\e$A!?\e(B ")\e$A!>\e(B")
132
133 (modify-category-entry (make-char 'chinese-gb2312) ?c)
134 (modify-category-entry (make-char 'chinese-gb2312) ?\|)
135 (modify-category-entry (make-char 'chinese-gb2312 35) ?A)
136 (modify-category-entry (make-char 'chinese-gb2312 36) ?H)
137 (modify-category-entry (make-char 'chinese-gb2312 37) ?K)
138 (modify-category-entry (make-char 'chinese-gb2312 38) ?G)
139 (modify-category-entry (make-char 'chinese-gb2312 39) ?Y)
140 (modify-category-entry (make-char 'chinese-gb2312 35) ?A)
141 (let ((row 48))
142 (while (< row 127)
143 (modify-category-entry (make-char 'chinese-gb2312 row) ?C)
144 (setq row (1+ row))))
145
146 ;; Chinese character set (BIG5)
147
148 (let ((generic-big5-1-char (make-char 'chinese-big5-1))
149 (generic-big5-2-char (make-char 'chinese-big5-2)))
150 (modify-syntax-entry generic-big5-1-char "w")
151 (modify-syntax-entry generic-big5-2-char "w")
152
153 (modify-category-entry generic-big5-1-char ?c)
154 (modify-category-entry generic-big5-2-char ?c)
155
156 (modify-category-entry generic-big5-1-char ?C)
157 (modify-category-entry generic-big5-2-char ?C)
158
159 (modify-category-entry generic-big5-1-char ?\|)
160 (modify-category-entry generic-big5-2-char ?\|))
161
162
163 ;; Chinese character set (CNS11643)
164
165 (let ((cns-list '(chinese-cns11643-1
166 chinese-cns11643-2
167 chinese-cns11643-3
168 chinese-cns11643-4
169 chinese-cns11643-5
170 chinese-cns11643-6
171 chinese-cns11643-7))
172 generic-char)
173 (while cns-list
174 (setq generic-char (make-char (car cns-list)))
175 (modify-syntax-entry generic-char "w")
176 (modify-category-entry generic-char ?c)
177 (modify-category-entry generic-char ?C)
178 (modify-category-entry generic-char ?|)
179 (setq cns-list (cdr cns-list))))
180
181 ;; Cyrillic character set (ISO-8859-5)
182
183 (modify-category-entry (make-char 'cyrillic-iso8859-5) ?y)
184
185 (modify-syntax-entry (make-char 'cyrillic-iso8859-5 160) " ")
186 (modify-syntax-entry ?\e,L-\e(B ".")
187 (modify-syntax-entry ?\e,Lp\e(B ".")
188 (modify-syntax-entry ?\e,L}\e(B ".")
189 (let ((tbl (standard-case-table)))
190 (set-case-syntax-pair ?\e,L!\e(B ?\e,Lq\e(B tbl)
191 (set-case-syntax-pair ?\e,L"\e(B ?\e,Lr\e(B tbl)
192 (set-case-syntax-pair ?\e,L#\e(B ?\e,Ls\e(B tbl)
193 (set-case-syntax-pair ?\e,L$\e(B ?\e,Lt\e(B tbl)
194 (set-case-syntax-pair ?\e,L%\e(B ?\e,Lu\e(B tbl)
195 (set-case-syntax-pair ?\e,L&\e(B ?\e,Lv\e(B tbl)
196 (set-case-syntax-pair ?\e,L'\e(B ?\e,Lw\e(B tbl)
197 (set-case-syntax-pair ?\e,L(\e(B ?\e,Lx\e(B tbl)
198 (set-case-syntax-pair ?\e,L)\e(B ?\e,Ly\e(B tbl)
199 (set-case-syntax-pair ?\e,L*\e(B ?\e,Lz\e(B tbl)
200 (set-case-syntax-pair ?\e,L+\e(B ?\e,L{\e(B tbl)
201 (set-case-syntax-pair ?\e,L,\e(B ?\e,L|\e(B tbl)
202 (set-case-syntax-pair ?\e,L.\e(B ?\e,L~\e(B tbl)
203 (set-case-syntax-pair ?\e,L/\e(B ?\e,L\7f\e(B tbl)
204 (set-case-syntax-pair ?\e,L0\e(B ?\e,LP\e(B tbl)
205 (set-case-syntax-pair ?\e,L1\e(B ?\e,LQ\e(B tbl)
206 (set-case-syntax-pair ?\e,L2\e(B ?\e,LR\e(B tbl)
207 (set-case-syntax-pair ?\e,L3\e(B ?\e,LS\e(B tbl)
208 (set-case-syntax-pair ?\e,L4\e(B ?\e,LT\e(B tbl)
209 (set-case-syntax-pair ?\e,L5\e(B ?\e,LU\e(B tbl)
210 (set-case-syntax-pair ?\e,L6\e(B ?\e,LV\e(B tbl)
211 (set-case-syntax-pair ?\e,L7\e(B ?\e,LW\e(B tbl)
212 (set-case-syntax-pair ?\e,L8\e(B ?\e,LX\e(B tbl)
213 (set-case-syntax-pair ?\e,L9\e(B ?\e,LY\e(B tbl)
214 (set-case-syntax-pair ?\e,L:\e(B ?\e,LZ\e(B tbl)
215 (set-case-syntax-pair ?\e,L;\e(B ?\e,L[\e(B tbl)
216 (set-case-syntax-pair ?\e,L<\e(B ?\e,L\\e(B tbl)
217 (set-case-syntax-pair ?\e,L=\e(B ?\e,L]\e(B tbl)
218 (set-case-syntax-pair ?\e,L>\e(B ?\e,L^\e(B tbl)
219 (set-case-syntax-pair ?\e,L?\e(B ?\e,L_\e(B tbl)
220 (set-case-syntax-pair ?\e,L@\e(B ?\e,L`\e(B tbl)
221 (set-case-syntax-pair ?\e,LA\e(B ?\e,La\e(B tbl)
222 (set-case-syntax-pair ?\e,LB\e(B ?\e,Lb\e(B tbl)
223 (set-case-syntax-pair ?\e,LC\e(B ?\e,Lc\e(B tbl)
224 (set-case-syntax-pair ?\e,LD\e(B ?\e,Ld\e(B tbl)
225 (set-case-syntax-pair ?\e,LE\e(B ?\e,Le\e(B tbl)
226 (set-case-syntax-pair ?\e,LF\e(B ?\e,Lf\e(B tbl)
227 (set-case-syntax-pair ?\e,LG\e(B ?\e,Lg\e(B tbl)
228 (set-case-syntax-pair ?\e,LH\e(B ?\e,Lh\e(B tbl)
229 (set-case-syntax-pair ?\e,LI\e(B ?\e,Li\e(B tbl)
230 (set-case-syntax-pair ?\e,LJ\e(B ?\e,Lj\e(B tbl)
231 (set-case-syntax-pair ?\e,LK\e(B ?\e,Lk\e(B tbl)
232 (set-case-syntax-pair ?\e,LL\e(B ?\e,Ll\e(B tbl)
233 (set-case-syntax-pair ?\e,LM\e(B ?\e,Lm\e(B tbl)
234 (set-case-syntax-pair ?\e,LN\e(B ?\e,Ln\e(B tbl)
235 (set-case-syntax-pair ?\e,LO\e(B ?\e,Lo\e(B tbl))
236
237 ;; Devanagari character set
238
239 (let ((deflist '(;; chars syntax category
240 ("\e$(5!!!"!#\e(B" "w" ?7) ; vowel-modifying diacritical mark
241 ; chandrabindu, anuswar, visarga
242 ("\e$(5!$\e(B-\e$(5!2\e(B" "w" ?1) ; independent vowel
243 ("\e$(5!3\e(B-\e$(5!X\e(B" "w" ?0) ; consonant
244 ("\e$(5!Z\e(B-\e$(5!g\e(B" "w" ?8) ; matra
245 ("\e$(5!q\e(B-\e$(5!z\e(B" "w" ?6) ; digit
246 ))
247 elm chars len syntax category to ch i)
248 (while deflist
249 (setq elm (car deflist))
250 (setq chars (car elm)
251 len (length chars)
252 syntax (nth 1 elm)
253 category (nth 2 elm)
254 i 0)
255 (while (< i len)
256 (if (= (aref chars i) ?-)
257 (setq i (1+ i)
258 to (sref chars i))
259 (setq ch (sref chars i)
260 to ch))
261 (while (<= ch to)
262 (modify-syntax-entry ch syntax)
263 (modify-category-entry ch category)
264 (setq ch (1+ ch)))
265 (setq i (+ i (char-bytes to))))
266 (setq deflist (cdr deflist))))
267
268 ;; Ethiopic character set
269
270 (modify-category-entry (make-char 'ethiopic) ?e)
271 (modify-syntax-entry (make-char 'ethiopic) "w")
272 (let ((chars '(?\e$(3$h\e(B ?\e$(3$i\e(B ?\e$(3$j\e(B ?\e$(3$k\e(B ?\e$(3$l\e(B ?\e$(3$m\e(B ?\e$(3$n\e(B ?\e$(3$o\e(B ?\e$(3%i\e(B ?\e$(3%t\e(B ?\e$(3%u\e(B ?\e$(3%v\e(B ?\e$(3%w\e(B ?\e$(3%x\e(B)))
273 (while chars
274 (modify-syntax-entry (car chars) ".")
275 (setq chars (cdr chars))))
276
277 ;; European character set (Latin-1,2,3,4,5)
278
279 (modify-category-entry (make-char 'latin-iso8859-1) ?l)
280 (modify-category-entry (make-char 'latin-iso8859-2) ?l)
281 (modify-category-entry (make-char 'latin-iso8859-3) ?l)
282 (modify-category-entry (make-char 'latin-iso8859-4) ?l)
283 (modify-category-entry (make-char 'latin-iso8859-9) ?l)
284
285 (modify-category-entry (make-char 'latin-iso8859-1 160) ?\ )
286 (modify-category-entry (make-char 'latin-iso8859-2 160) ?\ )
287 (modify-category-entry (make-char 'latin-iso8859-3 160) ?\ )
288 (modify-category-entry (make-char 'latin-iso8859-4 160) ?\ )
289 (modify-category-entry (make-char 'latin-iso8859-9 160) ?\ )
290
291 ;; Greek character set (ISO-8859-7)
292
293 (modify-category-entry (make-char 'greek-iso8859-7) ?g)
294
295 (let ((c 182))
296 (while (< c 255)
297 (modify-syntax-entry (make-char 'greek-iso8859-7 c) "w")
298 (setq c (1+ c))))
299 (modify-syntax-entry (make-char 'greek-iso8859-7 160) "w") ; NBSP
300 (modify-syntax-entry ?\e,F7\e(B ".")
301 (modify-syntax-entry ?\e,F;\e(B ".")
302 (modify-syntax-entry ?\e,F=\e(B ".")
303
304 ;; Hebrew character set (ISO-8859-8)
305
306 (modify-category-entry (make-char 'hebrew-iso8859-8) ?w)
307
308 (let ((c 224))
309 (while (< c 251)
310 (modify-syntax-entry (make-char 'hebrew-iso8859-8 c) "w")
311 (setq c (1+ c))))
312 (modify-syntax-entry (make-char 'hebrew-iso8859-8 160) "w") ; NBSP
313
314 ;; Indian character set (IS 13194 and other Emacs original Indian charsets)
315
316 (modify-category-entry (make-char 'indian-is13194) ?i)
317 (modify-category-entry (make-char 'indian-2-column) ?I)
318 (modify-category-entry (make-char 'indian-1-column) ?I)
319
320 ;; Japanese character set (JISX0201-kana, JISX0201-roman, JISX0208, JISX0212)
321
322 (modify-category-entry (make-char 'katakana-jisx0201) ?k)
323 (modify-category-entry (make-char 'latin-jisx0201) ?r)
324 (modify-category-entry (make-char 'japanese-jisx0208) ?j)
325 (modify-category-entry (make-char 'japanese-jisx0212) ?j)
326 (modify-category-entry (make-char 'katakana-jisx0201) ?\|)
327 (modify-category-entry (make-char 'japanese-jisx0208) ?\|)
328 (modify-category-entry (make-char 'japanese-jisx0212) ?\|)
329
330 ;; JISX0208
331 (modify-syntax-entry (make-char 'japanese-jisx0208) "w")
332 (modify-syntax-entry (make-char 'japanese-jisx0208 33) "_")
333 (modify-syntax-entry (make-char 'japanese-jisx0208 34) "_")
334 (modify-syntax-entry (make-char 'japanese-jisx0208 40) "_")
335 (let ((chars '(?\e$B!<\e(B ?\e$B!+\e(B ?\e$B!,\e(B ?\e$B!3\e(B ?\e$B!4\e(B ?\e$B!5\e(B ?\e$B!6\e(B ?\e$B!7\e(B ?\e$B!8\e(B ?\e$B!9\e(B ?\e$B!:\e(B ?\e$B!;\e(B)))
336 (while chars
337 (modify-syntax-entry (car chars) "w")
338 (setq chars (cdr chars))))
339 (modify-syntax-entry ?\\e$B!J\e(B "(\e$B!K\e(B")
340 (modify-syntax-entry ?\\e$B!N\e(B "(\e$B!O\e(B")
341 (modify-syntax-entry ?\\e$B!P\e(B "(\e$B!Q\e(B")
342 (modify-syntax-entry ?\\e$B!V\e(B "(\e$B!W\e(B")
343 (modify-syntax-entry ?\\e$B!X\e(B "(\e$B!Y\e(B")
344 (modify-syntax-entry ?\\e$B!K\e(B ")\e$B!J\e(B")
345 (modify-syntax-entry ?\\e$B!O\e(B ")\e$B!N\e(B")
346 (modify-syntax-entry ?\\e$B!Q\e(B ")\e$B!P\e(B")
347 (modify-syntax-entry ?\\e$B!W\e(B ")\e$B!V\e(B")
348 (modify-syntax-entry ?\\e$B!Y\e(B ")\e$B!X\e(B")
349
350 (modify-category-entry (make-char 'japanese-jisx0208 35) ?A)
351 (modify-category-entry (make-char 'japanese-jisx0208 36) ?H)
352 (modify-category-entry (make-char 'japanese-jisx0208 37) ?K)
353 (modify-category-entry (make-char 'japanese-jisx0208 38) ?G)
354 (modify-category-entry (make-char 'japanese-jisx0208 39) ?Y)
355 (let ((row 48))
356 (while (< row 127)
357 (modify-category-entry (make-char 'japanese-jisx0208 row) ?C)
358 (setq row (1+ row))))
359 (modify-category-entry ?\e$B!<\e(B ?K)
360 (let ((chars '(?\e$B!+\e(B ?\e$B!,\e(B)))
361 (while chars
362 (modify-category-entry (car chars) ?K)
363 (modify-category-entry (car chars) ?H)
364 (setq chars (cdr chars))))
365 (let ((chars '(?\e$B!3\e(B ?\e$B!4\e(B ?\e$B!5\e(B ?\e$B!6\e(B ?\e$B!7\e(B ?\e$B!8\e(B ?\e$B!9\e(B ?\e$B!:\e(B ?\e$B!;\e(B)))
366 (while chars
367 (modify-category-entry (car chars) ?C)
368 (setq chars (cdr chars))))
369
370 ;; JISX0212
371 (modify-syntax-entry (make-char 'japanese-jisx0212) "w")
372 (modify-syntax-entry (make-char 'japanese-jisx0212 33) "_")
373 (modify-syntax-entry (make-char 'japanese-jisx0212 34) "_")
374 (modify-syntax-entry (make-char 'japanese-jisx0212 35) "_")
375
376 (modify-category-entry (make-char 'japanese-jisx0212 ) ?C)
377
378 ;; JISX0201-Kana
379 (modify-syntax-entry (make-char 'katakana-jisx0201) "w")
380 (let ((chars '(?\e(I!\e(B ?\e(I"\e(B ?\e(I#\e(B ?\e(I$\e(B ?\e(I%\e(B)))
381 (while chars
382 (modify-syntax-entry (car chars) ".")
383 (setq chars (cdr chars))))
384
385 ;; Korean character set (KSC5601)
386
387 (modify-syntax-entry (make-char 'korean-ksc5601) "w")
388 (modify-syntax-entry (make-char 'korean-ksc5601 33) "_")
389 (modify-syntax-entry (make-char 'korean-ksc5601 34) "_")
390 (modify-syntax-entry (make-char 'korean-ksc5601 38) "_")
391 (modify-syntax-entry (make-char 'korean-ksc5601 39) "_")
392 (modify-syntax-entry (make-char 'korean-ksc5601 40) "_")
393 (modify-syntax-entry (make-char 'korean-ksc5601 41) "_")
394
395 (modify-category-entry (make-char 'korean-ksc5601) ?h)
396 (modify-category-entry (make-char 'korean-ksc5601 35) ?A)
397 (modify-category-entry (make-char 'korean-ksc5601 37) ?G)
398 (modify-category-entry (make-char 'korean-ksc5601 42) ?H)
399 (modify-category-entry (make-char 'korean-ksc5601 43) ?K)
400 (modify-category-entry (make-char 'korean-ksc5601 44) ?Y)
401
402 ;; Lao character set
403
404 (modify-category-entry (make-char 'lao) ?o)
405
406 (let ((deflist '(;; chars syntax category
407 ("\e(1!\e(B-\e(1N\e(B" "w" ?0) ; consonant
408 ("\e(1PRS]`\e(B-\e(1d\e(B" "w" ?1) ; vowel base
409 ("\e(1QT\e(B-\e(1W[m\e(B" "w" ?2) ; vowel upper
410 ("\e(1XY\e(B" "w" ?3) ; vowel lower
411 ("\e(1h\e(B-\e(1l\e(B" "w" ?4) ; tone mark
412 ("\e(1\\e(B" "w" ?9) ; semivowel lower
413 ("\e(1p\e(B-\e(1y\e(B" "w" ?6) ; digit
414 ("\e(1Of\e(B" "_" ?0) ; symbol
415 ))
416 elm chars len syntax category to ch i)
417 (while deflist
418 (setq elm (car deflist))
419 (setq chars (car elm)
420 len (length chars)
421 syntax (nth 1 elm)
422 category (nth 2 elm)
423 i 0)
424 (while (< i len)
425 (if (= (aref chars i) ?-)
426 (setq i (1+ i)
427 to (sref chars i))
428 (setq ch (sref chars i)
429 to ch))
430 (while (<= ch to)
431 (modify-syntax-entry ch syntax)
432 (modify-category-entry ch category)
433 (setq ch (1+ ch)))
434 (setq i (+ i (char-bytes to))))
435 (setq deflist (cdr deflist))))
436
437 ;; Thai character set (TIS620)
438
439 (modify-category-entry (make-char 'thai-tis620) ?t)
440
441 (let ((deflist '(;; chars syntax category
442 ("\e,T!\e(B-\e,TCEG\e(B-\e,TN\e(B" "w" ?0) ; consonant
443 ("\e,TDFPRS`\e(B-\e,Te\e(B" "w" ?1) ; vowel base
444 ("\e,TQT\e(B-\e,TWgn\e(B" "w" ?2) ; vowel upper
445 ("\e,TX\e(B-\e,TZ\e(B" "w" ?3) ; vowel lower
446 ("\e,Th\e(B-\e,Tm\e(B" "w" ?4) ; tone mark
447 ("\e,Tp\e(B-\e,Ty\e(B" "w" ?6) ; digit
448 ("\e,TOf_oz{\e(B" "_" ?5) ; symbol
449 ))
450 elm chars len syntax category to ch i)
451 (while deflist
452 (setq elm (car deflist))
453 (setq chars (car elm)
454 len (length chars)
455 syntax (nth 1 elm)
456 category (nth 2 elm)
457 i 0)
458 (while (< i len)
459 (if (= (aref chars i) ?-)
460 (setq i (1+ i)
461 to (sref chars i))
462 (setq ch (sref chars i)
463 to ch))
464 (while (<= ch to)
465 (modify-syntax-entry ch syntax)
466 (modify-category-entry ch category)
467 (setq ch (1+ ch)))
468 (setq i (+ i (char-bytes to))))
469 (setq deflist (cdr deflist))))
470
471 ;; Tibetan character set
472
473 (let ((row 33))
474 (while (< row 38)
475 (modify-category-entry (make-char 'tibetan row) ?q)
476 (setq row (1+ row))))
477
478 (modify-category-entry (make-char 'tibetan-1-column 33) ?q)
479
480 (let ((deflist '(;; chars syntax category
481 ("\e$(7"!\e(B-\e$(7"J\e(B" "w" ?0) ; consonant
482 ("\e$(7#!\e(B-\e$(7#J#P#Q\e(B" "w" ?0) ;
483 ("\e$(7$!\e(B-\e$(7$e\e(B" "w" ?0) ;
484 ("\e$(7%!\e(B-\e$(7%u\e(B" "w" ?0) ;
485 ("\e$(7"S"["\"]"^"a\e(B" "w" ?2) ; upper vowel
486 ("\e$(7"_"c"d"g"h"i"j"k"l\e(B" "w" ?2) ; upper modifier
487 ("\e$(7!I"Q"U"e!e!g\e(B" "w" ?3) ; lowel vowel/modifier
488 ("\e$(7!P\e(B-\e$(7!Y!Z\e(B-\e$(7!c\e(B" "w" ?6) ; digit
489 ("\e$(7!;!=\e(B-\e$(7!B!D"`\e(B" "." ?|) ; line-break char
490 ("\e$(8!;!=!?!@!A!D"`\e(B" "." ?|) ;
491 ("\e$(7!8!;!=\e(B-\e$(7!B!D"`!m!d\e(B" "." ?>) ; prohibition
492 ("\e$(8!;!=!?!@!A!D"`\e(B" "." ?>) ;
493 ("\e$(7!0\e(B-\e$(7!:!l#R#S"f\e(B" "." ?<) ; prohibition
494 ("\e$(7!C!E\e(B-\e$(7!H!J\e(B-\e$(7!O!f!h\e(B-\e$(7!k!n!o\e(B" "." ?q) ; others
495 ))
496 elm chars len syntax category to ch i)
497 (while deflist
498 (setq elm (car deflist))
499 (setq chars (car elm)
500 len (length chars)
501 syntax (nth 1 elm)
502 category (nth 2 elm)
503 i 0)
504 (while (< i len)
505 (if (= (aref chars i) ?-)
506 (setq i (1+ i)
507 to (sref chars i))
508 (setq ch (sref chars i)
509 to ch))
510 (while (<= ch to)
511 (modify-syntax-entry ch syntax)
512 (modify-category-entry ch category)
513 (setq ch (1+ ch)))
514 (setq i (+ i (char-bytes to))))
515 (setq deflist (cdr deflist))))
516
517 ;; Vietnamese character set
518
519 (let ((lower (make-char 'vietnamese-viscii-lower))
520 (upper (make-char 'vietnamese-viscii-upper)))
521 (modify-syntax-entry lower "w")
522 (modify-syntax-entry upper "w")
523 (modify-category-entry lower ?v)
524 (modify-category-entry upper ?v)
525 (modify-category-entry lower ?l) ; To make a word with
526 (modify-category-entry upper ?l) ; latin characters.
527 )
528
529 \f
530 ;;; Setting word boundary.
531
532 (setq word-combining-categories
533 '((?l . ?l)))
534
535 (setq word-separating-categories ; (2-byte character sets)
536 '((?A . ?K) ; Alpha numeric - Katakana
537 (?A . ?C) ; Alpha numeric - Chinese
538 (?H . ?A) ; Hiragana - Alpha numeric
539 (?H . ?K) ; Hiragana - Katakana
540 (?H . ?C) ; Hiragana - Chinese
541 (?K . ?A) ; Katakana - Alpha numeric
542 (?K . ?C) ; Katakana - Chinese
543 (?C . ?A) ; Chinese - Alpha numeric
544 (?C . ?K) ; Chinese - Katakana
545 ))