code.delx.au - gnu-emacs/blob - lisp/international/characters.el

   1 ;;; characters.el --- set syntax and category for multibyte characters
   2
   3 ;; Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN.
   4 ;; Licensed to the Free Software Foundation.
   5 ;; Copyright (C) 2001, 2002 Free Software Foundation, Inc.
   6 ;; Copyright (C) 2001, 2002
   7 ;;   National Institute of Advanced Industrial Science and Technology (AIST)
   8 ;;   Registration Number H13PRO009
   9
  10 ;; Keywords: multibyte character, character set, syntax, category
  11
  12 ;; This file is part of GNU Emacs.
  13
  14 ;; GNU Emacs is free software; you can redistribute it and/or modify
  15 ;; it under the terms of the GNU General Public License as published by
  16 ;; the Free Software Foundation; either version 2, or (at your option)
  17 ;; any later version.
  18
  19 ;; GNU Emacs is distributed in the hope that it will be useful,
  20 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  21 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  22 ;; GNU General Public License for more details.
  23
  24 ;; You should have received a copy of the GNU General Public License
  25 ;; along with GNU Emacs; see the file COPYING.  If not, write to the
  26 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  27 ;; Boston, MA 02111-1307, USA.
  28
  29 ;;; Commentary:
  30
  31 ;; This file contains multibyte characters.  Save this file always in
  32 ;; the coding system `iso-2022-7bit'.
  33
  34 ;; This file does not define the syntax for Latin-N character sets;
  35 ;; those are defined by the files latin-N.el.
  36
  37 ;;; Code:
  38
  39 ;;; Predefined categories.
  40
  41 ;; For each character set.
  42
  43 (define-category ?a "ASCII")
  44 (define-category ?l "Latin")
  45 (define-category ?t "Thai")
  46 (define-category ?g "Greek")
  47 (define-category ?b "Arabic")
  48 (define-category ?w "Hebrew")
  49 (define-category ?y "Cyrillic")
  50 (define-category ?k "Japanese katakana")
  51 (define-category ?r "Japanese roman")
  52 (define-category ?c "Chinese")
  53 (define-category ?j "Japanese")
  54 (define-category ?h "Korean")
  55 (define-category ?e "Ethiopic (Ge'ez)")
  56 (define-category ?v "Vietnamese")
  57 (define-category ?i "Indian")
  58 (define-category ?o "Lao")
  59 (define-category ?q "Tibetan")
  60
  61 ;; For each group (row) of 2-byte character sets.
  62
  63 (define-category ?A "Alpha-numeric characters of 2-byte character sets")
  64 (define-category ?C "Chinese (Han) characters of 2-byte character sets")
  65 (define-category ?G "Greek characters of 2-byte character sets")
  66 (define-category ?H "Japanese Hiragana characters of 2-byte character sets")
  67 (define-category ?K "Japanese Katakana characters of 2-byte character sets")
  68 (define-category ?N "Korean Hangul characters of 2-byte character sets")
  69 (define-category ?Y "Cyrillic characters of 2-byte character sets")
  70 (define-category ?I "Indian Glyphs")
  71
  72 ;; For phonetic classifications.
  73
  74 (define-category ?0 "consonant")
  75 (define-category ?1 "base (independent) vowel")
  76 (define-category ?2 "upper diacritical mark (including upper vowel)")
  77 (define-category ?3 "lower diacritical mark (including lower vowel)")
  78 (define-category ?4 "tone mark")
  79 (define-category ?5 "symbol")
  80 (define-category ?6 "digit")
  81 (define-category ?7 "vowel-modifying diacritical mark")
  82 (define-category ?8 "vowel-signs")
  83 (define-category ?9 "semivowel lower")
  84
  85 ;; For filling.
  86 (define-category ?| "While filling, we can break a line at this character.")
  87
  88 ;; For indentation calculation.
  89 (define-category ?
  90   "This character counts as a space for indentation purposes.")
  91
  92 ;; Keep the following for `kinsoku' processing.  See comments in
  93 ;; kinsoku.el.
  94 (define-category ?> "A character which can't be placed at beginning of line.")
  95 (define-category ?< "A character which can't be placed at end of line.")
  96
  97 ;; Combining
  98 (define-category ?^ "Combining diacritic or mark")
  99 \f
 100 ;;; Setting syntax and category.
 101
 102 ;; ASCII
 103
 104 ;; All ASCII characters have the category `a' (ASCII) and `l' (Latin).
 105 (modify-category-entry '(32 . 127) ?a)
 106 (modify-category-entry '(32 . 127) ?l)
 107
 108 ;; Deal with the CJK charsets first.  Since the syntax of blocks is
 109 ;; defined per charset, and the charsets may contain e.g. Latin
 110 ;; characters, we end up with the wrong syntax definitions if we're
 111 ;; not careful.
 112
 113 ;; Chinese characters (Unicode)
 114 (modify-category-entry '(#x3400 . #x9FAF) ?C)
 115 (modify-category-entry '(#x3400 . #x9FAF) ?c)
 116 (modify-category-entry '(#x3400 . #x9FAF) ?|)
 117 (modify-category-entry '(#xF900 . #xFAFF) ?C)
 118 (modify-category-entry '(#xF900 . #xFAFF) ?c)
 119 (modify-category-entry '(#xF900 . #xFAFF) ?|)
 120
 121 ;; Chinese character set (GB2312)
 122
 123 (map-charset-chars #'modify-syntax-entry 'chinese-gb2312 "_" #x2121 #x217E)
 124 (map-charset-chars #'modify-syntax-entry 'chinese-gb2312 "_" #x2221 #x227E)
 125 (map-charset-chars #'modify-syntax-entry 'chinese-gb2312 "_" #x2921 #x297E)
 126
 127 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?c)
 128 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?|)
 129 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?A #x2330 #x2339)
 130 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?A #x2341 #x235A)
 131 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?A #x2361 #x237A)
 132 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?H #x2421 #x247E)
 133 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?K #x2521 #x257E)
 134 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?G #x2621 #x267E)
 135 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?Y #x2721 #x277E)
 136 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?C #x3021 #x7E7E)
 137
 138 ;; Chinese character set (BIG5)
 139
 140 (map-charset-chars #'modify-category-entry 'big5 ?c)
 141 (map-charset-chars #'modify-category-entry 'big5 ?C #xA259 #xA25F)
 142 (map-charset-chars #'modify-category-entry 'big5 ?C #xA440 #xC67E)
 143 (map-charset-chars #'modify-category-entry 'big5 ?C #xC940 #xF9DF)
 144 (map-charset-chars #'modify-category-entry 'big5 ?|)
 145
 146
 147 ;; Chinese character set (CNS11643)
 148
 149 (dolist (c '(chinese-cns11643-1 chinese-cns11643-2 chinese-cns11643-3
 150              chinese-cns11643-4 chinese-cns11643-5 chinese-cns11643-6
 151              chinese-cns11643-7))
 152   (map-charset-chars #'modify-category-entry c ?c)
 153   (if (eq c 'chinese-cns11643-1)
 154       (map-charset-chars #'modify-category-entry c ?C #x4421 #x7E7E)
 155     (map-charset-chars #'modify-category-entry c ?C))
 156   (map-charset-chars #'modify-category-entry c ?|))
 157
 158 ;; Japanese character set (JISX0201-kana, JISX0201-roman, JISX0208, JISX0212)
 159
 160 (map-charset-chars #'modify-category-entry 'katakana-jisx0201 ?k)
 161
 162 (map-charset-chars #'modify-category-entry 'latin-jisx0201 ?r)
 163
 164 (dolist (l '(katakana-jisx0201 japanese-jisx0208 japanese-jisx0212))
 165   (map-charset-chars #'modify-category-entry l ?j)
 166   (map-charset-chars #'modify-category-entry l ?\|))
 167
 168 ;; Unicode equivalents of JISX0201-kana
 169 (let ((range '(#xff61 . #xff9f)))
 170   (modify-category-entry range  ?k)
 171   (modify-category-entry range ?j)
 172   (modify-category-entry range ?\|))
 173
 174 ;; Katakana block
 175 (let ((range '(#x30a0 . #x30ff)))
 176   ;; ?K is double width, ?k isn't specified
 177   (modify-category-entry range ?K)
 178   (modify-category-entry range ?\|))
 179
 180 ;; Hiragana block
 181 (let ((range '(#x3040 . #x309f)))
 182   ;; ?H is actually defined to be double width
 183   ;;(modify-category-entry range ?H)
 184   ;;(modify-category-entry range ?\|)
 185   )
 186
 187 ;; JISX0208
 188 (map-charset-chars #'modify-syntax-entry 'japanese-jisx0208 "_" #x2121 #x227E)
 189 (map-charset-chars #'modify-syntax-entry 'japanese-jisx0208 "_" #x2821 #x287E)
 190 (let ((chars '(?ー ?゛ ?゜ ?ヽ ?ヾ ?ゝ ?ゞ ?〃 ?仝 ?々 ?〆 ?〇)))
 191   (dolist (elt chars)
 192     (modify-syntax-entry (car chars) "w")))
 193
 194 (map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?A #x2321 #x237E)
 195 (map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?H #x2421 #x247E)
 196 (map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?K #x2521 #x257E)
 197 (map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?G #x2621 #x267E)
 198 (map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?Y #x2721 #x277E)
 199 (map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?C #x3021 #x7E7E)
 200 (modify-category-entry ?ー ?K)
 201 (let ((chars '(?゛ ?゜)))
 202   (while chars
 203     (modify-category-entry (car chars) ?K)
 204     (modify-category-entry (car chars) ?H)
 205     (setq chars (cdr chars))))
 206 (let ((chars '(?ヽ ?ヾ ?ゝ ?ゞ ?〃 ?仝 ?々 ?〆 ?〇)))
 207   (while chars
 208     (modify-category-entry (car chars) ?C)
 209     (setq chars (cdr chars))))
 210
 211 ;; JISX0212
 212
 213 (map-charset-chars #'modify-syntax-entry 'japanese-jisx0212 "_" #x2121 #x237E)
 214
 215 ;; JISX0201-Kana
 216
 217 (let ((chars '(?｡ ?､ ?･)))
 218   (while chars
 219     (modify-syntax-entry (car chars) ".")
 220     (setq chars (cdr chars))))
 221
 222 (modify-syntax-entry ?\｢ "(｣")
 223 (modify-syntax-entry ?\｣ "(｢")
 224
 225 ;; Korean character set (KSC5601)
 226
 227 (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?h)
 228
 229 (map-charset-chars #'modify-syntax-entry 'korean-ksc5601 "_" #x2121 #x227E)
 230 (map-charset-chars #'modify-syntax-entry 'korean-ksc5601 "_" #x2621 #x277E)
 231 (map-charset-chars #'modify-syntax-entry 'korean-ksc5601 "_" #x2830 #x287E)
 232 (map-charset-chars #'modify-syntax-entry 'korean-ksc5601 "_" #x2930 #x297E)
 233 (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?A #x2330 #x2339)
 234 (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?A #x2341 #x235A)
 235 (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?A #x2361 #x237A)
 236 (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?G #x2521 #x257E)
 237 (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?H #x2A21 #x2A7E)
 238 (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?K #x2B21 #x2B7E)
 239 (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?Y #x2C21 #x2C7E)
 240
 241 ;; These are in more than one charset.
 242 (modify-syntax-entry ?\（ "(）")
 243 (modify-syntax-entry ?\［ "(］")
 244 (modify-syntax-entry ?\｛ "(｝")
 245 (modify-syntax-entry ?\「 "(」")
 246 (modify-syntax-entry ?\『 "(』")
 247 (modify-syntax-entry ?\） ")（")
 248 (modify-syntax-entry ?\］ ")［")
 249 (modify-syntax-entry ?\｝ ")｛")
 250 (modify-syntax-entry ?\」 ")「")
 251 (modify-syntax-entry ?\』 ")『")
 252
 253 (modify-syntax-entry ?\〔 "(〕")
 254 (modify-syntax-entry ?\〈 "(〉")
 255 (modify-syntax-entry ?\《 "(》")
 256 (modify-syntax-entry ?\〖 "(〗")
 257 (modify-syntax-entry ?\【 "(】")
 258 (modify-syntax-entry ?\〕 ")〔")
 259 (modify-syntax-entry ?\〉 ")〈")
 260 (modify-syntax-entry ?\》 ")《")
 261 (modify-syntax-entry ?\〗 ")〖")
 262 (modify-syntax-entry ?\】 ")【")
 263 (modify-syntax-entry ?\〚 "(〛")
 264 (modify-syntax-entry ?\〛 ")〚")
 265
 266 ;; Arabic character set
 267
 268 (let ((charsets '(arabic-iso8859-6
 269                   arabic-digit
 270                   arabic-1-column
 271                   arabic-2-column)))
 272   (while charsets
 273     (map-charset-chars #'modify-category-entry (car charsets) ?b)
 274     (setq charsets (cdr charsets))))
 275 (modify-category-entry '(#x600 . #x6ff) ?b)
 276 (modify-category-entry '(#xfb50 . #xfdff) ?b)
 277 (modify-category-entry '(#xfe70 . #xfefe) ?b)
 278
 279 ;; Cyrillic character set (ISO-8859-5)
 280
 281 (modify-syntax-entry ?№ ".")
 282
 283 ;; Ethiopic character set
 284
 285 (modify-category-entry '(#x1200 . #x137b) ?e)
 286 (let ((chars '(?፡ ?። ?፣ ?፤ ?፥ ?፦ ?፧ ?፨ ?���� ?���� ?���� ?���� ?���� ?����)))
 287   (while chars
 288     (modify-syntax-entry (car chars) ".")
 289     (setq chars (cdr chars))))
 290 (map-charset-chars #'modify-category-entry 'ethiopic ?e)
 291
 292 ;; Hebrew character set (ISO-8859-8)
 293
 294 (modify-syntax-entry #x5be ".") ; MAQAF
 295 (modify-syntax-entry #x5c0 ".") ; PASEQ
 296 (modify-syntax-entry #x5c3 ".") ; SOF PASUQ
 297 (modify-syntax-entry #x5f3 ".") ; GERESH
 298 (modify-syntax-entry #x5f4 ".") ; GERSHAYIM
 299
 300 ;; Indian character set (IS 13194 and other Emacs original Indian charsets)
 301
 302 (modify-category-entry '(#x901 . #x970) ?i)
 303 (map-charset-chars #'modify-category-entry 'indian-is13194 ?i)
 304 (map-charset-chars #'modify-category-entry 'indian-2-column ?i)
 305
 306 ;; Lao character set
 307
 308 (modify-category-entry '(#xe80 . #xeff) ?o)
 309 (map-charset-chars #'modify-category-entry 'lao ?o)
 310
 311 (let ((deflist  '(("ກ-ຮ"    "w"     ?0) ; consonant
 312                   ("ະາຳຽເ-ໄ"        "w"     ?1) ; vowel base
 313                   ("ັິ-ືົໍ"   "w"     ?2) ; vowel upper
 314                   ("ຸູ"     "w"     ?3) ; vowel lower
 315                   ("່-໋"    "w"     ?4) ; tone mark
 316                   ("ຼຽ"     "w"     ?9) ; semivowel lower
 317                   ("໐-໙"    "w"     ?6) ; digit
 318                   ("ຯໆ"     "_"     ?5) ; symbol
 319                   ))
 320       elm chars len syntax category to ch i)
 321   (while deflist
 322     (setq elm (car deflist))
 323     (setq chars (car elm)
 324           len (length chars)
 325           syntax (nth 1 elm)
 326           category (nth 2 elm)
 327           i 0)
 328     (while (< i len)
 329       (if (= (aref chars i) ?-)
 330           (setq i (1+ i)
 331                 to (aref chars i))
 332         (setq ch (aref chars i)
 333               to ch))
 334       (while (<= ch to)
 335         (unless (string-equal syntax "w")
 336           (modify-syntax-entry ch syntax))
 337         (modify-category-entry ch category)
 338         (setq ch (1+ ch)))
 339       (setq i (1+ i)))
 340     (setq deflist (cdr deflist))))
 341
 342 ;; Thai character set (TIS620)
 343
 344 (modify-category-entry '(#xe00 . #xe7f) ?t)
 345 (map-charset-chars #'modify-category-entry 'thai-tis620 ?t)
 346
 347 (let ((deflist  '(;; chars      syntax  category
 348                   ("ก-รลว-ฮ"  "w"     ?0) ; consonant
 349                   ("ฤฦะาำเ-ๅ"     "w"     ?1) ; vowel base
 350                   ("ัิ-ื็๎"   "w"     ?2) ; vowel upper
 351                   ("ุ-ฺ"    "w"     ?3) ; vowel lower
 352                   ("่-ํ"    "w"     ?4) ; tone mark
 353                   ("๐-๙"    "w"     ?6) ; digit
 354                   ("ฯๆ฿๏๚๛" "_"     ?5) ; symbol
 355                   ))
 356       elm chars len syntax category to ch i)
 357   (while deflist
 358     (setq elm (car deflist))
 359     (setq chars (car elm)
 360           len (length chars)
 361           syntax (nth 1 elm)
 362           category (nth 2 elm)
 363           i 0)
 364     (while (< i len)
 365       (if (= (aref chars i) ?-)
 366           (setq i (1+ i)
 367                 to (aref chars i))
 368         (setq ch (aref chars i)
 369               to ch))
 370       (while (<= ch to)
 371         (unless (string-equal syntax "w")
 372           (modify-syntax-entry ch syntax))
 373         (modify-category-entry ch category)
 374         (setq ch (1+ ch)))
 375       (setq i (1+ i)))
 376     (setq deflist (cdr deflist))))
 377
 378 ;; Tibetan character set
 379
 380 (modify-category-entry '(#xf00 . #xfff) ?q)
 381 (map-charset-chars #'modify-category-entry 'tibetan ?q)
 382 (map-charset-chars #'modify-category-entry 'tibetan-1-column ?q)
 383
 384 (let ((deflist  '(;; chars             syntax category
 385                   ("ཀ-ཀྵཪ"         "w"     ?0) ; consonant
 386                   ("ྐ-ྐྵྺྻྼ��������"       "w"     ?0) ;
 387                   ("����-����"              "w"     ?0) ;
 388                   ("����-����"              "w"     ?0) ;
 389                   ("ིེཻོཽྀ"       "w"       ?2) ; upper vowel
 390                   ("ཾྂྃ྆྇ྈྉྊྋ" "w"    ?2) ; upper modifier
 391                   ("༙����྄ཱུ༵༷"       "w"   ?3) ; lowel vowel/modifier
 392                   ("༠-༩༪-༳"             "w"     ?6) ; digit
 393                   ("་།-༒༔ཿ"        "."     ?|) ; line-break char
 394                   ("་།༏༐༑༔ཿ"            "."     ?|) ;
 395                   ("༈་།-༒༔ཿ༽༴"  "."     ?>) ; prohibition
 396                   ("་།༏༐༑༔ཿ"            "."     ?>) ;
 397                   ("ༀ-༊༼࿁࿂྅"      "."     ?<) ; prohibition
 398                   ("༓༕-༘༚-༟༶༸-༻༾༿྾྿-࿏" "." ?q) ; others
 399                   ))
 400       elm chars len syntax category to ch i)
 401   (while deflist
 402     (setq elm (car deflist))
 403     (setq chars (car elm)
 404           len (length chars)
 405           syntax (nth 1 elm)
 406           category (nth 2 elm)
 407           i 0)
 408     (while (< i len)
 409       (if (= (aref chars i) ?-)
 410           (setq i (1+ i)
 411                 to (aref chars i))
 412         (setq ch (aref chars i)
 413               to ch))
 414       (while (<= ch to)
 415         (unless (string-equal syntax "w")
 416           (modify-syntax-entry ch syntax))
 417         (modify-category-entry ch category)
 418         (setq ch (1+ ch)))
 419       (setq i (1+ i)))
 420     (setq deflist (cdr deflist))))
 421
 422 ;; Vietnamese character set
 423
 424 ;; To make a word with Latin characters
 425 (map-charset-chars #'modify-category-entry 'vietnamese-viscii-lower ?l)
 426 (map-charset-chars #'modify-category-entry 'vietnamese-viscii-lower ?v)
 427
 428 (map-charset-chars #'modify-category-entry 'vietnamese-viscii-upper ?l)
 429 (map-charset-chars #'modify-category-entry 'vietnamese-viscii-upper ?v)
 430
 431 (let ((tbl (standard-case-table))
 432       (i 32))
 433   (while (< i 128)
 434     (let* ((char (decode-char 'vietnamese-viscii-upper i))
 435            (charl (decode-char 'vietnamese-viscii-lower i))
 436            (uc (encode-char char 'ucs))
 437            (lc (encode-char charl 'ucs)))
 438       (set-case-syntax-pair char (decode-char 'vietnamese-viscii-lower i)
 439                             tbl)
 440       (if uc (modify-category-entry uc ?v))
 441       (if lc (modify-category-entry lc ?v)))
 442     (setq i (1+ i))))
 443
 444
 445 ;; Latin
 446
 447 (modify-category-entry '(#x80 . #x024F) ?l)
 448
 449 (let ((tbl (standard-case-table)) c)
 450
 451 ;; In some languages, U+0049 LATIN CAPITAL LETTER I and U+0131 LATIN
 452 ;; SMALL LETTER DOTLESS I make a case pair, and so do U+0130 LATIN
 453 ;; CAPITAL LETTER I WITH DOT ABOVE and U+0069 LATIN SMALL LETTER I.
 454 ;; See the Turkish language environment.
 455
 456   ;; Latin-1
 457
 458   ;; Fixme: Some of the non-word syntaxes here perhaps should be
 459   ;; reviewed.  (Note that the following all implicitly have word
 460   ;; syntax: ¢£¤¥¨ª¯²³´¶¸¹º.)  There should be a well-defined way of
 461   ;; relating Unicode categories to Emacs syntax codes.
 462   (set-case-syntax ?  " " tbl)         ; dubious
 463   (set-case-syntax ?¡ "." tbl)
 464   (set-case-syntax ?¦ "_" tbl)
 465   (set-case-syntax ?§ "." tbl)
 466   (set-case-syntax ?© "_" tbl)
 467   (set-case-syntax-delims 171 187 tbl)  ; « »
 468   (set-case-syntax ?¬ "_" tbl)
 469   (set-case-syntax ? "_" tbl)
 470   (set-case-syntax ?® "_" tbl)
 471   (set-case-syntax ?° "_" tbl)
 472   (set-case-syntax ?± "_" tbl)
 473   (set-case-syntax ?µ "_" tbl)
 474   (set-case-syntax ?· "_" tbl)
 475   (set-case-syntax ?¼ "_" tbl)
 476   (set-case-syntax ?½ "_" tbl)
 477   (set-case-syntax ?¾ "_" tbl)
 478   (set-case-syntax ?¿ "." tbl)
 479   (let ((c 192))
 480     (while (<= c 222)
 481       (set-case-syntax-pair c (+ c 32) tbl)
 482       (setq c (1+ c))))
 483   (set-case-syntax ?× "_" tbl)
 484   (set-case-syntax ?ß "w" tbl)
 485   (set-case-syntax ?÷ "_" tbl)
 486   ;; See below for ÿ.
 487
 488   ;; Latin Extended-A, Latin Extended-B
 489   (setq c #x0100)
 490   (while (<= c #x0233)
 491     (and (or (<= c #x012e)
 492              (and (>= c #x014a) (<= c #x0177)))
 493          (zerop (% c 2))
 494          (set-case-syntax-pair c (1+ c) tbl))
 495     (and (>= c #x013a)
 496          (<= c #x0148)
 497          (zerop (% c 2))
 498          (set-case-syntax-pair (1- c) c tbl))
 499     (setq c (1+ c)))
 500   (set-case-syntax-pair ?Ĳ ?ĳ tbl)
 501   (set-case-syntax-pair ?Ĵ ?ĵ tbl)
 502   (set-case-syntax-pair ?Ķ ?ķ tbl)
 503   (set-case-syntax-pair ?Ÿ ?ÿ tbl)
 504   (set-case-syntax-pair ?Ź ?ź tbl)
 505   (set-case-syntax-pair ?Ż ?ż tbl)
 506   (set-case-syntax-pair ?Ž ?ž tbl)
 507
 508   ;; Latin Extended-B
 509   (set-case-syntax-pair ?Ɓ ?ɓ tbl)
 510   (set-case-syntax-pair ?Ƃ ?ƃ tbl)
 511   (set-case-syntax-pair ?Ƅ ?ƅ tbl)
 512   (set-case-syntax-pair ?Ɔ ?ɔ tbl)
 513   (set-case-syntax-pair ?Ƈ ?ƈ tbl)
 514   (set-case-syntax-pair ?Ɖ ?ɖ tbl)
 515   (set-case-syntax-pair ?Ɗ ?ɗ tbl)
 516   (set-case-syntax-pair ?Ƌ ?ƌ tbl)
 517   (set-case-syntax-pair ?Ǝ ?ǝ tbl)
 518   (set-case-syntax-pair ?Ə ?ə tbl)
 519   (set-case-syntax-pair ?Ɛ ?ɛ tbl)
 520   (set-case-syntax-pair ?Ƒ ?ƒ tbl)
 521   (set-case-syntax-pair ?Ɠ ?ɠ tbl)
 522   (set-case-syntax-pair ?Ɣ ?ɣ tbl)
 523   (set-case-syntax-pair ?Ɩ ?ɩ tbl)
 524   (set-case-syntax-pair ?Ɨ ?ɨ tbl)
 525   (set-case-syntax-pair ?Ƙ ?ƙ tbl)
 526   (set-case-syntax-pair ?Ɯ ?ɯ tbl)
 527   (set-case-syntax-pair ?Ɲ ?ɲ tbl)
 528   (set-case-syntax-pair ?Ɵ ?ɵ tbl)
 529   (set-case-syntax-pair ?Ơ ?ơ tbl)
 530   (set-case-syntax-pair ?Ƣ ?ƣ tbl)
 531   (set-case-syntax-pair ?Ƥ ?ƥ tbl)
 532   (set-case-syntax-pair ?Ʀ ?ʀ tbl)
 533   (set-case-syntax-pair ?Ƨ ?ƨ tbl)
 534   (set-case-syntax-pair ?Ʃ ?ʃ tbl)
 535   (set-case-syntax-pair ?Ƭ ?ƭ tbl)
 536   (set-case-syntax-pair ?Ʈ ?ʈ tbl)
 537   (set-case-syntax-pair ?Ư ?ư tbl)
 538   (set-case-syntax-pair ?Ʊ ?ʊ tbl)
 539   (set-case-syntax-pair ?Ʋ ?ʋ tbl)
 540   (set-case-syntax-pair ?Ƴ ?ƴ tbl)
 541   (set-case-syntax-pair ?Ƶ ?ƶ tbl)
 542   (set-case-syntax-pair ?Ʒ ?ʒ tbl)
 543   (set-case-syntax-pair ?Ƹ ?ƹ tbl)
 544   (set-case-syntax-pair ?Ƽ ?ƽ tbl)
 545   (set-case-syntax-pair ?Ǆ ?ǆ tbl)
 546   (set-case-syntax-pair ?ǅ ?ǆ tbl)
 547   (set-case-syntax-pair ?Ǉ ?ǉ tbl)
 548   (set-case-syntax-pair ?ǈ ?ǉ tbl)
 549   (set-case-syntax-pair ?Ǌ ?ǌ tbl)
 550   (set-case-syntax-pair ?ǋ ?ǌ tbl)
 551   (set-case-syntax-pair ?Ǎ ?ǎ tbl)
 552   (set-case-syntax-pair ?Ǐ ?ǐ tbl)
 553   (set-case-syntax-pair ?Ǒ ?ǒ tbl)
 554   (set-case-syntax-pair ?Ǔ ?ǔ tbl)
 555   (set-case-syntax-pair ?Ǖ ?ǖ tbl)
 556   (set-case-syntax-pair ?Ǘ ?ǘ tbl)
 557   (set-case-syntax-pair ?Ǚ ?ǚ tbl)
 558   (set-case-syntax-pair ?Ǜ ?ǜ tbl)
 559   (set-case-syntax-pair ?Ǟ ?ǟ tbl)
 560   (set-case-syntax-pair ?Ǡ ?ǡ tbl)
 561   (set-case-syntax-pair ?Ǣ ?ǣ tbl)
 562   (set-case-syntax-pair ?Ǥ ?ǥ tbl)
 563   (set-case-syntax-pair ?Ǧ ?ǧ tbl)
 564   (set-case-syntax-pair ?Ǩ ?ǩ tbl)
 565   (set-case-syntax-pair ?Ǫ ?ǫ tbl)
 566   (set-case-syntax-pair ?Ǭ ?ǭ tbl)
 567   (set-case-syntax-pair ?Ǯ ?ǯ tbl)
 568   ;; 01F0; F; 006A 030C; # LATIN SMALL LETTER J WITH CARON
 569   (set-case-syntax-pair ?Ǳ ?ǳ tbl)
 570   (set-case-syntax-pair ?ǲ ?ǳ tbl)
 571   (set-case-syntax-pair ?Ǵ ?ǵ tbl)
 572   (set-case-syntax-pair ?Ƕ ?ƕ tbl)
 573   (set-case-syntax-pair ?Ƿ ?ƿ tbl)
 574   (set-case-syntax-pair ?Ǹ ?ǹ tbl)
 575   (set-case-syntax-pair ?Ǻ ?ǻ tbl)
 576   (set-case-syntax-pair ?Ǽ ?ǽ tbl)
 577   (set-case-syntax-pair ?Ǿ ?ǿ tbl)
 578   (set-case-syntax-pair ?Ȁ ?ȁ tbl)
 579   (set-case-syntax-pair ?Ȃ ?ȃ tbl)
 580   (set-case-syntax-pair ?Ȅ ?ȅ tbl)
 581   (set-case-syntax-pair ?Ȇ ?ȇ tbl)
 582   (set-case-syntax-pair ?Ȉ ?ȉ tbl)
 583   (set-case-syntax-pair ?Ȋ ?ȋ tbl)
 584   (set-case-syntax-pair ?Ȍ ?ȍ tbl)
 585   (set-case-syntax-pair ?Ȏ ?ȏ tbl)
 586   (set-case-syntax-pair ?Ȑ ?ȑ tbl)
 587   (set-case-syntax-pair ?Ȓ ?ȓ tbl)
 588   (set-case-syntax-pair ?Ȕ ?ȕ tbl)
 589   (set-case-syntax-pair ?Ȗ ?ȗ tbl)
 590   (set-case-syntax-pair ?Ș ?ș tbl)
 591   (set-case-syntax-pair ?Ț ?ț tbl)
 592   (set-case-syntax-pair ?Ȝ ?ȝ tbl)
 593   (set-case-syntax-pair ?Ȟ ?ȟ tbl)
 594   (set-case-syntax-pair ?Ȣ ?ȣ tbl)
 595   (set-case-syntax-pair ?Ȥ ?ȥ tbl)
 596   (set-case-syntax-pair ?Ȧ ?ȧ tbl)
 597   (set-case-syntax-pair ?Ȩ ?ȩ tbl)
 598   (set-case-syntax-pair ?Ȫ ?ȫ tbl)
 599   (set-case-syntax-pair ?Ȭ ?ȭ tbl)
 600   (set-case-syntax-pair ?Ȯ ?ȯ tbl)
 601   (set-case-syntax-pair ?Ȱ ?ȱ tbl)
 602   (set-case-syntax-pair ?Ȳ ?ȳ tbl)
 603
 604   ;; Latin Extended Additional
 605   (modify-category-entry '(#x1e00 . #x1ef9) ?l)
 606   (setq c #x1e00)
 607   (while (<= c #x1ef9)
 608     (and (zerop (% c 2))
 609          (or (<= c #x1e94) (>= c #x1ea0))
 610          (set-case-syntax-pair c (1+ c) tbl))
 611     (setq c (1+ c)))
 612
 613   ;; Greek
 614   (modify-category-entry '(#x0370 . #x03ff) ?g)
 615   (setq c #x0370)
 616   (while (<= c #x03ff)
 617     (if (or (and (>= c #x0391) (<= c #x03a1))
 618             (and (>= c #x03a3) (<= c #x03ab)))
 619         (set-case-syntax-pair c (+ c 32) tbl))
 620     (and (>= c #x03da)
 621          (<= c #x03ee)
 622          (zerop (% c 2))
 623          (set-case-syntax-pair c (1+ c) tbl))
 624     (setq c (1+ c)))
 625   (set-case-syntax-pair ?Ά ?ά tbl)
 626   (set-case-syntax-pair ?Έ ?έ tbl)
 627   (set-case-syntax-pair ?Ή ?ή tbl)
 628   (set-case-syntax-pair ?Ί ?ί tbl)
 629   (set-case-syntax-pair ?Ό ?ό tbl)
 630   (set-case-syntax-pair ?Ύ ?ύ tbl)
 631   (set-case-syntax-pair ?Ώ ?ώ tbl)
 632
 633   ;; Armenian
 634   (setq c #x531)
 635   (while (<= c #x556)
 636     (set-case-syntax-pair c (+ c #x30) tbl)
 637     (setq c (1+ c)))
 638
 639   ;; Greek Extended
 640   (modify-category-entry '(#x1f00 . #x1fff) ?g)
 641   (setq c #x1f00)
 642   (while (<= c #x1fff)
 643     (and (<= (logand c #x000f) 7)
 644          (<= c #x1fa7)
 645          (not (memq c '(#x1f50 #x1f52 #x1f54 #x1f56)))
 646          (/= (logand c #x00f0) 7)
 647          (set-case-syntax-pair (+ c 8) c tbl))
 648     (setq c (1+ c)))
 649   (set-case-syntax-pair ?Ᾰ ?ᾰ tbl)
 650   (set-case-syntax-pair ?Ᾱ ?ᾱ tbl)
 651   (set-case-syntax-pair ?Ὰ ?ὰ tbl)
 652   (set-case-syntax-pair ?Ά ?ά tbl)
 653   (set-case-syntax-pair ?ᾼ ?ᾳ tbl)
 654   (set-case-syntax-pair ?Ὲ ?ὲ tbl)
 655   (set-case-syntax-pair ?Έ ?έ tbl)
 656   (set-case-syntax-pair ?Ὴ ?ὴ tbl)
 657   (set-case-syntax-pair ?Ή ?ή tbl)
 658   (set-case-syntax-pair ?ῌ ?ῃ tbl)
 659   (set-case-syntax-pair ?Ῐ ?ῐ tbl)
 660   (set-case-syntax-pair ?Ῑ ?ῑ tbl)
 661   (set-case-syntax-pair ?Ὶ ?ὶ tbl)
 662   (set-case-syntax-pair ?Ί ?ί tbl)
 663   (set-case-syntax-pair ?Ῠ ?ῠ tbl)
 664   (set-case-syntax-pair ?Ῡ ?ῡ tbl)
 665   (set-case-syntax-pair ?Ὺ ?ὺ tbl)
 666   (set-case-syntax-pair ?Ύ ?ύ tbl)
 667   (set-case-syntax-pair ?Ῥ ?ῥ tbl)
 668   (set-case-syntax-pair ?Ὸ ?ὸ tbl)
 669   (set-case-syntax-pair ?Ό ?ό tbl)
 670   (set-case-syntax-pair ?Ὼ ?ὼ tbl)
 671   (set-case-syntax-pair ?Ώ ?ώ tbl)
 672   (set-case-syntax-pair ?ῼ ?ῳ tbl)
 673
 674   ;; cyrillic
 675   (modify-category-entry '(#x0400 . #x04FF) ?y)
 676   (setq c #x0400)
 677   (while (<= c #x04ff)
 678     (and (>= c #x0400)
 679          (<= c #x040f)
 680          (set-case-syntax-pair c (+ c 80) tbl))
 681     (and (>= c #x0410)
 682          (<= c #x042f)
 683          (set-case-syntax-pair c (+ c 32) tbl))
 684     (and (zerop (% c 2))
 685          (or (and (>= c #x0460) (<= c #x0480))
 686              (and (>= c #x048c) (<= c #x04be))
 687              (and (>= c #x04d0) (<= c #x04f4)))
 688          (set-case-syntax-pair c (1+ c) tbl))
 689     (setq c (1+ c)))
 690   (set-case-syntax-pair ?Ӂ ?ӂ tbl)
 691   (set-case-syntax-pair ?Ӄ ?ӄ tbl)
 692   (set-case-syntax-pair ?Ӈ ?ӈ tbl)
 693   (set-case-syntax-pair ?Ӌ ?ӌ tbl)
 694   (set-case-syntax-pair ?Ӹ ?ӹ tbl)
 695
 696   ;; general punctuation
 697   (setq c #x2000)
 698   (while (<= c #x200b)
 699     (set-case-syntax c " " tbl)
 700     (setq c (1+ c)))
 701   (while (<= c #x200F)
 702     (set-case-syntax c "." tbl)
 703     (setq c (1+ c)))
 704   ;; Fixme: These aren't all right:
 705   (while (<= c #x2027)
 706     (set-case-syntax c "_" tbl)
 707     (setq c (1+ c)))
 708   (while (<= c #x206F)
 709     (set-case-syntax c "." tbl)
 710     (setq c (1+ c)))
 711
 712   ;; Roman numerals
 713   (setq c #x2160)
 714   (while (<= c #x216f)
 715     (set-case-syntax-pair c (+ c #x10) tbl)
 716     (setq c (1+ c)))
 717
 718   ;; Fixme: The following blocks might be better as symbol rather than
 719   ;; punctuation.
 720   ;; Arrows
 721   (setq c #x2190)
 722   (while (<= c #x21FF)
 723     (set-case-syntax c "." tbl)
 724     (setq c (1+ c)))
 725   ;; Mathematical Operators
 726   (while (<= c #x22FF)
 727     (set-case-syntax c "." tbl)
 728     (setq c (1+ c)))
 729   ;; Miscellaneous Technical
 730   (while (<= c #x23FF)
 731     (set-case-syntax c "." tbl)
 732     (setq c (1+ c)))
 733   ;; Control Pictures
 734   (while (<= c #x243F)
 735     (set-case-syntax c "_" tbl)
 736     (setq c (1+ c)))
 737
 738   ;; Circled Latin
 739   (setq c #x24b6)
 740   (while (<= c #x24cf)
 741     (set-case-syntax-pair c (+ c 26) tbl)
 742     (modify-category-entry c ?l)
 743     (modify-category-entry (+ c 26) ?l)
 744     (setq c (1+ c)))
 745
 746   ;; Fullwidth Latin
 747   (setq c #xff21)
 748   (while (<= c #xff3a)
 749     (set-case-syntax-pair c (+ c #x20) tbl)
 750     (modify-category-entry c ?l)
 751     (modify-category-entry (+ c #x20) ?l)
 752     (setq c (1+ c)))
 753
 754   ;; Combining diacritics
 755   (modify-category-entry '(#x300 . #x362) ?^)
 756   ;; Combining marks
 757   (modify-category-entry '(#x20d0 . #x20e3) ?^)
 758
 759   ;; Fixme: syntax for symbols &c
 760   )
 761 \f
 762 ;; For each character set, put the information of the most proper
 763 ;; coding system to encode it by `preferred-coding-system' property.
 764
 765 ;; Fixme: should this be junked?
 766 (let ((l '((latin-iso8859-1     . iso-latin-1)
 767            (latin-iso8859-2     . iso-latin-2)
 768            (latin-iso8859-3     . iso-latin-3)
 769            (latin-iso8859-4     . iso-latin-4)
 770            (thai-tis620         . thai-tis620)
 771            (greek-iso8859-7     . greek-iso-8bit)
 772            (arabic-iso8859-6    . iso-2022-7bit)
 773            (hebrew-iso8859-8    . hebrew-iso-8bit)
 774            (katakana-jisx0201   . japanese-shift-jis)
 775            (latin-jisx0201      . japanese-shift-jis)
 776            (cyrillic-iso8859-5  . cyrillic-iso-8bit)
 777            (latin-iso8859-9     . iso-latin-5)
 778            (japanese-jisx0208-1978 . iso-2022-jp)
 779            (chinese-gb2312      . cn-gb-2312)
 780            (japanese-jisx0208   . iso-2022-jp)
 781            (korean-ksc5601      . iso-2022-kr)
 782            (japanese-jisx0212   . iso-2022-jp)
 783            (chinese-cns11643-1  . iso-2022-cn)
 784            (chinese-cns11643-2  . iso-2022-cn)
 785            (chinese-big5-1      . chinese-big5)
 786            (chinese-big5-2      . chinese-big5)
 787            (chinese-sisheng     . iso-2022-7bit)
 788            (ipa                 . iso-2022-7bit)
 789            (vietnamese-viscii-lower . vietnamese-viscii)
 790            (vietnamese-viscii-upper . vietnamese-viscii)
 791            (arabic-digit        . iso-2022-7bit)
 792            (arabic-1-column     . iso-2022-7bit)
 793            (lao                 . lao)
 794            (arabic-2-column     . iso-2022-7bit)
 795            (indian-is13194      . devanagari)
 796            (indian-glyph        . devanagari)
 797            (tibetan-1-column    . tibetan)
 798            (ethiopic            . iso-2022-7bit)
 799            (chinese-cns11643-3  . iso-2022-cn)
 800            (chinese-cns11643-4  . iso-2022-cn)
 801            (chinese-cns11643-5  . iso-2022-cn)
 802            (chinese-cns11643-6  . iso-2022-cn)
 803            (chinese-cns11643-7  . iso-2022-cn)
 804            (indian-2-column     . devanagari)
 805            (tibetan             . tibetan)
 806            (latin-iso8859-14    . iso-latin-8)
 807            (latin-iso8859-15    . iso-latin-9))))
 808   (while l
 809     (put-charset-property (car (car l)) 'preferred-coding-system (cdr (car l)))
 810     (setq l (cdr l))))
 811
 812 \f
 813 ;; Setup auto-fill-chars for charsets that should invoke auto-filling.
 814 ;; SPACE and NEWLINE are already set.  Also put `nospace-between-words'
 815 ;; property on the charsets.
 816 (let ((l '(katakana-jisx0201
 817            japanese-jisx0208 japanese-jisx0212
 818            chinese-gb2312 chinese-big5-1 chinese-big5-2)))
 819   (while l
 820     ;;(aset auto-fill-chars (make-char (car l)) t)
 821     (put-charset-property (car l) 'nospace-between-words t)
 822     (setq l (cdr l))))
 823
 824 \f
 825 ;; CJK double width characters.
 826 (let ((l '((#x1100 . #x11FF)
 827            (#x2E80 . #x9FAF)
 828            (#xAC00 . #xD7AF)
 829            (#xF900 . #xFAFF)
 830            (#xFE30 . #xFE4F)
 831            (#xFF00 . #xFF5F)
 832            (#xFFE0 . #xFFEF))))
 833   (dolist (elt l)
 834     (set-char-table-range char-width-table
 835                           (cons (car elt) (cdr elt))
 836                           2)))
 837 ;; Fixme: Doing this affects non-CJK characters through unification,
 838 ;; but presumably CJK users expect those characters to be
 839 ;; double-width when using these charsets.
 840 ;; (map-charset-chars
 841 ;;  #'(lambda (range ignore) (set-char-table-range char-width-table range 2))
 842 ;;  'japanese-jisx0208)
 843 ;; (map-charset-chars
 844 ;;  #'(lambda (range ignore) (set-char-table-range char-width-table range 2))
 845 ;;  'japanese-jisx0212)
 846 ;; (map-charset-chars
 847 ;;  #'(lambda (range ignore) (set-char-table-range char-width-table range 2))
 848 ;;  'japanese-jisx0213-1)
 849 ;; (map-charset-chars
 850 ;;  #'(lambda (range ignore) (set-char-table-range char-width-table range 2))
 851 ;;  'japanese-jisx0213-2)
 852 ;; (map-charset-chars
 853 ;;  (lambda (range ignore) (set-char-table-range char-width-table range 2))
 854 ;;  'korean-ksc5601)
 855
 856 ;; Other double width
 857 (map-charset-chars
 858  (lambda (range ignore) (set-char-table-range char-width-table range 2))
 859  'ethiopic)
 860 (map-charset-chars
 861  (lambda (range ignore) (set-char-table-range char-width-table range 2))
 862  'tibetan)
 863 (map-charset-chars
 864  (lambda (range ignore) (set-char-table-range char-width-table range 2))
 865  'indian-2-column)
 866 (map-charset-chars
 867  (lambda (range ignore) (set-char-table-range char-width-table range 2))
 868  'arabic-2-column)
 869
 870 (optimize-char-table (standard-case-table))
 871 (optimize-char-table char-width-table)
 872 (optimize-char-table (standard-category-table))
 873 (optimize-char-table (standard-syntax-table))
 874
 875 ;; The Unicode blocks actually extend past some of these ranges with
 876 ;; undefined codepoints.
 877 (let ((script-list nil))
 878   (dolist
 879       (elt
 880        '((#x0000 #x007F latin)
 881          (#x00A0 #x036F latin)
 882          (#x0370 #x03E1 greek)
 883          (#x03E2 #x03EF coptic)
 884          (#x03F0 #x03F3 greek)
 885          (#x0400 #x04FF cyrillic)
 886          (#x0530 #x058F armenian)
 887          (#x0590 #x05FF hebrew)
 888          (#x0600 #x06FF arabic)
 889          (#x0700 #x074F syriac)
 890          (#x0780 #x07BF thaana)
 891          (#x0900 #x097F devanagari)
 892          (#x0980 #x09FF bengali)
 893          (#x0A00 #x0A7F gurmukhi)
 894          (#x0A80 #x0AFF gujarati)
 895          (#x0B00 #x0B7F oriya)
 896          (#x0B80 #x0BFF tamil)
 897          (#x0C00 #x0C7F telugu)
 898          (#x0C80 #x0CFF kannada)
 899          (#x0D00 #x0D7F malayalam)
 900          (#x0D80 #x0DFF sinhala)
 901          (#x0E00 #x0E5F thai)
 902          (#x0E80 #x0EDF lao)
 903          (#x0F00 #x0FFF tibetan)
 904          (#x1000 #x105F myanmar)
 905          (#x10A0 #x10FF georgian)
 906          (#x1100 #x11FF hangul)
 907          (#x1200 #x137F ethiopic)
 908          (#x13A0 #x13FF cherokee)
 909          (#x1400 #x167F canadian-aboriginal)
 910          (#x1680 #x169F ogham)
 911          (#x16A0 #x16FF runic)
 912          (#x1780 #x17FF khmer)
 913          (#x1800 #x18AF mongolian)
 914          (#x1E00 #x1EFF latin)
 915          (#x1F00 #x1FFF greek)
 916          (#x20A0 #x20AF currency)
 917          (#x2800 #x28FF braille)
 918          (#x2E80 #x2FDF han)
 919          (#x2FF0 #x2FFF ideographic-description)
 920          (#x3000 #x303F cjk-misc)
 921          (#x3040 #x30FF kana)
 922          (#x3100 #x312F bopomofo)
 923          (#x3130 #x318F hangul)
 924          (#x3190 #x319F kanbun)
 925          (#x31A0 #x31BF bopomofo)
 926          (#x3400 #x9FAF han)
 927          (#xA000 #xA4CF yi)
 928          (#xAC00 #xD7AF hangul)
 929          (#xF900 #xFA5F han)
 930          (#xFB1D #xFB4F hebrew)
 931          (#xFB50 #xFDFF arabic)
 932          (#xFE70 #xFEFC arabic)
 933          (#xFF00 #xFF5F cjk-misc)
 934          (#xFF61 #xFF9F kana)
 935          (#xFFE0 #xFFE6 cjk-misc)
 936          (#x20000 #x2AFFF han)
 937          (#x2F800 #x2FFFF han)))
 938     (set-char-table-range char-script-table
 939                           (cons (car elt) (nth 1 elt)) (nth 2 elt))
 940     (or (memq (nth 2 elt) script-list)
 941         (setq script-list (cons (nth 2 elt) script-list))))
 942   (set-char-table-extra-slot char-script-table 0 (nreverse script-list)))
 943
 944 (map-charset-chars
 945  #'(lambda (range ignore)
 946      (set-char-table-range char-script-table range 'tibetan))
 947  'tibetan)
 948
 949 \f
 950 ;;; Setting word boundary.
 951
 952 (defun next-word-boundary-han (pos limit)
 953   (if (<= pos limit)
 954       (save-excursion
 955         (goto-char pos)
 956         (looking-at "\\cC+")
 957         (goto-char (match-end 0))
 958         (if (looking-at "\\cH+")
 959             (goto-char (match-end 0)))
 960         (point))
 961     (while (and (> pos limit)
 962                 (eq (aref char-script-table (char-after (1- pos))) 'han))
 963       (setq pos (1- pos)))
 964     pos))
 965
 966 (defun next-word-boundary-kana (pos limit)
 967   (if (<= pos limit)
 968       (save-excursion
 969         (goto-char pos)
 970         (if (looking-at "\\cK+")
 971             (goto-char (match-end 0)))
 972         (if (looking-at "\\cH+")
 973             (goto-char (match-end 0)))
 974         (point))
 975     (let ((category-set (char-category-set (char-after pos)))
 976           category)
 977       (if (aref category-set ?K)
 978           (while (and (> pos limit)
 979                       (aref (char-category-set (char-after (1- pos))) ?K))
 980             (setq pos (1- pos)))
 981         (while (and (> pos limit)
 982                     (aref (setq category-set
 983                                 (char-category-set (char-after (1- pos)))) ?H))
 984           (setq pos (1- pos)))
 985         (setq category (cond ((aref category-set ?C) ?C)
 986                              ((aref category-set ?K) ?K)
 987                              ((aref category-set ?A) ?A)))
 988         (when category
 989           (setq pos (1- pos))
 990           (while (and (> pos limit)
 991                       (aref (char-category-set (char-after (1- pos)))
 992                             category))
 993             (setq pos (1- pos)))))
 994       pos)))
 995
 996 (map-char-table
 997  #'(lambda (char script)
 998      (cond ((eq script 'han)
 999             (set-char-table-range next-word-boundary-function-table
1000                                   char #'next-word-boundary-han))
1001            ((eq script 'kana)
1002             (set-char-table-range next-word-boundary-function-table
1003                                   char #'next-word-boundary-kana))))
1004  char-script-table)
1005
1006 (setq word-combining-categories
1007       '((?l . ?l)))
1008
1009 (setq word-separating-categories        ;  (2-byte character sets)
1010       '((?A . ?K)                       ; Alpha numeric - Katakana
1011         (?A . ?C)                       ; Alpha numeric - Chinese
1012         (?H . ?A)                       ; Hiragana - Alpha numeric
1013         (?H . ?K)                       ; Hiragana - Katakana
1014         (?H . ?C)                       ; Hiragana - Chinese
1015         (?K . ?A)                       ; Katakana - Alpha numeric
1016         (?K . ?C)                       ; Katakana - Chinese
1017         (?C . ?A)                       ; Chinese - Alpha numeric
1018         (?C . ?K)                       ; Chinese - Katakana
1019         ))
1020
1021 ;;; Local Variables:
1022 ;;; coding: utf-8-emacs
1023 ;;; End:
1024
1025 ;;; characters.el ends here