*** empty log message ***

[gnu-emacs] / lisp / international / characters.el
diff --git a/lisp/international/characters.el b/lisp/international/characters.el

index 4fe00017b4e3f3aad3c2658764f9e65879a750e1..bd353c53b01e331503b7bc59c32321d2cbd777df 100644 (file)
--- a/lisp/international/characters.el
+++ b/lisp/international/characters.el
@@ -1,9 +1,9 @@
  ;;; characters.el --- set syntax and category for multibyte characters
  
  ;; Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN.
-;; Licensed to the Free Software Foundation.
+;;   Licensed to the Free Software Foundation.
  ;; Copyright (C) 2001, 2002 Free Software Foundation, Inc.
-;; Copyright (C) 2001, 2002
+;; Copyright (C) 2003
  ;;   National Institute of Advanced Industrial Science and Technology (AIST)
  ;;   Registration Number H13PRO009
  
@@ -28,12 +28,6 @@
  
  ;;; Commentary:
  
-;; This file contains multibyte characters.  Save this file always in
-;; the coding system `iso-2022-7bit'.
-
-;; This file does not define the syntax for Latin-N character sets;
-;; those are defined by the files latin-N.el.
-
  ;;; Code:
  
  ;;; Predefined categories.
@@ -86,7 +80,7 @@
  (define-category ?| "While filling, we can break a line at this character.")
  
  ;; For indentation calculation.
-(define-category ? 
+(define-category ?\s
    "This character counts as a space for indentation purposes.")
  
  ;; Keep the following for `kinsoku' processing.  See comments in
@@ -155,15 +149,18 @@
      (map-charset-chars #'modify-category-entry c ?C))
    (map-charset-chars #'modify-category-entry c ?|))
  
-;; Japanese character set (JISX0201-kana, JISX0201-roman, JISX0208, JISX0212)
+;; Japanese character set (JISX0201, JISX0208, JISX0212, JISX0213)
  
  (map-charset-chars #'modify-category-entry 'katakana-jisx0201 ?k)
  
  (map-charset-chars #'modify-category-entry 'latin-jisx0201 ?r)
  
-(dolist (l '(katakana-jisx0201 japanese-jisx0208 japanese-jisx0212))
+(dolist (l '(katakana-jisx0201 japanese-jisx0208 japanese-jisx0212
+                              japanese-jisx0213-1 japanese-jisx0213-2))
    (map-charset-chars #'modify-category-entry l ?j)
-  (map-charset-chars #'modify-category-entry l ?\|))
+  (if (eq l 'japanese-jisx0213-1)
+      (map-charset-chars #'modify-category-entry l ?\| #x2E21 #x7E7F)
+    (map-charset-chars #'modify-category-entry l ?\|)))
  
  ;; Unicode equivalents of JISX0201-kana
  (let ((range '(#xff61 . #xff9f)))
@@ -178,7 +175,7 @@
    (modify-category-entry range ?\|))
  
  ;; Hiragana block
-(let ((range '(#x3040 . #x309f)))
+(let ((range '(#x3040 . #x309d)))
    ;; ?H is actually defined to be double width
    ;;(modify-category-entry range ?H)
    ;;(modify-category-entry range ?\|)
@@ -239,29 +236,15 @@
  (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?Y #x2C21 #x2C7E)
  
  ;; These are in more than one charset.
-(modify-syntax-entry ?\（ "(）")
-(modify-syntax-entry ?\［ "(］")
-(modify-syntax-entry ?\｛ "(｝")
-(modify-syntax-entry ?\「 "(」")
-(modify-syntax-entry ?\『 "(』")
-(modify-syntax-entry ?\） ")（")
-(modify-syntax-entry ?\］ ")［")
-(modify-syntax-entry ?\｝ ")｛")
-(modify-syntax-entry ?\」 ")「")
-(modify-syntax-entry ?\』 ")『")
-
-(modify-syntax-entry ?\〔 "(〕")
-(modify-syntax-entry ?\〈 "(〉")
-(modify-syntax-entry ?\《 "(》")
-(modify-syntax-entry ?\〖 "(〗")
-(modify-syntax-entry ?\【 "(】")
-(modify-syntax-entry ?\〕 ")〔")
-(modify-syntax-entry ?\〉 ")〈")
-(modify-syntax-entry ?\》 ")《")
-(modify-syntax-entry ?\〗 ")〖")
-(modify-syntax-entry ?\】 ")【")
-(modify-syntax-entry ?\〚 "(〛")
-(modify-syntax-entry ?\〛 ")〚")
+(let ((parens (concat "〈〉《》「」『』【】〔〕〖〗〘〙〚〛"
+                     "︵︶︷︸︹︺︻︼︽︾︿﹀﹁﹂﹃﹄"
+                     "（）［］｛｝"))
+      open close)
+  (dotimes (i (/ (length parens) 2))
+    (setq open (aref parens (* i 2))
+         close (aref parens (1+ (* i 2))))
+    (modify-syntax-entry open (format "(%c" close))
+    (modify-syntax-entry close (format ")%c" open))))
  
  ;; Arabic character set
  
@@ -312,7 +295,7 @@
                   ("ະາຳຽເ-ໄ"        "w"     ?1) ; vowel base
                   ("ັິ-ືົໍ"   "w"     ?2) ; vowel upper
                   ("ຸູ"     "w"     ?3) ; vowel lower
-                 ("່-໋"    "w"     ?4) ; tone mark 
+                 ("່-໋"    "w"     ?4) ; tone mark
                   ("ຼຽ"     "w"     ?9) ; semivowel lower
                   ("໐-໙"    "w"     ?6) ; digit
                   ("ຯໆ"     "_"     ?5) ; symbol
@@ -349,7 +332,7 @@
                   ("ฤฦะาำเ-ๅ"     "w"     ?1) ; vowel base
                   ("ัิ-ื็๎"   "w"     ?2) ; vowel upper
                   ("ุ-ฺ"    "w"     ?3) ; vowel lower
-                 ("่-ํ"    "w"     ?4) ; tone mark 
+                 ("่-ํ"    "w"     ?4) ; tone mark
                   ("๐-๙"    "w"     ?6) ; digit
                   ("ฯๆ฿๏๚๛" "_"     ?5) ; symbol
                   ))
@@ -389,6 +372,7 @@
                   ("ིེཻོཽྀ"       "w"       ?2) ; upper vowel
                   ("ཾྂྃ྆྇ྈྉྊྋ" "w"    ?2) ; upper modifier
                   ("༙����྄ཱུ༵༷"       "w"   ?3) ; lowel vowel/modifier
+                 ("཰"                "w" ?3)             ; invisible vowel a
                   ("༠-༩༪-༳"             "w"     ?6) ; digit
                   ("་།-༒༔ཿ"        "."     ?|) ; line-break char
                   ("་།༏༐༑༔ཿ"            "."     ?|) ;
@@ -685,7 +669,7 @@
          (or (and (>= c #x0460) (<= c #x0480))
              (and (>= c #x048c) (<= c #x04be))
              (and (>= c #x04d0) (<= c #x04f4)))
-        (set-case-syntax-pair c (1+ c) tbl))    
+        (set-case-syntax-pair c (1+ c) tbl))
      (setq c (1+ c)))
    (set-case-syntax-pair ?Ӂ ?ӂ tbl)
    (set-case-syntax-pair ?Ӄ ?ӄ tbl)
@@ -820,7 +804,7 @@
      ;;(aset auto-fill-chars (make-char (car l)) t)
      (put-charset-property (car l) 'nospace-between-words t)
      (setq l (cdr l))))
- 
+
  \f
  ;; CJK double width characters.
  (let ((l '((#x1100 . #x11FF)
@@ -829,7 +813,9 @@
            (#xF900 . #xFAFF)
            (#xFE30 . #xFE4F)
            (#xFF00 . #xFF5F)
-          (#xFFE0 . #xFFEF))))
+          (#xFFE0 . #xFFEF)
+          (#x20000 . #x2AFFF)
+          (#x2F800 . #x2FFFF))))
    (dolist (elt l)
      (set-char-table-range char-width-table
                           (cons (car elt) (cdr elt))
@@ -941,7 +927,7 @@
         (setq script-list (cons (nth 2 elt) script-list))))
    (set-char-table-extra-slot char-script-table 0 (nreverse script-list)))
  
-(map-charset-chars 
+(map-charset-chars
   #'(lambda (range ignore)
       (set-char-table-range char-script-table range 'tibetan))
   'tibetan)
@@ -979,7 +965,7 @@
                       (aref (char-category-set (char-after (1- pos))) ?K))
             (setq pos (1- pos)))
         (while (and (> pos limit)
-                   (aref (setq category-set 
+                   (aref (setq category-set
                                 (char-category-set (char-after (1- pos)))) ?H))
           (setq pos (1- pos)))
         (setq category (cond ((aref category-set ?C) ?C)
@@ -996,10 +982,10 @@
  (map-char-table
   #'(lambda (char script)
       (cond ((eq script 'han)
-           (set-char-table-range next-word-boundary-function-table
+           (set-char-table-range find-word-boundary-function-table
                                   char #'next-word-boundary-han))
            ((eq script 'kana)
-           (set-char-table-range next-word-boundary-function-table
+           (set-char-table-range find-word-boundary-function-table
                                   char #'next-word-boundary-kana))))
   char-script-table)