;;; characters.el --- set syntax and category for multibyte characters
;; Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN.
-;; Licensed to the Free Software Foundation.
+;; Licensed to the Free Software Foundation.
;; Copyright (C) 2001, 2002 Free Software Foundation, Inc.
-;; Copyright (C) 2001, 2002
+;; Copyright (C) 2003
;; National Institute of Advanced Industrial Science and Technology (AIST)
;; Registration Number H13PRO009
;;; Commentary:
-;; This file contains multibyte characters. Save this file always in
-;; the coding system `iso-2022-7bit'.
-
-;; This file does not define the syntax for Latin-N character sets;
-;; those are defined by the files latin-N.el.
-
;;; Code:
;;; Predefined categories.
(define-category ?| "While filling, we can break a line at this character.")
;; For indentation calculation.
-(define-category ?
+(define-category ?\s
"This character counts as a space for indentation purposes.")
;; Keep the following for `kinsoku' processing. See comments in
(map-charset-chars #'modify-category-entry c ?C))
(map-charset-chars #'modify-category-entry c ?|))
-;; Japanese character set (JISX0201-kana, JISX0201-roman, JISX0208, JISX0212)
+;; Japanese character set (JISX0201, JISX0208, JISX0212, JISX0213)
(map-charset-chars #'modify-category-entry 'katakana-jisx0201 ?k)
(map-charset-chars #'modify-category-entry 'latin-jisx0201 ?r)
-(dolist (l '(katakana-jisx0201 japanese-jisx0208 japanese-jisx0212))
+(dolist (l '(katakana-jisx0201 japanese-jisx0208 japanese-jisx0212
+ japanese-jisx0213-1 japanese-jisx0213-2))
(map-charset-chars #'modify-category-entry l ?j)
- (map-charset-chars #'modify-category-entry l ?\|))
+ (if (eq l 'japanese-jisx0213-1)
+ (map-charset-chars #'modify-category-entry l ?\| #x2E21 #x7E7F)
+ (map-charset-chars #'modify-category-entry l ?\|)))
;; Unicode equivalents of JISX0201-kana
(let ((range '(#xff61 . #xff9f)))
(modify-category-entry range ?\|))
;; Hiragana block
-(let ((range '(#x3040 . #x309f)))
+(let ((range '(#x3040 . #x309d)))
;; ?H is actually defined to be double width
;;(modify-category-entry range ?H)
;;(modify-category-entry range ?\|)
(map-charset-chars #'modify-category-entry 'korean-ksc5601 ?Y #x2C21 #x2C7E)
;; These are in more than one charset.
-(modify-syntax-entry ?\( "()")
-(modify-syntax-entry ?\[ "(]")
-(modify-syntax-entry ?\{ "(}")
-(modify-syntax-entry ?\「 "(」")
-(modify-syntax-entry ?\『 "(』")
-(modify-syntax-entry ?\) ")(")
-(modify-syntax-entry ?\] ")[")
-(modify-syntax-entry ?\} "){")
-(modify-syntax-entry ?\」 ")「")
-(modify-syntax-entry ?\』 ")『")
-
-(modify-syntax-entry ?\〔 "(〕")
-(modify-syntax-entry ?\〈 "(〉")
-(modify-syntax-entry ?\《 "(》")
-(modify-syntax-entry ?\〖 "(〗")
-(modify-syntax-entry ?\【 "(】")
-(modify-syntax-entry ?\〕 ")〔")
-(modify-syntax-entry ?\〉 ")〈")
-(modify-syntax-entry ?\》 ")《")
-(modify-syntax-entry ?\〗 ")〖")
-(modify-syntax-entry ?\】 ")【")
-(modify-syntax-entry ?\〚 "(〛")
-(modify-syntax-entry ?\〛 ")〚")
+(let ((parens (concat "〈〉《》「」『』【】〔〕〖〗〘〙〚〛"
+ "︵︶︷︸︹︺︻︼︽︾︿﹀﹁﹂﹃﹄"
+ "()[]{}"))
+ open close)
+ (dotimes (i (/ (length parens) 2))
+ (setq open (aref parens (* i 2))
+ close (aref parens (1+ (* i 2))))
+ (modify-syntax-entry open (format "(%c" close))
+ (modify-syntax-entry close (format ")%c" open))))
;; Arabic character set
("ະາຳຽເ-ໄ" "w" ?1) ; vowel base
("ັິ-ືົໍ" "w" ?2) ; vowel upper
("ຸູ" "w" ?3) ; vowel lower
- ("່-໋" "w" ?4) ; tone mark
+ ("່-໋" "w" ?4) ; tone mark
("ຼຽ" "w" ?9) ; semivowel lower
("໐-໙" "w" ?6) ; digit
("ຯໆ" "_" ?5) ; symbol
("ฤฦะาำเ-ๅ" "w" ?1) ; vowel base
("ัิ-ื็๎" "w" ?2) ; vowel upper
("ุ-ฺ" "w" ?3) ; vowel lower
- ("่-ํ" "w" ?4) ; tone mark
+ ("่-ํ" "w" ?4) ; tone mark
("๐-๙" "w" ?6) ; digit
("ฯๆ฿๏๚๛" "_" ?5) ; symbol
))
("ིེཻོཽྀ" "w" ?2) ; upper vowel
("ཾྂྃ྆྇ྈྉྊྋ" "w" ?2) ; upper modifier
("྄ཱུ༙༵༷" "w" ?3) ; lowel vowel/modifier
+ ("" "w" ?3) ; invisible vowel a
("༠-༩༪-༳" "w" ?6) ; digit
("་།-༒༔ཿ" "." ?|) ; line-break char
("་།༏༐༑༔ཿ" "." ?|) ;
(or (and (>= c #x0460) (<= c #x0480))
(and (>= c #x048c) (<= c #x04be))
(and (>= c #x04d0) (<= c #x04f4)))
- (set-case-syntax-pair c (1+ c) tbl))
+ (set-case-syntax-pair c (1+ c) tbl))
(setq c (1+ c)))
(set-case-syntax-pair ?Ӂ ?ӂ tbl)
(set-case-syntax-pair ?Ӄ ?ӄ tbl)
;;(aset auto-fill-chars (make-char (car l)) t)
(put-charset-property (car l) 'nospace-between-words t)
(setq l (cdr l))))
-
+
\f
;; CJK double width characters.
(let ((l '((#x1100 . #x11FF)
(#xF900 . #xFAFF)
(#xFE30 . #xFE4F)
(#xFF00 . #xFF5F)
- (#xFFE0 . #xFFEF))))
+ (#xFFE0 . #xFFEF)
+ (#x20000 . #x2AFFF)
+ (#x2F800 . #x2FFFF))))
(dolist (elt l)
(set-char-table-range char-width-table
(cons (car elt) (cdr elt))
(setq script-list (cons (nth 2 elt) script-list))))
(set-char-table-extra-slot char-script-table 0 (nreverse script-list)))
-(map-charset-chars
+(map-charset-chars
#'(lambda (range ignore)
(set-char-table-range char-script-table range 'tibetan))
'tibetan)
(aref (char-category-set (char-after (1- pos))) ?K))
(setq pos (1- pos)))
(while (and (> pos limit)
- (aref (setq category-set
+ (aref (setq category-set
(char-category-set (char-after (1- pos)))) ?H))
(setq pos (1- pos)))
(setq category (cond ((aref category-set ?C) ?C)
(map-char-table
#'(lambda (char script)
(cond ((eq script 'han)
- (set-char-table-range next-word-boundary-function-table
+ (set-char-table-range find-word-boundary-function-table
char #'next-word-boundary-han))
((eq script 'kana)
- (set-char-table-range next-word-boundary-function-table
+ (set-char-table-range find-word-boundary-function-table
char #'next-word-boundary-kana))))
char-script-table)