;;; characters.el --- set syntax and category for multibyte characters
-;; Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN.
-;; Licensed to the Free Software Foundation.
-;; Copyright (C) 2001, 2002 Free Software Foundation, Inc.
+;; Copyright (C) 1997, 2000, 2001, 2002, 2003, 2004
+;; Free Software Foundation, Inc.
+;; Copyright (C) 1995, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005
+;; National Institute of Advanced Industrial Science and Technology (AIST)
+;; Registration Number H14PRO021
;; Keywords: multibyte character, character set, syntax, category
;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs; see the file COPYING. If not, write to the
-;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-;; Boston, MA 02111-1307, USA.
+;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+;; Boston, MA 02110-1301, USA.
;;; Commentary:
;;; Code:
+;; We must set utf-translate-cjk-mode to nil while loading this file
+;; to avoid translating CJK characters in decode-char.
+(defvar saved-utf-translate-cjk-mode utf-translate-cjk-mode)
+(setq utf-translate-cjk-mode nil)
+
;;; Predefined categories.
;; For each character set.
(modify-category-entry (make-char 'chinese-gb2312 row) ?C)
(setq row (1+ row))))
+(let ((tbl (standard-case-table)))
+ (dotimes (i 26)
+ (set-case-syntax-pair (make-char 'chinese-gb2312 #x23 (+ #x41 i))
+ (make-char 'chinese-gb2312 #x23 (+ #x61 i)) tbl))
+ (dotimes (i 24)
+ (set-case-syntax-pair (make-char 'chinese-gb2312 #x26 (+ #x21 i))
+ (make-char 'chinese-gb2312 #x26 (+ #x41 i)) tbl))
+ (dotimes (i 33)
+ (set-case-syntax-pair (make-char 'chinese-gb2312 #x27 (+ #x21 i))
+ (make-char 'chinese-gb2312 #x27 (+ #x51 i)) tbl)))
+
;; Chinese character set (BIG5)
(let ((from (decode-big5-char #xA141))
(modify-category-entry generic-big5-1-char ?\|)
(modify-category-entry generic-big5-2-char ?\|))
+(let ((tbl (standard-case-table)))
+ (dotimes (i 22)
+ (set-case-syntax-pair (decode-big5-char (+ #xA2CF i))
+ (decode-big5-char (+ #xA2CF i 26)) tbl))
+ (dotimes (i 4)
+ (set-case-syntax-pair (decode-big5-char (+ #xA2E4 i))
+ (decode-big5-char (+ #xA340 i)) tbl))
+ (dotimes (i 24)
+ (set-case-syntax-pair (decode-big5-char (+ #xA344 i))
+ (decode-big5-char (+ #xA344 i 24)) tbl)))
+
;; Chinese character set (CNS11643)
(modify-category-entry generic-char ?|)
(setq cns-list (cdr cns-list))))
+(let ((parens "\e$(G!>!?!@!A!B!C!D!E!F!G!H!I!J!K!L!M!N!O!P!Q!R!S!T!U!V!W!X!Y!Z![!\!]!^!_!`!a!b!c\e(B")
+ open close)
+ (dotimes (i (/ (length parens) 2))
+ (setq open (aref parens (* i 2))
+ close (aref parens (1+ (* i 2))))
+ (modify-syntax-entry open (format "(%c" close))
+ (modify-syntax-entry close (format ")%c" open))))
+
;; Cyrillic character set (ISO-8859-5)
(modify-category-entry (make-char 'cyrillic-iso8859-5) ?y)
(set-case-syntax-pair ?\e,FO\e(B ?\e,Fo\e(B tbl)
(set-case-syntax-pair ?\e,FP\e(B ?\e,Fp\e(B tbl)
(set-case-syntax-pair ?\e,FQ\e(B ?\e,Fq\e(B tbl)
+ (set-upcase-syntax ?\e,FS\e(B ?\e,Fr\e(B tbl)
(set-case-syntax-pair ?\e,FS\e(B ?\e,Fs\e(B tbl)
(set-case-syntax-pair ?\e,FT\e(B ?\e,Ft\e(B tbl)
(set-case-syntax-pair ?\e,FU\e(B ?\e,Fu\e(B tbl)
(set-case-syntax-pair ?\e$,1&\7f\e(B ?\e$,1'?\e(B tbl)
(set-case-syntax-pair ?\e$,1' \e(B ?\e$,1'@\e(B tbl)
(set-case-syntax-pair ?\e$,1'!\e(B ?\e$,1'A\e(B tbl)
+ (set-upcase-syntax ?\e$,1'#\e(B ?\e$,1'B\e(B tbl)
(set-case-syntax-pair ?\e$,1'#\e(B ?\e$,1'C\e(B tbl)
(set-case-syntax-pair ?\e$,1'$\e(B ?\e$,1'D\e(B tbl)
(set-case-syntax-pair ?\e$,1'%\e(B ?\e$,1'E\e(B tbl)
(while chars
(modify-syntax-entry (car chars) "w")
(setq chars (cdr chars))))
-(modify-syntax-entry ?\\e$B!J\e(B "(\e$B!K\e(B")
-(modify-syntax-entry ?\\e$B!N\e(B "(\e$B!O\e(B")
-(modify-syntax-entry ?\\e$B!P\e(B "(\e$B!Q\e(B")
-(modify-syntax-entry ?\\e$B!V\e(B "(\e$B!W\e(B")
-(modify-syntax-entry ?\\e$B!X\e(B "(\e$B!Y\e(B")
-(modify-syntax-entry ?\\e$B!K\e(B ")\e$B!J\e(B")
-(modify-syntax-entry ?\\e$B!O\e(B ")\e$B!N\e(B")
-(modify-syntax-entry ?\\e$B!Q\e(B ")\e$B!P\e(B")
-(modify-syntax-entry ?\\e$B!W\e(B ")\e$B!V\e(B")
-(modify-syntax-entry ?\\e$B!Y\e(B ")\e$B!X\e(B")
+(let ((parens "\e$B!J!K!L!M!N!O!P!Q!R!S!T!U!V!W!X!Y!Z![\e(B" )
+ open close)
+ (dotimes (i (/ (length parens) 2))
+ (setq open (aref parens (* i 2))
+ close (aref parens (1+ (* i 2))))
+ (modify-syntax-entry open (format "(%c" close))
+ (modify-syntax-entry close (format ")%c" open))))
(modify-category-entry (make-char 'japanese-jisx0208 35) ?A)
(modify-category-entry (make-char 'japanese-jisx0208 36) ?H)
(modify-category-entry (car chars) ?C)
(setq chars (cdr chars))))
+(let ((tbl (standard-case-table)))
+ (dotimes (i 26)
+ (set-case-syntax-pair (make-char 'japanese-jisx0208 #x23 (+ #x41 i))
+ (make-char 'japanese-jisx0208 #x23 (+ #x61 i)) tbl))
+ (dotimes (i 24)
+ (set-case-syntax-pair (make-char 'japanese-jisx0208 #x26 (+ #x21 i))
+ (make-char 'japanese-jisx0208 #x26 (+ #x41 i)) tbl))
+ (dotimes (i 33)
+ (set-case-syntax-pair (make-char 'japanese-jisx0208 #x27 (+ #x21 i))
+ (make-char 'japanese-jisx0208 #x27 (+ #x51 i)) tbl)))
+
;; JISX0212
;; (modify-syntax-entry (make-char 'japanese-jisx0212) "w")
(modify-syntax-entry (make-char 'japanese-jisx0212 33) "_")
(modify-category-entry (make-char 'korean-ksc5601 43) ?K)
(modify-category-entry (make-char 'korean-ksc5601 44) ?Y)
+(let ((parens "\e$(C!2!3!4!5!6!7!8!9!:!;!<!=#(#)#[#]#{#}\e(B" )
+ open close)
+ (dotimes (i (/ (length parens) 2))
+ (setq open (aref parens (* i 2))
+ close (aref parens (1+ (* i 2))))
+ (modify-syntax-entry open (format "(%c" close))
+ (modify-syntax-entry close (format ")%c" open))))
+
+(let ((tbl (standard-case-table)))
+ (dotimes (i 26)
+ (set-case-syntax-pair (make-char 'korean-ksc5601 #x23 (+ #x41 i))
+ (make-char 'korean-ksc5601 #x23 (+ #x61 i)) tbl))
+ (dotimes (i 10)
+ (set-case-syntax-pair (make-char 'korean-ksc5601 #x25 (+ #x21 i))
+ (make-char 'korean-ksc5601 #x25 (+ #x30 i)) tbl))
+ (dotimes (i 24)
+ (set-case-syntax-pair (make-char 'korean-ksc5601 #x25 (+ #x41 i))
+ (make-char 'korean-ksc5601 #x25 (+ #x61 i)) tbl))
+ (dotimes (i 33)
+ (set-case-syntax-pair (make-char 'korean-ksc5601 #x2C (+ #x21 i))
+ (make-char 'korean-ksc5601 #x2C (+ #x51 i)) tbl)))
+
;; Latin character set (latin-1,2,3,4,5,8,9)
(modify-category-entry (make-char 'latin-iso8859-1) ?l)
(let ((tbl (standard-case-table)) c)
-;; In some languages, U+0049 LATIN CAPITAL LETTER I and U+0131 LATIN
-;; SMALL LETTER DOTLESS I make a case pair, and so do U+0130 LATIN
-;; CAPITAL LETTER I WITH DOT ABOVE and U+0069 LATIN SMALL LETTER I.
-;; Thus we have to check language-environment to handle casing
-;; correctly. Currently only I<->i is available.
-
;; Latin Extended-A, Latin Extended-B
(setq c #x0100)
(while (<= c #x0233)
(set-case-syntax-pair
(decode-char 'ucs (1- c)) (decode-char 'ucs c) tbl))
(setq c (1+ c)))
+
+
+ ;; In some languages, such as Turkish, U+0049 LATIN CAPITAL LETTER I
+ ;; and U+0131 LATIN SMALL LETTER DOTLESS I make a case pair, and so
+ ;; do U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE and U+0069 LATIN
+ ;; SMALL LETTER I.
+
+ ;; We used to set up half of those correspondence unconditionally,
+ ;; but that makes searches slow. So now we don't set up either half
+ ;; of these correspondences by default.
+
+ ;; (set-downcase-syntax ?\e$,1 P\e(B ?i tbl)
+ ;; (set-upcase-syntax ?I ?\e$,1 Q\e(B tbl)
+
(set-case-syntax-pair ?\e$,1 R\e(B ?\e$,1 S\e(B tbl)
(set-case-syntax-pair ?\e$,1 T\e(B ?\e$,1 U\e(B tbl)
(set-case-syntax-pair ?\e$,1 V\e(B ?\e$,1 W\e(B tbl)
-;;; (set-case-syntax-pair ?\e$,1!8\e(B ?\e,A\7f\e(B tbl) ; these two have different length!
+ (set-case-syntax-pair ?\e$,1!8\e(B ?\e,A\7f\e(B tbl)
(set-case-syntax-pair ?\e$,1!9\e(B ?\e$,1!:\e(B tbl)
(set-case-syntax-pair ?\e$,1!;\e(B ?\e$,1!<\e(B tbl)
(set-case-syntax-pair ?\e$,1!=\e(B ?\e$,1!>\e(B tbl)
;;; Setting word boundary.
(setq word-combining-categories
- '((?l . ?l)))
+ '((?l . ?l)
+ (?C . ?C)
+ (?C . ?H)
+ (?C . ?K)))
(setq word-separating-categories ; (2-byte character sets)
'((?A . ?K) ; Alpha numeric - Katakana
(put-charset-property (car l) 'nospace-between-words t)
(setq l (cdr l))))
+\f
+(setq utf-translate-cjk-mode saved-utf-translate-cjk-mode)
+(makunbound 'saved-utf-translate-cjk-mode)
+
;;; Local Variables:
;;; coding: iso-2022-7bit
;;; End: