;;; mule-conf.el --- configure multilingual environment
;; Copyright (C) 1997 Electrotechnical Laboratory, JAPAN.
-;; Licensed to the Free Software Foundation.
-;; Copyright (C) 2001, 2002
+;; Licensed to the Free Software Foundation.
+;; Copyright (C) 2002, 2003 Free Software Foundation, Inc.
+;; Copyright (C) 2003
;; National Institute of Advanced Industrial Science and Technology (AIST)
;; Registration Number H13PRO009
-;; Copyright (C) 2002, 2003 Free Software Foundation, Inc.
;; Keywords: i18n, mule, multilingual, character set, coding system
(define-charset 'jisx0201
"JISX0201"
:short-name "JISX0201"
- :code-space [33 254]
- :map "jisx0201")
+ :code-space [0 #xDF]
+ :map "JISX0201")
(define-charset 'latin-jisx0201
"Roman Part of JISX0201.1976"
:emacs-mule-id 145
:code-space [33 126 33 126]
:code-offset #x110000
- :unify-map "gb2312-1980")
+ :unify-map "GB2312")
(define-charset 'chinese-gbk
"GBK Chinese simplified."
:short-name "GBK"
:code-space [#x40 #xFE #x81 #xFE]
:code-offset #x160000
- :unify-map "gbk")
+ :unify-map "GBK")
(define-charset-alias 'cp936 'chinese-gbk)
(define-charset-alias 'windows-936 'chinese-gbk)
:emacs-mule-id 149
:code-space [33 126 33 126]
:code-offset #x114000
- :unify-map "cns11643-1")
+ :unify-map "CNS-1")
(define-charset 'chinese-cns11643-2
"CNS11643 Plane 2 Chinese traditional: ISO-IR-172"
:emacs-mule-id 150
:code-space [33 126 33 126]
:code-offset #x118000
- :unify-map "cns11643-2")
+ :unify-map "CNS-2")
(define-charset 'chinese-cns11643-3
"CNS11643 Plane 3 Chinese Traditional: ISO-IR-183"
:iso-final-char ?I
:code-space [33 126 33 126]
:emacs-mule-id 246
- :code-offset #x11C000)
+ :code-offset #x11C000
+ :unify-map "CNS-3")
(define-charset 'chinese-cns11643-4
"CNS11643 Plane 4 Chinese Traditional: ISO-IR-184"
:iso-final-char ?J
:emacs-mule-id 247
:code-space [33 126 33 126]
- :code-offset #x120000)
+ :code-offset #x120000
+ :unify-map "CNS-4")
(define-charset 'chinese-cns11643-5
"CNS11643 Plane 5 Chinese Traditional: ISO-IR-185"
:iso-final-char ?K
:emacs-mule-id 248
:code-space [33 126 33 126]
- :code-offset #x124000)
+ :code-offset #x124000
+ :unify-map "CNS-5")
(define-charset 'chinese-cns11643-6
"CNS11643 Plane 6 Chinese Traditional: ISO-IR-186"
:iso-final-char ?L
:emacs-mule-id 249
:code-space [33 126 33 126]
- :code-offset #x128000)
+ :code-offset #x128000
+ :unify-map "CNS-6")
(define-charset 'chinese-cns11643-7
"CNS11643 Plane 7 Chinese Traditional: ISO-IR-187"
:iso-final-char ?M
:emacs-mule-id 250
:code-space [33 126 33 126]
- :code-offset #x12C000)
+ :code-offset #x12C000
+ :unify-map "CNS-7")
(define-charset 'big5
"Big5 (Chinese traditional)"
:short-name "Big5"
:code-space [#x40 #xFE #xA1 #xFE]
:code-offset #x130000
- :unify-map "big5")
+ :unify-map "BIG5")
;; Fixme: AKA cp950 according to
;; <URL:http://www.microsoft.com/globaldev/reference/WinCP.asp>. Is
;; that correct?
:emacs-mule-id 152
:code-space [#x21 #x7E #x21 #x7E]
:code-offset #x135000
- :unify-map "big5-1")
+ :unify-map "BIG5-1")
(define-charset 'chinese-big5-2
"Less frequently used part (C940-FEFE) of Big5 (Chinese traditional)"
:emacs-mule-id 153
:code-space [#x21 #x7E #x21 #x7E]
:code-offset #x137800
- :unify-map "big5-2")
+ :unify-map "BIG5-2")
(define-charset 'japanese-jisx0208
"JISX0208.1983/1990 Japanese Kanji: ISO-IR-87"
:emacs-mule-id 146
:code-space [33 126 33 126]
:code-offset #x140000
- :unify-map "jisx0208-1990")
+ :unify-map "JISX0208")
(define-charset 'japanese-jisx0208-1978
"JISX0208.1978 Japanese Kanji (so called \"old JIS\"): ISO-IR-42"
:short-name "JISX0208.1978"
- :long-name "JISX0208.1978 (Japanese): ISO-IR-42"
+ :long-name "JISX0208.1978 (JISC6226.1978): ISO-IR-42"
:iso-final-char ?@
:emacs-mule-id 144
:code-space [33 126 33 126]
:code-offset #x144000
- :unify-map "jisx0208-1978")
+ :unify-map "JISC6226")
(define-charset 'japanese-jisx0212
"JISX0212 Japanese supplement: ISO-IR-159"
:emacs-mule-id 148
:code-space [33 126 33 126]
:code-offset #x148000
- :unify-map "jisx0212-1990")
+ :unify-map "JISX0212")
;; Note that jisx0213 contains characters not in Unicode (3.2?). It's
;; arguable whether it should have a unify-map.
:short-name "JISX0213-1"
:iso-final-char ?O
:emacs-mule-id 151
- :unify-map "jisx0213-1"
+ :unify-map "JISX2131"
:code-space [33 126 33 126]
:code-offset #x14C000)
:short-name "JISX0213-2"
:iso-final-char ?P
:emacs-mule-id 254
- :unify-map "jisx0213-2"
+ :unify-map "JISX2132"
:code-space [33 126 33 126]
:code-offset #x150000)
:emacs-mule-id 147
:code-space [33 126 33 126]
:code-offset #x279f94 ; ... #x27c217
- :unify-map "ksc5601-1987")
+ :unify-map "KSC5601")
(define-charset 'big5-hkscs
"Big5-HKSCS (Chinese traditional, Hong Kong supplement)"
:short-name "Big5"
:code-space [#x40 #xFE #xA1 #xFE]
:code-offset #x27c218 ; ... #x280839
- :unify-map "big5-hkscs")
+ :unify-map "BIG5-HKSCS")
;; Fixme: Korean cp949/UHC
:iso-final-char ?0
:emacs-mule-id 160
:code-space [33 126]
- :unify-map "sisheng"
+ :unify-map "MULE-sisheng"
:code-offset #x200000)
;; A subset of the 1989 version of IPA. It consists of the consonant
:short-name "IPA"
:iso-final-char ?0
:emacs-mule-id 161
- :unify-map "ipa"
+ :unify-map "MULE-ipa"
:code-space [32 127]
:code-offset #x200080)
:short-name "VISCII"
:long-name "VISCII 1.1"
:code-space [0 255]
- :map "viscii")
+ :map "VISCII")
(define-charset 'vietnamese-viscii-lower
"VISCII1.1 lower-case"
:emacs-mule-id 162
:code-space [32 127]
:code-offset #x200200
- :unify-map "viscii-lower")
+ :unify-map "MULE-lviscii")
(define-charset 'vietnamese-viscii-upper
"VISCII1.1 upper-case"
:emacs-mule-id 163
:code-space [32 127]
:code-offset #x200280
- :unify-map "viscii-upper")
+ :unify-map "MULE-uviscii")
(define-charset 'vscii
- "VSCII1.1"
+ "VSCII1.1 (TCVN-5712 VN1)"
:short-name "VSCII"
:code-space [0 255]
- :map "vscii")
+ :map "VSCII")
+
+(define-charset-alias 'tcvn-5712 'vscii)
;; Fixme: see note in tcvn.map about combining characters
-(define-charset 'tcvn-5712
- "TCVN-5712"
+(define-charset 'vscii-2
+ "VSCII-2 (TCVN-5712 VN2)"
:code-space [0 255]
- :map "tcvn")
+ :map "VSCII-2")
(define-charset 'koi8-r
"KOI8-R"
:short-name "KOI8-R"
:ascii-compatible-p t
:code-space [0 255]
- :map "koi8-r")
+ :map "KOI8-R")
(define-charset-alias 'koi8 'koi8-r)
:short-name "alternativnyj"
:ascii-compatible-p t
:code-space [0 255]
- :map "alternativnyj")
+ :map "ALTERNATIVNYJ")
(define-charset 'cp866
"CP866"
:short-name "cp866"
:ascii-compatible-p t
:code-space [0 255]
- :map "ibm866")
+ :map "IBM866")
(define-charset-alias 'ibm866 'cp866)
(define-charset 'koi8-u
:short-name "KOI8-U"
:ascii-compatible-p t
:code-space [0 255]
- :map "koi8-u")
+ :map "KOI8-U")
(define-charset 'koi8-t
"KOI8-T"
:short-name "KOI8-T"
:ascii-compatible-p t
:code-space [0 255]
- :map "koi8-t")
+ :map "KOI8-T")
(define-charset 'georgian-ps
"GEORGIAN-PS"
:short-name "GEORGIAN-PS"
:ascii-compatible-p t
:code-space [0 255]
- :map "georgian-ps")
+ :map "KA-PS")
(define-charset 'georgian-academy
"GEORGIAN-ACADEMY"
:short-name "GEORGIAN-ACADEMY"
:ascii-compatible-p t
:code-space [0 255]
- :map "georgian-academy")
+ :map "KA-ACADEMY")
(define-charset 'windows-1250
"WINDOWS-1250 (Central Europe)"
:short-name "WINDOWS-1250"
:ascii-compatible-p t
:code-space [0 255]
- :map "windows-1250")
+ :map "CP1250")
(define-charset-alias 'cp1250 'windows-1250)
(define-charset 'windows-1251
:short-name "WINDOWS-1251"
:ascii-compatible-p t
:code-space [0 255]
- :map "windows-1251")
+ :map "CP1251")
(define-charset-alias 'cp1251 'windows-1251)
(define-charset 'windows-1252
:short-name "WINDOWS-1252"
:ascii-compatible-p t
:code-space [0 255]
- :map "windows-1252")
+ :map "CP1252")
(define-charset-alias 'cp1252 'windows-1252)
(define-charset 'windows-1253
:short-name "WINDOWS-1253"
:ascii-compatible-p t
:code-space [0 255]
- :map "windows-1253")
+ :map "CP1253")
(define-charset-alias 'cp1253 'windows-1253)
(define-charset 'windows-1254
:short-name "WINDOWS-1254"
:ascii-compatible-p t
:code-space [0 255]
- :map "windows-1254")
+ :map "CP1254")
(define-charset-alias 'cp1254 'windows-1254)
(define-charset 'windows-1255
:short-name "WINDOWS-1255"
:ascii-compatible-p t
:code-space [0 255]
- :map "windows-1255")
+ :map "CP1255")
(define-charset-alias 'cp1255 'windows-1255)
(define-charset 'windows-1256
:short-name "WINDOWS-1256"
:ascii-compatible-p t
:code-space [0 255]
- :map "windows-1256")
+ :map "CP1256")
(define-charset-alias 'cp1256 'windows-1256)
(define-charset 'windows-1257
:short-name "WINDOWS-1257"
:ascii-compatible-p t
:code-space [0 255]
- :map "windows-1257")
+ :map "CP1257")
(define-charset-alias 'cp1257 'windows-1257)
(define-charset 'windows-1258
:short-name "WINDOWS-1258"
:ascii-compatible-p t
:code-space [0 255]
- :map "windows-1258")
+ :map "CP1258")
(define-charset-alias 'cp1258 'windows-1258)
(define-charset 'next
:short-name "NEXT"
:ascii-compatible-p t
:code-space [0 255]
- :map "next")
+ :map "NEXTSTEP")
(define-charset 'cp1125
"CP1125"
:short-name "CP1125"
:code-space [0 255]
- :map "cp1125")
+ :map "CP1125")
(define-charset-alias 'ruscii 'cp1125)
;; Original name for cp1125, says Serhii Hlodin <hlodin@lutsk.bank.gov.ua>
(define-charset-alias 'cp866u 'cp1125)
:short-name "CP437"
:code-space [0 255]
:ascii-compatible-p t
- :map "cp437")
+ :map "IBM437")
(define-charset 'cp720
"CP720 (Arabic)"
:short-name "CP720"
:code-space [0 255]
:ascii-compatible-p t
- :map "cp720")
+ :map "CP720")
(define-charset 'cp737
"CP737 (PC Greek)"
:short-name "CP737"
:code-space [0 255]
:ascii-compatible-p t
- :map "cp737")
+ :map "CP737")
(define-charset 'cp775
"CP775 (PC Baltic)"
:short-name "CP775"
:code-space [0 255]
:ascii-compatible-p t
- :map "cp775")
+ :map "CP775")
(define-charset 'cp851
"CP851 (Greek)"
:short-name "CP851"
:code-space [0 255]
:ascii-compatible-p t
- :map "cp851")
+ :map "IBM851")
(define-charset 'cp852
"CP852 (MS-DOS Latin-2)"
:short-name "CP852"
:code-space [0 255]
:ascii-compatible-p t
- :map "cp852")
+ :map "IBM852")
(define-charset 'cp855
"CP855 (IBM Cyrillic)"
:short-name "CP855"
:code-space [0 255]
:ascii-compatible-p t
- :map "cp855")
+ :map "IBM855")
(define-charset 'cp857
"CP857 (IBM Turkish)"
:short-name "CP857"
:code-space [0 255]
:ascii-compatible-p t
- :map "cp857")
+ :map "IBM857")
(define-charset 'cp858
"CP858 (Multilingual Latin I + Euro)"
:short-name "CP858"
:code-space [0 255]
:ascii-compatible-p t
- :map "cp858")
+ :map "CP858")
(define-charset-alias 'cp00858 'cp858) ; IANA has IBM00858/CP00858
(define-charset 'cp860
:short-name "CP860"
:code-space [0 255]
:ascii-compatible-p t
- :map "cp860")
+ :map "IBM860")
(define-charset 'cp861
"CP861 (MS-DOS Icelandic)"
:short-name "CP861"
:code-space [0 255]
:ascii-compatible-p t
- :map "cp861")
+ :map "IBM861")
(define-charset 'cp862
"CP862 (PC Hebrew)"
:short-name "CP862"
:code-space [0 255]
:ascii-compatible-p t
- :map "cp862")
+ :map "IBM862")
(define-charset 'cp863
"CP863 (MS-DOS Canadian French)"
:short-name "CP863"
:code-space [0 255]
:ascii-compatible-p t
- :map "cp863")
+ :map "IBM863")
(define-charset 'cp864
"CP864 (PC Arabic)"
:short-name "CP864"
:code-space [0 255]
:ascii-compatible-p t
- :map "cp864")
+ :map "IBM864")
(define-charset 'cp865
"CP865 (MS-DOS Nordic)"
:short-name "CP865"
:code-space [0 255]
:ascii-compatible-p t
- :map "cp865")
+ :map "IBM865")
(define-charset 'cp869
"CP869 (IBM Modern Greek)"
:short-name "CP869"
:code-space [0 255]
:ascii-compatible-p t
- :map "cp869")
+ :map "IBM869")
(define-charset 'cp874
"CP874 (IBM Thai)"
:short-name "CP874"
:code-space [0 255]
:ascii-compatible-p t
- :map "cp874")
+ :map "IBM874")
;; For Arabic, we need three different types of character sets.
;; Digits are of direction left-to-right and of width 1-column.
:code-space [33 126]
:code-offset #x180000)
-(define-charset 'devanagari-glyph
- "Glyphs for Devanagari script. Subset of `indian-glyph'."
- :short-name "Devanagari glyph"
- :code-space [0 255]
- :code-offset #x180100)
-
-(define-charset 'malayalam-glyph
- "Glyphs for Malayalam script. Subset of `indian-glyph'."
- :short-name "Malayalam glyph"
- :code-space [0 255]
- :code-offset #x180200)
-
-;; These would be necessary for supporting the complete set of Indian
-;; scripts. See also fontset.el.
-
-;; (let ((i 0))
-;; (dolist (script '(sanskrit bengali tamil telugu assamese
-;; oriya kannada malayalam gujarati punjabi))
-;; (define-charset (intern (concat (symbol-name script) "-glyph"))
-;; (concat "Glyphs for " (capitalize (symbol-name script))
-;; " script. Subset of `indian-glyph'.")
-;; :short-name (concat (capitalize (symbol-name script)) " glyph")
-;; :code-space [0 255]
-;; :code-offset (+ #x180100 (* 256 i)))
-;; (setq i (1+ i))))
+(let ((code-offset #x180100))
+ (dolist (script '(devanagari sanskrit bengali tamil telugu assamese
+ oriya kannada malayalam gujarati punjabi))
+ (define-charset (intern (format "%s-cdac" script))
+ (format "Glyphs of %s script for CDAC font. Subset of `indian-glyph'."
+ (capitalize (symbol-name script)))
+ :short-name (format "CDAC %s glyphs" (capitalize (symbol-name script)))
+ :code-space [0 255]
+ :code-offset code-offset)
+ (setq code-offset (+ code-offset #x100)))
+
+ (dolist (script '(devanagari bengali punjabi gujarati
+ oriya tamil telugu kannada malayalam))
+ (define-charset (intern (format "%s-akruti" script))
+ (format "Glyphs of %s script for AKRUTI font. Subset of `indian-glyph'."
+ (capitalize (symbol-name script)))
+ :short-name (format "AKRUTI %s glyphs" (capitalize (symbol-name script)))
+ :code-space [0 255]
+ :code-offset code-offset)
+ (setq code-offset (+ code-offset #x100))))
(define-charset 'indian-glyph
"Glyphs for Indian characters."
:long-name "Tibetan 2 column"
:iso-final-char ?7
:emacs-mule-id 252
- :unify-map "tibetan"
+ :unify-map "MULE-tibetan"
:code-space [33 126 33 37]
:code-offset #x190000)
:long-name "Ethiopic characters"
:iso-final-char ?3
:emacs-mule-id 245
- :unify-map "ethiopic"
+ :unify-map "MULE-ethiopic"
:code-space [33 126 33 126]
:code-offset #x1A0000)
:short-name "Mac Roman"
:ascii-compatible-p t
:code-space [0 255]
- :map "mac-roman")
+ :map "MACINTOSH")
;; Fixme: modern EBCDIC variants, e.g. IBM00924?
(define-charset 'ebcdic-us
:short-name "EBCDIC-US"
:code-space [0 255]
:mime-charset 'ebcdic-us
- :map "ebcdic-us")
+ :map "EBCDICUS")
(define-charset 'ebcdic-uk
"UK version of EBCDIC"
:short-name "EBCDIC-UK"
:code-space [0 255]
:mime-charset 'ebcdic-uk
- :map "ebcdic-uk")
+ :map "EBCDICUK")
(define-charset 'ibm1047
;; Says groff:
:short-name "IBM1047"
:code-space [0 255]
:mime-charset 'ibm1047
- :map "ibm1047")
+ :map "IBM1047")
(define-charset-alias 'cp1047 'ibm1047)
(define-charset 'hp-roman8
:short-name "HP-ROMAN8"
:ascii-compatible-p t
:code-space [0 255]
- :map "hp-roman8")
+ :map "HP-ROMAN8")
;; To make a coding system with this, a pre-write-conversion should
;; account for the commented-out multi-valued code points in
:short-name "IBM850"
:ascii-compatible-p t
:code-space [0 255]
- :map "ibm850")
+ :map "IBM850")
(define-charset-alias 'cp850 'ibm850)
(define-charset 'mik
:short-name "MIK"
:ascii-compatible-p t
:code-space [0 255]
- :map "mik")
+ :map "MIK")
-(define-charset 'pt154
+(define-charset 'ptcp154
"`Paratype' codepage (Asian Cyrillic)"
:short-name "PT154"
:ascii-compatible-p t
:code-space [0 255]
:mime-charset 'pt154
- :map "pt154")
-(define-charset-alias 'ptcp154 'pt154)
-(define-charset-alias 'cp154 'pt154)
+ :map "PTCP154")
+(define-charset-alias 'pt154 'ptcp154)
+(define-charset-alias 'cp154 'ptcp154)
(define-charset 'gb18030-2-byte
"GB18030 2-byte (0x814E..0xFEFE)"
:code-space [#x40 #xFE #x81 #xFE]
:supplementary-p t
- :map "gb18030-2")
+ :map "GB180302")
(define-charset 'gb18030-4-byte-bmp
"GB18030 4-byte for BMP (0x81308130-0x8431A439)"
:code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x81 #x84]
:supplementary-p t
- :map "gb18030-4")
+ :map "GB180304")
(define-charset 'gb18030-4-byte-smp
"GB18030 4-byte for SMP (0x90308130-0xE3329A35)"
(unify-charset 'chinese-gbk)
(unify-charset 'chinese-cns11643-1)
(unify-charset 'chinese-cns11643-2)
+(unify-charset 'chinese-cns11643-3)
+(unify-charset 'chinese-cns11643-4)
+(unify-charset 'chinese-cns11643-5)
+(unify-charset 'chinese-cns11643-6)
+(unify-charset 'chinese-cns11643-7)
(unify-charset 'big5)
(unify-charset 'chinese-big5-1)
(unify-charset 'chinese-big5-2)
+(unify-charset 'big5-hkscs)
+(unify-charset 'korean-ksc5601)
(unify-charset 'vietnamese-viscii-lower)
(unify-charset 'vietnamese-viscii-upper)
-(unify-charset 'big5-hkscs)
(unify-charset 'chinese-sisheng)
-(unify-charset 'korean-ksc5601)
(unify-charset 'ipa)
(unify-charset 'tibetan)
(unify-charset 'ethiopic)
-;; (unify-charset 'japanese-jisx0208-1978)
+(unify-charset 'japanese-jisx0208-1978)
(unify-charset 'japanese-jisx0208)
(unify-charset 'japanese-jisx0212)
(unify-charset 'japanese-jisx0213-1)
(define-coding-system-alias 'binary 'no-conversion)
(define-coding-system 'raw-text
- "Raw text, which means text contains random 8-bit codes.
+ "Raw text, which means text contains random 8-bit codes.
Encoding text with this coding system produces the actual byte
sequence of the text in buffers and strings. An exception is made for
eight-bit-control characters. Each of them is encoded into a single
When you visit a file with this coding, the file is read into a
unibyte buffer as is (except for EOL format), thus each byte of a file
is treated as a character."
- :coding-type 'raw-text
- :mnemonic ?t)
+ :coding-type 'raw-text
+ :for-unibyte t
+ :mnemonic ?t)
+(define-coding-system 'no-conversion-multibyte
+ "Like `no-conversion' but don't read a file into a unibyte buffer."
+ :coding-type 'raw-text
+ :eol-type 'unix
+ :mnemonic ?=)
+
(define-coding-system 'undecided
"No conversion on encoding, automatic conversion on decoding."
:coding-type 'undecided
(define-coding-system 'compound-text
"Compound text based generic encoding for decoding unknown messages.
-This coding system does not support ICCCM Extended Segments."
+This coding system does not support extended segments of CTEXT."
:coding-type 'iso-2022
:mnemonic ?x
:charset-list 'iso-2022
'ctext-with-extensions 'compound-text-with-extensions)
(define-coding-system 'us-ascii
- "Convert all characters but ASCII to `?'."
+ "Encode ASCII as-is and encode non-ASCII characters to `?'."
:coding-type 'charset
:mnemonic ?-
:charset-list '(ascii)
;; the beginning of a doc string, work.
("\\(\\`\\|/\\)loaddefs.el\\'" . (raw-text . raw-text-unix))
("\\.tar\\'" . (no-conversion . no-conversion))
+ ( "\\.po[tx]?\\'\\|\\.po\\." . po-find-file-coding-system)
("" . (undecided . nil))))
\f
(cons 128 (max-char))
'self-insert-command)
+(aset latin-extra-code-table ?\221 t)
(aset latin-extra-code-table ?\222 t)
+(aset latin-extra-code-table ?\223 t)
+(aset latin-extra-code-table ?\224 t)
+(aset latin-extra-code-table ?\225 t)
+(aset latin-extra-code-table ?\226 t)
;; Move least specific charsets to end of priority list