]> code.delx.au - gnu-emacs/blob - lisp/international/mule-conf.el
Nuke arch-tags.
[gnu-emacs] / lisp / international / mule-conf.el
1 ;;; mule-conf.el --- configure multilingual environment
2
3 ;; Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003,
4 ;; 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
5 ;; Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
6 ;; National Institute of Advanced Industrial Science and Technology (AIST)
7 ;; Registration Number H14PRO021
8 ;; Copyright (C) 2003
9 ;; National Institute of Advanced Industrial Science and Technology (AIST)
10 ;; Registration Number H13PRO009
11
12 ;; Keywords: i18n, mule, multilingual, character set, coding system
13
14 ;; This file is part of GNU Emacs.
15
16 ;; GNU Emacs is free software: you can redistribute it and/or modify
17 ;; it under the terms of the GNU General Public License as published by
18 ;; the Free Software Foundation, either version 3 of the License, or
19 ;; (at your option) any later version.
20
21 ;; GNU Emacs is distributed in the hope that it will be useful,
22 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
23 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 ;; GNU General Public License for more details.
25
26 ;; You should have received a copy of the GNU General Public License
27 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
28
29 ;;; Commentary:
30
31 ;; This file defines the Emacs charsets and some basic coding systems.
32 ;; Other coding systems are defined in the files in directory
33 ;; lisp/language.
34
35 ;;; Code:
36
37 ;;; Remarks
38
39 ;; The ISO-IR registry is at http://www.itscj.ipsj.or.jp/ISO-IR/.
40 ;; Standards docs equivalent to iso-2022 and iso-8859 are at
41 ;; http://www.ecma.ch/.
42
43 ;; FWIW, http://www.microsoft.com/globaldev/ lists the following for
44 ;; MS Windows, which are presumably the only charsets we really need
45 ;; to worry about on such systems:
46 ;; `OEM codepages': 437, 720, 737, 775, 850, 852, 855, 857, 858, 862, 866
47 ;; `Windows codepages': 1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257,
48 ;; 1258, 874, 932, 936, 949, 950
49
50 ;;; Definitions of character sets.
51
52 ;; The charsets `ascii', `unicode' and `eight-bit' are already defined
53 ;; in charset.c as below:
54 ;;
55 ;; (define-charset 'ascii
56 ;; ""
57 ;; :dimension 1
58 ;; :code-space [0 127]
59 ;; :iso-final-char ?B
60 ;; :ascii-compatible-p t
61 ;; :emacs-mule-id 0
62 ;; :code-offset 0)
63 ;;
64 ;; (define-charset 'unicode
65 ;; ""
66 ;; :dimension 3
67 ;; :code-space [0 255 0 255 0 16]
68 ;; :ascii-compatible-p t
69 ;; :code-offset 0)
70 ;;
71 ;; (define-charset 'emacs
72 ;; ""
73 ;; :dimension 3
74 ;; :code-space [0 255 0 255 0 63]
75 ;; :ascii-compatible-p t
76 ;; :supplementary-p t
77 ;; :code-offset 0)
78 ;;
79 ;; (define-charset 'eight-bit
80 ;; ""
81 ;; :dimension 1
82 ;; :code-space [128 255]
83 ;; :code-offset #x3FFF80)
84 ;;
85 ;; We now set :docstring, :short-name, and :long-name properties.
86
87 (put-charset-property
88 'ascii :docstring "ASCII (ISO646 IRV)")
89 (put-charset-property
90 'ascii :short-name "ASCII")
91 (put-charset-property
92 'ascii :long-name "ASCII (ISO646 IRV)")
93 (put-charset-property
94 'iso-8859-1 :docstring "Latin-1 (ISO/IEC 8859-1)")
95 (put-charset-property
96 'iso-8859-1 :short-name "Latin-1")
97 (put-charset-property
98 'iso-8859-1 :long-name "Latin-1")
99 (put-charset-property
100 'unicode :docstring "Unicode (ISO10646)")
101 (put-charset-property
102 'unicode :short-name "Unicode")
103 (put-charset-property
104 'unicode :long-name "Unicode (ISO10646)")
105 (put-charset-property
106 'emacs :docstring "Full Emacs charset (excluding eight bit chars)")
107 (put-charset-property
108 'emacs :short-name "Emacs")
109 (put-charset-property
110 'emacs :long-name "Emacs")
111
112 (put-charset-property 'eight-bit :docstring "Raw bytes 128-255")
113 (put-charset-property 'eight-bit :short-name "Raw bytes")
114
115 (define-charset-alias 'ucs 'unicode)
116
117 (define-charset 'latin-iso8859-1
118 "Right-Hand Part of ISO/IEC 8859/1 (Latin-1): ISO-IR-100"
119 :short-name "RHP of Latin-1"
120 :long-name "RHP of ISO/IEC 8859/1 (Latin-1): ISO-IR-100"
121 :iso-final-char ?A
122 :emacs-mule-id 129
123 :code-space [32 127]
124 :code-offset 160)
125
126 ;; Name perhaps not ideal, but is XEmacs-compatible.
127 (define-charset 'control-1
128 "8-bit control code (0x80..0x9F)"
129 :short-name "8-bit control code"
130 :code-space [128 159]
131 :code-offset 128)
132
133 (define-charset 'eight-bit-control
134 "Raw bytes in the range 0x80..0x9F (usually produced from invalid encodings)"
135 :short-name "Raw bytes 0x80..0x9F"
136 :supplementary-p t
137 :code-space [128 159]
138 :code-offset #x3FFF80) ; see character.h
139
140 (define-charset 'eight-bit-graphic
141 "Raw bytes in the range 0xA0..0xFF (usually produced from invalid encodings)"
142 :short-name "Raw bytes 0xA0..0xFF"
143 :supplementary-p t
144 :code-space [160 255]
145 :code-offset #x3FFFA0) ; see character.h
146
147 (defmacro define-iso-single-byte-charset (symbol iso-symbol name nickname
148 iso-ir iso-final
149 emacs-mule-id map)
150 `(progn
151 (define-charset ,symbol
152 ,name
153 :short-name ,nickname
154 :long-name ,name
155 :ascii-compatible-p t
156 :code-space [0 255]
157 :map ,map)
158 (if ,iso-symbol
159 (define-charset ,iso-symbol
160 (if ,iso-ir
161 (format "Right-Hand Part of %s (%s): ISO-IR-%d"
162 ,name ,nickname ,iso-ir)
163 (format "Right-Hand Part of %s (%s)" ,name ,nickname))
164 :short-name (format "RHP of %s" ,name)
165 :long-name (format "RHP of %s (%s)" ,name ,nickname)
166 :iso-final-char ,iso-final
167 :emacs-mule-id ,emacs-mule-id
168 :code-space [32 127]
169 :subset (list ,symbol 160 255 -128)))))
170
171 (define-iso-single-byte-charset 'iso-8859-2 'latin-iso8859-2
172 "ISO/IEC 8859/2" "Latin-2" 101 ?B 130 "8859-2")
173
174 (define-iso-single-byte-charset 'iso-8859-3 'latin-iso8859-3
175 "ISO/IEC 8859/3" "Latin-3" 109 ?C 131 "8859-3")
176
177 (define-iso-single-byte-charset 'iso-8859-4 'latin-iso8859-4
178 "ISO/IEC 8859/4" "Latin-4" 110 ?D 132 "8859-4")
179
180 (define-iso-single-byte-charset 'iso-8859-5 'cyrillic-iso8859-5
181 "ISO/IEC 8859/5" "Latin/Cyrillic" 144 ?L 140 "8859-5")
182
183 (define-iso-single-byte-charset 'iso-8859-6 'arabic-iso8859-6
184 "ISO/IEC 8859/6" "Latin/Arabic" 127 ?G 135 "8859-6")
185
186 (define-iso-single-byte-charset 'iso-8859-7 'greek-iso8859-7
187 "ISO/IEC 8859/7" "Latin/Greek" 126 ?F 134 "8859-7")
188
189 (define-iso-single-byte-charset 'iso-8859-8 'hebrew-iso8859-8
190 "ISO/IEC 8859/8" "Latin/Hebrew" 138 ?H 136 "8859-8")
191
192 (define-iso-single-byte-charset 'iso-8859-9 'latin-iso8859-9
193 "ISO/IEC 8859/9" "Latin-5" 148 ?M 141 "8859-9")
194
195 (define-iso-single-byte-charset 'iso-8859-10 'latin-iso8859-10
196 "ISO/IEC 8859/10" "Latin-6" 157 ?V nil "8859-10")
197
198 ;; http://www.nectec.or.th/it-standards/iso8859-11/
199 ;; http://www.cwi.nl/~dik/english/codes/8859.html says this is tis-620
200 ;; plus nbsp
201 (define-iso-single-byte-charset 'iso-8859-11 'thai-iso8859-11
202 "ISO/IEC 8859/11" "Latin/Thai" 166 ?T nil "8859-11")
203
204 ;; 8859-12 doesn't (yet?) exist.
205
206 (define-iso-single-byte-charset 'iso-8859-13 'latin-iso8859-13
207 "ISO/IEC 8859/13" "Latin-7" 179 ?Y nil "8859-13")
208
209 (define-iso-single-byte-charset 'iso-8859-14 'latin-iso8859-14
210 "ISO/IEC 8859/14" "Latin-8" 199 ?_ 143 "8859-14")
211
212 (define-iso-single-byte-charset 'iso-8859-15 'latin-iso8859-15
213 "ISO/IEC 8859/15" "Latin-9" 203 ?b 142 "8859-15")
214
215 (define-iso-single-byte-charset 'iso-8859-16 'latin-iso8859-16
216 "ISO/IEC 8859/16" "Latin-10" 226 ?f nil "8859-16")
217
218 ;; No point in keeping it around.
219 (fmakunbound 'define-iso-single-byte-charset)
220
221 ;; Can this be shared with 8859-11?
222 ;; N.b. not all of these are defined unicodes.
223 (define-charset 'thai-tis620
224 "TIS620.2533"
225 :short-name "TIS620.2533"
226 :iso-final-char ?T
227 :emacs-mule-id 133
228 :code-space [32 127]
229 :code-offset #x0E00)
230
231 ;; Fixme: doc for this, c.f. above
232 (define-charset 'tis620-2533
233 "TIS620.2533"
234 :short-name "TIS620.2533"
235 :ascii-compatible-p t
236 :code-space [0 255]
237 :superset '(ascii eight-bit-control (thai-tis620 . 128)))
238
239 (define-charset 'jisx0201
240 "JISX0201"
241 :short-name "JISX0201"
242 :code-space [0 #xDF]
243 :map "JISX0201")
244
245 (define-charset 'latin-jisx0201
246 "Roman Part of JISX0201.1976"
247 :short-name "JISX0201 Roman"
248 :long-name "Japanese Roman (JISX0201.1976)"
249 :iso-final-char ?J
250 :emacs-mule-id 138
251 :supplementary-p t
252 :code-space [33 126]
253 :subset '(jisx0201 33 126 0))
254
255 (define-charset 'katakana-jisx0201
256 "Katakana Part of JISX0201.1976"
257 :short-name "JISX0201 Katakana"
258 :long-name "Japanese Katakana (JISX0201.1976)"
259 :iso-final-char ?I
260 :emacs-mule-id 137
261 :supplementary-p t
262 :code-space [33 126]
263 :subset '(jisx0201 161 254 -128))
264
265 (define-charset 'chinese-gb2312
266 "GB2312 Chinese simplified: ISO-IR-58"
267 :short-name "GB2312"
268 :long-name "GB2312: ISO-IR-58"
269 :iso-final-char ?A
270 :emacs-mule-id 145
271 :code-space [33 126 33 126]
272 :code-offset #x110000
273 :unify-map "GB2312")
274
275 (define-charset 'chinese-gbk
276 "GBK Chinese simplified."
277 :short-name "GBK"
278 :code-space [#x40 #xFE #x81 #xFE]
279 :code-offset #x160000
280 :unify-map "GBK")
281 (define-charset-alias 'cp936 'chinese-gbk)
282 (define-charset-alias 'windows-936 'chinese-gbk)
283
284 (define-charset 'chinese-cns11643-1
285 "CNS11643 Plane 1 Chinese traditional: ISO-IR-171"
286 :short-name "CNS11643-1"
287 :long-name "CNS11643-1 (Chinese traditional): ISO-IR-171"
288 :iso-final-char ?G
289 :emacs-mule-id 149
290 :code-space [33 126 33 126]
291 :code-offset #x114000
292 :unify-map "CNS-1")
293
294 (define-charset 'chinese-cns11643-2
295 "CNS11643 Plane 2 Chinese traditional: ISO-IR-172"
296 :short-name "CNS11643-2"
297 :long-name "CNS11643-2 (Chinese traditional): ISO-IR-172"
298 :iso-final-char ?H
299 :emacs-mule-id 150
300 :code-space [33 126 33 126]
301 :code-offset #x118000
302 :unify-map "CNS-2")
303
304 (define-charset 'chinese-cns11643-3
305 "CNS11643 Plane 3 Chinese Traditional: ISO-IR-183"
306 :short-name "CNS11643-3"
307 :long-name "CNS11643-3 (Chinese traditional): ISO-IR-183"
308 :iso-final-char ?I
309 :code-space [33 126 33 126]
310 :emacs-mule-id 246
311 :code-offset #x11C000
312 :unify-map "CNS-3")
313
314 (define-charset 'chinese-cns11643-4
315 "CNS11643 Plane 4 Chinese Traditional: ISO-IR-184"
316 :short-name "CNS11643-4"
317 :long-name "CNS11643-4 (Chinese traditional): ISO-IR-184"
318 :iso-final-char ?J
319 :emacs-mule-id 247
320 :code-space [33 126 33 126]
321 :code-offset #x120000
322 :unify-map "CNS-4")
323
324 (define-charset 'chinese-cns11643-5
325 "CNS11643 Plane 5 Chinese Traditional: ISO-IR-185"
326 :short-name "CNS11643-5"
327 :long-name "CNS11643-5 (Chinese traditional): ISO-IR-185"
328 :iso-final-char ?K
329 :emacs-mule-id 248
330 :code-space [33 126 33 126]
331 :code-offset #x124000
332 :unify-map "CNS-5")
333
334 (define-charset 'chinese-cns11643-6
335 "CNS11643 Plane 6 Chinese Traditional: ISO-IR-186"
336 :short-name "CNS11643-6"
337 :long-name "CNS11643-6 (Chinese traditional): ISO-IR-186"
338 :iso-final-char ?L
339 :emacs-mule-id 249
340 :code-space [33 126 33 126]
341 :code-offset #x128000
342 :unify-map "CNS-6")
343
344 (define-charset 'chinese-cns11643-7
345 "CNS11643 Plane 7 Chinese Traditional: ISO-IR-187"
346 :short-name "CNS11643-7"
347 :long-name "CNS11643-7 (Chinese traditional): ISO-IR-187"
348 :iso-final-char ?M
349 :emacs-mule-id 250
350 :code-space [33 126 33 126]
351 :code-offset #x12C000
352 :unify-map "CNS-7")
353
354 (define-charset 'big5
355 "Big5 (Chinese traditional)"
356 :short-name "Big5"
357 :code-space [#x40 #xFE #xA1 #xFE]
358 :code-offset #x130000
359 :unify-map "BIG5")
360 ;; Fixme: AKA cp950 according to
361 ;; <URL:http://www.microsoft.com/globaldev/reference/WinCP.asp>. Is
362 ;; that correct?
363
364 (define-charset 'chinese-big5-1
365 "Frequently used part (A141-C67E) of Big5 (Chinese traditional)"
366 :short-name "Big5 (Level-1)"
367 :long-name "Big5 (Level-1) A141-C67F"
368 :iso-final-char ?0
369 :emacs-mule-id 152
370 :supplementary-p t
371 :code-space [#x21 #x7E #x21 #x7E]
372 :code-offset #x135000
373 :unify-map "BIG5-1")
374
375 (define-charset 'chinese-big5-2
376 "Less frequently used part (C940-FEFE) of Big5 (Chinese traditional)"
377 :short-name "Big5 (Level-2)"
378 :long-name "Big5 (Level-2) C940-FEFE"
379 :iso-final-char ?1
380 :emacs-mule-id 153
381 :supplementary-p t
382 :code-space [#x21 #x7E #x21 #x7E]
383 :code-offset #x137800
384 :unify-map "BIG5-2")
385
386 (define-charset 'japanese-jisx0208
387 "JISX0208.1983/1990 Japanese Kanji: ISO-IR-87"
388 :short-name "JISX0208"
389 :long-name "JISX0208.1983/1990 (Japanese): ISO-IR-87"
390 :iso-final-char ?B
391 :emacs-mule-id 146
392 :code-space [33 126 33 126]
393 :code-offset #x140000
394 :unify-map "JISX0208")
395
396 (define-charset 'japanese-jisx0208-1978
397 "JISX0208.1978 Japanese Kanji (so called \"old JIS\"): ISO-IR-42"
398 :short-name "JISX0208.1978"
399 :long-name "JISX0208.1978 (JISC6226.1978): ISO-IR-42"
400 :iso-final-char ?@
401 :emacs-mule-id 144
402 :code-space [33 126 33 126]
403 :code-offset #x144000
404 :unify-map "JISC6226")
405
406 (define-charset 'japanese-jisx0212
407 "JISX0212 Japanese supplement: ISO-IR-159"
408 :short-name "JISX0212"
409 :long-name "JISX0212 (Japanese): ISO-IR-159"
410 :iso-final-char ?D
411 :emacs-mule-id 148
412 :code-space [33 126 33 126]
413 :code-offset #x148000
414 :unify-map "JISX0212")
415
416 ;; Note that jisx0213 contains characters not in Unicode (3.2?). It's
417 ;; arguable whether it should have a unify-map.
418 (define-charset 'japanese-jisx0213-1
419 "JISX0213.2000 Plane 1 (Japanese)"
420 :short-name "JISX0213-1"
421 :iso-final-char ?O
422 :emacs-mule-id 151
423 :unify-map "JISX2131"
424 :code-space [33 126 33 126]
425 :code-offset #x14C000)
426
427 (define-charset 'japanese-jisx0213-2
428 "JISX0213.2000 Plane 2 (Japanese)"
429 :short-name "JISX0213-2"
430 :iso-final-char ?P
431 :emacs-mule-id 254
432 :unify-map "JISX2132"
433 :code-space [33 126 33 126]
434 :code-offset #x150000)
435
436 (define-charset 'japanese-jisx0213-a
437 "JISX0213.2004 adds these characters to JISX0213.2000."
438 :short-name "JISX0213A"
439 :dimension 2
440 :code-space [33 126 33 126]
441 :supplementary-p t
442 :map "JISX213A")
443
444 (define-charset 'japanese-jisx0213.2004-1
445 "JISX0213.2004 Plane1 (Japanese)"
446 :short-name "JISX0213.2004-1"
447 :dimension 2
448 :code-space [33 126 33 126]
449 :iso-final-char ?Q
450 :superset '(japanese-jisx0213-a japanese-jisx0213-1))
451
452 (define-charset 'katakana-sjis
453 "Katakana part of Shift-JIS"
454 :dimension 1
455 :code-space [#xA1 #xDF]
456 :subset '(jisx0201 #xA1 #xDF 0)
457 :supplementary-p t)
458
459 (define-charset 'cp932-2-byte
460 "2-byte part of CP932"
461 :dimension 2
462 :map "CP932-2BYTE"
463 :code-space [#x40 #xFC #x81 #xFC]
464 :supplementary-p t)
465
466 (define-charset 'cp932
467 "CP932 (Microsoft shift-jis)"
468 :code-space [#x00 #xFF #x00 #xFE]
469 :short-name "CP932"
470 :superset '(ascii katakana-sjis cp932-2-byte))
471
472 (define-charset 'korean-ksc5601
473 "KSC5601 Korean Hangul and Hanja: ISO-IR-149"
474 :short-name "KSC5601"
475 :long-name "KSC5601 (Korean): ISO-IR-149"
476 :iso-final-char ?C
477 :emacs-mule-id 147
478 :code-space [33 126 33 126]
479 :code-offset #x279f94 ; ... #x27c217
480 :unify-map "KSC5601")
481
482 (define-charset 'big5-hkscs
483 "Big5-HKSCS (Chinese traditional, Hong Kong supplement)"
484 :short-name "Big5"
485 :code-space [#x40 #xFE #xA1 #xFE]
486 :code-offset #x27c218 ; ... #x280839
487 :unify-map "BIG5-HKSCS")
488
489 (define-charset 'cp949-2-byte
490 "2-byte part of CP949"
491 :dimension 2
492 :map "CP949-2BYTE"
493 :code-space [#x41 #xFE #x81 #xFD]
494 :supplementary-p t)
495
496 (define-charset 'cp949
497 "CP949 (Korean)"
498 :short-name "CP949"
499 :long-name "CP949 (Korean)"
500 :code-space [#x00 #xFE #x00 #xFD]
501 :superset '(ascii cp949-2-byte))
502
503 (define-charset 'chinese-sisheng
504 "SiSheng characters for PinYin/ZhuYin"
505 :short-name "SiSheng"
506 :long-name "SiSheng (PinYin/ZhuYin)"
507 :iso-final-char ?0
508 :emacs-mule-id 160
509 :code-space [33 126]
510 :unify-map "MULE-sisheng"
511 :supplementary-p t
512 :code-offset #x200000)
513
514 ;; A subset of the 1989 version of IPA. It consists of the consonant
515 ;; signs used in English, French, German and Italian, and all vowels
516 ;; signs in the table. [says old MULE doc]
517 (define-charset 'ipa
518 "IPA (International Phonetic Association)"
519 :short-name "IPA"
520 :iso-final-char ?0
521 :emacs-mule-id 161
522 :unify-map "MULE-ipa"
523 :code-space [32 127]
524 :supplementary-p t
525 :code-offset #x200080)
526
527 (define-charset 'viscii
528 "VISCII1.1"
529 :short-name "VISCII"
530 :long-name "VISCII 1.1"
531 :code-space [0 255]
532 :map "VISCII")
533
534 (define-charset 'vietnamese-viscii-lower
535 "VISCII1.1 lower-case"
536 :short-name "VISCII lower"
537 :long-name "VISCII lower-case"
538 :iso-final-char ?1
539 :emacs-mule-id 162
540 :code-space [32 127]
541 :code-offset #x200200
542 :supplementary-p t
543 :unify-map "MULE-lviscii")
544
545 (define-charset 'vietnamese-viscii-upper
546 "VISCII1.1 upper-case"
547 :short-name "VISCII upper"
548 :long-name "VISCII upper-case"
549 :iso-final-char ?2
550 :emacs-mule-id 163
551 :code-space [32 127]
552 :code-offset #x200280
553 :supplementary-p t
554 :unify-map "MULE-uviscii")
555
556 (define-charset 'vscii
557 "VSCII1.1 (TCVN-5712 VN1)"
558 :short-name "VSCII"
559 :code-space [0 255]
560 :map "VSCII")
561
562 (define-charset-alias 'tcvn-5712 'vscii)
563
564 ;; Fixme: see note in tcvn.map about combining characters
565 (define-charset 'vscii-2
566 "VSCII-2 (TCVN-5712 VN2)"
567 :code-space [0 255]
568 :map "VSCII-2")
569
570 (define-charset 'koi8-r
571 "KOI8-R"
572 :short-name "KOI8-R"
573 :ascii-compatible-p t
574 :code-space [0 255]
575 :map "KOI8-R")
576
577 (define-charset-alias 'koi8 'koi8-r)
578
579 (define-charset 'alternativnyj
580 "ALTERNATIVNYJ"
581 :short-name "alternativnyj"
582 :ascii-compatible-p t
583 :code-space [0 255]
584 :map "ALTERNATIVNYJ")
585
586 (define-charset 'cp866
587 "CP866"
588 :short-name "cp866"
589 :ascii-compatible-p t
590 :code-space [0 255]
591 :map "IBM866")
592 (define-charset-alias 'ibm866 'cp866)
593
594 (define-charset 'koi8-u
595 "KOI8-U"
596 :short-name "KOI8-U"
597 :ascii-compatible-p t
598 :code-space [0 255]
599 :map "KOI8-U")
600
601 (define-charset 'koi8-t
602 "KOI8-T"
603 :short-name "KOI8-T"
604 :ascii-compatible-p t
605 :code-space [0 255]
606 :map "KOI8-T")
607
608 (define-charset 'georgian-ps
609 "GEORGIAN-PS"
610 :short-name "GEORGIAN-PS"
611 :ascii-compatible-p t
612 :code-space [0 255]
613 :map "KA-PS")
614
615 (define-charset 'georgian-academy
616 "GEORGIAN-ACADEMY"
617 :short-name "GEORGIAN-ACADEMY"
618 :ascii-compatible-p t
619 :code-space [0 255]
620 :map "KA-ACADEMY")
621
622 (define-charset 'windows-1250
623 "WINDOWS-1250 (Central Europe)"
624 :short-name "WINDOWS-1250"
625 :ascii-compatible-p t
626 :code-space [0 255]
627 :map "CP1250")
628 (define-charset-alias 'cp1250 'windows-1250)
629
630 (define-charset 'windows-1251
631 "WINDOWS-1251 (Cyrillic)"
632 :short-name "WINDOWS-1251"
633 :ascii-compatible-p t
634 :code-space [0 255]
635 :map "CP1251")
636 (define-charset-alias 'cp1251 'windows-1251)
637
638 (define-charset 'windows-1252
639 "WINDOWS-1252 (Latin I)"
640 :short-name "WINDOWS-1252"
641 :ascii-compatible-p t
642 :code-space [0 255]
643 :map "CP1252")
644 (define-charset-alias 'cp1252 'windows-1252)
645
646 (define-charset 'windows-1253
647 "WINDOWS-1253 (Greek)"
648 :short-name "WINDOWS-1253"
649 :ascii-compatible-p t
650 :code-space [0 255]
651 :map "CP1253")
652 (define-charset-alias 'cp1253 'windows-1253)
653
654 (define-charset 'windows-1254
655 "WINDOWS-1254 (Turkish)"
656 :short-name "WINDOWS-1254"
657 :ascii-compatible-p t
658 :code-space [0 255]
659 :map "CP1254")
660 (define-charset-alias 'cp1254 'windows-1254)
661
662 (define-charset 'windows-1255
663 "WINDOWS-1255 (Hebrew)"
664 :short-name "WINDOWS-1255"
665 :ascii-compatible-p t
666 :code-space [0 255]
667 :map "CP1255")
668 (define-charset-alias 'cp1255 'windows-1255)
669
670 (define-charset 'windows-1256
671 "WINDOWS-1256 (Arabic)"
672 :short-name "WINDOWS-1256"
673 :ascii-compatible-p t
674 :code-space [0 255]
675 :map "CP1256")
676 (define-charset-alias 'cp1256 'windows-1256)
677
678 (define-charset 'windows-1257
679 "WINDOWS-1257 (Baltic)"
680 :short-name "WINDOWS-1257"
681 :ascii-compatible-p t
682 :code-space [0 255]
683 :map "CP1257")
684 (define-charset-alias 'cp1257 'windows-1257)
685
686 (define-charset 'windows-1258
687 "WINDOWS-1258 (Viet Nam)"
688 :short-name "WINDOWS-1258"
689 :ascii-compatible-p t
690 :code-space [0 255]
691 :map "CP1258")
692 (define-charset-alias 'cp1258 'windows-1258)
693
694 (define-charset 'next
695 "NEXT"
696 :short-name "NEXT"
697 :ascii-compatible-p t
698 :code-space [0 255]
699 :map "NEXTSTEP")
700
701 (define-charset 'cp1125
702 "CP1125"
703 :short-name "CP1125"
704 :code-space [0 255]
705 :ascii-compatible-p t
706 :map "CP1125")
707 (define-charset-alias 'ruscii 'cp1125)
708 ;; Original name for cp1125, says Serhii Hlodin <hlodin@lutsk.bank.gov.ua>
709 (define-charset-alias 'cp866u 'cp1125)
710
711 ;; Fixme: C.f. iconv, http://czyborra.com/charsets/codepages.html
712 ;; shows this as not ASCII comptaible, with various graphics in
713 ;; 0x01-0x1F.
714 (define-charset 'cp437
715 "CP437 (MS-DOS United States, Australia, New Zealand, South Africa)"
716 :short-name "CP437"
717 :code-space [0 255]
718 :ascii-compatible-p t
719 :map "IBM437")
720
721 (define-charset 'cp720
722 "CP720 (Arabic)"
723 :short-name "CP720"
724 :code-space [0 255]
725 :ascii-compatible-p t
726 :map "CP720")
727
728 (define-charset 'cp737
729 "CP737 (PC Greek)"
730 :short-name "CP737"
731 :code-space [0 255]
732 :ascii-compatible-p t
733 :map "CP737")
734
735 (define-charset 'cp775
736 "CP775 (PC Baltic)"
737 :short-name "CP775"
738 :code-space [0 255]
739 :ascii-compatible-p t
740 :map "CP775")
741
742 (define-charset 'cp851
743 "CP851 (Greek)"
744 :short-name "CP851"
745 :code-space [0 255]
746 :ascii-compatible-p t
747 :map "IBM851")
748
749 (define-charset 'cp852
750 "CP852 (MS-DOS Latin-2)"
751 :short-name "CP852"
752 :code-space [0 255]
753 :ascii-compatible-p t
754 :map "IBM852")
755
756 (define-charset 'cp855
757 "CP855 (IBM Cyrillic)"
758 :short-name "CP855"
759 :code-space [0 255]
760 :ascii-compatible-p t
761 :map "IBM855")
762
763 (define-charset 'cp857
764 "CP857 (IBM Turkish)"
765 :short-name "CP857"
766 :code-space [0 255]
767 :ascii-compatible-p t
768 :map "IBM857")
769
770 (define-charset 'cp858
771 "CP858 (Multilingual Latin I + Euro)"
772 :short-name "CP858"
773 :code-space [0 255]
774 :ascii-compatible-p t
775 :map "CP858")
776 (define-charset-alias 'cp00858 'cp858) ; IANA has IBM00858/CP00858
777
778 (define-charset 'cp860
779 "CP860 (MS-DOS Portuguese)"
780 :short-name "CP860"
781 :code-space [0 255]
782 :ascii-compatible-p t
783 :map "IBM860")
784
785 (define-charset 'cp861
786 "CP861 (MS-DOS Icelandic)"
787 :short-name "CP861"
788 :code-space [0 255]
789 :ascii-compatible-p t
790 :map "IBM861")
791
792 (define-charset 'cp862
793 "CP862 (PC Hebrew)"
794 :short-name "CP862"
795 :code-space [0 255]
796 :ascii-compatible-p t
797 :map "IBM862")
798
799 (define-charset 'cp863
800 "CP863 (MS-DOS Canadian French)"
801 :short-name "CP863"
802 :code-space [0 255]
803 :ascii-compatible-p t
804 :map "IBM863")
805
806 (define-charset 'cp864
807 "CP864 (PC Arabic)"
808 :short-name "CP864"
809 :code-space [0 255]
810 :ascii-compatible-p t
811 :map "IBM864")
812
813 (define-charset 'cp865
814 "CP865 (MS-DOS Nordic)"
815 :short-name "CP865"
816 :code-space [0 255]
817 :ascii-compatible-p t
818 :map "IBM865")
819
820 (define-charset 'cp869
821 "CP869 (IBM Modern Greek)"
822 :short-name "CP869"
823 :code-space [0 255]
824 :ascii-compatible-p t
825 :map "IBM869")
826
827 (define-charset 'cp874
828 "CP874 (IBM Thai)"
829 :short-name "CP874"
830 :code-space [0 255]
831 :ascii-compatible-p t
832 :map "IBM874")
833
834 ;; For Arabic, we need three different types of character sets.
835 ;; Digits are of direction left-to-right and of width 1-column.
836 ;; Others are of direction right-to-left and of width 1-column or
837 ;; 2-column.
838 (define-charset 'arabic-digit
839 "Arabic digit"
840 :short-name "Arabic digit"
841 :iso-final-char ?2
842 :emacs-mule-id 164
843 :supplementary-p t
844 :code-space [34 42]
845 :code-offset #x0600)
846
847 (define-charset 'arabic-1-column
848 "Arabic 1-column"
849 :short-name "Arabic 1-col"
850 :long-name "Arabic 1-column"
851 :iso-final-char ?3
852 :emacs-mule-id 165
853 :supplementary-p t
854 :code-space [33 126]
855 :code-offset #x200100)
856
857 (define-charset 'arabic-2-column
858 "Arabic 2-column"
859 :short-name "Arabic 2-col"
860 :long-name "Arabic 2-column"
861 :iso-final-char ?4
862 :emacs-mule-id 224
863 :supplementary-p t
864 :code-space [33 126]
865 :code-offset #x200180)
866
867 ;; Lao script.
868 ;; Codes 0x21..0x7E are mapped to Unicode U+0E81..U+0EDF.
869 ;; Not all of them are defined unicodes.
870 (define-charset 'lao
871 "Lao characters (ISO10646 0E81..0EDF)"
872 :short-name "Lao"
873 :iso-final-char ?1
874 :emacs-mule-id 167
875 :supplementary-p t
876 :code-space [33 126]
877 :code-offset #x0E81)
878
879 (define-charset 'mule-lao
880 "Lao characters (ISO10646 0E81..0EDF)"
881 :short-name "Lao"
882 :code-space [0 255]
883 :supplementary-p t
884 :superset '(ascii eight-bit-control (lao . 128)))
885
886
887 ;; Indian scripts. Symbolic charset for data exchange. Glyphs are
888 ;; not assigned. They are automatically converted to each Indian
889 ;; script which IS-13194 supports.
890
891 (define-charset 'indian-is13194
892 "Generic Indian charset for data exchange with IS 13194"
893 :short-name "IS 13194"
894 :long-name "Indian IS 13194"
895 :iso-final-char ?5
896 :emacs-mule-id 225
897 :supplementary-p t
898 :code-space [33 126]
899 :code-offset #x180000)
900
901 (let ((code-offset #x180100))
902 (dolist (script '(devanagari sanskrit bengali tamil telugu assamese
903 oriya kannada malayalam gujarati punjabi))
904 (define-charset (intern (format "%s-cdac" script))
905 (format "Glyphs of %s script for CDAC font. Subset of `indian-glyph'."
906 (capitalize (symbol-name script)))
907 :short-name (format "CDAC %s glyphs" (capitalize (symbol-name script)))
908 :supplementary-p t
909 :code-space [0 255]
910 :code-offset code-offset)
911 (setq code-offset (+ code-offset #x100)))
912
913 (dolist (script '(devanagari bengali punjabi gujarati
914 oriya tamil telugu kannada malayalam))
915 (define-charset (intern (format "%s-akruti" script))
916 (format "Glyphs of %s script for AKRUTI font. Subset of `indian-glyph'."
917 (capitalize (symbol-name script)))
918 :short-name (format "AKRUTI %s glyphs" (capitalize (symbol-name script)))
919 :supplementary-p t
920 :code-space [0 255]
921 :code-offset code-offset)
922 (setq code-offset (+ code-offset #x100))))
923
924 (define-charset 'indian-glyph
925 "Glyphs for Indian characters."
926 :short-name "Indian glyph"
927 :iso-final-char ?4
928 :emacs-mule-id 240
929 :supplementary-p t
930 :code-space [32 127 32 127]
931 :code-offset #x180100)
932
933 ;; Actual Glyph for 1-column width.
934 (define-charset 'indian-1-column
935 "Indian charset for 1-column width glyphs."
936 :short-name "Indian 1-col"
937 :long-name "Indian 1 Column"
938 :iso-final-char ?6
939 :emacs-mule-id 251
940 :supplementary-p t
941 :code-space [33 126 33 126]
942 :code-offset #x184000)
943
944 ;; Actual Glyph for 2-column width.
945 (define-charset 'indian-2-column
946 "Indian charset for 2-column width glyphs."
947 :short-name "Indian 2-col"
948 :long-name "Indian 2 Column"
949 :iso-final-char ?5
950 :emacs-mule-id 251
951 :supplementary-p t
952 :code-space [33 126 33 126]
953 :code-offset #x184000)
954
955 (define-charset 'tibetan
956 "Tibetan characters"
957 :iso-final-char ?7
958 :short-name "Tibetan 2-col"
959 :long-name "Tibetan 2 column"
960 :iso-final-char ?7
961 :emacs-mule-id 252
962 :unify-map "MULE-tibetan"
963 :supplementary-p t
964 :code-space [33 126 33 37]
965 :code-offset #x190000)
966
967 (define-charset 'tibetan-1-column
968 "Tibetan 1 column glyph"
969 :short-name "Tibetan 1-col"
970 :long-name "Tibetan 1 column"
971 :iso-final-char ?8
972 :emacs-mule-id 241
973 :supplementary-p t
974 :code-space [33 126 33 37]
975 :code-offset #x190000)
976
977 ;; Subsets of Unicode.
978 (define-charset 'mule-unicode-2500-33ff
979 "Unicode characters of the range U+2500..U+33FF."
980 :short-name "Unicode subset 2"
981 :long-name "Unicode subset (U+2500..U+33FF)"
982 :iso-final-char ?2
983 :emacs-mule-id 242
984 :supplementary-p t
985 :code-space [#x20 #x7f #x20 #x47]
986 :code-offset #x2500)
987
988 (define-charset 'mule-unicode-e000-ffff
989 "Unicode characters of the range U+E000..U+FFFF."
990 :short-name "Unicode subset 3"
991 :long-name "Unicode subset (U+E000+FFFF)"
992 :iso-final-char ?3
993 :emacs-mule-id 243
994 :supplementary-p t
995 :code-space [#x20 #x7F #x20 #x75]
996 :code-offset #xE000
997 :max-code 30015) ; U+FFFF
998
999 (define-charset 'mule-unicode-0100-24ff
1000 "Unicode characters of the range U+0100..U+24FF."
1001 :short-name "Unicode subset"
1002 :long-name "Unicode subset (U+0100..U+24FF)"
1003 :iso-final-char ?1
1004 :emacs-mule-id 244
1005 :supplementary-p t
1006 :code-space [#x20 #x7F #x20 #x7F]
1007 :code-offset #x100)
1008
1009 (define-charset 'unicode-bmp
1010 "Unicode Basic Multilingual Plane (U+0000..U+FFFF)"
1011 :short-name "Unicode BMP"
1012 :code-space [0 255 0 255]
1013 :code-offset 0)
1014
1015 (define-charset 'unicode-smp
1016 "Unicode Supplementary Multilingual Plane (U+10000..U+1FFFF)"
1017 :short-name "Unicode SMP "
1018 :code-space [0 255 0 255]
1019 :code-offset #x10000)
1020
1021 (define-charset 'unicode-sip
1022 "Unicode Supplementary Ideographic Plane (U+20000..U+2FFFF)"
1023 :short-name "Unicode SIP"
1024 :code-space [0 255 0 255]
1025 :code-offset #x20000)
1026
1027 (define-charset 'unicode-ssp
1028 "Unicode Supplementary Special-purpose Plane (U+E0000..U+EFFFF)"
1029 :short-name "Unicode SSP"
1030 :code-space [0 255 0 255]
1031 :code-offset #xE0000)
1032
1033 (define-charset 'ethiopic
1034 "Ethiopic characters for Amharic and Tigrigna."
1035 :short-name "Ethiopic"
1036 :long-name "Ethiopic characters"
1037 :iso-final-char ?3
1038 :emacs-mule-id 245
1039 :supplementary-p t
1040 :unify-map "MULE-ethiopic"
1041 :code-space [33 126 33 126]
1042 :code-offset #x1A0000)
1043
1044 (define-charset 'mac-roman
1045 "Mac Roman charset"
1046 :short-name "Mac Roman"
1047 :ascii-compatible-p t
1048 :code-space [0 255]
1049 :map "MACINTOSH")
1050
1051 ;; Fixme: modern EBCDIC variants, e.g. IBM00924?
1052 (define-charset 'ebcdic-us
1053 "US version of EBCDIC"
1054 :short-name "EBCDIC-US"
1055 :code-space [0 255]
1056 :mime-charset 'ebcdic-us
1057 :map "EBCDICUS")
1058
1059 (define-charset 'ebcdic-uk
1060 "UK version of EBCDIC"
1061 :short-name "EBCDIC-UK"
1062 :code-space [0 255]
1063 :mime-charset 'ebcdic-uk
1064 :map "EBCDICUK")
1065
1066 (define-charset 'ibm1047
1067 ;; Says groff:
1068 "IBM1047, `EBCDIC Latin 1/Open Systems' used by OS/390 Unix."
1069 :short-name "IBM1047"
1070 :code-space [0 255]
1071 :mime-charset 'ibm1047
1072 :map "IBM1047")
1073 (define-charset-alias 'cp1047 'ibm1047)
1074
1075 (define-charset 'hp-roman8
1076 "Encoding used by Hewlet-Packard printer software"
1077 :short-name "HP-ROMAN8"
1078 :ascii-compatible-p t
1079 :code-space [0 255]
1080 :map "HP-ROMAN8")
1081
1082 ;; To make a coding system with this, a pre-write-conversion should
1083 ;; account for the commented-out multi-valued code points in
1084 ;; stdenc.map.
1085 (define-charset 'adobe-standard-encoding
1086 "Adobe `standard encoding' used in PostScript"
1087 :short-name "ADOBE-STANDARD-ENCODING"
1088 :code-space [#x20 255]
1089 :map "stdenc")
1090
1091 (define-charset 'symbol
1092 "Adobe symbol encoding used in PostScript"
1093 :short-name "ADOBE-SYMBOL"
1094 :code-space [#x20 255]
1095 :map "symbol")
1096
1097 (define-charset 'ibm850
1098 "DOS codepage 850 (Latin-1)"
1099 :short-name "IBM850"
1100 :ascii-compatible-p t
1101 :code-space [0 255]
1102 :map "IBM850")
1103 (define-charset-alias 'cp850 'ibm850)
1104
1105 (define-charset 'mik
1106 "Bulgarian DOS codepage"
1107 :short-name "MIK"
1108 :ascii-compatible-p t
1109 :code-space [0 255]
1110 :map "MIK")
1111
1112 (define-charset 'ptcp154
1113 "`Paratype' codepage (Asian Cyrillic)"
1114 :short-name "PT154"
1115 :ascii-compatible-p t
1116 :code-space [0 255]
1117 :mime-charset 'pt154
1118 :map "PTCP154")
1119 (define-charset-alias 'pt154 'ptcp154)
1120 (define-charset-alias 'cp154 'ptcp154)
1121
1122 (define-charset 'gb18030-2-byte
1123 "GB18030 2-byte (0x814E..0xFEFE)"
1124 :code-space [#x40 #xFE #x81 #xFE]
1125 :supplementary-p t
1126 :map "GB180302")
1127
1128 (define-charset 'gb18030-4-byte-bmp
1129 "GB18030 4-byte for BMP (0x81308130-0x8431A439)"
1130 :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x81 #x84]
1131 :supplementary-p t
1132 :map "GB180304")
1133
1134 (define-charset 'gb18030-4-byte-smp
1135 "GB18030 4-byte for SMP (0x90308130-0xE3329A35)"
1136 :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x90 #xE3]
1137 :min-code '(#x9030 . #x8130)
1138 :max-code '(#xE332 . #x9A35)
1139 :supplementary-p t
1140 :code-offset #x10000)
1141
1142 (define-charset 'gb18030-4-byte-ext-1
1143 "GB18030 4-byte (0x8431A530-0x8F39FE39)"
1144 :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x84 #x8F]
1145 :min-code '(#x8431 . #xA530)
1146 :max-code '(#x8F39 . #xFE39)
1147 :supplementary-p t
1148 :code-offset #x200000 ; ... #x22484B
1149 )
1150
1151 (define-charset 'gb18030-4-byte-ext-2
1152 "GB18030 4-byte (0xE3329A36-0xFE39FE39)"
1153 :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #xE3 #xFE]
1154 :min-code '(#xE332 . #x9A36)
1155 :max-code '(#xFE39 . #xFE39)
1156 :supplementary-p t
1157 :code-offset #x22484C ; ... #x279f93
1158 )
1159
1160 (define-charset 'gb18030
1161 "GB18030"
1162 :code-space [#x00 #xFF #x00 #xFE #x00 #xFE #x00 #xFE]
1163 :min-code 0
1164 :max-code '(#xFE39 . #xFE39)
1165 :superset '(ascii gb18030-2-byte
1166 gb18030-4-byte-bmp gb18030-4-byte-smp
1167 gb18030-4-byte-ext-1 gb18030-4-byte-ext-2))
1168
1169 (define-charset 'chinese-cns11643-15
1170 "CNS11643 Plane 15 Chinese Traditional"
1171 :short-name "CNS11643-15"
1172 :long-name "CNS11643-15 (Chinese traditional)"
1173 :code-space [33 126 33 126]
1174 :code-offset #x27A000)
1175
1176 (unify-charset 'chinese-gb2312)
1177 (unify-charset 'chinese-gbk)
1178 (unify-charset 'chinese-cns11643-1)
1179 (unify-charset 'chinese-cns11643-2)
1180 (unify-charset 'chinese-cns11643-3)
1181 (unify-charset 'chinese-cns11643-4)
1182 (unify-charset 'chinese-cns11643-5)
1183 (unify-charset 'chinese-cns11643-6)
1184 (unify-charset 'chinese-cns11643-7)
1185 (unify-charset 'big5)
1186 (unify-charset 'chinese-big5-1)
1187 (unify-charset 'chinese-big5-2)
1188 (unify-charset 'big5-hkscs)
1189 (unify-charset 'korean-ksc5601)
1190 (unify-charset 'vietnamese-viscii-lower)
1191 (unify-charset 'vietnamese-viscii-upper)
1192 (unify-charset 'chinese-sisheng)
1193 (unify-charset 'ipa)
1194 (unify-charset 'tibetan)
1195 (unify-charset 'ethiopic)
1196 (unify-charset 'japanese-jisx0208-1978)
1197 (unify-charset 'japanese-jisx0208)
1198 (unify-charset 'japanese-jisx0212)
1199 (unify-charset 'japanese-jisx0213-1)
1200 (unify-charset 'japanese-jisx0213-2)
1201
1202 \f
1203 ;; These are tables for translating characters on decoding and
1204 ;; encoding.
1205 ;; Fixme: these aren't used now -- should they be?
1206 (setq standard-translation-table-for-decode nil)
1207
1208 (setq standard-translation-table-for-encode nil)
1209 \f
1210 ;;; Make fundamental coding systems.
1211
1212 ;; The coding system `no-conversion' and `undecided' are already
1213 ;; defined in coding.c as below:
1214 ;;
1215 ;; (define-coding-system 'no-conversion
1216 ;; "..."
1217 ;; :coding-type 'raw-text
1218 ;; ...)
1219 ;; (define-coding-system 'undecided
1220 ;; "..."
1221 ;; :coding-type 'undecided
1222 ;; ...)
1223
1224 (define-coding-system-alias 'binary 'no-conversion)
1225 (define-coding-system-alias 'unix 'undecided-unix)
1226 (define-coding-system-alias 'dos 'undecided-dos)
1227 (define-coding-system-alias 'mac 'undecided-mac)
1228
1229 (define-coding-system 'raw-text
1230 "Raw text, which means text contains random 8-bit codes.
1231 Encoding text with this coding system produces the actual byte
1232 sequence of the text in buffers and strings. An exception is made for
1233 characters from the `eight-bit' character set. Each of them is encoded
1234 into a single byte.
1235
1236 When you visit a file with this coding, the file is read into a
1237 unibyte buffer as is (except for EOL format), thus each byte of a file
1238 is treated as a character."
1239 :coding-type 'raw-text
1240 :for-unibyte t
1241 :mnemonic ?t)
1242
1243 (define-coding-system 'no-conversion-multibyte
1244 "Like `no-conversion' but don't read a file into a unibyte buffer."
1245 :coding-type 'raw-text
1246 :eol-type 'unix
1247 :mnemonic ?=)
1248
1249 (define-coding-system 'iso-latin-1
1250 "ISO 2022 based 8-bit encoding for Latin-1 (MIME:ISO-8859-1)."
1251 :coding-type 'charset
1252 :mnemonic ?1
1253 :charset-list '(iso-8859-1)
1254 :mime-charset 'iso-8859-1)
1255
1256 (define-coding-system-alias 'iso-8859-1 'iso-latin-1)
1257 (define-coding-system-alias 'latin-1 'iso-latin-1)
1258
1259 ;; Coding systems not specific to each language environment.
1260
1261 (define-coding-system 'emacs-mule
1262 "Emacs 21 internal format used in buffer and string."
1263 :coding-type 'emacs-mule
1264 :charset-list 'emacs-mule
1265 :mnemonic ?M)
1266
1267 (define-coding-system 'utf-8
1268 "UTF-8 (no signature (BOM))"
1269 :coding-type 'utf-8
1270 :mnemonic ?U
1271 :charset-list '(unicode)
1272 :mime-charset 'utf-8)
1273
1274 (define-coding-system 'utf-8-with-signature
1275 "UTF-8 (with signature (BOM))"
1276 :coding-type 'utf-8
1277 :mnemonic ?U
1278 :charset-list '(unicode)
1279 :bom t)
1280
1281 (define-coding-system 'utf-8-auto
1282 "UTF-8 (auto-detect signature (BOM))"
1283 :coding-type 'utf-8
1284 :mnemonic ?U
1285 :charset-list '(unicode)
1286 :bom '(utf-8-with-signature . utf-8))
1287
1288 (define-coding-system-alias 'mule-utf-8 'utf-8)
1289
1290 (define-coding-system 'utf-8-emacs
1291 "Support for all Emacs characters (including non-Unicode characters)."
1292 :coding-type 'utf-8
1293 :mnemonic ?U
1294 :charset-list '(emacs))
1295
1296 ;; The encoding used internally. This encoding is meant to be able to save
1297 ;; any multibyte buffer without losing information. It can change between
1298 ;; Emacs releases, tho, so should only be used for internal files.
1299 (define-coding-system-alias 'emacs-internal 'utf-8-emacs-unix)
1300
1301 (define-coding-system 'utf-16le
1302 "UTF-16LE (little endian, no signature (BOM))."
1303 :coding-type 'utf-16
1304 :mnemonic ?U
1305 :charset-list '(unicode)
1306 :endian 'little
1307 :mime-text-unsuitable t
1308 :mime-charset 'utf-16le)
1309
1310 (define-coding-system 'utf-16be
1311 "UTF-16BE (big endian, no signature (BOM))."
1312 :coding-type 'utf-16
1313 :mnemonic ?U
1314 :charset-list '(unicode)
1315 :endian 'big
1316 :mime-text-unsuitable t
1317 :mime-charset 'utf-16be)
1318
1319 (define-coding-system 'utf-16le-with-signature
1320 "UTF-16 (little endian, with signature (BOM))."
1321 :coding-type 'utf-16
1322 :mnemonic ?U
1323 :charset-list '(unicode)
1324 :bom t
1325 :endian 'little
1326 :mime-text-unsuitable t
1327 :mime-charset 'utf-16)
1328
1329 (define-coding-system 'utf-16be-with-signature
1330 "UTF-16 (big endian, with signature (BOM))."
1331 :coding-type 'utf-16
1332 :mnemonic ?U
1333 :charset-list '(unicode)
1334 :bom t
1335 :endian 'big
1336 :mime-text-unsuitable t
1337 :mime-charset 'utf-16)
1338
1339 (define-coding-system 'utf-16
1340 "UTF-16 (detect endian on decoding, use big endian on encoding with BOM)."
1341 :coding-type 'utf-16
1342 :mnemonic ?U
1343 :charset-list '(unicode)
1344 :bom '(utf-16le-with-signature . utf-16be-with-signature)
1345 :endian 'big
1346 :mime-text-unsuitable t
1347 :mime-charset 'utf-16)
1348
1349 ;; Backwards compatibility (old names, also used by Mule-UCS). We
1350 ;; prefer the MIME names.
1351 (define-coding-system-alias 'utf-16-le 'utf-16le-with-signature)
1352 (define-coding-system-alias 'utf-16-be 'utf-16be-with-signature)
1353
1354
1355 (define-coding-system 'iso-2022-7bit
1356 "ISO 2022 based 7-bit encoding using only G0."
1357 :coding-type 'iso-2022
1358 :mnemonic ?J
1359 :charset-list 'iso-2022
1360 :designation [(ascii t) nil nil nil]
1361 :flags '(short ascii-at-eol ascii-at-cntl 7-bit designation composition))
1362
1363 (define-coding-system 'iso-2022-7bit-ss2
1364 "ISO 2022 based 7-bit encoding using SS2 for 96-charset."
1365 :coding-type 'iso-2022
1366 :mnemonic ?$
1367 :charset-list 'iso-2022
1368 :designation [(ascii 94) nil (nil 96) nil]
1369 :flags '(short ascii-at-eol ascii-at-cntl 7-bit
1370 designation single-shift composition))
1371
1372 (define-coding-system 'iso-2022-7bit-lock
1373 "ISO-2022 coding system using Locking-Shift for 96-charset."
1374 :coding-type 'iso-2022
1375 :mnemonic ?&
1376 :charset-list 'iso-2022
1377 :designation [(ascii 94) (nil 96) nil nil]
1378 :flags '(ascii-at-eol ascii-at-cntl 7-bit
1379 designation locking-shift composition))
1380
1381 (define-coding-system-alias 'iso-2022-int-1 'iso-2022-7bit-lock)
1382
1383 (define-coding-system 'iso-2022-7bit-lock-ss2
1384 "Mixture of ISO-2022-JP, ISO-2022-KR, and ISO-2022-CN."
1385 :coding-type 'iso-2022
1386 :mnemonic ?i
1387 :charset-list '(ascii
1388 japanese-jisx0208 japanese-jisx0208-1978 latin-jisx0201
1389 korean-ksc5601
1390 chinese-gb2312
1391 chinese-cns11643-1 chinese-cns11643-2 chinese-cns11643-3
1392 chinese-cns11643-4 chinese-cns11643-5 chinese-cns11643-6
1393 chinese-cns11643-7)
1394 :designation [(ascii 94)
1395 (nil korean-ksc5601 chinese-gb2312 chinese-cns11643-1 96)
1396 (nil chinese-cns11643-2)
1397 (nil chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5
1398 chinese-cns11643-6 chinese-cns11643-7)]
1399 :flags '(short ascii-at-eol ascii-at-cntl 7-bit locking-shift
1400 single-shift init-bol))
1401
1402 (define-coding-system-alias 'iso-2022-cjk 'iso-2022-7bit-lock-ss2)
1403
1404 (define-coding-system 'iso-2022-8bit-ss2
1405 "ISO 2022 based 8-bit encoding using SS2 for 96-charset."
1406 :coding-type 'iso-2022
1407 :mnemonic ?@
1408 :charset-list 'iso-2022
1409 :designation [(ascii 94) nil (nil 96) nil]
1410 :flags '(ascii-at-eol ascii-at-cntl designation single-shift composition))
1411
1412 (define-coding-system 'compound-text
1413 "Compound text based generic encoding.
1414 This coding system is an extension of X's \"Compound Text Encoding\".
1415 It encodes many characters using the normal ISO-2022 designation sequences,
1416 but it doesn't support extended segments of CTEXT."
1417 :coding-type 'iso-2022
1418 :mnemonic ?x
1419 :charset-list 'iso-2022
1420 :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil]
1421 :flags '(ascii-at-eol ascii-at-cntl long-form
1422 designation locking-shift single-shift composition)
1423 ;; Fixme: this isn't a valid MIME charset and has to be
1424 ;; special-cased elsewhere -- fx
1425 :mime-charset 'x-ctext)
1426
1427 (define-coding-system-alias 'x-ctext 'compound-text)
1428 (define-coding-system-alias 'ctext 'compound-text)
1429
1430 ;; Same as compound-text, but doesn't produce composition escape
1431 ;; sequences. Used in post-read and pre-write conversions of
1432 ;; compound-text-with-extensions, see mule.el. Note that this should
1433 ;; not have a mime-charset property, to prevent it from showing up
1434 ;; close to the beginning of coding systems ordered by priority.
1435 (define-coding-system 'ctext-no-compositions
1436 "Compound text based generic encoding.
1437
1438 Like `compound-text', but does not produce escape sequences for compositions."
1439 :coding-type 'iso-2022
1440 :mnemonic ?x
1441 :charset-list 'iso-2022
1442 :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil]
1443 :flags '(ascii-at-eol ascii-at-cntl
1444 designation locking-shift single-shift))
1445
1446 (define-coding-system 'compound-text-with-extensions
1447 "Compound text encoding with ICCCM Extended Segment extensions.
1448
1449 See the variables `ctext-standard-encodings' and
1450 `ctext-non-standard-encodings-alist' for the detail about how
1451 extended segments are handled.
1452
1453 This coding system should be used only for X selections. It is inappropriate
1454 for decoding and encoding files, process I/O, etc."
1455 :coding-type 'iso-2022
1456 :mnemonic ?x
1457 :charset-list 'iso-2022
1458 :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil]
1459 :flags '(ascii-at-eol ascii-at-cntl long-form
1460 designation locking-shift single-shift)
1461 :post-read-conversion 'ctext-post-read-conversion
1462 :pre-write-conversion 'ctext-pre-write-conversion)
1463
1464 (define-coding-system-alias
1465 'x-ctext-with-extensions 'compound-text-with-extensions)
1466 (define-coding-system-alias
1467 'ctext-with-extensions 'compound-text-with-extensions)
1468
1469 (define-coding-system 'us-ascii
1470 "Encode ASCII as-is and encode non-ASCII characters to `?'."
1471 :coding-type 'charset
1472 :mnemonic ?-
1473 :charset-list '(ascii)
1474 :default-char ??
1475 :mime-charset 'us-ascii)
1476
1477 (define-coding-system-alias 'iso-safe 'us-ascii)
1478
1479 (define-coding-system 'utf-7
1480 "UTF-7 encoding of Unicode (RFC 2152)."
1481 :coding-type 'utf-8
1482 :mnemonic ?U
1483 :mime-charset 'utf-7
1484 :charset-list '(unicode)
1485 :pre-write-conversion 'utf-7-pre-write-conversion
1486 :post-read-conversion 'utf-7-post-read-conversion)
1487
1488 (define-coding-system 'utf-7-imap
1489 "UTF-7 encoding of Unicode, IMAP version (RFC 2060)"
1490 :coding-type 'utf-8
1491 :mnemonic ?u
1492 :charset-list '(unicode)
1493 :pre-write-conversion 'utf-7-imap-pre-write-conversion
1494 :post-read-conversion 'utf-7-imap-post-read-conversion)
1495
1496 ;; Use us-ascii for terminal output if some other coding system is not
1497 ;; specified explicitly.
1498 (set-safe-terminal-coding-system-internal 'us-ascii)
1499
1500 ;; The other coding-systems are defined in each language specific
1501 ;; files under lisp/language.
1502
1503 ;; Normally, set coding system to `undecided' before reading a file.
1504 ;; Compiled Emacs Lisp files (*.elc) are not decoded at all,
1505 ;; but we regard them as containing multibyte characters.
1506 ;; Tar files are not decoded at all, but we treat them as raw bytes.
1507
1508 (setq file-coding-system-alist
1509 (mapcar (lambda (arg) (cons (purecopy (car arg)) (cdr arg)))
1510 '(("\\.elc\\'" . utf-8-emacs)
1511 ("\\.utf\\(-8\\)?\\'" . utf-8)
1512 ("\\.xml\\'" . xml-find-file-coding-system)
1513 ;; We use raw-text for reading loaddefs.el so that if it
1514 ;; happens to have DOS or Mac EOLs, they are converted to
1515 ;; newlines. This is required to make the special treatment
1516 ;; of the "\ newline" combination in loaddefs.el, which marks
1517 ;; the beginning of a doc string, work.
1518 ("\\(\\`\\|/\\)loaddefs.el\\'" . (raw-text . raw-text-unix))
1519 ("\\.tar\\'" . (no-conversion . no-conversion))
1520 ( "\\.po[tx]?\\'\\|\\.po\\." . po-find-file-coding-system)
1521 ("\\.\\(tex\\|ltx\\|dtx\\|drv\\)\\'" . latexenc-find-file-coding-system)
1522 ("" . (undecided . nil)))))
1523
1524 \f
1525 ;;; Setting coding categories and their priorities.
1526
1527 ;; This setting is just to read an Emacs Lisp source files which
1528 ;; contain multilingual text while dumping Emacs. More appropriate
1529 ;; values are set by the command `set-language-environment' for each
1530 ;; language environment.
1531
1532 (set-coding-system-priority
1533 'iso-latin-1
1534 'utf-8
1535 'iso-2022-7bit
1536 )
1537
1538 \f
1539 ;;; Miscellaneous settings.
1540
1541 ;; Make all multibyte characters self-insert.
1542 (set-char-table-range (nth 1 global-map)
1543 (cons 128 (max-char))
1544 'self-insert-command)
1545
1546 (aset latin-extra-code-table ?\221 t)
1547 (aset latin-extra-code-table ?\222 t)
1548 (aset latin-extra-code-table ?\223 t)
1549 (aset latin-extra-code-table ?\224 t)
1550 (aset latin-extra-code-table ?\225 t)
1551 (aset latin-extra-code-table ?\226 t)
1552
1553 ;; The old code-pages library is obsoleted by coding systems based on
1554 ;; the charsets defined in this file but might be required by user
1555 ;; code.
1556 (provide 'code-pages)
1557
1558 ;;; mule-conf.el ends here