;;; cyrillic.el --- support for Cyrillic -*- coding: iso-2022-7bit; -*-
;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
-;; Licensed to the Free Software Foundation.
-;; Copyright (C) 2002 Free Software Foundation, Inc.
+;; Licensed to the Free Software Foundation.
+;; Copyright (C) 2001, 2002, 2003 Free Software Foundation, Inc.
+;; Copyright (C) 2003
+;; National Institute of Advanced Industrial Science and Technology (AIST)
+;; Registration Number H13PRO009
-;; Keywords: multilingual, Cyrillic
+;; Author: Kenichi Handa <handa@etl.go.jp>
+;; Keywords: multilingual, Cyrillic, i18n
;; This file is part of GNU Emacs.
;;; Commentary:
-;; The character set ISO8859-5 is supported. See
-;; http://www.ecma.ch/ecma1/STAND/ECMA-113.HTM. KOI-8 and
-;; ALTERNATIVNYJ are converted to ISO8859-5 internally.
+;; The character set ISO8859-5 is supported. KOI-8 and ALTERNATIVNYJ
+;; are converted to Unicode internally. See
+;; <URL:http://www.ecma.ch/ecma1/STAND/ECMA-113.HTM>. For more info
+;; on Cyrillic charsets, see
+;; <URL:http://czyborra.com/charsets/cyrillic.html>. The KOI and
+;; Alternativnyj coding systems should live in code-pages.el, but
+;; they've always been preloaded and the coding system autoload
+;; mechanism didn't get accepted, so they have to stay here and
+;; duplicate code-pages stuff.
+
+;; Note that 8859-5 maps directly onto the Unicode Cyrillic block,
+;; apart from codepoints 160 (NBSP, c.f. U+0400), 173 (soft hyphen,
+;; c.f. U+04OD) and 253 (section sign, c.f U+045D). The KOI-8 and
+;; Alternativnyj coding systems encode both 8859-5 and Unicode.
+;; ucs-tables.el provides unification for cyrillic-iso-8bit.
+
+;; Customizing `utf-fragment-on-decoding' allows decoding characters
+;; from KOI and Alternativnyj into 8859-5 where that's possible.
+;; cyrillic-iso8859-5 characters take half as much space in the buffer
+;; as the mule-unicode-0100-24ff equivalents, though that's probably
+;; not normally a big deal.
;;; Code:
"Cyrillic-ISO" '((charset iso-8859-5)
(coding-system cyrillic-iso-8bit)
(coding-priority cyrillic-iso-8bit)
+ (input-method . "cyrillic-yawerty") ; fixme
(nonascii-translation . iso-8859-5)
- (input-method . "cyrillic-yawerty")
(unibyte-display . cyrillic-iso-8bit)
(features cyril-util)
(sample-text . "Russian (\e,L@caaZXY\e(B) \e,L7T`PRabRcYbU\e(B!")
(documentation . "Support for Cyrillic ISO-8859-5."))
'("Cyrillic"))
-;; KOI-8 stuff
+;; KOI-8R stuff
(define-coding-system 'cyrillic-koi8
"KOI8 8-bit encoding for Cyrillic (MIME: KOI8-R)."
(set-language-info-alist
"Cyrillic-KOI8" `((charset koi8)
(coding-system cyrillic-koi8)
- (coding-priority cyrillic-koi8)
+ (coding-priority cyrillic-koi8 cyrillic-iso-8bit)
(nonascii-translation . koi8)
- (input-method . "cyrillic-jcuken")
+ (input-method . "russian-typewriter")
(features cyril-util)
(unibyte-display . cyrillic-koi8)
(sample-text . "Russian (\e,L@caaZXY\e(B) \e,L7T`PRabRcYbU\e(B!")
(documentation . "Support for Cyrillic KOI8-R."))
'("Cyrillic"))
+(set-language-info-alist
+ "Russian" `((charset cyrillic-iso8859-5)
+ (nonascii-translation
+ . ,(get 'cyrillic-koi8-r-nonascii-translation-table
+ 'translation-table))
+ (coding-system cyrillic-koi8)
+ (coding-priority cyrillic-koi8 cyrillic-iso-8bit)
+ (input-method . "russian-computer")
+ (features cyril-util)
+ (unibyte-display . cyrillic-koi8)
+ (sample-text . "Russian (\e,L@caaZXY\e(B) \e,L7T`PRabRcYbU\e(B!")
+ (documentation . "\
+Support for Russian using koi8-r and the russian-computer input method.")
+ (tutorial . "TUTORIAL.ru"))
+ '("Cyrillic"))
+
;;; ALTERNATIVNYJ stuff
(define-coding-system 'cyrillic-alternativnyj
(coding-system cyrillic-alternativnyj)
(coding-priority cyrillic-alternativnyj)
(nonascii-translation . alternativnyj)
- (input-method . "cyrillic-jcuken")
+ (input-method . "russian-typewriter")
(features cyril-util)
(unibyte-display . cyrillic-alternativnyj)
(sample-text . "Russian (\e,L@caaZXY\e(B) \e,L7T`PRabRcYbU\e(B!")