1 ;;; european.el --- support for European languages -*- coding: iso-2022-7bit; -*-
3 ;; Copyright (C) 1995, 1997, 2001 Electrotechnical Laboratory, JAPAN.
4 ;; Licensed to the Free Software Foundation.
5 ;; Copyright (C) 2001, 2002 Free Software Foundation, Inc.
7 ;; Keywords: multilingual, European
9 ;; This file is part of GNU Emacs.
11 ;; GNU Emacs is free software; you can redistribute it and/or modify
12 ;; it under the terms of the GNU General Public License as published by
13 ;; the Free Software Foundation; either version 2, or (at your option)
16 ;; GNU Emacs is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 ;; GNU General Public License for more details.
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with GNU Emacs; see the file COPYING. If not, write to the
23 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 ;; Boston, MA 02111-1307, USA.
28 ;; For European scripts, character sets ISO8859-1,2,3,4,9,10,13,14,15,
29 ;; windows-1250,2,4,7, mac-roman, adobe-standard-encoding, cp850 and
30 ;; next are supported.
34 ;; Latin-1 (ISO-8859-1)
36 (set-language-info-alist
37 "Latin-1" '((charset iso-8859-1)
38 (coding-system iso-latin-1 iso-latin-9 windows-1252)
39 (coding-priority iso-latin-1)
40 (nonascii-translation . iso-8859-1)
41 (unibyte-display . iso-latin-1)
42 (input-method . "latin-1-prefix")
44 . "Hello, Hej, Tere, Hei, Bonjour, Gr
\e,A|_
\e(B Gott, Ciao,
\e,A!
\e(BHola!")
46 This language environment is a generic one for the Latin-1 (ISO-8859-1)
47 character set which supports the following European languages:
48 Albanian, Basque, Breton, Catalan, Danish, Dutch, English, Faeroese,
49 Finnish, French (with restrictions -- see Latin-9), Frisian, Galician,
50 German, Greenlandic, Icelandic, Irish Gaelic (new orthography),
51 Italian, Latin, Luxemburgish, Norwegian, Portuguese, Rhaeto-Romanic,
52 Scottish Gaelic, Spanish, and Swedish.
53 We also have specific language environments for the following languages:
55 For German, \"German\".
56 For Spanish, \"Spanish\".
57 For French, \"French\".
59 Latin-1 also covers several written languages outside Europe, including
60 Indonesian/Malay, Tagalog (Philippines), Swahili and Afrikaans."))
64 ;; Latin-2 (ISO-8859-2)
66 (define-coding-system 'iso-latin-2
67 "ISO 2022 based 8-bit encoding for Latin-2 (MIME:ISO-8859-2)."
70 :charset-list '(iso-8859-2)
71 :mime-charset 'iso-8859-2)
73 (define-coding-system-alias 'iso-8859-2 'iso-latin-2)
74 (define-coding-system-alias 'latin-2 'iso-latin-2)
76 (set-language-info-alist
77 "Latin-2" '((charset iso-8859-2)
78 (coding-system iso-latin-2 windows-1250)
79 (coding-priority iso-latin-2)
80 (nonascii-translation . iso-8859-2)
81 (unibyte-display . iso-latin-2)
82 (input-method . "latin-2-prefix")
84 This language environment is a generic one for the Latin-2 (ISO-8859-2)
85 character set which supports the following languages:
86 Albanian, Czech, English, German, Hungarian, Polish, Romanian,
87 Serbo-Croatian or Croatian, Slovak, Slovene, Sorbian (upper and lower),
89 We also have specific language environments for the following languages:
91 For Polish, \"Polish\".
92 For Romanian, \"Romanian\".
93 For Slovak, \"Slovak\"."))
97 ;; Latin-3 (ISO-8859-3)
99 (define-coding-system 'iso-latin-3
100 "ISO 2022 based 8-bit encoding for Latin-3 (MIME:ISO-8859-3)."
101 :coding-type 'charset
103 :charset-list '(iso-8859-3)
104 :mime-charset 'iso-8859-3)
106 (define-coding-system-alias 'iso-8859-3 'iso-latin-3)
107 (define-coding-system-alias 'latin-3 'iso-latin-3)
109 (set-language-info-alist
110 "Latin-3" '((charset iso-8859-3)
111 (coding-system iso-latin-3)
112 (coding-priority iso-latin-3)
113 (nonascii-translation . iso-8859-3)
114 (unibyte-display . iso-latin-3)
115 (input-method . "latin-3-prefix")
117 These languages are supported with the Latin-3 (ISO-8859-3) character set:
118 Afrikaans, Catalan, Dutch, English, Esperanto, French, Galician,
119 German, Italian, Maltese, Spanish, and Turkish."))
123 ;; Latin-4 (ISO-8859-4)
125 (define-coding-system 'iso-latin-4
126 "ISO 2022 based 8-bit encoding for Latin-4 (MIME:ISO-8859-4)."
127 :coding-type 'charset
129 :charset-list '(iso-8859-4)
130 :mime-charset 'iso-8859-4)
132 (define-coding-system-alias 'iso-8859-4 'iso-latin-4)
133 (define-coding-system-alias 'latin-4 'iso-latin-4)
135 (set-language-info-alist
136 "Latin-4" '((charset iso-8859-4)
137 (coding-system iso-8859-4)
138 (coding-priority iso-8859-4)
139 (nonascii-translation . iso-8859-4)
140 (unibyte-display . iso-8859-4)
141 (input-method . "latin-4-postfix")
143 These languages are supported with the Latin-4 (ISO-8859-4) character set:
144 Danish, English, Estonian, Finnish, German, Greenlandic, Lappish,
145 Latvian, Lithuanian, and Norwegian."))
149 ;; Latin-5 (ISO-8859-9)
151 (define-coding-system 'iso-latin-5
152 "ISO 2022 based 8-bit encoding for Latin-5 (MIME:ISO-8859-9)."
153 :coding-type 'charset
155 :charset-list '(iso-8859-9)
156 :mime-charset 'iso-8859-9)
158 (define-coding-system-alias 'iso-8859-9 'iso-latin-5)
159 (define-coding-system-alias 'latin-5 'iso-latin-5)
161 (set-language-info-alist
162 "Latin-5" '((charset iso-8859-9)
163 (coding-system iso-latin-5)
164 (coding-priority iso-latin-5)
165 (nonascii-translation . iso-8859-9)
166 (unibyte-display . iso-latin-5)
167 (input-method . "latin-5-postfix")
168 (documentation . "Support for Latin-5.\
169 See also the Turkish environment."))
173 ;; Latin-6 (ISO-8859-10)
175 (define-coding-system 'iso-latin-6
176 "ISO 2022 based 8-bit encoding for Latin-6 (MIME:ISO-8859-10)."
177 :coding-type 'charset
179 :charset-list '(iso-8859-10)
180 :mime-charset 'iso-8859-10)
182 (define-coding-system-alias 'iso-8859-10 'iso-latin-6)
183 (define-coding-system-alias 'latin-6 'iso-latin-6)
185 (set-language-info-alist
186 "Latin-6" '((charset iso-8859-10)
187 (coding-system iso-latin-6)
188 (coding-priority iso-latin-6)
189 (nonascii-translation . iso-8859-10)
190 (unibyte-display . iso-latin-6)
191 ;; Fixme: input method.
192 (documentation . "Support for generic Latin-6 (Northern European)."))
196 ;; Latin-7 (ISO-8859-13)
198 (define-coding-system 'iso-latin-7
199 "ISO 2022 based 8-bit encoding for Latin-7 (MIME:ISO-8859-13)."
200 :coding-type 'charset
202 :charset-list '(iso-8859-13)
203 :mime-charset 'iso-8859-13)
205 (define-coding-system-alias 'iso-8859-13 'iso-latin-7)
206 (define-coding-system-alias 'latin-7 'iso-latin-7)
208 (set-language-info-alist
209 "Latin-7" '((charset iso-8859-13)
210 (coding-system iso-latin-7)
211 (coding-priority iso-latin-7)
212 (nonascii-translation . iso-8859-13)
213 (unibyte-display . iso-latin-7)
214 ;; Fixme: input method.
215 (documentation . "Support for generic Latin-7 (Baltic Rim)."))
218 ;; Latin-8 (ISO-8859-14)
220 (define-coding-system 'iso-latin-8
221 "ISO 2022 based 8-bit encoding for Latin-8 (MIME:ISO-8859-14)."
222 :coding-type 'charset
223 ;; `W' for `Welsh', since `C' for `Celtic' is taken.
225 :charset-list '(iso-8859-14)
226 :mime-charset 'iso-8859-14)
228 (define-coding-system-alias 'iso-8859-14 'iso-latin-8)
229 (define-coding-system-alias 'latin-8 'iso-latin-8)
231 (set-language-info-alist
232 "Latin-8" '((charset iso-8859-14)
233 (coding-system iso-latin-8)
234 (coding-priority iso-latin-8)
235 (nonascii-translation . iso-8859-14)
236 (unibyte-display . iso-latin-8)
237 (input-method . "latin-8-prefix")
238 ;; Fixme: Welsh/Ga{e}lic greetings
239 (sample-text . "
\e,_"
\e(B
\e,_p
\e(B
\e,_^
\e(B")
241 This language environment is a generic one for the Latin-8 (ISO-8859-14)
242 character set which supports the Celtic languages, including those not
243 covered by other ISO-8859 character sets:
244 Welsh, Manx Gaelic and Irish Gaelic (old orthography)."))
247 ;; Latin-9 (ISO-8859-15)
249 (define-coding-system 'iso-latin-9
250 "ISO 2022 based 8-bit encoding for Latin-9 (MIME:ISO-8859-15)."
251 :coding-type 'charset
254 :charset-list '(iso-8859-15)
255 :mime-charset 'iso-8859-15)
257 (define-coding-system-alias 'iso-8859-15 'iso-latin-9)
258 (define-coding-system-alias 'latin-9 'iso-latin-9)
259 (define-coding-system-alias 'latin-0 'iso-latin-9)
261 (set-language-info-alist
262 "Latin-9" '((charset iso-8859-15)
263 (coding-system iso-latin-9)
264 (coding-priority iso-latin-9)
265 (nonascii-translation . iso-8859-15)
266 (unibyte-display . iso-latin-9)
267 (input-method . "latin-9-prefix")
269 . "AVE.
\e,B)9.>
\e,b<=
\e,_/
\e(B
\e,b$
\e(B")
271 This language environment is a generic one for the Latin-9 (ISO-8859-15)
272 character set which supports the same languages as Latin-1 with the
273 addition of the Euro sign and some additional French and Finnish letters.
274 Latin-9 is sometimes nicknamed `Latin-0'."))
277 (define-coding-system 'iso-latin-7
278 "ISO 2022 based 8-bit encoding for Latin-7 (MIME:ISO-8859-13)."
279 :coding-type 'charset
282 :charset-list '(iso-8859-13)
283 :mime-charset 'iso-8859-13)
285 (define-coding-system-alias 'iso-8859-13 'iso-latin-7)
286 (define-coding-system-alias 'latin-7 'iso-latin-7)
288 (define-coding-system 'windows-1250
289 "windows-1250 (Central European) encoding (MIME: WINDOWS-1250)"
290 :coding-type 'charset
292 :charset-list '(windows-1250)
293 :mime-charset 'windows-1250)
294 (define-coding-system-alias 'cp1250 'windows-1250)
296 (define-coding-system 'windows-1252
297 "windows-1252 (Western European) encoding (MIME: WINDOWS-1252)"
298 :coding-type 'charset
300 :charset-list '(windows-1252)
301 :mime-charset 'windows-1252)
302 (define-coding-system-alias 'cp1252 'windows-1252)
304 (define-coding-system 'windows-1254
305 "windows-1254 (Turkish) encoding (MIME: WINDOWS-1254)"
306 :coding-type 'charset
308 :charset-list '(windows-1254)
309 :mime-charset 'windows-1254)
310 (define-coding-system-alias 'cp1254 'windows-1254)
312 (define-coding-system 'windows-1257
313 "windows-1257 (Baltic) encoding (MIME: WINDOWS-1257)"
314 :coding-type 'charset
316 :charset-list '(windows-1257)
317 :mime-charset 'windows-1257)
318 (define-coding-system-alias 'cp1257 'windows-1257)
320 (define-coding-system 'cp850
321 "DOS codepage 850 (Western European)"
322 :coding-type 'charset
324 :charset-list '(cp850)
325 :mime-charset 'cp850)
326 (define-coding-system-alias 'ibm850 'cp850)
328 (define-coding-system 'cp852
329 "DOS codepage 852 (Slavic)"
330 :coding-type 'charset
332 :charset-list '(cp852)
333 :mime-charset 'cp852)
334 (define-coding-system-alias 'ibm852 'cp852)
336 (define-coding-system 'cp857
337 "DOS codepage 857 (Turkish)"
338 :coding-type 'charset
340 :charset-list '(cp857)
341 :mime-charset 'cp857)
342 (define-coding-system-alias 'ibm857 'cp857)
344 (define-coding-system 'cp858
345 "Codepage 858 (Multilingual Latin I + Euro)"
346 :coding-type 'charset
348 :charset-list '(cp858)
349 :mime-charset 'cp858)
351 (define-coding-system 'cp860
352 "DOS codepage 860 (Portuguese)"
353 :coding-type 'charset
355 :charset-list '(cp860)
356 :mime-charset 'cp860)
357 (define-coding-system-alias 'ibm860 'cp860)
359 (define-coding-system 'cp861
360 "DOS codepage 861 (Icelandic)"
361 :coding-type 'charset
363 :charset-list '(cp861)
364 :mime-charset 'cp861)
365 (define-coding-system-alias 'ibm861 'cp861)
367 (define-coding-system 'cp863
368 "DOS codepage 863 (French Canadian)"
369 :coding-type 'charset
371 :charset-list '(cp863)
372 :mime-charset 'cp863)
373 (define-coding-system-alias 'ibm863 'cp863)
375 (define-coding-system 'cp865
376 "DOS codepage 865 (Norwegian/Danish)"
377 :coding-type 'charset
379 :charset-list '(cp865)
380 :mime-charset 'cp865)
381 (define-coding-system-alias 'ibm865 'cp865)
383 (define-coding-system 'cp437
385 :coding-type 'charset
387 :charset-list '(cp437)
388 :mime-charset 'cp437)
389 (define-coding-system-alias 'ibm437 'cp437)
391 (set-language-info-alist
392 "German" '((tutorial . "TUTORIAL.de")
394 (coding-system iso-latin-1 iso-latin-9)
395 (coding-priority iso-latin-1)
396 (nonascii-translation . iso-8859-1)
397 (input-method . "german-postfix")
398 (unibyte-display . iso-latin-1)
400 German (Deutsch Nord) Guten Tag
401 German (Deutsch S
\e,A|
\e(Bd) Gr
\e,A|_
\e(B Gott")
403 This language environment is almost the same as Latin-1,
404 but sets the default input method to \"german-postfix\".
405 Additionally, it selects the German tutorial."))
408 (set-language-info-alist
409 "French" '((tutorial . "TUTORIAL.fr")
411 (coding-system iso-latin-1 iso-latin-9)
412 (coding-priority iso-latin-1)
413 (nonascii-translation . iso-8859-1)
414 (unibyte-display . iso-latin-1)
415 (input-method . "latin-1-prefix")
416 (sample-text . "French (Fran
\e,Ag
\e(Bais) Bonjour, Salut")
418 This language environment is almost the same as Latin-1,
419 but it selects the French tutorial."))
422 (set-language-info-alist
423 "Slovenian" '((charset iso-8859-2)
424 (coding-system . (iso-8859-2 windows-1250))
425 (coding-priority . (iso-8859-2))
426 (nonascii-translation . iso-8859-2)
427 (input-method . "latin-2-postfix")
428 (unibyte-display . iso-8859-2)
429 (tutorial . "TUTORIAL.sl")
430 (sample-text . "
\e,B.
\e(Belimo vam uspe
\e,B9
\e(Ben dan!")
434 (set-language-info-alist
435 "Spanish" '((tutorial . "TUTORIAL.es")
437 (coding-system iso-latin-1 iso-latin-9)
438 (coding-priority iso-latin-1)
439 (nonascii-translation . iso-8859-1)
440 (input-method . "spanish-postfix")
441 (unibyte-display . iso-latin-1)
442 (sample-text . "Spanish (Espa
\e,Aq
\e(Bol)
\e,A!
\e(BHola!")
444 This language environment is almost the same as Latin-1,
445 but it sets the default input method to \"spanish-postfix\",
446 and it selects the Spanish tutorial."))
449 (set-language-info-alist
450 "Dutch" '((tutorial . "TUTORIAL.nl")
452 (coding-system iso-latin-1 iso-latin-9)
453 (coding-priority iso-latin-1)
454 (nonascii-translation . iso-8859-1)
455 (unibyte-display . iso-latin-1)
456 (input-method . "dutch")
457 (sample-text . "Er is een aantal manieren waarop je dit kan doen")
459 This language environment is almost the same as Latin-1,
460 but it selects the Dutch tutorial and input method."))
463 ;; For Turkish, the character set ISO-8859-9 (Latin-5) is used. But,
464 ;; before the introduction of ISO-8859-9 in 1988, ISO-8859-3 (Latin-3)
465 ;; was used for Turkish. Those who use Latin-3 for Turkish should use
466 ;; "Latin-3" language environment.
468 (set-language-info-alist
469 "Turkish" '((charset iso-8859-9)
470 (coding-system iso-latin-5 windows-1254 iso-latin-3)
471 (coding-priority iso-latin-5)
472 (nonascii-translation . iso-8859-9)
473 (unibyte-display . iso-latin-5)
474 (input-method . "turkish-postfix")
475 (sample-text . "Turkish (T
\e,A|
\e(Brk
\e,Ag
\e(Be) Merhaba")
478 (set-case-syntax-pair ?I ?
\e,C9
\e(B (standard-case-table))
479 (set-case-syntax-pair ?
\e,C)
\e(B ?i (standard-case-table))))
482 (set-case-syntax-pair ?I ?i (standard-case-table))
483 (set-case-syntax ?
\e,C9
\e(B "w" (standard-case-table))
484 (set-case-syntax ?
\e,C)
\e(B "w" (standard-case-table))))
485 (documentation . "Support for Turkish.
486 Differs from the Latin-5 environment in using the `turkish-postfix' input
487 method and applying Turkish case rules for the characters i, I,
\e,C9
\e(B,
\e,C)
\e(B.")))
489 ;; Polish ISO 8859-2 environment.
490 ;; Maintainer: Wlodek Bzyl <matwb@univ.gda.pl>
491 ;; Keywords: multilingual, Polish
493 (set-language-info-alist
494 "Polish" '((charset . (iso-8859-2))
495 (coding-system . (iso-8859-2 windows-1250))
496 (coding-priority . (iso-8859-2))
497 (nonascii-translation . iso-8859-2)
498 (input-method . "polish-slash")
499 (unibyte-display . iso-8859-2)
500 (tutorial . "TUTORIAL.pl")
501 (sample-text . "P
\e,As
\e(Bjd
\e,B<
\e(B, ki
\e,Bq
\e(B-
\e,B?
\e(Be t
\e,Bj
\e(B chmurno
\e,B6f
\e(B w g
\e,B31
\e(Bb flaszy")
505 (set-language-info-alist
506 "Welsh" `((coding-system utf-8 latin-8) ; the input method is Unicode-based
507 (coding-priority utf-8 latin-8)
508 (nonascii-translation . iso-8859-14)
509 (input-method . "welsh")
510 (documentation . "Support for Welsh, using Unicode."))
513 (set-language-info-alist
514 "Latin-7" `((coding-system latin-7)
515 (coding-priority latin-7)
516 (nonascii-translation . iso-8859-13)
517 ;; Fixme: input-method
518 (documentation . "Support for Latin-7, e.g. Latvian, Lithuanian."))
521 (set-language-info-alist
522 "Lithuanian" `((coding-system latin-7 windows-1257)
523 (coding-priority latin-7)
524 (nonascii-translation . iso-8859-13)
525 (input-method . "lithuanian-keyboard")
526 (documentation . "Support for Lithuanian."))
529 (set-language-info-alist
530 "Latvian" `((coding-system latin-7 windows-1257)
531 (coding-priority latin-7)
532 (nonascii-translation . iso-8859-13)
533 (input-method . "latvian-keyboard")
534 (documentation . "Support for Latvian."))
538 (define-coding-system 'mac-roman
539 "Mac Roman Encoding (MIME:MACINTOSH)."
540 :coding-type 'charset
542 :charset-list '(mac-roman)
543 :mime-charset 'macintosh)
545 (define-coding-system 'next
547 :coding-type 'charset
549 :charset-list '(next)
552 (define-coding-system 'hp-roman8
553 "Hewlet-Packard roman-8 encoding (MIME:ROMAN-8)"
554 :coding-type 'charset
556 :charset-list '(hp-roman8)
557 :mime-charset 'hp-roman8)
558 (define-coding-system-alias 'roman8 'hp-roman8)
560 (define-coding-system 'adobe-standard-encoding
561 "Adobe `standard' encoding for PostScript"
562 :coding-type 'charset
564 :charset-list '(adobe-standard-encoding)
565 :mime-charset 'adobe-standard-encoding)
568 ;; For automatic composing of diacritics and combining marks.
569 (dolist (range '( ;; combining diacritical marks
570 (#x0300 #x0314 (tc . bc))
572 (#x0316 #x0319 (bc . tc))
574 (#x031B #x0320 (bc . tc))
577 (#x0323 #x0333 (bc . tc))
578 (#x0334 #x0338 (Bc . Bc))
579 (#x0339 #x033C (bc . tc))
580 (#x033D #x033F (tc . bc))
583 (#x0342 #x0344 (tc . bc))
586 (#x0347 #x0349 (bc . tc))
587 (#x034A #x034C (tc . bc))
588 (#x034D #x034E (bc . tc))
589 ;; combining diacritical marks for symbols
590 (#x20D0 #x20D1 (tc . bc))
591 (#x20D2 #x20D3 (Bc . Bc))
592 (#x20D4 #x20D7 (tc . bc))
593 (#x20D8 #x20DA (Bc . Bc))
594 (#x20DB #x20DC (tc . bc))
595 (#x20DD #x20E0 (Bc . Bc))
597 (#x20E2 #x20E3 (Bc . Bc))))
598 (let* ((from (car range))
599 (to (if (= (length range) 3)
602 (composition (car (last range))))
604 (put-char-code-property from 'diacritic-composition composition)
605 (aset composition-function-table from 'diacritic-composition-function)
606 (setq from (1+ from)))))
608 (defconst diacritic-composition-pattern "\\C^\\c^+")
610 (defun diacritic-compose-region (beg end)
611 "Compose diacritic characters in the region.
612 When called from a program, expects two arguments,
613 positions (integers or markers) specifying the region."
616 (narrow-to-region beg end)
617 (goto-char (point-min))
618 (while (re-search-forward diacritic-composition-pattern nil t)
619 (compose-region (match-beginning 0) (match-end 0)))))
621 (defun diacritic-compose-string (string)
622 "Compose diacritic characters in STRING and return the resulting string."
624 (while (setq idx (string-match diacritic-composition-pattern string idx))
625 (compose-string string idx (match-end 0))
626 (setq idx (match-end 0))))
629 (defun diacritic-compose-buffer ()
630 "Compose diacritic characters in the current buffer."
632 (diacritic-compose-region (point-min) (point-max)))
634 (defun diacritic-post-read-conversion (len)
635 (diacritic-compose-region (point) (+ (point) len))
638 (defun diacritic-composition-function (pos &optional string)
639 "Compose diacritic text around POS.
640 Optional 2nd argument STRING, if non-nil, is a string containing text
643 The return value is the end position of composed characters,
644 or nil if no characters are composed."
647 (let ((ch (aref string pos))
648 start end components ch composition)
649 (when (and (>= pos 0)
650 ;; Previous character is latin.
651 (aref (char-category-set ch) ?l)
659 (setq ch (aref string pos)
661 (get-char-code-property ch 'diacritic-composition)))
662 (setq components (cons ch (cons composition components))
664 (compose-string string start pos (nreverse components))
666 (let ((ch (char-after pos))
667 start end components composition)
668 (when (and (>= pos (point-min))
669 (aref (char-category-set ch) ?l)
677 (setq ch (char-after pos)
679 (get-char-code-property ch 'diacritic-composition)))
680 (setq components (cons ch (cons composition components))
682 (compose-region start pos (nreverse components))
687 ;;; european.el ends here