1 /* Header for coding system handler.
2 Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN.
3 Licensed to the Free Software Foundation.
4 Copyright (C) 2001, 2002
5 National Institute of Advanced Industrial Science and Technology (AIST)
6 Registration Number H13PRO009
8 This file is part of GNU Emacs.
10 GNU Emacs is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
15 GNU Emacs is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with GNU Emacs; see the file COPYING. If not, write to
22 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 Boston, MA 02111-1307, USA. */
25 #ifndef EMACS_CODING_H
26 #define EMACS_CODING_H
28 /* Index to arguments of Fdefine_coding_system_internal. */
30 enum define_coding_system_arg_index
34 coding_arg_coding_type
,
35 coding_arg_charset_list
,
36 coding_arg_ascii_compatible_p
,
37 coding_arg_decode_translation_table
,
38 coding_arg_encode_translation_table
,
39 coding_arg_post_read_conversion
,
40 coding_arg_pre_write_conversion
,
41 coding_arg_default_char
,
47 enum define_coding_iso2022_arg_index
49 coding_arg_iso2022_initial
= coding_arg_max
,
50 coding_arg_iso2022_reg_usage
,
51 coding_arg_iso2022_request
,
52 coding_arg_iso2022_flags
,
53 coding_arg_iso2022_max
56 enum define_coding_utf16_arg_index
58 coding_arg_utf16_bom
= coding_arg_max
,
59 coding_arg_utf16_endian
,
63 enum define_coding_ccl_arg_index
65 coding_arg_ccl_decoder
= coding_arg_max
,
66 coding_arg_ccl_encoder
,
67 coding_arg_ccl_valids
,
71 extern Lisp_Object Vcoding_system_hash_table
;
73 /* Enumeration of coding system type. */
75 enum coding_system_type
81 coding_type_emacs_mule
,
85 coding_type_undecided
,
90 /* Enumeration of end-of-line format type. */
94 eol_lf
, /* Line-feed only, same as Emacs' internal
96 eol_crlf
, /* Sequence of carriage-return and
98 eol_cr
, /* Carriage-return only. */
99 eol_any
, /* Accept any of above. Produce line-feed
101 eol_undecided
, /* This value is used to denote that the
102 eol-type is not yet undecided. */
106 /* Enumeration of index to an attribute vector of a coding system. */
108 enum coding_attr_index
110 coding_attr_base_name
,
111 coding_attr_docstring
,
112 coding_attr_mnemonic
,
114 coding_attr_charset_list
,
115 coding_attr_ascii_compat
,
116 coding_attr_decode_tbl
,
117 coding_attr_encode_tbl
,
118 coding_attr_post_read
,
119 coding_attr_pre_write
,
120 coding_attr_default_char
,
123 coding_attr_category
,
124 coding_attr_safe_charsets
,
126 /* The followings are extra attributes for each type. */
127 coding_attr_charset_valids
,
129 coding_attr_ccl_decoder
,
130 coding_attr_ccl_encoder
,
131 coding_attr_ccl_valids
,
133 coding_attr_iso_initial
,
134 coding_attr_iso_usage
,
135 coding_attr_iso_request
,
136 coding_attr_iso_flags
,
138 coding_attr_utf_16_bom
,
139 coding_attr_utf_16_endian
,
141 coding_attr_emacs_mule_full
,
143 coding_attr_last_index
147 #define CODING_ATTR_BASE_NAME(attrs) AREF (attrs, coding_attr_base_name)
148 #define CODING_ATTR_TYPE(attrs) AREF (attrs, coding_attr_type)
149 #define CODING_ATTR_CHARSET_LIST(attrs) AREF (attrs, coding_attr_charset_list)
150 #define CODING_ATTR_MNEMONIC(attrs) AREF (attrs, coding_attr_mnemonic)
151 #define CODING_ATTR_DOCSTRING(attrs) AREF (attrs, coding_attr_docstring)
152 #define CODING_ATTR_ASCII_COMPAT(attrs) AREF (attrs, coding_attr_ascii_compat)
153 #define CODING_ATTR_DECODE_TBL(attrs) AREF (attrs, coding_attr_decode_tbl)
154 #define CODING_ATTR_ENCODE_TBL(attrs) AREF (attrs, coding_attr_encode_tbl)
155 #define CODING_ATTR_POST_READ(attrs) AREF (attrs, coding_attr_post_read)
156 #define CODING_ATTR_PRE_WRITE(attrs) AREF (attrs, coding_attr_pre_write)
157 #define CODING_ATTR_DEFAULT_CHAR(attrs) AREF (attrs, coding_attr_default_char)
158 #define CODING_ATTR_DIRECTION(attrs) AREF (attrs, coding_attr_direction)
159 #define CODING_ATTR_FLUSHING(attrs) AREF (attrs, coding_attr_flushing)
160 #define CODING_ATTR_PLIST(attrs) AREF (attrs, coding_attr_plist)
161 #define CODING_ATTR_CATEGORY(attrs) AREF (attrs, coding_attr_category)
162 #define CODING_ATTR_SAFE_CHARSETS(attrs)AREF (attrs, coding_attr_safe_charsets)
165 #define CODING_ID_ATTRS(id) \
166 (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 0))
168 #define CODING_ID_ALIASES(id) \
169 (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 1))
171 #define CODING_ID_EOL_TYPE(id) \
172 (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 2))
174 #define CODING_ID_NAME(id) \
175 (HASH_KEY (XHASH_TABLE (Vcoding_system_hash_table), id))
177 #define CODING_SYSTEM_SPEC(coding_system_symbol) \
178 (Fgethash (coding_system_symbol, Vcoding_system_hash_table, Qnil))
180 #define CODING_SYSTEM_ID(coding_system_symbol) \
181 hash_lookup (XHASH_TABLE (Vcoding_system_hash_table), \
182 coding_system_symbol, NULL)
184 #define CODING_SYSTEM_P(coding_system_symbol) \
185 (! NILP (CODING_SYSTEM_SPEC (coding_system_symbol)))
187 #define CHECK_CODING_SYSTEM(x) \
189 if (!CODING_SYSTEM_P (x)) \
190 x = wrong_type_argument (Qcoding_system_p, (x)); \
194 #define CHECK_CODING_SYSTEM_GET_SPEC(x, spec) \
196 spec = CODING_SYSTEM_SPEC (x); \
198 x = wrong_type_argument (Qcoding_system_p, (x)); \
202 #define CHECK_CODING_SYSTEM_GET_ID(x, id) \
205 id = CODING_SYSTEM_ID (x); \
207 x = wrong_type_argument (Qcoding_system_p, (x)); \
211 /*** GENERAL section ***/
213 /* Enumeration of result code of code conversion. */
214 enum coding_result_code
216 CODING_RESULT_SUCCESS
,
217 CODING_RESULT_INSUFFICIENT_SRC
,
218 CODING_RESULT_INSUFFICIENT_DST
,
219 CODING_RESULT_INCONSISTENT_EOL
,
220 CODING_RESULT_INSUFFICIENT_CMP
,
221 CODING_RESULT_INTERRUPT
,
222 CODING_RESULT_INSUFFICIENT_MEM
226 /* Macros used for the member `mode' of the struct coding_system. */
228 /* If set, recover the original CR or LF of the already decoded text
229 when the decoding routine encounters an inconsistent eol format. */
230 #define CODING_MODE_INHIBIT_INCONSISTENT_EOL 0x01
232 /* If set, the decoding/encoding routines treat the current data as
233 the last block of the whole text to be converted, and do the
234 appropriate finishing job. */
235 #define CODING_MODE_LAST_BLOCK 0x02
237 /* If set, it means that the current source text is in a buffer which
238 enables selective display. */
239 #define CODING_MODE_SELECTIVE_DISPLAY 0x04
241 /* This flag is used by the decoding/encoding routines on the fly. If
242 set, it means that right-to-left text is being processed. */
243 #define CODING_MODE_DIRECTION 0x08
245 #define CODING_MODE_FIXED_DESTINATION 0x10
247 #define CODING_MODE_SAFE_ENCODING 0x20
249 /* Structure of the field `spec.iso_2022' in the structure
253 /* Bit-wise-or of CODING_ISO_FLAG_XXX. */
256 /* The current graphic register invoked to each graphic plane. */
257 int current_invocation
[2];
259 /* The current charset designated to each graphic register. The
260 value -1 means that not charset is designated, -2 means that
261 there was an invalid designation previously. */
262 int current_designation
[4];
264 /* Set to 1 temporarily only when graphic register 2 or 3 is invoked
265 by single-shift while encoding. */
268 /* Set to 1 temporarily only when processing at beginning of line. */
281 enum utf_16_endian_type
289 enum utf_16_bom_type bom
;
290 enum utf_16_endian_type endian
;
294 struct coding_detection_info
296 /* Values of these members are bitwise-OR of CATEGORY_MASK_XXXs. */
297 /* Which categories are already checked. */
299 /* Which categories are strongly found. */
301 /* Which categories are rejected. */
308 /* ID number of the coding system. This is an index to
309 Vcoding_system_hash_table. This value is set by
310 setup_coding_system. At the early stage of building time, this
311 value is -1 in the array coding_categories to indicate that no
312 coding-system of that category is yet defined. */
315 /* Flag bits of the coding system. The meaning of each bit is common
316 to all types of coding systems. */
319 /* Mode bits of the coding system. See the comments of the macros
323 /* Detailed information specific to each type of coding system. */
326 struct iso_2022_spec iso_2022
;
327 struct ccl_spec
*ccl
; /* Defined in ccl.h. */
328 struct utf_16_spec utf_16
;
329 int emacs_mule_full_support
;
335 /* The following two members specify how binary 8-bit code 128..255
336 are represented in source and destination text respectively. 1
337 means they are represented by 2-byte sequence, 0 means they are
338 represented by 1-byte as is (see the comment in character.h). */
339 unsigned src_multibyte
: 1;
340 unsigned dst_multibyte
: 1;
342 /* How may heading bytes we can skip for decoding. This is set to
343 -1 in setup_coding_system, and updated by detect_coding. So,
344 when this is equal to the byte length of the text being
345 converted, we can skip the actual conversion process. */
348 /* The following members are set by encoding/decoding routine. */
349 EMACS_INT produced
, produced_char
, consumed
, consumed_char
;
351 /* Number of error source data found in a decoding routine. */
354 /* Store the positions of error source data. */
355 EMACS_INT
*error_positions
;
357 /* Finish status of code conversion. */
358 enum coding_result_code result
;
360 /* The following members are all Lisp symbols. We don't have to
361 protect them from GC because the current garbage collection
362 doesn't relocate Lisp symbols. But, when it is changed, we must
363 find a way to protect them. */
365 EMACS_INT src_pos
, src_pos_byte
, src_chars
, src_bytes
;
366 Lisp_Object src_object
;
367 unsigned char *source
;
369 EMACS_INT dst_pos
, dst_pos_byte
, dst_bytes
;
370 Lisp_Object dst_object
;
371 unsigned char *destination
;
375 /* If an element is non-negative, it is a character code.
377 If it is in the range -128..-1, it is a 8-bit character code
380 If it is less than -128, it specifies the start of an annotation
381 chunk. The length of the chunk is -128 minus the value of the
382 element. The following elements are OFFSET, ANNOTATION-TYPE, and
383 a sequence of actual data for the annotation. OFFSET is a
384 character position offset from dst_pos or src_pos,
385 ANNOTATION-TYPE specfies the meaning of the annotation and how to
386 handle the following data.. */
388 int charbuf_size
, charbuf_used
;
390 /* Set to 1 if charbuf contains an annotation. */
393 unsigned char carryover
[64];
398 int (*detector
) P_ ((struct coding_system
*,
399 struct coding_detection_info
*));
400 void (*decoder
) P_ ((struct coding_system
*));
401 int (*encoder
) P_ ((struct coding_system
*));
404 /* Meanings of bits in the member `common_flags' of the structure
405 coding_system. The lowest 8 bits are reserved for various kind of
406 annotations (currently two of them are used). */
407 #define CODING_ANNOTATION_MASK 0x00FF
408 #define CODING_ANNOTATE_COMPOSITION_MASK 0x0001
409 #define CODING_ANNOTATE_DIRECTION_MASK 0x0002
410 #define CODING_ANNOTATE_CHARSET_MASK 0x0003
411 #define CODING_FOR_UNIBYTE_MASK 0x0100
412 #define CODING_REQUIRE_FLUSHING_MASK 0x0200
413 #define CODING_REQUIRE_DECODING_MASK 0x0400
414 #define CODING_REQUIRE_ENCODING_MASK 0x0800
415 #define CODING_REQUIRE_DETECTION_MASK 0x1000
416 #define CODING_RESET_AT_BOL_MASK 0x2000
418 /* Return 1 if the coding context CODING requires annotaion
420 #define CODING_REQUIRE_ANNOTATION(coding) \
421 ((coding)->common_flags & CODING_ANNOTATION_MASK)
423 /* Return 1 if the coding context CODING prefers decoding into unibyte. */
424 #define CODING_FOR_UNIBYTE(coding) \
425 ((coding)->common_flags & CODING_FOR_UNIBYTE_MASK)
427 /* Return 1 if the coding context CODING requires specific code to be
428 attached at the tail of converted text. */
429 #define CODING_REQUIRE_FLUSHING(coding) \
430 ((coding)->common_flags & CODING_REQUIRE_FLUSHING_MASK)
432 /* Return 1 if the coding context CODING requires code conversion on
434 #define CODING_REQUIRE_DECODING(coding) \
435 ((coding)->dst_multibyte \
436 || (coding)->common_flags & CODING_REQUIRE_DECODING_MASK)
439 /* Return 1 if the coding context CODING requires code conversion on
441 #define CODING_REQUIRE_ENCODING(coding) \
442 ((coding)->src_multibyte \
443 || (coding)->common_flags & CODING_REQUIRE_ENCODING_MASK \
444 || (coding)->mode & CODING_MODE_SELECTIVE_DISPLAY)
447 /* Return 1 if the coding context CODING requires some kind of code
449 #define CODING_REQUIRE_DETECTION(coding) \
450 ((coding)->common_flags & CODING_REQUIRE_DETECTION_MASK)
452 /* Return 1 if the coding context CODING requires code conversion on
453 decoding or some kind of code detection. */
454 #define CODING_MAY_REQUIRE_DECODING(coding) \
455 (CODING_REQUIRE_DECODING (coding) \
456 || CODING_REQUIRE_DETECTION (coding))
458 /* Macros to decode or encode a character of JISX0208 in SJIS. S1 and
459 S2 are the 1st and 2nd position-codes of JISX0208 in SJIS coding
460 system. C1 and C2 are the 1st and 2nd position codes of Emacs'
463 #define SJIS_TO_JIS(code) \
465 int s1, s2, j1, j2; \
467 s1 = (code) >> 8, s2 = (code) & 0xFF; \
470 (j1 = s1 * 2 - (s1 >= 0xE0 ? 0x160 : 0xE0), \
473 (j1 = s1 * 2 - ((s1 >= 0xE0) ? 0x161 : 0xE1), \
474 j2 = s2 - ((s2 >= 0x7F) ? 0x20 : 0x1F)); \
475 (code) = (j1 << 8) | j2; \
479 #define JIS_TO_SJIS(code) \
481 int s1, s2, j1, j2; \
483 j1 = (code) >> 8, j2 = (code) & 0xFF; \
485 (s1 = j1 / 2 + ((j1 < 0x5F) ? 0x71 : 0xB1), \
486 s2 = j2 + ((j2 >= 0x60) ? 0x20 : 0x1F)); \
488 (s1 = j1 / 2 + ((j1 < 0x5F) ? 0x70 : 0xB0), \
490 (code) = (s1 << 8) | s2; \
494 /* Encode the file name NAME using the specified coding system
495 for file names, if any. */
496 #define ENCODE_FILE(name) \
497 (! NILP (Vfile_name_coding_system) \
498 && XFASTINT (Vfile_name_coding_system) != 0 \
499 ? code_convert_string_norecord (name, Vfile_name_coding_system, 1) \
500 : (! NILP (Vdefault_file_name_coding_system) \
501 && XFASTINT (Vdefault_file_name_coding_system) != 0 \
502 ? code_convert_string_norecord (name, Vdefault_file_name_coding_system, 1) \
506 /* Decode the file name NAME using the specified coding system
507 for file names, if any. */
508 #define DECODE_FILE(name) \
509 (! NILP (Vfile_name_coding_system) \
510 && XFASTINT (Vfile_name_coding_system) != 0 \
511 ? code_convert_string_norecord (name, Vfile_name_coding_system, 0) \
512 : (! NILP (Vdefault_file_name_coding_system) \
513 && XFASTINT (Vdefault_file_name_coding_system) != 0 \
514 ? code_convert_string_norecord (name, Vdefault_file_name_coding_system, 0) \
519 /* Encode the string STR using the specified coding system
520 for w32 system functions, if any. */
521 #define ENCODE_SYSTEM(str) \
522 (! NILP (Vlocale_coding_system) \
523 && XFASTINT (Vlocale_coding_system) != 0 \
524 ? code_convert_string_norecord (str, Vlocale_coding_system, 1) \
527 /* Decode the string STR using the specified coding system
528 for w32 system functions, if any. */
529 #define DECODE_SYSTEM(name) \
530 (! NILP (Vlocale_coding_system) \
531 && XFASTINT (Vlocale_coding_system) != 0 \
532 ? code_convert_string_norecord (str, Vlocale_coding_system, 0) \
535 #else /* WINDOWSNT */
537 #define ENCODE_SYSTEM(str) string_make_unibyte(str)
538 #define DECODE_SYSTEM(name) name
540 #endif /* !WINDOWSNT */
542 /* Extern declarations. */
543 extern Lisp_Object make_conversion_work_buffer
P_ ((int, int));
544 extern int decoding_buffer_size
P_ ((struct coding_system
*, int));
545 extern int encoding_buffer_size
P_ ((struct coding_system
*, int));
546 extern void setup_coding_system
P_ ((Lisp_Object
, struct coding_system
*));
547 extern void detect_coding
P_ ((struct coding_system
*));
548 extern Lisp_Object code_convert_region
P_ ((Lisp_Object
, Lisp_Object
,
549 Lisp_Object
, Lisp_Object
,
551 extern Lisp_Object code_convert_string
P_ ((Lisp_Object
, Lisp_Object
,
552 Lisp_Object
, int, int, int));
553 extern Lisp_Object code_convert_string_norecord
P_ ((Lisp_Object
, Lisp_Object
,
555 extern Lisp_Object raw_text_coding_system
P_ ((Lisp_Object
));
556 extern Lisp_Object coding_inherit_eol_type
P_ ((Lisp_Object
, Lisp_Object
));
558 extern int decode_coding_gap
P_ ((struct coding_system
*,
559 EMACS_INT
, EMACS_INT
));
560 extern int encode_coding_gap
P_ ((struct coding_system
*,
561 EMACS_INT
, EMACS_INT
));
562 extern void decode_coding_object
P_ ((struct coding_system
*,
563 Lisp_Object
, EMACS_INT
, EMACS_INT
,
564 EMACS_INT
, EMACS_INT
, Lisp_Object
));
565 extern void encode_coding_object
P_ ((struct coding_system
*,
566 Lisp_Object
, EMACS_INT
, EMACS_INT
,
567 EMACS_INT
, EMACS_INT
, Lisp_Object
));
569 #define decode_coding_region(coding, from, to) \
570 decode_coding_object (coding, Fcurrent_buffer (), \
571 from, CHAR_TO_BYTE (from), \
572 to, CHAR_TO_BYTE (to), Fcurrent_buffer ())
575 #define encode_coding_region(coding, from, to) \
576 encode_coding_object (coding, Fcurrent_buffer (), \
577 from, CHAR_TO_BYTE (from), \
578 to, CHAR_TO_BYTE (to), Fcurrent_buffer ())
581 #define decode_coding_string(coding, string, nocopy) \
582 decode_coding_object (coding, string, 0, 0, XSTRING (string)->size, \
583 STRING_BYTES (XSTRING (string)), Qt)
585 #define encode_coding_string(coding, string, nocopy) \
586 (encode_coding_object (coding, string, 0, 0, XSTRING (string)->size, \
587 STRING_BYTES (XSTRING (string)), Qt), \
588 (coding)->dst_object)
591 #define decode_coding_c_string(coding, src, bytes, dst_object) \
593 (coding)->source = (src); \
594 (coding)->src_chars = (coding)->src_bytes = (bytes); \
595 decode_coding_object ((coding), Qnil, 0, 0, (bytes), (bytes), \
600 extern Lisp_Object preferred_coding_system
P_ (());
603 extern Lisp_Object Qcoding_system
, Qeol_type
, Qcoding_category_index
;
604 extern Lisp_Object Qcoding_system_p
;
605 extern Lisp_Object Qraw_text
, Qemacs_mule
, Qno_conversion
, Qundecided
;
606 extern Lisp_Object Qiso_2022
;
607 extern Lisp_Object Qbuffer_file_coding_system
;
609 extern Lisp_Object Qunix
, Qdos
, Qmac
;
611 extern Lisp_Object Qtranslation_table
;
612 extern Lisp_Object Qtranslation_table_id
;
614 /* Mnemonic strings to indicate each type of end-of-line. */
615 extern Lisp_Object eol_mnemonic_unix
, eol_mnemonic_dos
, eol_mnemonic_mac
;
616 /* Mnemonic string to indicate type of end-of-line is not yet decided. */
617 extern Lisp_Object eol_mnemonic_undecided
;
620 extern Lisp_Object Qfile_coding_system
;
621 extern Lisp_Object Qcall_process
, Qcall_process_region
, Qprocess_argument
;
622 extern Lisp_Object Qstart_process
, Qopen_network_stream
;
623 extern Lisp_Object Qwrite_region
;
625 extern char *emacs_strerror
P_ ((int));
627 /* Coding-system for reading files and receiving data from process. */
628 extern Lisp_Object Vcoding_system_for_read
;
629 /* Coding-system for writing files and sending data to process. */
630 extern Lisp_Object Vcoding_system_for_write
;
631 /* Coding-system actually used in the latest I/O. */
632 extern Lisp_Object Vlast_coding_system_used
;
633 /* Coding-system to use with system messages (e.g. strerror). */
634 extern Lisp_Object Vlocale_coding_system
;
636 /* If non-zero, process buffer inherits the coding system used to decode
637 the subprocess output. */
638 extern int inherit_process_coding_system
;
640 /* Coding-system to be used for encoding terminal output. This
641 structure contains information of a coding-system specified by the
642 function `set-terminal-coding-system'. */
643 extern struct coding_system terminal_coding
;
645 /* Coding system to be used to encode text for terminal display when
646 terminal coding system is nil. */
647 extern struct coding_system safe_terminal_coding
;
649 /* Coding-system of what is sent from terminal keyboard. This
650 structure contains information of a coding-system specified by the
651 function `set-keyboard-coding-system'. */
652 extern struct coding_system keyboard_coding
;
654 /* Default coding systems used for process I/O. */
655 extern Lisp_Object Vdefault_process_coding_system
;
657 /* Function to call to force a user to force select a propert coding
659 extern Lisp_Object Vselect_safe_coding_system_function
;
661 /* Coding system for file names, or nil if none. */
662 extern Lisp_Object Vfile_name_coding_system
;
664 /* Coding system for file names used only when
665 Vfile_name_coding_system is nil. */
666 extern Lisp_Object Vdefault_file_name_coding_system
;
670 /* Error signaled when there's a problem with detecting coding system */
671 extern Lisp_Object Qcoding_system_error
;
673 extern char emacs_mule_bytes
[256];
674 extern int emacs_mule_string_char
P_ ((unsigned char *));
676 #endif /* EMACS_CODING_H */