]> code.delx.au - gnu-emacs/blob - src/charset.c
Merge changes from emacs-23 branch
[gnu-emacs] / src / charset.c
1 /* Basic character set support.
2 Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007,
3 2008, 2009, 2010 Free Software Foundation, Inc.
4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5 2005, 2006, 2007, 2008, 2009, 2010
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H14PRO021
8
9 Copyright (C) 2003, 2004
10 National Institute of Advanced Industrial Science and Technology (AIST)
11 Registration Number H13PRO009
12
13 This file is part of GNU Emacs.
14
15 GNU Emacs is free software: you can redistribute it and/or modify
16 it under the terms of the GNU General Public License as published by
17 the Free Software Foundation, either version 3 of the License, or
18 (at your option) any later version.
19
20 GNU Emacs is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 GNU General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
27
28 #include <config.h>
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <unistd.h>
33 #include <ctype.h>
34 #include <sys/types.h>
35 #include <setjmp.h>
36 #include "lisp.h"
37 #include "character.h"
38 #include "charset.h"
39 #include "coding.h"
40 #include "disptab.h"
41 #include "buffer.h"
42
43 /*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
44
45 A coded character set ("charset" hereafter) is a meaningful
46 collection (i.e. language, culture, functionality, etc.) of
47 characters. Emacs handles multiple charsets at once. In Emacs Lisp
48 code, a charset is represented by a symbol. In C code, a charset is
49 represented by its ID number or by a pointer to a struct charset.
50
51 The actual information about each charset is stored in two places.
52 Lispy information is stored in the hash table Vcharset_hash_table as
53 a vector (charset attributes). The other information is stored in
54 charset_table as a struct charset.
55
56 */
57
58 /* List of all charsets. This variable is used only from Emacs
59 Lisp. */
60 Lisp_Object Vcharset_list;
61
62 /* Hash table that contains attributes of each charset. Keys are
63 charset symbols, and values are vectors of charset attributes. */
64 Lisp_Object Vcharset_hash_table;
65
66 /* Table of struct charset. */
67 struct charset *charset_table;
68
69 static int charset_table_size;
70 static int charset_table_used;
71
72 Lisp_Object Qcharsetp;
73
74 /* Special charset symbols. */
75 Lisp_Object Qascii;
76 Lisp_Object Qeight_bit;
77 Lisp_Object Qiso_8859_1;
78 Lisp_Object Qunicode;
79 Lisp_Object Qemacs;
80
81 /* The corresponding charsets. */
82 int charset_ascii;
83 int charset_eight_bit;
84 int charset_iso_8859_1;
85 int charset_unicode;
86 int charset_emacs;
87
88 /* The other special charsets. */
89 int charset_jisx0201_roman;
90 int charset_jisx0208_1978;
91 int charset_jisx0208;
92 int charset_ksc5601;
93
94 /* Value of charset attribute `charset-iso-plane'. */
95 Lisp_Object Qgl, Qgr;
96
97 /* Charset of unibyte characters. */
98 int charset_unibyte;
99
100 /* List of charsets ordered by the priority. */
101 Lisp_Object Vcharset_ordered_list;
102
103 /* Sub-list of Vcharset_ordered_list that contains all non-preferred
104 charsets. */
105 Lisp_Object Vcharset_non_preferred_head;
106
107 /* Incremented everytime we change Vcharset_ordered_list. This is
108 unsigned short so that it fits in Lisp_Int and never matches
109 -1. */
110 unsigned short charset_ordered_list_tick;
111
112 /* List of iso-2022 charsets. */
113 Lisp_Object Viso_2022_charset_list;
114
115 /* List of emacs-mule charsets. */
116 Lisp_Object Vemacs_mule_charset_list;
117
118 struct charset *emacs_mule_charset[256];
119
120 /* Mapping table from ISO2022's charset (specified by DIMENSION,
121 CHARS, and FINAL-CHAR) to Emacs' charset. */
122 int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
123
124 Lisp_Object Vcharset_map_path;
125
126 /* If nonzero, don't load charset maps. */
127 int inhibit_load_charset_map;
128
129 Lisp_Object Vcurrent_iso639_language;
130
131 #define CODE_POINT_TO_INDEX(charset, code) \
132 ((charset)->code_linear_p \
133 ? (code) - (charset)->min_code \
134 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
135 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
136 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
137 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
138 ? (((((code) >> 24) - (charset)->code_space[12]) \
139 * (charset)->code_space[11]) \
140 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
141 * (charset)->code_space[7]) \
142 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
143 * (charset)->code_space[3]) \
144 + (((code) & 0xFF) - (charset)->code_space[0]) \
145 - ((charset)->char_index_offset)) \
146 : -1)
147
148
149 /* Convert the character index IDX to code-point CODE for CHARSET.
150 It is assumed that IDX is in a valid range. */
151
152 #define INDEX_TO_CODE_POINT(charset, idx) \
153 ((charset)->code_linear_p \
154 ? (idx) + (charset)->min_code \
155 : (idx += (charset)->char_index_offset, \
156 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
157 | (((charset)->code_space[4] \
158 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
159 << 8) \
160 | (((charset)->code_space[8] \
161 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
162 << 16) \
163 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
164 << 24))))
165
166 /* Structure to hold mapping tables for a charset. Used by temacs
167 invoked for dumping. */
168
169 static struct
170 {
171 /* The current charset for which the following tables are setup. */
172 struct charset *current;
173
174 /* 1 iff the following table is used for encoder. */
175 short for_encoder;
176
177 /* When the following table is used for encoding, mininum and
178 maxinum character of the current charset. */
179 int min_char, max_char;
180
181 /* A Unicode character correspoinding to the code indice 0 (i.e. the
182 minimum code-point) of the current charset, or -1 if the code
183 indice 0 is not a Unicode character. This is checked when
184 table.encoder[CHAR] is zero. */
185 int zero_index_char;
186
187 union {
188 /* Table mapping code-indices (not code-points) of the current
189 charset to Unicode characters. If decoder[CHAR] is -1, CHAR
190 doesn't belong to the current charset. */
191 int decoder[0x10000];
192 /* Table mapping Unicode characters to code-indices of the current
193 charset. The first 0x10000 elements are for BMP (0..0xFFFF),
194 and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
195 (0x20000..0x2FFFF). Note that there is no charset map that
196 uses both SMP and SIP. */
197 unsigned short encoder[0x20000];
198 } table;
199 } *temp_charset_work;
200
201 #define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE) \
202 do { \
203 if ((CODE) == 0) \
204 temp_charset_work->zero_index_char = (C); \
205 else if ((C) < 0x20000) \
206 temp_charset_work->table.encoder[(C)] = (CODE); \
207 else \
208 temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
209 } while (0)
210
211 #define GET_TEMP_CHARSET_WORK_ENCODER(C) \
212 ((C) == temp_charset_work->zero_index_char ? 0 \
213 : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)] \
214 ? (int) temp_charset_work->table.encoder[(C)] : -1) \
215 : temp_charset_work->table.encoder[(C) - 0x10000] \
216 ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
217
218 #define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \
219 (temp_charset_work->table.decoder[(CODE)] = (C))
220
221 #define GET_TEMP_CHARSET_WORK_DECODER(CODE) \
222 (temp_charset_work->table.decoder[(CODE)])
223 \f
224
225 /* Set to 1 to warn that a charset map is loaded and thus a buffer
226 text and a string data may be relocated. */
227 int charset_map_loaded;
228
229 struct charset_map_entries
230 {
231 struct {
232 unsigned from, to;
233 int c;
234 } entry[0x10000];
235 struct charset_map_entries *next;
236 };
237
238 /* Load the mapping information of CHARSET from ENTRIES for
239 initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
240 encoding (CONTROL_FLAG == 2).
241
242 If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
243 and CHARSET->fast_map.
244
245 If CONTROL_FLAG is 1, setup the following tables according to
246 CHARSET->method and inhibit_load_charset_map.
247
248 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
249 ----------------------+--------------------+---------------------------
250 CHARSET_METHOD_MAP | CHARSET->decoder | temp_charset_work->decoder
251 ----------------------+--------------------+---------------------------
252 CHARSET_METHOD_OFFSET | Vchar_unify_table | temp_charset_work->decoder
253
254 If CONTROL_FLAG is 2, setup the following tables.
255
256 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
257 ----------------------+--------------------+---------------------------
258 CHARSET_METHOD_MAP | CHARSET->encoder | temp_charset_work->encoder
259 ----------------------+--------------------+--------------------------
260 CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
261 */
262
263 static void
264 load_charset_map (struct charset *charset, struct charset_map_entries *entries, int n_entries, int control_flag)
265 {
266 Lisp_Object vec, table;
267 unsigned max_code = CHARSET_MAX_CODE (charset);
268 int ascii_compatible_p = charset->ascii_compatible_p;
269 int min_char, max_char, nonascii_min_char;
270 int i;
271 unsigned char *fast_map = charset->fast_map;
272
273 if (n_entries <= 0)
274 return;
275
276 if (control_flag)
277 {
278 if (! inhibit_load_charset_map)
279 {
280 if (control_flag == 1)
281 {
282 if (charset->method == CHARSET_METHOD_MAP)
283 {
284 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
285
286 vec = CHARSET_DECODER (charset)
287 = Fmake_vector (make_number (n), make_number (-1));
288 }
289 else
290 {
291 char_table_set_range (Vchar_unify_table,
292 charset->min_char, charset->max_char,
293 Qnil);
294 }
295 }
296 else
297 {
298 table = Fmake_char_table (Qnil, Qnil);
299 if (charset->method == CHARSET_METHOD_MAP)
300 CHARSET_ENCODER (charset) = table;
301 else
302 CHARSET_DEUNIFIER (charset) = table;
303 }
304 }
305 else
306 {
307 if (! temp_charset_work)
308 temp_charset_work = malloc (sizeof (*temp_charset_work));
309 if (control_flag == 1)
310 {
311 memset (temp_charset_work->table.decoder, -1,
312 sizeof (int) * 0x10000);
313 }
314 else
315 {
316 memset (temp_charset_work->table.encoder, 0,
317 sizeof (unsigned short) * 0x20000);
318 temp_charset_work->zero_index_char = -1;
319 }
320 temp_charset_work->current = charset;
321 temp_charset_work->for_encoder = (control_flag == 2);
322 control_flag += 2;
323 }
324 charset_map_loaded = 1;
325 }
326
327 min_char = max_char = entries->entry[0].c;
328 nonascii_min_char = MAX_CHAR;
329 for (i = 0; i < n_entries; i++)
330 {
331 unsigned from, to;
332 int from_index, to_index;
333 int from_c, to_c;
334 int idx = i % 0x10000;
335
336 if (i > 0 && idx == 0)
337 entries = entries->next;
338 from = entries->entry[idx].from;
339 to = entries->entry[idx].to;
340 from_c = entries->entry[idx].c;
341 from_index = CODE_POINT_TO_INDEX (charset, from);
342 if (from == to)
343 {
344 to_index = from_index;
345 to_c = from_c;
346 }
347 else
348 {
349 to_index = CODE_POINT_TO_INDEX (charset, to);
350 to_c = from_c + (to_index - from_index);
351 }
352 if (from_index < 0 || to_index < 0)
353 continue;
354
355 if (to_c > max_char)
356 max_char = to_c;
357 else if (from_c < min_char)
358 min_char = from_c;
359
360 if (control_flag == 1)
361 {
362 if (charset->method == CHARSET_METHOD_MAP)
363 for (; from_index <= to_index; from_index++, from_c++)
364 ASET (vec, from_index, make_number (from_c));
365 else
366 for (; from_index <= to_index; from_index++, from_c++)
367 CHAR_TABLE_SET (Vchar_unify_table,
368 CHARSET_CODE_OFFSET (charset) + from_index,
369 make_number (from_c));
370 }
371 else if (control_flag == 2)
372 {
373 if (charset->method == CHARSET_METHOD_MAP
374 && CHARSET_COMPACT_CODES_P (charset))
375 for (; from_index <= to_index; from_index++, from_c++)
376 {
377 unsigned code = INDEX_TO_CODE_POINT (charset, from_index);
378
379 if (NILP (CHAR_TABLE_REF (table, from_c)))
380 CHAR_TABLE_SET (table, from_c, make_number (code));
381 }
382 else
383 for (; from_index <= to_index; from_index++, from_c++)
384 {
385 if (NILP (CHAR_TABLE_REF (table, from_c)))
386 CHAR_TABLE_SET (table, from_c, make_number (from_index));
387 }
388 }
389 else if (control_flag == 3)
390 for (; from_index <= to_index; from_index++, from_c++)
391 SET_TEMP_CHARSET_WORK_DECODER (from_c, from_index);
392 else if (control_flag == 4)
393 for (; from_index <= to_index; from_index++, from_c++)
394 SET_TEMP_CHARSET_WORK_ENCODER (from_c, from_index);
395 else /* control_flag == 0 */
396 {
397 if (ascii_compatible_p)
398 {
399 if (! ASCII_BYTE_P (from_c))
400 {
401 if (from_c < nonascii_min_char)
402 nonascii_min_char = from_c;
403 }
404 else if (! ASCII_BYTE_P (to_c))
405 {
406 nonascii_min_char = 0x80;
407 }
408 }
409
410 for (; from_c <= to_c; from_c++)
411 CHARSET_FAST_MAP_SET (from_c, fast_map);
412 }
413 }
414
415 if (control_flag == 0)
416 {
417 CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
418 ? nonascii_min_char : min_char);
419 CHARSET_MAX_CHAR (charset) = max_char;
420 }
421 else if (control_flag == 4)
422 {
423 temp_charset_work->min_char = min_char;
424 temp_charset_work->max_char = max_char;
425 }
426 }
427
428
429 /* Read a hexadecimal number (preceded by "0x") from the file FP while
430 paying attention to comment charcter '#'. */
431
432 static INLINE unsigned
433 read_hex (FILE *fp, int *eof)
434 {
435 int c;
436 unsigned n;
437
438 while ((c = getc (fp)) != EOF)
439 {
440 if (c == '#')
441 {
442 while ((c = getc (fp)) != EOF && c != '\n');
443 }
444 else if (c == '0')
445 {
446 if ((c = getc (fp)) == EOF || c == 'x')
447 break;
448 }
449 }
450 if (c == EOF)
451 {
452 *eof = 1;
453 return 0;
454 }
455 *eof = 0;
456 n = 0;
457 if (c == 'x')
458 while ((c = getc (fp)) != EOF && isxdigit (c))
459 n = ((n << 4)
460 | (c <= '9' ? c - '0' : c <= 'F' ? c - 'A' + 10 : c - 'a' + 10));
461 else
462 while ((c = getc (fp)) != EOF && isdigit (c))
463 n = (n * 10) + c - '0';
464 if (c != EOF)
465 ungetc (c, fp);
466 return n;
467 }
468
469 /* Return a mapping vector for CHARSET loaded from MAPFILE.
470 Each line of MAPFILE has this form
471 0xAAAA 0xCCCC
472 where 0xAAAA is a code-point and 0xCCCC is the corresponding
473 character code, or this form
474 0xAAAA-0xBBBB 0xCCCC
475 where 0xAAAA and 0xBBBB are code-points specifying a range, and
476 0xCCCC is the first character code of the range.
477
478 The returned vector has this form:
479 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
480 where CODE1 is a code-point or a cons of code-points specifying a
481 range.
482
483 Note that this function uses `openp' to open MAPFILE but ignores
484 `file-name-handler-alist' to avoid running any Lisp code. */
485
486 static void
487 load_charset_map_from_file (struct charset *charset, Lisp_Object mapfile, int control_flag)
488 {
489 unsigned min_code = CHARSET_MIN_CODE (charset);
490 unsigned max_code = CHARSET_MAX_CODE (charset);
491 int fd;
492 FILE *fp;
493 int eof;
494 Lisp_Object suffixes;
495 struct charset_map_entries *head, *entries;
496 int n_entries, count;
497 USE_SAFE_ALLOCA;
498
499 suffixes = Fcons (build_string (".map"),
500 Fcons (build_string (".TXT"), Qnil));
501
502 count = SPECPDL_INDEX ();
503 specbind (Qfile_name_handler_alist, Qnil);
504 fd = openp (Vcharset_map_path, mapfile, suffixes, NULL, Qnil);
505 unbind_to (count, Qnil);
506 if (fd < 0
507 || ! (fp = fdopen (fd, "r")))
508 error ("Failure in loading charset map: %S", SDATA (mapfile));
509
510 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
511 large (larger than MAX_ALLOCA). */
512 SAFE_ALLOCA (head, struct charset_map_entries *,
513 sizeof (struct charset_map_entries));
514 entries = head;
515 memset (entries, 0, sizeof (struct charset_map_entries));
516
517 n_entries = 0;
518 eof = 0;
519 while (1)
520 {
521 unsigned from, to;
522 int c;
523 int idx;
524
525 from = read_hex (fp, &eof);
526 if (eof)
527 break;
528 if (getc (fp) == '-')
529 to = read_hex (fp, &eof);
530 else
531 to = from;
532 c = (int) read_hex (fp, &eof);
533
534 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
535 continue;
536
537 if (n_entries > 0 && (n_entries % 0x10000) == 0)
538 {
539 SAFE_ALLOCA (entries->next, struct charset_map_entries *,
540 sizeof (struct charset_map_entries));
541 entries = entries->next;
542 memset (entries, 0, sizeof (struct charset_map_entries));
543 }
544 idx = n_entries % 0x10000;
545 entries->entry[idx].from = from;
546 entries->entry[idx].to = to;
547 entries->entry[idx].c = c;
548 n_entries++;
549 }
550 fclose (fp);
551
552 load_charset_map (charset, head, n_entries, control_flag);
553 SAFE_FREE ();
554 }
555
556 static void
557 load_charset_map_from_vector (struct charset *charset, Lisp_Object vec, int control_flag)
558 {
559 unsigned min_code = CHARSET_MIN_CODE (charset);
560 unsigned max_code = CHARSET_MAX_CODE (charset);
561 struct charset_map_entries *head, *entries;
562 int n_entries;
563 int len = ASIZE (vec);
564 int i;
565 USE_SAFE_ALLOCA;
566
567 if (len % 2 == 1)
568 {
569 add_to_log ("Failure in loading charset map: %V", vec, Qnil);
570 return;
571 }
572
573 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
574 large (larger than MAX_ALLOCA). */
575 SAFE_ALLOCA (head, struct charset_map_entries *,
576 sizeof (struct charset_map_entries));
577 entries = head;
578 memset (entries, 0, sizeof (struct charset_map_entries));
579
580 n_entries = 0;
581 for (i = 0; i < len; i += 2)
582 {
583 Lisp_Object val, val2;
584 unsigned from, to;
585 int c;
586 int idx;
587
588 val = AREF (vec, i);
589 if (CONSP (val))
590 {
591 val2 = XCDR (val);
592 val = XCAR (val);
593 CHECK_NATNUM (val);
594 CHECK_NATNUM (val2);
595 from = XFASTINT (val);
596 to = XFASTINT (val2);
597 }
598 else
599 {
600 CHECK_NATNUM (val);
601 from = to = XFASTINT (val);
602 }
603 val = AREF (vec, i + 1);
604 CHECK_NATNUM (val);
605 c = XFASTINT (val);
606
607 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
608 continue;
609
610 if (n_entries > 0 && (n_entries % 0x10000) == 0)
611 {
612 SAFE_ALLOCA (entries->next, struct charset_map_entries *,
613 sizeof (struct charset_map_entries));
614 entries = entries->next;
615 memset (entries, 0, sizeof (struct charset_map_entries));
616 }
617 idx = n_entries % 0x10000;
618 entries->entry[idx].from = from;
619 entries->entry[idx].to = to;
620 entries->entry[idx].c = c;
621 n_entries++;
622 }
623
624 load_charset_map (charset, head, n_entries, control_flag);
625 SAFE_FREE ();
626 }
627
628
629 /* Load a mapping table for CHARSET. CONTROL-FLAG tells what kind of
630 map it is (see the comment of load_charset_map for the detail). */
631
632 static void
633 load_charset (struct charset *charset, int control_flag)
634 {
635 Lisp_Object map;
636
637 if (inhibit_load_charset_map
638 && temp_charset_work
639 && charset == temp_charset_work->current
640 && ((control_flag == 2) == temp_charset_work->for_encoder))
641 return;
642
643 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
644 map = CHARSET_MAP (charset);
645 else if (CHARSET_UNIFIED_P (charset))
646 map = CHARSET_UNIFY_MAP (charset);
647 if (STRINGP (map))
648 load_charset_map_from_file (charset, map, control_flag);
649 else
650 load_charset_map_from_vector (charset, map, control_flag);
651 }
652
653
654 DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
655 doc: /* Return non-nil if and only if OBJECT is a charset.*/)
656 (Lisp_Object object)
657 {
658 return (CHARSETP (object) ? Qt : Qnil);
659 }
660
661
662 void map_charset_for_dump (void (*c_function) (Lisp_Object, Lisp_Object),
663 Lisp_Object function, Lisp_Object arg,
664 unsigned from, unsigned to);
665
666 void
667 map_charset_for_dump (void (*c_function) (Lisp_Object, Lisp_Object), Lisp_Object function, Lisp_Object arg, unsigned int from, unsigned int to)
668 {
669 int from_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, from);
670 int to_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, to);
671 Lisp_Object range;
672 int c, stop;
673 struct gcpro gcpro1;
674
675 range = Fcons (Qnil, Qnil);
676 GCPRO1 (range);
677
678 c = temp_charset_work->min_char;
679 stop = (temp_charset_work->max_char < 0x20000
680 ? temp_charset_work->max_char : 0xFFFF);
681
682 while (1)
683 {
684 int index = GET_TEMP_CHARSET_WORK_ENCODER (c);
685
686 if (index >= from_idx && index <= to_idx)
687 {
688 if (NILP (XCAR (range)))
689 XSETCAR (range, make_number (c));
690 }
691 else if (! NILP (XCAR (range)))
692 {
693 XSETCDR (range, make_number (c - 1));
694 if (c_function)
695 (*c_function) (arg, range);
696 else
697 call2 (function, range, arg);
698 XSETCAR (range, Qnil);
699 }
700 if (c == stop)
701 {
702 if (c == temp_charset_work->max_char)
703 {
704 if (! NILP (XCAR (range)))
705 {
706 XSETCDR (range, make_number (c));
707 if (c_function)
708 (*c_function) (arg, range);
709 else
710 call2 (function, range, arg);
711 }
712 break;
713 }
714 c = 0x1FFFF;
715 stop = temp_charset_work->max_char;
716 }
717 c++;
718 }
719 UNGCPRO;
720 }
721
722 void
723 map_charset_chars (void (*c_function)(Lisp_Object, Lisp_Object), Lisp_Object function,
724 Lisp_Object arg, struct charset *charset, unsigned from, unsigned to)
725 {
726 Lisp_Object range;
727 int partial;
728
729 partial = (from > CHARSET_MIN_CODE (charset)
730 || to < CHARSET_MAX_CODE (charset));
731
732 if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
733 {
734 int from_idx = CODE_POINT_TO_INDEX (charset, from);
735 int to_idx = CODE_POINT_TO_INDEX (charset, to);
736 int from_c = from_idx + CHARSET_CODE_OFFSET (charset);
737 int to_c = to_idx + CHARSET_CODE_OFFSET (charset);
738
739 if (CHARSET_UNIFIED_P (charset))
740 {
741 if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
742 load_charset (charset, 2);
743 if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
744 map_char_table_for_charset (c_function, function,
745 CHARSET_DEUNIFIER (charset), arg,
746 partial ? charset : NULL, from, to);
747 else
748 map_charset_for_dump (c_function, function, arg, from, to);
749 }
750
751 range = Fcons (make_number (from_c), make_number (to_c));
752 if (NILP (function))
753 (*c_function) (arg, range);
754 else
755 call2 (function, range, arg);
756 }
757 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
758 {
759 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
760 load_charset (charset, 2);
761 if (CHAR_TABLE_P (CHARSET_ENCODER (charset)))
762 map_char_table_for_charset (c_function, function,
763 CHARSET_ENCODER (charset), arg,
764 partial ? charset : NULL, from, to);
765 else
766 map_charset_for_dump (c_function, function, arg, from, to);
767 }
768 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET)
769 {
770 Lisp_Object subset_info;
771 int offset;
772
773 subset_info = CHARSET_SUBSET (charset);
774 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
775 offset = XINT (AREF (subset_info, 3));
776 from -= offset;
777 if (from < XFASTINT (AREF (subset_info, 1)))
778 from = XFASTINT (AREF (subset_info, 1));
779 to -= offset;
780 if (to > XFASTINT (AREF (subset_info, 2)))
781 to = XFASTINT (AREF (subset_info, 2));
782 map_charset_chars (c_function, function, arg, charset, from, to);
783 }
784 else /* i.e. CHARSET_METHOD_SUPERSET */
785 {
786 Lisp_Object parents;
787
788 for (parents = CHARSET_SUPERSET (charset); CONSP (parents);
789 parents = XCDR (parents))
790 {
791 int offset;
792 unsigned this_from, this_to;
793
794 charset = CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents))));
795 offset = XINT (XCDR (XCAR (parents)));
796 this_from = from > offset ? from - offset : 0;
797 this_to = to > offset ? to - offset : 0;
798 if (this_from < CHARSET_MIN_CODE (charset))
799 this_from = CHARSET_MIN_CODE (charset);
800 if (this_to > CHARSET_MAX_CODE (charset))
801 this_to = CHARSET_MAX_CODE (charset);
802 map_charset_chars (c_function, function, arg, charset,
803 this_from, this_to);
804 }
805 }
806 }
807
808 DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0,
809 doc: /* Call FUNCTION for all characters in CHARSET.
810 FUNCTION is called with an argument RANGE and the optional 3rd
811 argument ARG.
812
813 RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
814 characters contained in CHARSET.
815
816 The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
817 range of code points (in CHARSET) of target characters. */)
818 (Lisp_Object function, Lisp_Object charset, Lisp_Object arg, Lisp_Object from_code, Lisp_Object to_code)
819 {
820 struct charset *cs;
821 unsigned from, to;
822
823 CHECK_CHARSET_GET_CHARSET (charset, cs);
824 if (NILP (from_code))
825 from = CHARSET_MIN_CODE (cs);
826 else
827 {
828 CHECK_NATNUM (from_code);
829 from = XINT (from_code);
830 if (from < CHARSET_MIN_CODE (cs))
831 from = CHARSET_MIN_CODE (cs);
832 }
833 if (NILP (to_code))
834 to = CHARSET_MAX_CODE (cs);
835 else
836 {
837 CHECK_NATNUM (to_code);
838 to = XINT (to_code);
839 if (to > CHARSET_MAX_CODE (cs))
840 to = CHARSET_MAX_CODE (cs);
841 }
842 map_charset_chars (NULL, function, arg, cs, from, to);
843 return Qnil;
844 }
845
846
847 /* Define a charset according to the arguments. The Nth argument is
848 the Nth attribute of the charset (the last attribute `charset-id'
849 is not included). See the docstring of `define-charset' for the
850 detail. */
851
852 DEFUN ("define-charset-internal", Fdefine_charset_internal,
853 Sdefine_charset_internal, charset_arg_max, MANY, 0,
854 doc: /* For internal use only.
855 usage: (define-charset-internal ...) */)
856 (int nargs, Lisp_Object *args)
857 {
858 /* Charset attr vector. */
859 Lisp_Object attrs;
860 Lisp_Object val;
861 unsigned hash_code;
862 struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
863 int i, j;
864 struct charset charset;
865 int id;
866 int dimension;
867 int new_definition_p;
868 int nchars;
869
870 if (nargs != charset_arg_max)
871 return Fsignal (Qwrong_number_of_arguments,
872 Fcons (intern ("define-charset-internal"),
873 make_number (nargs)));
874
875 attrs = Fmake_vector (make_number (charset_attr_max), Qnil);
876
877 CHECK_SYMBOL (args[charset_arg_name]);
878 ASET (attrs, charset_name, args[charset_arg_name]);
879
880 val = args[charset_arg_code_space];
881 for (i = 0, dimension = 0, nchars = 1; i < 4; i++)
882 {
883 int min_byte, max_byte;
884
885 min_byte = XINT (Faref (val, make_number (i * 2)));
886 max_byte = XINT (Faref (val, make_number (i * 2 + 1)));
887 if (min_byte < 0 || min_byte > max_byte || max_byte >= 256)
888 error ("Invalid :code-space value");
889 charset.code_space[i * 4] = min_byte;
890 charset.code_space[i * 4 + 1] = max_byte;
891 charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
892 nchars *= charset.code_space[i * 4 + 2];
893 charset.code_space[i * 4 + 3] = nchars;
894 if (max_byte > 0)
895 dimension = i + 1;
896 }
897
898 val = args[charset_arg_dimension];
899 if (NILP (val))
900 charset.dimension = dimension;
901 else
902 {
903 CHECK_NATNUM (val);
904 charset.dimension = XINT (val);
905 if (charset.dimension < 1 || charset.dimension > 4)
906 args_out_of_range_3 (val, make_number (1), make_number (4));
907 }
908
909 charset.code_linear_p
910 = (charset.dimension == 1
911 || (charset.code_space[2] == 256
912 && (charset.dimension == 2
913 || (charset.code_space[6] == 256
914 && (charset.dimension == 3
915 || charset.code_space[10] == 256)))));
916
917 if (! charset.code_linear_p)
918 {
919 charset.code_space_mask = (unsigned char *) xmalloc (256);
920 memset (charset.code_space_mask, 0, 256);
921 for (i = 0; i < 4; i++)
922 for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
923 j++)
924 charset.code_space_mask[j] |= (1 << i);
925 }
926
927 charset.iso_chars_96 = charset.code_space[2] == 96;
928
929 charset.min_code = (charset.code_space[0]
930 | (charset.code_space[4] << 8)
931 | (charset.code_space[8] << 16)
932 | (charset.code_space[12] << 24));
933 charset.max_code = (charset.code_space[1]
934 | (charset.code_space[5] << 8)
935 | (charset.code_space[9] << 16)
936 | (charset.code_space[13] << 24));
937 charset.char_index_offset = 0;
938
939 val = args[charset_arg_min_code];
940 if (! NILP (val))
941 {
942 unsigned code;
943
944 if (INTEGERP (val))
945 code = XINT (val);
946 else
947 {
948 CHECK_CONS (val);
949 CHECK_NUMBER_CAR (val);
950 CHECK_NUMBER_CDR (val);
951 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
952 }
953 if (code < charset.min_code
954 || code > charset.max_code)
955 args_out_of_range_3 (make_number (charset.min_code),
956 make_number (charset.max_code), val);
957 charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
958 charset.min_code = code;
959 }
960
961 val = args[charset_arg_max_code];
962 if (! NILP (val))
963 {
964 unsigned code;
965
966 if (INTEGERP (val))
967 code = XINT (val);
968 else
969 {
970 CHECK_CONS (val);
971 CHECK_NUMBER_CAR (val);
972 CHECK_NUMBER_CDR (val);
973 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
974 }
975 if (code < charset.min_code
976 || code > charset.max_code)
977 args_out_of_range_3 (make_number (charset.min_code),
978 make_number (charset.max_code), val);
979 charset.max_code = code;
980 }
981
982 charset.compact_codes_p = charset.max_code < 0x10000;
983
984 val = args[charset_arg_invalid_code];
985 if (NILP (val))
986 {
987 if (charset.min_code > 0)
988 charset.invalid_code = 0;
989 else
990 {
991 XSETINT (val, charset.max_code + 1);
992 if (XINT (val) == charset.max_code + 1)
993 charset.invalid_code = charset.max_code + 1;
994 else
995 error ("Attribute :invalid-code must be specified");
996 }
997 }
998 else
999 {
1000 CHECK_NATNUM (val);
1001 charset.invalid_code = XFASTINT (val);
1002 }
1003
1004 val = args[charset_arg_iso_final];
1005 if (NILP (val))
1006 charset.iso_final = -1;
1007 else
1008 {
1009 CHECK_NUMBER (val);
1010 if (XINT (val) < '0' || XINT (val) > 127)
1011 error ("Invalid iso-final-char: %d", XINT (val));
1012 charset.iso_final = XINT (val);
1013 }
1014
1015 val = args[charset_arg_iso_revision];
1016 if (NILP (val))
1017 charset.iso_revision = -1;
1018 else
1019 {
1020 CHECK_NUMBER (val);
1021 if (XINT (val) > 63)
1022 args_out_of_range (make_number (63), val);
1023 charset.iso_revision = XINT (val);
1024 }
1025
1026 val = args[charset_arg_emacs_mule_id];
1027 if (NILP (val))
1028 charset.emacs_mule_id = -1;
1029 else
1030 {
1031 CHECK_NATNUM (val);
1032 if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256)
1033 error ("Invalid emacs-mule-id: %d", XINT (val));
1034 charset.emacs_mule_id = XINT (val);
1035 }
1036
1037 charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]);
1038
1039 charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]);
1040
1041 charset.unified_p = 0;
1042
1043 memset (charset.fast_map, 0, sizeof (charset.fast_map));
1044
1045 if (! NILP (args[charset_arg_code_offset]))
1046 {
1047 val = args[charset_arg_code_offset];
1048 CHECK_NUMBER (val);
1049
1050 charset.method = CHARSET_METHOD_OFFSET;
1051 charset.code_offset = XINT (val);
1052
1053 i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
1054 charset.min_char = i + charset.code_offset;
1055 i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
1056 charset.max_char = i + charset.code_offset;
1057 if (charset.max_char > MAX_CHAR)
1058 error ("Unsupported max char: %d", charset.max_char);
1059
1060 i = (charset.min_char >> 7) << 7;
1061 for (; i < 0x10000 && i <= charset.max_char; i += 128)
1062 CHARSET_FAST_MAP_SET (i, charset.fast_map);
1063 i = (i >> 12) << 12;
1064 for (; i <= charset.max_char; i += 0x1000)
1065 CHARSET_FAST_MAP_SET (i, charset.fast_map);
1066 if (charset.code_offset == 0 && charset.max_char >= 0x80)
1067 charset.ascii_compatible_p = 1;
1068 }
1069 else if (! NILP (args[charset_arg_map]))
1070 {
1071 val = args[charset_arg_map];
1072 ASET (attrs, charset_map, val);
1073 charset.method = CHARSET_METHOD_MAP;
1074 }
1075 else if (! NILP (args[charset_arg_subset]))
1076 {
1077 Lisp_Object parent;
1078 Lisp_Object parent_min_code, parent_max_code, parent_code_offset;
1079 struct charset *parent_charset;
1080
1081 val = args[charset_arg_subset];
1082 parent = Fcar (val);
1083 CHECK_CHARSET_GET_CHARSET (parent, parent_charset);
1084 parent_min_code = Fnth (make_number (1), val);
1085 CHECK_NATNUM (parent_min_code);
1086 parent_max_code = Fnth (make_number (2), val);
1087 CHECK_NATNUM (parent_max_code);
1088 parent_code_offset = Fnth (make_number (3), val);
1089 CHECK_NUMBER (parent_code_offset);
1090 val = Fmake_vector (make_number (4), Qnil);
1091 ASET (val, 0, make_number (parent_charset->id));
1092 ASET (val, 1, parent_min_code);
1093 ASET (val, 2, parent_max_code);
1094 ASET (val, 3, parent_code_offset);
1095 ASET (attrs, charset_subset, val);
1096
1097 charset.method = CHARSET_METHOD_SUBSET;
1098 /* Here, we just copy the parent's fast_map. It's not accurate,
1099 but at least it works for quickly detecting which character
1100 DOESN'T belong to this charset. */
1101 for (i = 0; i < 190; i++)
1102 charset.fast_map[i] = parent_charset->fast_map[i];
1103
1104 /* We also copy these for parents. */
1105 charset.min_char = parent_charset->min_char;
1106 charset.max_char = parent_charset->max_char;
1107 }
1108 else if (! NILP (args[charset_arg_superset]))
1109 {
1110 val = args[charset_arg_superset];
1111 charset.method = CHARSET_METHOD_SUPERSET;
1112 val = Fcopy_sequence (val);
1113 ASET (attrs, charset_superset, val);
1114
1115 charset.min_char = MAX_CHAR;
1116 charset.max_char = 0;
1117 for (; ! NILP (val); val = Fcdr (val))
1118 {
1119 Lisp_Object elt, car_part, cdr_part;
1120 int this_id, offset;
1121 struct charset *this_charset;
1122
1123 elt = Fcar (val);
1124 if (CONSP (elt))
1125 {
1126 car_part = XCAR (elt);
1127 cdr_part = XCDR (elt);
1128 CHECK_CHARSET_GET_ID (car_part, this_id);
1129 CHECK_NUMBER (cdr_part);
1130 offset = XINT (cdr_part);
1131 }
1132 else
1133 {
1134 CHECK_CHARSET_GET_ID (elt, this_id);
1135 offset = 0;
1136 }
1137 XSETCAR (val, Fcons (make_number (this_id), make_number (offset)));
1138
1139 this_charset = CHARSET_FROM_ID (this_id);
1140 if (charset.min_char > this_charset->min_char)
1141 charset.min_char = this_charset->min_char;
1142 if (charset.max_char < this_charset->max_char)
1143 charset.max_char = this_charset->max_char;
1144 for (i = 0; i < 190; i++)
1145 charset.fast_map[i] |= this_charset->fast_map[i];
1146 }
1147 }
1148 else
1149 error ("None of :code-offset, :map, :parents are specified");
1150
1151 val = args[charset_arg_unify_map];
1152 if (! NILP (val) && !STRINGP (val))
1153 CHECK_VECTOR (val);
1154 ASET (attrs, charset_unify_map, val);
1155
1156 CHECK_LIST (args[charset_arg_plist]);
1157 ASET (attrs, charset_plist, args[charset_arg_plist]);
1158
1159 charset.hash_index = hash_lookup (hash_table, args[charset_arg_name],
1160 &hash_code);
1161 if (charset.hash_index >= 0)
1162 {
1163 new_definition_p = 0;
1164 id = XFASTINT (CHARSET_SYMBOL_ID (args[charset_arg_name]));
1165 HASH_VALUE (hash_table, charset.hash_index) = attrs;
1166 }
1167 else
1168 {
1169 charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs,
1170 hash_code);
1171 if (charset_table_used == charset_table_size)
1172 {
1173 struct charset *new_table
1174 = (struct charset *) xmalloc (sizeof (struct charset)
1175 * (charset_table_size + 16));
1176 memcpy (new_table, charset_table,
1177 sizeof (struct charset) * charset_table_size);
1178 charset_table_size += 16;
1179 charset_table = new_table;
1180 }
1181 id = charset_table_used++;
1182 new_definition_p = 1;
1183 }
1184
1185 ASET (attrs, charset_id, make_number (id));
1186 charset.id = id;
1187 charset_table[id] = charset;
1188
1189 if (charset.method == CHARSET_METHOD_MAP)
1190 {
1191 load_charset (&charset, 0);
1192 charset_table[id] = charset;
1193 }
1194
1195 if (charset.iso_final >= 0)
1196 {
1197 ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
1198 charset.iso_final) = id;
1199 if (new_definition_p)
1200 Viso_2022_charset_list = nconc2 (Viso_2022_charset_list,
1201 Fcons (make_number (id), Qnil));
1202 if (ISO_CHARSET_TABLE (1, 0, 'J') == id)
1203 charset_jisx0201_roman = id;
1204 else if (ISO_CHARSET_TABLE (2, 0, '@') == id)
1205 charset_jisx0208_1978 = id;
1206 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id)
1207 charset_jisx0208 = id;
1208 else if (ISO_CHARSET_TABLE (2, 0, 'C') == id)
1209 charset_ksc5601 = id;
1210 }
1211
1212 if (charset.emacs_mule_id >= 0)
1213 {
1214 emacs_mule_charset[charset.emacs_mule_id] = CHARSET_FROM_ID (id);
1215 if (charset.emacs_mule_id < 0xA0)
1216 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 1;
1217 else
1218 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 2;
1219 if (new_definition_p)
1220 Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list,
1221 Fcons (make_number (id), Qnil));
1222 }
1223
1224 if (new_definition_p)
1225 {
1226 Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list);
1227 if (charset.supplementary_p)
1228 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1229 Fcons (make_number (id), Qnil));
1230 else
1231 {
1232 Lisp_Object tail;
1233
1234 for (tail = Vcharset_ordered_list; CONSP (tail); tail = XCDR (tail))
1235 {
1236 struct charset *cs = CHARSET_FROM_ID (XINT (XCAR (tail)));
1237
1238 if (cs->supplementary_p)
1239 break;
1240 }
1241 if (EQ (tail, Vcharset_ordered_list))
1242 Vcharset_ordered_list = Fcons (make_number (id),
1243 Vcharset_ordered_list);
1244 else if (NILP (tail))
1245 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1246 Fcons (make_number (id), Qnil));
1247 else
1248 {
1249 val = Fcons (XCAR (tail), XCDR (tail));
1250 XSETCDR (tail, val);
1251 XSETCAR (tail, make_number (id));
1252 }
1253 }
1254 charset_ordered_list_tick++;
1255 }
1256
1257 return Qnil;
1258 }
1259
1260
1261 /* Same as Fdefine_charset_internal but arguments are more convenient
1262 to call from C (typically in syms_of_charset). This can define a
1263 charset of `offset' method only. Return the ID of the new
1264 charset. */
1265
1266 static int
1267 define_charset_internal (Lisp_Object name,
1268 int dimension,
1269 unsigned char *code_space,
1270 unsigned min_code, unsigned max_code,
1271 int iso_final, int iso_revision, int emacs_mule_id,
1272 int ascii_compatible, int supplementary,
1273 int code_offset)
1274 {
1275 Lisp_Object args[charset_arg_max];
1276 Lisp_Object plist[14];
1277 Lisp_Object val;
1278 int i;
1279
1280 args[charset_arg_name] = name;
1281 args[charset_arg_dimension] = make_number (dimension);
1282 val = Fmake_vector (make_number (8), make_number (0));
1283 for (i = 0; i < 8; i++)
1284 ASET (val, i, make_number (code_space[i]));
1285 args[charset_arg_code_space] = val;
1286 args[charset_arg_min_code] = make_number (min_code);
1287 args[charset_arg_max_code] = make_number (max_code);
1288 args[charset_arg_iso_final]
1289 = (iso_final < 0 ? Qnil : make_number (iso_final));
1290 args[charset_arg_iso_revision] = make_number (iso_revision);
1291 args[charset_arg_emacs_mule_id]
1292 = (emacs_mule_id < 0 ? Qnil : make_number (emacs_mule_id));
1293 args[charset_arg_ascii_compatible_p] = ascii_compatible ? Qt : Qnil;
1294 args[charset_arg_supplementary_p] = supplementary ? Qt : Qnil;
1295 args[charset_arg_invalid_code] = Qnil;
1296 args[charset_arg_code_offset] = make_number (code_offset);
1297 args[charset_arg_map] = Qnil;
1298 args[charset_arg_subset] = Qnil;
1299 args[charset_arg_superset] = Qnil;
1300 args[charset_arg_unify_map] = Qnil;
1301
1302 plist[0] = intern_c_string (":name");
1303 plist[1] = args[charset_arg_name];
1304 plist[2] = intern_c_string (":dimension");
1305 plist[3] = args[charset_arg_dimension];
1306 plist[4] = intern_c_string (":code-space");
1307 plist[5] = args[charset_arg_code_space];
1308 plist[6] = intern_c_string (":iso-final-char");
1309 plist[7] = args[charset_arg_iso_final];
1310 plist[8] = intern_c_string (":emacs-mule-id");
1311 plist[9] = args[charset_arg_emacs_mule_id];
1312 plist[10] = intern_c_string (":ascii-compatible-p");
1313 plist[11] = args[charset_arg_ascii_compatible_p];
1314 plist[12] = intern_c_string (":code-offset");
1315 plist[13] = args[charset_arg_code_offset];
1316
1317 args[charset_arg_plist] = Flist (14, plist);
1318 Fdefine_charset_internal (charset_arg_max, args);
1319
1320 return XINT (CHARSET_SYMBOL_ID (name));
1321 }
1322
1323
1324 DEFUN ("define-charset-alias", Fdefine_charset_alias,
1325 Sdefine_charset_alias, 2, 2, 0,
1326 doc: /* Define ALIAS as an alias for charset CHARSET. */)
1327 (Lisp_Object alias, Lisp_Object charset)
1328 {
1329 Lisp_Object attr;
1330
1331 CHECK_CHARSET_GET_ATTR (charset, attr);
1332 Fputhash (alias, attr, Vcharset_hash_table);
1333 Vcharset_list = Fcons (alias, Vcharset_list);
1334 return Qnil;
1335 }
1336
1337
1338 DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
1339 doc: /* Return the property list of CHARSET. */)
1340 (Lisp_Object charset)
1341 {
1342 Lisp_Object attrs;
1343
1344 CHECK_CHARSET_GET_ATTR (charset, attrs);
1345 return CHARSET_ATTR_PLIST (attrs);
1346 }
1347
1348
1349 DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
1350 doc: /* Set CHARSET's property list to PLIST. */)
1351 (Lisp_Object charset, Lisp_Object plist)
1352 {
1353 Lisp_Object attrs;
1354
1355 CHECK_CHARSET_GET_ATTR (charset, attrs);
1356 CHARSET_ATTR_PLIST (attrs) = plist;
1357 return plist;
1358 }
1359
1360
1361 DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 3, 0,
1362 doc: /* Unify characters of CHARSET with Unicode.
1363 This means reading the relevant file and installing the table defined
1364 by CHARSET's `:unify-map' property.
1365
1366 Optional second arg UNIFY-MAP is a file name string or a vector. It has
1367 the same meaning as the `:unify-map' attribute in the function
1368 `define-charset' (which see).
1369
1370 Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1371 (Lisp_Object charset, Lisp_Object unify_map, Lisp_Object deunify)
1372 {
1373 int id;
1374 struct charset *cs;
1375
1376 CHECK_CHARSET_GET_ID (charset, id);
1377 cs = CHARSET_FROM_ID (id);
1378 if (NILP (deunify)
1379 ? CHARSET_UNIFIED_P (cs) && ! NILP (CHARSET_DEUNIFIER (cs))
1380 : ! CHARSET_UNIFIED_P (cs))
1381 return Qnil;
1382
1383 CHARSET_UNIFIED_P (cs) = 0;
1384 if (NILP (deunify))
1385 {
1386 if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET
1387 || CHARSET_CODE_OFFSET (cs) < 0x110000)
1388 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset)));
1389 if (NILP (unify_map))
1390 unify_map = CHARSET_UNIFY_MAP (cs);
1391 else
1392 {
1393 if (! STRINGP (unify_map) && ! VECTORP (unify_map))
1394 signal_error ("Bad unify-map", unify_map);
1395 CHARSET_UNIFY_MAP (cs) = unify_map;
1396 }
1397 if (NILP (Vchar_unify_table))
1398 Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
1399 char_table_set_range (Vchar_unify_table,
1400 cs->min_char, cs->max_char, charset);
1401 CHARSET_UNIFIED_P (cs) = 1;
1402 }
1403 else if (CHAR_TABLE_P (Vchar_unify_table))
1404 {
1405 int min_code = CHARSET_MIN_CODE (cs);
1406 int max_code = CHARSET_MAX_CODE (cs);
1407 int min_char = DECODE_CHAR (cs, min_code);
1408 int max_char = DECODE_CHAR (cs, max_code);
1409
1410 char_table_set_range (Vchar_unify_table, min_char, max_char, Qnil);
1411 }
1412
1413 return Qnil;
1414 }
1415
1416 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
1417 Sget_unused_iso_final_char, 2, 2, 0,
1418 doc: /*
1419 Return an unused ISO final char for a charset of DIMENSION and CHARS.
1420 DIMENSION is the number of bytes to represent a character: 1 or 2.
1421 CHARS is the number of characters in a dimension: 94 or 96.
1422
1423 This final char is for private use, thus the range is `0' (48) .. `?' (63).
1424 If there's no unused final char for the specified kind of charset,
1425 return nil. */)
1426 (Lisp_Object dimension, Lisp_Object chars)
1427 {
1428 int final_char;
1429
1430 CHECK_NUMBER (dimension);
1431 CHECK_NUMBER (chars);
1432 if (XINT (dimension) != 1 && XINT (dimension) != 2 && XINT (dimension) != 3)
1433 args_out_of_range_3 (dimension, make_number (1), make_number (3));
1434 if (XINT (chars) != 94 && XINT (chars) != 96)
1435 args_out_of_range_3 (chars, make_number (94), make_number (96));
1436 for (final_char = '0'; final_char <= '?'; final_char++)
1437 if (ISO_CHARSET_TABLE (XINT (dimension), XINT (chars), final_char) < 0)
1438 break;
1439 return (final_char <= '?' ? make_number (final_char) : Qnil);
1440 }
1441
1442 static void
1443 check_iso_charset_parameter (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char)
1444 {
1445 CHECK_NATNUM (dimension);
1446 CHECK_NATNUM (chars);
1447 CHECK_NATNUM (final_char);
1448
1449 if (XINT (dimension) > 3)
1450 error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension));
1451 if (XINT (chars) != 94 && XINT (chars) != 96)
1452 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
1453 if (XINT (final_char) < '0' || XINT (final_char) > '~')
1454 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
1455 }
1456
1457
1458 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
1459 4, 4, 0,
1460 doc: /* Declare an equivalent charset for ISO-2022 decoding.
1461
1462 On decoding by an ISO-2022 base coding system, when a charset
1463 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1464 if CHARSET is designated instead. */)
1465 (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char, Lisp_Object charset)
1466 {
1467 int id;
1468 int chars_flag;
1469
1470 CHECK_CHARSET_GET_ID (charset, id);
1471 check_iso_charset_parameter (dimension, chars, final_char);
1472 chars_flag = XINT (chars) == 96;
1473 ISO_CHARSET_TABLE (XINT (dimension), chars_flag, XINT (final_char)) = id;
1474 return Qnil;
1475 }
1476
1477
1478 /* Return information about charsets in the text at PTR of NBYTES
1479 bytes, which are NCHARS characters. The value is:
1480
1481 0: Each character is represented by one byte. This is always
1482 true for a unibyte string. For a multibyte string, true if
1483 it contains only ASCII characters.
1484
1485 1: No charsets other than ascii, control-1, and latin-1 are
1486 found.
1487
1488 2: Otherwise.
1489 */
1490
1491 int
1492 string_xstring_p (Lisp_Object string)
1493 {
1494 const unsigned char *p = SDATA (string);
1495 const unsigned char *endp = p + SBYTES (string);
1496
1497 if (SCHARS (string) == SBYTES (string))
1498 return 0;
1499
1500 while (p < endp)
1501 {
1502 int c = STRING_CHAR_ADVANCE (p);
1503
1504 if (c >= 0x100)
1505 return 2;
1506 }
1507 return 1;
1508 }
1509
1510
1511 /* Find charsets in the string at PTR of NCHARS and NBYTES.
1512
1513 CHARSETS is a vector. If Nth element is non-nil, it means the
1514 charset whose id is N is already found.
1515
1516 It may lookup a translation table TABLE if supplied. */
1517
1518 static void
1519 find_charsets_in_text (const unsigned char *ptr, EMACS_INT nchars, EMACS_INT nbytes, Lisp_Object charsets, Lisp_Object table, int multibyte)
1520 {
1521 const unsigned char *pend = ptr + nbytes;
1522
1523 if (nchars == nbytes)
1524 {
1525 if (multibyte)
1526 ASET (charsets, charset_ascii, Qt);
1527 else
1528 while (ptr < pend)
1529 {
1530 int c = *ptr++;
1531
1532 if (!NILP (table))
1533 c = translate_char (table, c);
1534 if (ASCII_BYTE_P (c))
1535 ASET (charsets, charset_ascii, Qt);
1536 else
1537 ASET (charsets, charset_eight_bit, Qt);
1538 }
1539 }
1540 else
1541 {
1542 while (ptr < pend)
1543 {
1544 int c = STRING_CHAR_ADVANCE (ptr);
1545 struct charset *charset;
1546
1547 if (!NILP (table))
1548 c = translate_char (table, c);
1549 charset = CHAR_CHARSET (c);
1550 ASET (charsets, CHARSET_ID (charset), Qt);
1551 }
1552 }
1553 }
1554
1555 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
1556 2, 3, 0,
1557 doc: /* Return a list of charsets in the region between BEG and END.
1558 BEG and END are buffer positions.
1559 Optional arg TABLE if non-nil is a translation table to look up.
1560
1561 If the current buffer is unibyte, the returned list may contain
1562 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1563 (Lisp_Object beg, Lisp_Object end, Lisp_Object table)
1564 {
1565 Lisp_Object charsets;
1566 EMACS_INT from, from_byte, to, stop, stop_byte;
1567 int i;
1568 Lisp_Object val;
1569 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1570
1571 validate_region (&beg, &end);
1572 from = XFASTINT (beg);
1573 stop = to = XFASTINT (end);
1574
1575 if (from < GPT && GPT < to)
1576 {
1577 stop = GPT;
1578 stop_byte = GPT_BYTE;
1579 }
1580 else
1581 stop_byte = CHAR_TO_BYTE (stop);
1582
1583 from_byte = CHAR_TO_BYTE (from);
1584
1585 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1586 while (1)
1587 {
1588 find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
1589 stop_byte - from_byte, charsets, table,
1590 multibyte);
1591 if (stop < to)
1592 {
1593 from = stop, from_byte = stop_byte;
1594 stop = to, stop_byte = CHAR_TO_BYTE (stop);
1595 }
1596 else
1597 break;
1598 }
1599
1600 val = Qnil;
1601 for (i = charset_table_used - 1; i >= 0; i--)
1602 if (!NILP (AREF (charsets, i)))
1603 val = Fcons (CHARSET_NAME (charset_table + i), val);
1604 return val;
1605 }
1606
1607 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
1608 1, 2, 0,
1609 doc: /* Return a list of charsets in STR.
1610 Optional arg TABLE if non-nil is a translation table to look up.
1611
1612 If STR is unibyte, the returned list may contain
1613 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1614 (Lisp_Object str, Lisp_Object table)
1615 {
1616 Lisp_Object charsets;
1617 int i;
1618 Lisp_Object val;
1619
1620 CHECK_STRING (str);
1621
1622 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1623 find_charsets_in_text (SDATA (str), SCHARS (str), SBYTES (str),
1624 charsets, table,
1625 STRING_MULTIBYTE (str));
1626 val = Qnil;
1627 for (i = charset_table_used - 1; i >= 0; i--)
1628 if (!NILP (AREF (charsets, i)))
1629 val = Fcons (CHARSET_NAME (charset_table + i), val);
1630 return val;
1631 }
1632
1633 \f
1634
1635 /* Return a unified character code for C (>= 0x110000). VAL is a
1636 value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
1637 charset symbol. */
1638 int
1639 maybe_unify_char (int c, Lisp_Object val)
1640 {
1641 struct charset *charset;
1642
1643 if (INTEGERP (val))
1644 return XINT (val);
1645 if (NILP (val))
1646 return c;
1647
1648 CHECK_CHARSET_GET_CHARSET (val, charset);
1649 load_charset (charset, 1);
1650 if (! inhibit_load_charset_map)
1651 {
1652 val = CHAR_TABLE_REF (Vchar_unify_table, c);
1653 if (! NILP (val))
1654 c = XINT (val);
1655 }
1656 else
1657 {
1658 int code_index = c - CHARSET_CODE_OFFSET (charset);
1659 int unified = GET_TEMP_CHARSET_WORK_DECODER (code_index);
1660
1661 if (unified > 0)
1662 c = unified;
1663 }
1664 return c;
1665 }
1666
1667
1668 /* Return a character correponding to the code-point CODE of
1669 CHARSET. */
1670
1671 int
1672 decode_char (struct charset *charset, unsigned int code)
1673 {
1674 int c, char_index;
1675 enum charset_method method = CHARSET_METHOD (charset);
1676
1677 if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
1678 return -1;
1679
1680 if (method == CHARSET_METHOD_SUBSET)
1681 {
1682 Lisp_Object subset_info;
1683
1684 subset_info = CHARSET_SUBSET (charset);
1685 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1686 code -= XINT (AREF (subset_info, 3));
1687 if (code < XFASTINT (AREF (subset_info, 1))
1688 || code > XFASTINT (AREF (subset_info, 2)))
1689 c = -1;
1690 else
1691 c = DECODE_CHAR (charset, code);
1692 }
1693 else if (method == CHARSET_METHOD_SUPERSET)
1694 {
1695 Lisp_Object parents;
1696
1697 parents = CHARSET_SUPERSET (charset);
1698 c = -1;
1699 for (; CONSP (parents); parents = XCDR (parents))
1700 {
1701 int id = XINT (XCAR (XCAR (parents)));
1702 int code_offset = XINT (XCDR (XCAR (parents)));
1703 unsigned this_code = code - code_offset;
1704
1705 charset = CHARSET_FROM_ID (id);
1706 if ((c = DECODE_CHAR (charset, this_code)) >= 0)
1707 break;
1708 }
1709 }
1710 else
1711 {
1712 char_index = CODE_POINT_TO_INDEX (charset, code);
1713 if (char_index < 0)
1714 return -1;
1715
1716 if (method == CHARSET_METHOD_MAP)
1717 {
1718 Lisp_Object decoder;
1719
1720 decoder = CHARSET_DECODER (charset);
1721 if (! VECTORP (decoder))
1722 {
1723 load_charset (charset, 1);
1724 decoder = CHARSET_DECODER (charset);
1725 }
1726 if (VECTORP (decoder))
1727 c = XINT (AREF (decoder, char_index));
1728 else
1729 c = GET_TEMP_CHARSET_WORK_DECODER (char_index);
1730 }
1731 else /* method == CHARSET_METHOD_OFFSET */
1732 {
1733 c = char_index + CHARSET_CODE_OFFSET (charset);
1734 if (CHARSET_UNIFIED_P (charset)
1735 && c > MAX_UNICODE_CHAR)
1736 MAYBE_UNIFY_CHAR (c);
1737 }
1738 }
1739
1740 return c;
1741 }
1742
1743 /* Variable used temporarily by the macro ENCODE_CHAR. */
1744 Lisp_Object charset_work;
1745
1746 /* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
1747 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1748 use CHARSET's strict_max_char instead of max_char. */
1749
1750 unsigned
1751 encode_char (struct charset *charset, int c)
1752 {
1753 unsigned code;
1754 enum charset_method method = CHARSET_METHOD (charset);
1755
1756 if (CHARSET_UNIFIED_P (charset))
1757 {
1758 Lisp_Object deunifier;
1759 int code_index = -1;
1760
1761 deunifier = CHARSET_DEUNIFIER (charset);
1762 if (! CHAR_TABLE_P (deunifier))
1763 {
1764 load_charset (charset, 2);
1765 deunifier = CHARSET_DEUNIFIER (charset);
1766 }
1767 if (CHAR_TABLE_P (deunifier))
1768 {
1769 Lisp_Object deunified = CHAR_TABLE_REF (deunifier, c);
1770
1771 if (INTEGERP (deunified))
1772 code_index = XINT (deunified);
1773 }
1774 else
1775 {
1776 code_index = GET_TEMP_CHARSET_WORK_ENCODER (c);
1777 }
1778 if (code_index >= 0)
1779 c = CHARSET_CODE_OFFSET (charset) + code_index;
1780 }
1781
1782 if (method == CHARSET_METHOD_SUBSET)
1783 {
1784 Lisp_Object subset_info;
1785 struct charset *this_charset;
1786
1787 subset_info = CHARSET_SUBSET (charset);
1788 this_charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1789 code = ENCODE_CHAR (this_charset, c);
1790 if (code == CHARSET_INVALID_CODE (this_charset)
1791 || code < XFASTINT (AREF (subset_info, 1))
1792 || code > XFASTINT (AREF (subset_info, 2)))
1793 return CHARSET_INVALID_CODE (charset);
1794 code += XINT (AREF (subset_info, 3));
1795 return code;
1796 }
1797
1798 if (method == CHARSET_METHOD_SUPERSET)
1799 {
1800 Lisp_Object parents;
1801
1802 parents = CHARSET_SUPERSET (charset);
1803 for (; CONSP (parents); parents = XCDR (parents))
1804 {
1805 int id = XINT (XCAR (XCAR (parents)));
1806 int code_offset = XINT (XCDR (XCAR (parents)));
1807 struct charset *this_charset = CHARSET_FROM_ID (id);
1808
1809 code = ENCODE_CHAR (this_charset, c);
1810 if (code != CHARSET_INVALID_CODE (this_charset))
1811 return code + code_offset;
1812 }
1813 return CHARSET_INVALID_CODE (charset);
1814 }
1815
1816 if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map)
1817 || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
1818 return CHARSET_INVALID_CODE (charset);
1819
1820 if (method == CHARSET_METHOD_MAP)
1821 {
1822 Lisp_Object encoder;
1823 Lisp_Object val;
1824
1825 encoder = CHARSET_ENCODER (charset);
1826 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
1827 {
1828 load_charset (charset, 2);
1829 encoder = CHARSET_ENCODER (charset);
1830 }
1831 if (CHAR_TABLE_P (encoder))
1832 {
1833 val = CHAR_TABLE_REF (encoder, c);
1834 if (NILP (val))
1835 return CHARSET_INVALID_CODE (charset);
1836 code = XINT (val);
1837 if (! CHARSET_COMPACT_CODES_P (charset))
1838 code = INDEX_TO_CODE_POINT (charset, code);
1839 }
1840 else
1841 {
1842 code = GET_TEMP_CHARSET_WORK_ENCODER (c);
1843 code = INDEX_TO_CODE_POINT (charset, code);
1844 }
1845 }
1846 else /* method == CHARSET_METHOD_OFFSET */
1847 {
1848 int code_index = c - CHARSET_CODE_OFFSET (charset);
1849
1850 code = INDEX_TO_CODE_POINT (charset, code_index);
1851 }
1852
1853 return code;
1854 }
1855
1856
1857 DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 3, 0,
1858 doc: /* Decode the pair of CHARSET and CODE-POINT into a character.
1859 Return nil if CODE-POINT is not valid in CHARSET.
1860
1861 CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
1862
1863 Optional argument RESTRICTION specifies a way to map the pair of CCS
1864 and CODE-POINT to a character. Currently not supported and just ignored. */)
1865 (Lisp_Object charset, Lisp_Object code_point, Lisp_Object restriction)
1866 {
1867 int c, id;
1868 unsigned code;
1869 struct charset *charsetp;
1870
1871 CHECK_CHARSET_GET_ID (charset, id);
1872 if (CONSP (code_point))
1873 {
1874 CHECK_NATNUM_CAR (code_point);
1875 CHECK_NATNUM_CDR (code_point);
1876 code = (XINT (XCAR (code_point)) << 16) | (XINT (XCDR (code_point)));
1877 }
1878 else
1879 {
1880 CHECK_NATNUM (code_point);
1881 code = XINT (code_point);
1882 }
1883 charsetp = CHARSET_FROM_ID (id);
1884 c = DECODE_CHAR (charsetp, code);
1885 return (c >= 0 ? make_number (c) : Qnil);
1886 }
1887
1888
1889 DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 3, 0,
1890 doc: /* Encode the character CH into a code-point of CHARSET.
1891 Return nil if CHARSET doesn't include CH.
1892
1893 Optional argument RESTRICTION specifies a way to map CH to a
1894 code-point in CCS. Currently not supported and just ignored. */)
1895 (Lisp_Object ch, Lisp_Object charset, Lisp_Object restriction)
1896 {
1897 int id;
1898 unsigned code;
1899 struct charset *charsetp;
1900
1901 CHECK_CHARSET_GET_ID (charset, id);
1902 CHECK_NATNUM (ch);
1903 charsetp = CHARSET_FROM_ID (id);
1904 code = ENCODE_CHAR (charsetp, XINT (ch));
1905 if (code == CHARSET_INVALID_CODE (charsetp))
1906 return Qnil;
1907 if (code > 0x7FFFFFF)
1908 return Fcons (make_number (code >> 16), make_number (code & 0xFFFF));
1909 return make_number (code);
1910 }
1911
1912
1913 DEFUN ("make-char", Fmake_char, Smake_char, 1, 5, 0,
1914 doc:
1915 /* Return a character of CHARSET whose position codes are CODEn.
1916
1917 CODE1 through CODE4 are optional, but if you don't supply sufficient
1918 position codes, it is assumed that the minimum code in each dimension
1919 is specified. */)
1920 (Lisp_Object charset, Lisp_Object code1, Lisp_Object code2, Lisp_Object code3, Lisp_Object code4)
1921 {
1922 int id, dimension;
1923 struct charset *charsetp;
1924 unsigned code;
1925 int c;
1926
1927 CHECK_CHARSET_GET_ID (charset, id);
1928 charsetp = CHARSET_FROM_ID (id);
1929
1930 dimension = CHARSET_DIMENSION (charsetp);
1931 if (NILP (code1))
1932 code = (CHARSET_ASCII_COMPATIBLE_P (charsetp)
1933 ? 0 : CHARSET_MIN_CODE (charsetp));
1934 else
1935 {
1936 CHECK_NATNUM (code1);
1937 if (XFASTINT (code1) >= 0x100)
1938 args_out_of_range (make_number (0xFF), code1);
1939 code = XFASTINT (code1);
1940
1941 if (dimension > 1)
1942 {
1943 code <<= 8;
1944 if (NILP (code2))
1945 code |= charsetp->code_space[(dimension - 2) * 4];
1946 else
1947 {
1948 CHECK_NATNUM (code2);
1949 if (XFASTINT (code2) >= 0x100)
1950 args_out_of_range (make_number (0xFF), code2);
1951 code |= XFASTINT (code2);
1952 }
1953
1954 if (dimension > 2)
1955 {
1956 code <<= 8;
1957 if (NILP (code3))
1958 code |= charsetp->code_space[(dimension - 3) * 4];
1959 else
1960 {
1961 CHECK_NATNUM (code3);
1962 if (XFASTINT (code3) >= 0x100)
1963 args_out_of_range (make_number (0xFF), code3);
1964 code |= XFASTINT (code3);
1965 }
1966
1967 if (dimension > 3)
1968 {
1969 code <<= 8;
1970 if (NILP (code4))
1971 code |= charsetp->code_space[0];
1972 else
1973 {
1974 CHECK_NATNUM (code4);
1975 if (XFASTINT (code4) >= 0x100)
1976 args_out_of_range (make_number (0xFF), code4);
1977 code |= XFASTINT (code4);
1978 }
1979 }
1980 }
1981 }
1982 }
1983
1984 if (CHARSET_ISO_FINAL (charsetp) >= 0)
1985 code &= 0x7F7F7F7F;
1986 c = DECODE_CHAR (charsetp, code);
1987 if (c < 0)
1988 error ("Invalid code(s)");
1989 return make_number (c);
1990 }
1991
1992
1993 /* Return the first charset in CHARSET_LIST that contains C.
1994 CHARSET_LIST is a list of charset IDs. If it is nil, use
1995 Vcharset_ordered_list. */
1996
1997 struct charset *
1998 char_charset (int c, Lisp_Object charset_list, unsigned int *code_return)
1999 {
2000 int maybe_null = 0;
2001
2002 if (NILP (charset_list))
2003 charset_list = Vcharset_ordered_list;
2004 else
2005 maybe_null = 1;
2006
2007 while (CONSP (charset_list))
2008 {
2009 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
2010 unsigned code = ENCODE_CHAR (charset, c);
2011
2012 if (code != CHARSET_INVALID_CODE (charset))
2013 {
2014 if (code_return)
2015 *code_return = code;
2016 return charset;
2017 }
2018 charset_list = XCDR (charset_list);
2019 if (! maybe_null
2020 && c <= MAX_UNICODE_CHAR
2021 && EQ (charset_list, Vcharset_non_preferred_head))
2022 return CHARSET_FROM_ID (charset_unicode);
2023 }
2024 return (maybe_null ? NULL
2025 : c <= MAX_5_BYTE_CHAR ? CHARSET_FROM_ID (charset_emacs)
2026 : CHARSET_FROM_ID (charset_eight_bit));
2027 }
2028
2029
2030 DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
2031 doc:
2032 /*Return list of charset and one to four position-codes of CH.
2033 The charset is decided by the current priority order of charsets.
2034 A position-code is a byte value of each dimension of the code-point of
2035 CH in the charset. */)
2036 (Lisp_Object ch)
2037 {
2038 struct charset *charset;
2039 int c, dimension;
2040 unsigned code;
2041 Lisp_Object val;
2042
2043 CHECK_CHARACTER (ch);
2044 c = XFASTINT (ch);
2045 charset = CHAR_CHARSET (c);
2046 if (! charset)
2047 abort ();
2048 code = ENCODE_CHAR (charset, c);
2049 if (code == CHARSET_INVALID_CODE (charset))
2050 abort ();
2051 dimension = CHARSET_DIMENSION (charset);
2052 for (val = Qnil; dimension > 0; dimension--)
2053 {
2054 val = Fcons (make_number (code & 0xFF), val);
2055 code >>= 8;
2056 }
2057 return Fcons (CHARSET_NAME (charset), val);
2058 }
2059
2060
2061 DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 2, 0,
2062 doc: /* Return the charset of highest priority that contains CH.
2063 If optional 2nd arg RESTRICTION is non-nil, it is a list of charsets
2064 from which to find the charset. It may also be a coding system. In
2065 that case, find the charset from what supported by that coding system. */)
2066 (Lisp_Object ch, Lisp_Object restriction)
2067 {
2068 struct charset *charset;
2069
2070 CHECK_CHARACTER (ch);
2071 if (NILP (restriction))
2072 charset = CHAR_CHARSET (XINT (ch));
2073 else
2074 {
2075 if (CONSP (restriction))
2076 {
2077 int c = XFASTINT (ch);
2078
2079 for (; CONSP (restriction); restriction = XCDR (restriction))
2080 {
2081 struct charset *charset;
2082
2083 CHECK_CHARSET_GET_CHARSET (XCAR (restriction), charset);
2084 if (ENCODE_CHAR (charset, c) != CHARSET_INVALID_CODE (charset))
2085 return XCAR (restriction);
2086 }
2087 return Qnil;
2088 }
2089 restriction = coding_system_charset_list (restriction);
2090 charset = char_charset (XINT (ch), restriction, NULL);
2091 if (! charset)
2092 return Qnil;
2093 }
2094 return (CHARSET_NAME (charset));
2095 }
2096
2097
2098 DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
2099 doc: /*
2100 Return charset of a character in the current buffer at position POS.
2101 If POS is nil, it defauls to the current point.
2102 If POS is out of range, the value is nil. */)
2103 (Lisp_Object pos)
2104 {
2105 Lisp_Object ch;
2106 struct charset *charset;
2107
2108 ch = Fchar_after (pos);
2109 if (! INTEGERP (ch))
2110 return ch;
2111 charset = CHAR_CHARSET (XINT (ch));
2112 return (CHARSET_NAME (charset));
2113 }
2114
2115
2116 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
2117 doc: /*
2118 Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
2119
2120 ISO 2022's designation sequence (escape sequence) distinguishes charsets
2121 by their DIMENSION, CHARS, and FINAL-CHAR,
2122 whereas Emacs distinguishes them by charset symbol.
2123 See the documentation of the function `charset-info' for the meanings of
2124 DIMENSION, CHARS, and FINAL-CHAR. */)
2125 (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char)
2126 {
2127 int id;
2128 int chars_flag;
2129
2130 check_iso_charset_parameter (dimension, chars, final_char);
2131 chars_flag = XFASTINT (chars) == 96;
2132 id = ISO_CHARSET_TABLE (XFASTINT (dimension), chars_flag,
2133 XFASTINT (final_char));
2134 return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil);
2135 }
2136
2137
2138 DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
2139 0, 0, 0,
2140 doc: /*
2141 Internal use only.
2142 Clear temporary charset mapping tables.
2143 It should be called only from temacs invoked for dumping. */)
2144 (void)
2145 {
2146 if (temp_charset_work)
2147 {
2148 free (temp_charset_work);
2149 temp_charset_work = NULL;
2150 }
2151
2152 if (CHAR_TABLE_P (Vchar_unify_table))
2153 Foptimize_char_table (Vchar_unify_table, Qnil);
2154
2155 return Qnil;
2156 }
2157
2158 DEFUN ("charset-priority-list", Fcharset_priority_list,
2159 Scharset_priority_list, 0, 1, 0,
2160 doc: /* Return the list of charsets ordered by priority.
2161 HIGHESTP non-nil means just return the highest priority one. */)
2162 (Lisp_Object highestp)
2163 {
2164 Lisp_Object val = Qnil, list = Vcharset_ordered_list;
2165
2166 if (!NILP (highestp))
2167 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list))));
2168
2169 while (!NILP (list))
2170 {
2171 val = Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list)))), val);
2172 list = XCDR (list);
2173 }
2174 return Fnreverse (val);
2175 }
2176
2177 DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority,
2178 1, MANY, 0,
2179 doc: /* Assign higher priority to the charsets given as arguments.
2180 usage: (set-charset-priority &rest charsets) */)
2181 (int nargs, Lisp_Object *args)
2182 {
2183 Lisp_Object new_head, old_list, arglist[2];
2184 Lisp_Object list_2022, list_emacs_mule;
2185 int i, id;
2186
2187 old_list = Fcopy_sequence (Vcharset_ordered_list);
2188 new_head = Qnil;
2189 for (i = 0; i < nargs; i++)
2190 {
2191 CHECK_CHARSET_GET_ID (args[i], id);
2192 if (! NILP (Fmemq (make_number (id), old_list)))
2193 {
2194 old_list = Fdelq (make_number (id), old_list);
2195 new_head = Fcons (make_number (id), new_head);
2196 }
2197 }
2198 arglist[0] = Fnreverse (new_head);
2199 arglist[1] = Vcharset_non_preferred_head = old_list;
2200 Vcharset_ordered_list = Fnconc (2, arglist);
2201 charset_ordered_list_tick++;
2202
2203 charset_unibyte = -1;
2204 for (old_list = Vcharset_ordered_list, list_2022 = list_emacs_mule = Qnil;
2205 CONSP (old_list); old_list = XCDR (old_list))
2206 {
2207 if (! NILP (Fmemq (XCAR (old_list), Viso_2022_charset_list)))
2208 list_2022 = Fcons (XCAR (old_list), list_2022);
2209 if (! NILP (Fmemq (XCAR (old_list), Vemacs_mule_charset_list)))
2210 list_emacs_mule = Fcons (XCAR (old_list), list_emacs_mule);
2211 if (charset_unibyte < 0)
2212 {
2213 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (old_list)));
2214
2215 if (CHARSET_DIMENSION (charset) == 1
2216 && CHARSET_ASCII_COMPATIBLE_P (charset)
2217 && CHARSET_MAX_CHAR (charset) >= 0x80)
2218 charset_unibyte = CHARSET_ID (charset);
2219 }
2220 }
2221 Viso_2022_charset_list = Fnreverse (list_2022);
2222 Vemacs_mule_charset_list = Fnreverse (list_emacs_mule);
2223 if (charset_unibyte < 0)
2224 charset_unibyte = charset_iso_8859_1;
2225
2226 return Qnil;
2227 }
2228
2229 DEFUN ("charset-id-internal", Fcharset_id_internal, Scharset_id_internal,
2230 0, 1, 0,
2231 doc: /* Internal use only.
2232 Return charset identification number of CHARSET. */)
2233 (Lisp_Object charset)
2234 {
2235 int id;
2236
2237 CHECK_CHARSET_GET_ID (charset, id);
2238 return make_number (id);
2239 }
2240
2241 struct charset_sort_data
2242 {
2243 Lisp_Object charset;
2244 int id;
2245 int priority;
2246 };
2247
2248 static int
2249 charset_compare (const void *d1, const void *d2)
2250 {
2251 const struct charset_sort_data *data1 = d1, *data2 = d2;
2252 return (data1->priority - data2->priority);
2253 }
2254
2255 DEFUN ("sort-charsets", Fsort_charsets, Ssort_charsets, 1, 1, 0,
2256 doc: /* Sort charset list CHARSETS by a priority of each charset.
2257 Return the sorted list. CHARSETS is modified by side effects.
2258 See also `charset-priority-list' and `set-charset-priority'. */)
2259 (Lisp_Object charsets)
2260 {
2261 Lisp_Object len = Flength (charsets);
2262 int n = XFASTINT (len), i, j, done;
2263 Lisp_Object tail, elt, attrs;
2264 struct charset_sort_data *sort_data;
2265 int id, min_id, max_id;
2266 USE_SAFE_ALLOCA;
2267
2268 if (n == 0)
2269 return Qnil;
2270 SAFE_ALLOCA (sort_data, struct charset_sort_data *, sizeof (*sort_data) * n);
2271 for (tail = charsets, i = 0; CONSP (tail); tail = XCDR (tail), i++)
2272 {
2273 elt = XCAR (tail);
2274 CHECK_CHARSET_GET_ATTR (elt, attrs);
2275 sort_data[i].charset = elt;
2276 sort_data[i].id = id = XINT (CHARSET_ATTR_ID (attrs));
2277 if (i == 0)
2278 min_id = max_id = id;
2279 else if (id < min_id)
2280 min_id = id;
2281 else if (id > max_id)
2282 max_id = id;
2283 }
2284 for (done = 0, tail = Vcharset_ordered_list, i = 0;
2285 done < n && CONSP (tail); tail = XCDR (tail), i++)
2286 {
2287 elt = XCAR (tail);
2288 id = XFASTINT (elt);
2289 if (id >= min_id && id <= max_id)
2290 for (j = 0; j < n; j++)
2291 if (sort_data[j].id == id)
2292 {
2293 sort_data[j].priority = i;
2294 done++;
2295 }
2296 }
2297 qsort (sort_data, n, sizeof *sort_data, charset_compare);
2298 for (i = 0, tail = charsets; CONSP (tail); tail = XCDR (tail), i++)
2299 XSETCAR (tail, sort_data[i].charset);
2300 SAFE_FREE ();
2301 return charsets;
2302 }
2303
2304 \f
2305 void
2306 init_charset (void)
2307 {
2308 Lisp_Object tempdir;
2309 tempdir = Fexpand_file_name (build_string ("charsets"), Vdata_directory);
2310 if (access ((char *) SDATA (tempdir), 0) < 0)
2311 {
2312 dir_warning ("Error: charsets directory (%s) does not exist.\n\
2313 Emacs will not function correctly without the character map files.\n\
2314 Please check your installation!\n",
2315 tempdir);
2316 /* TODO should this be a fatal error? (Bug#909) */
2317 }
2318
2319 Vcharset_map_path = Fcons (tempdir, Qnil);
2320 }
2321
2322
2323 void
2324 init_charset_once (void)
2325 {
2326 int i, j, k;
2327
2328 for (i = 0; i < ISO_MAX_DIMENSION; i++)
2329 for (j = 0; j < ISO_MAX_CHARS; j++)
2330 for (k = 0; k < ISO_MAX_FINAL; k++)
2331 iso_charset_table[i][j][k] = -1;
2332
2333 for (i = 0; i < 256; i++)
2334 emacs_mule_charset[i] = NULL;
2335
2336 charset_jisx0201_roman = -1;
2337 charset_jisx0208_1978 = -1;
2338 charset_jisx0208 = -1;
2339 charset_ksc5601 = -1;
2340 }
2341
2342 #ifdef emacs
2343
2344 void
2345 syms_of_charset (void)
2346 {
2347 DEFSYM (Qcharsetp, "charsetp");
2348
2349 DEFSYM (Qascii, "ascii");
2350 DEFSYM (Qunicode, "unicode");
2351 DEFSYM (Qemacs, "emacs");
2352 DEFSYM (Qeight_bit, "eight-bit");
2353 DEFSYM (Qiso_8859_1, "iso-8859-1");
2354
2355 DEFSYM (Qgl, "gl");
2356 DEFSYM (Qgr, "gr");
2357
2358 staticpro (&Vcharset_ordered_list);
2359 Vcharset_ordered_list = Qnil;
2360
2361 staticpro (&Viso_2022_charset_list);
2362 Viso_2022_charset_list = Qnil;
2363
2364 staticpro (&Vemacs_mule_charset_list);
2365 Vemacs_mule_charset_list = Qnil;
2366
2367 /* Don't staticpro them here. It's done in syms_of_fns. */
2368 QCtest = intern (":test");
2369 Qeq = intern ("eq");
2370
2371 staticpro (&Vcharset_hash_table);
2372 {
2373 Lisp_Object args[2];
2374 args[0] = QCtest;
2375 args[1] = Qeq;
2376 Vcharset_hash_table = Fmake_hash_table (2, args);
2377 }
2378
2379 charset_table_size = 128;
2380 charset_table = ((struct charset *)
2381 xmalloc (sizeof (struct charset) * charset_table_size));
2382 charset_table_used = 0;
2383
2384 defsubr (&Scharsetp);
2385 defsubr (&Smap_charset_chars);
2386 defsubr (&Sdefine_charset_internal);
2387 defsubr (&Sdefine_charset_alias);
2388 defsubr (&Scharset_plist);
2389 defsubr (&Sset_charset_plist);
2390 defsubr (&Sunify_charset);
2391 defsubr (&Sget_unused_iso_final_char);
2392 defsubr (&Sdeclare_equiv_charset);
2393 defsubr (&Sfind_charset_region);
2394 defsubr (&Sfind_charset_string);
2395 defsubr (&Sdecode_char);
2396 defsubr (&Sencode_char);
2397 defsubr (&Ssplit_char);
2398 defsubr (&Smake_char);
2399 defsubr (&Schar_charset);
2400 defsubr (&Scharset_after);
2401 defsubr (&Siso_charset);
2402 defsubr (&Sclear_charset_maps);
2403 defsubr (&Scharset_priority_list);
2404 defsubr (&Sset_charset_priority);
2405 defsubr (&Scharset_id_internal);
2406 defsubr (&Ssort_charsets);
2407
2408 DEFVAR_LISP ("charset-map-path", &Vcharset_map_path,
2409 doc: /* *List of directories to search for charset map files. */);
2410 Vcharset_map_path = Qnil;
2411
2412 DEFVAR_BOOL ("inhibit-load-charset-map", &inhibit_load_charset_map,
2413 doc: /* Inhibit loading of charset maps. Used when dumping Emacs. */);
2414 inhibit_load_charset_map = 0;
2415
2416 DEFVAR_LISP ("charset-list", &Vcharset_list,
2417 doc: /* List of all charsets ever defined. */);
2418 Vcharset_list = Qnil;
2419
2420 DEFVAR_LISP ("current-iso639-language", &Vcurrent_iso639_language,
2421 doc: /* ISO639 language mnemonic symbol for the current language environment.
2422 If the current language environment is for multiple languages (e.g. "Latin-1"),
2423 the value may be a list of mnemonics. */);
2424 Vcurrent_iso639_language = Qnil;
2425
2426 charset_ascii
2427 = define_charset_internal (Qascii, 1, "\x00\x7F\x00\x00\x00\x00",
2428 0, 127, 'B', -1, 0, 1, 0, 0);
2429 charset_iso_8859_1
2430 = define_charset_internal (Qiso_8859_1, 1, "\x00\xFF\x00\x00\x00\x00",
2431 0, 255, -1, -1, -1, 1, 0, 0);
2432 charset_unicode
2433 = define_charset_internal (Qunicode, 3, "\x00\xFF\x00\xFF\x00\x10",
2434 0, MAX_UNICODE_CHAR, -1, 0, -1, 1, 0, 0);
2435 charset_emacs
2436 = define_charset_internal (Qemacs, 3, "\x00\xFF\x00\xFF\x00\x3F",
2437 0, MAX_5_BYTE_CHAR, -1, 0, -1, 1, 1, 0);
2438 charset_eight_bit
2439 = define_charset_internal (Qeight_bit, 1, "\x80\xFF\x00\x00\x00\x00",
2440 128, 255, -1, 0, -1, 0, 1,
2441 MAX_5_BYTE_CHAR + 1);
2442 charset_unibyte = charset_iso_8859_1;
2443 }
2444
2445 #endif /* emacs */
2446
2447 /* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
2448 (do not change this comment) */