]> code.delx.au - gnu-emacs/blob - src/bidi.c
Doc fixes for fclist and grep
[gnu-emacs] / src / bidi.c
1 /* Low-level bidirectional buffer/string-scanning functions for GNU Emacs.
2 Copyright (C) 2000-2001, 2004-2005, 2009-2016 Free Software
3 Foundation, Inc.
4
5 This file is part of GNU Emacs.
6
7 GNU Emacs is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or (at
10 your option) any later version.
11
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
19
20 /* Written by Eli Zaretskii <eliz@gnu.org>.
21
22 A sequential implementation of the Unicode Bidirectional algorithm,
23 (UBA) as per UAX#9, a part of the Unicode Standard.
24
25 Unlike the Reference Implementation and most other implementations,
26 this one is designed to be called once for every character in the
27 buffer or string. That way, we can leave intact the design of the
28 Emacs display engine, whereby an iterator object is used to
29 traverse buffer or string text character by character, and generate
30 the necessary data for displaying each character in 'struct glyph'
31 objects. (See xdisp.c for the details of that iteration.) The
32 functions on this file replace the original linear iteration in the
33 logical order of the text with a non-linear iteration in the visual
34 order, i.e. in the order characters should be shown on display.
35
36 The main entry point is bidi_move_to_visually_next. Each time it
37 is called, it finds the next character in the visual order, and
38 returns its information in a special structure. The caller is then
39 expected to process this character for display or any other
40 purposes, and call bidi_move_to_visually_next for the next
41 character. See the comments in bidi_move_to_visually_next for more
42 details about its algorithm that finds the next visual-order
43 character by resolving their levels on the fly.
44
45 Two other entry points are bidi_paragraph_init and
46 bidi_mirror_char. The first determines the base direction of a
47 paragraph, while the second returns the mirrored version of its
48 argument character.
49
50 A few auxiliary entry points are used to initialize the bidi
51 iterator for iterating an object (buffer or string), push and pop
52 the bidi iterator state, and save and restore the state of the bidi
53 cache.
54
55 If you want to understand the code, you will have to read it
56 together with the relevant portions of UAX#9. The comments include
57 references to UAX#9 rules, for that very reason.
58
59 A note about references to UAX#9 rules: if the reference says
60 something like "X9/Retaining", it means that you need to refer to
61 rule X9 and to its modifications described in the "Implementation
62 Notes" section of UAX#9, under "Retaining Format Codes".
63
64 Here's the overview of the design of the reordering engine
65 implemented by this file.
66
67 Basic implementation structure
68 ------------------------------
69
70 The sequential processing steps described by UAX#9 are implemented
71 as recursive levels of processing, all of which examine the next
72 character in the logical order. This hierarchy of processing looks
73 as follows, from the innermost (deepest) to the outermost level,
74 omitting some subroutines used by each level:
75
76 bidi_fetch_char -- fetch next character
77 bidi_resolve_explicit -- resolve explicit levels and directions
78 bidi_resolve_weak -- resolve weak types
79 bidi_resolve_brackets -- resolve "paired brackets" neutral types
80 bidi_resolve_neutral -- resolve neutral types
81 bidi_level_of_next_char -- resolve implicit levels
82
83 Each level calls the level below it, and works on the result
84 returned by the lower level, including all of its sub-levels.
85
86 Unlike all the levels below it, bidi_level_of_next_char can return
87 the information about either the next or previous character in the
88 logical order, depending on the current direction of scanning the
89 buffer or string. For the next character, it calls all the levels
90 below it; for the previous character, it uses the cache, described
91 below.
92
93 Thus, the result of calling bidi_level_of_next_char is the resolved
94 level of the next or the previous character in the logical order.
95 Based on this information, the function bidi_move_to_visually_next
96 finds the next character in the visual order and updates the
97 direction in which the buffer is scanned, either forward or
98 backward, to find the next character to be displayed. (Text is
99 scanned backwards when it needs to be reversed for display, i.e. if
100 the visual order is the inverse of the logical order.) This
101 implements the last, reordering steps of the UBA, by successively
102 calling bidi_level_of_next_char until the character of the required
103 embedding level is found; the scan direction is dynamically updated
104 as a side effect. See the commentary before the 'while' loop in
105 bidi_move_to_visually_next, for the details.
106
107 Fetching characters
108 -------------------
109
110 In a nutshell, fetching the next character boils down to calling
111 STRING_CHAR_AND_LENGTH, passing it the address of a buffer or
112 string position. See bidi_fetch_char. However, if the next
113 character is "covered" by a display property of some kind,
114 bidi_fetch_char returns the u+FFFC "object replacement character"
115 that represents the entire run of text covered by the display
116 property. (The ch_len and nchars members of 'struct bidi_it'
117 reflect the length in bytes and characters of that text.) This is
118 so we reorder text on both sides of the display property as
119 appropriate for an image or embedded string. Similarly, text
120 covered by a display spec of the form '(space ...)', is replaced
121 with the u+2029 paragraph separator character, so such display
122 specs produce the same effect as a TAB under UBA. Both these
123 special characters are not actually displayed -- the display
124 property is displayed instead -- but just used to compute the
125 embedding level of the surrounding text so as to produce the
126 required effect.
127
128 Bidi iterator states
129 --------------------
130
131 The UBA is highly context dependent in some of its parts,
132 i.e. results of processing a character can generally depend on
133 characters very far away. The UAX#9 description of the UBA
134 prescribes a stateful processing of each character, whereby the
135 results of this processing depend on various state variables, such
136 as the current embedding level, level stack, and directional
137 override status. In addition, the UAX#9 description includes many
138 passages like this (from rule W2 in this case):
139
140 Search backward from each instance of a European number until the
141 first strong type (R, L, AL, or sos) is found. If an AL is found,
142 change the type of the European number to Arabic number.
143
144 To support this, we use a bidi iterator object, 'struct bidi_it',
145 which is a sub-structure of 'struct it' used by xdisp.c (see
146 dispextern.h for the definition of both of these structures). The
147 bidi iterator holds the entire state of the iteration required by
148 the UBA, and is updated as the text is traversed. In particular,
149 the embedding level of the current character being resolved is
150 recorded in the iterator state. To avoid costly searches backward
151 in support of rules like W2 above, the necessary character types
152 are also recorded in the iterator state as they are found during
153 the forward scan, and then used when such rules need to be applied.
154 (Forward scans cannot be avoided in this way; they need to be
155 performed at least once, and the results recorded in the iterator
156 state, to be reused until the forward scan oversteps the recorded
157 position.)
158
159 In this manner, the iterator state acts as a mini-cache of
160 contextual information required for resolving the level of the
161 current character by various UBA rules.
162
163 Caching of bidi iterator states
164 -------------------------------
165
166 As described above, the reordering engine uses the information
167 recorded in the bidi iterator state in order to resolve the
168 embedding level of the current character. When the reordering
169 engine needs to process the next character in the logical order, it
170 fetches it and applies to it all the UBA levels, updating the
171 iterator state as it goes. But when the buffer or string is
172 scanned backwards, i.e. in the reverse order of buffer/string
173 positions, the scanned characters were already processed during the
174 preceding forward scan (see bidi_find_other_level_edge). To avoid
175 costly re-processing of characters that were already processed
176 during the forward scan, the iterator states computed while
177 scanning forward are cached.
178
179 The cache is just a linear array of 'struct bidi_it' objects, which
180 is dynamically allocated and reallocated as needed, since the size
181 of the cache depends on the text being processed. We only need the
182 cache while processing embedded levels higher than the base
183 paragraph embedding level, because these higher levels require
184 changes in scan direction. Therefore, as soon as we are back to
185 the base embedding level, we can free the cache; see the calls to
186 bidi_cache_reset and bidi_cache_shrink, for the conditions to do
187 this.
188
189 The cache maintains the index of the next unused cache slot -- this
190 is where the next iterator state will be cached. The function
191 bidi_cache_iterator_state saves an instance of the state in the
192 cache and increments the unused slot index. The companion function
193 bidi_cache_find looks up a cached state that corresponds to a given
194 buffer/string position. All of the cached states must correspond
195 1:1 to the buffer or string region whose processing they reflect;
196 bidi.c will abort if it finds cache slots that violate this 1:1
197 correspondence.
198
199 When the parent iterator 'struct it' is pushed (see push_it in
200 xdisp.c) to pause the current iteration and start iterating over a
201 different object (e.g., a 'display' string that covers some buffer
202 text), the bidi iterator cache needs to be "pushed" as well, so
203 that a new empty cache could be used while iterating over the new
204 object. Later, when the new object is exhausted, and xdisp.c calls
205 pop_it, we need to "pop" the bidi cache as well and return to the
206 original cache. See bidi_push_it and bidi_pop_it for how this is
207 done.
208
209 Some functions of the display engine save copies of 'struct it' in
210 local variables, and restore them later. For examples, see
211 pos_visible_p and move_it_in_display_line_to in xdisp.c, and
212 window_scroll_pixel_based in window.c. When this happens, we need
213 to save and restore the bidi cache as well, because conceptually
214 the cache is part of the 'struct it' state, and needs to be in
215 perfect sync with the portion of the buffer/string that is being
216 processed. This saving and restoring of the cache state is handled
217 by bidi_shelve_cache and bidi_unshelve_cache, and the helper macros
218 SAVE_IT and RESTORE_IT defined on xdisp.c.
219
220 Note that, because reordering is implemented below the level in
221 xdisp.c that breaks glyphs into screen lines, we are violating
222 paragraph 3.4 of UAX#9. which mandates that line breaking shall be
223 done before reordering each screen line separately. However,
224 following UAX#9 to the letter in this matter goes against the basic
225 design of the Emacs display engine, and so we choose here this
226 minor deviation from the UBA letter in preference to redesign of
227 the display engine. The effect of this is only seen in continued
228 lines that are broken into screen lines in the middle of a run
229 whose direction is opposite to the paragraph's base direction.
230
231 Important design and implementation note: when the code needs to
232 scan far ahead, be sure to avoid such scans as much as possible
233 when the buffer/string doesn't contain any RTL characters. Users
234 of left-to-right scripts will never forgive you if you introduce
235 some slow-down due to bidi in situations that don't involve any
236 bidirectional text. See the large comment near the beginning of
237 bidi_resolve_neutral, for one situation where such shortcut was
238 necessary. */
239
240 #include <config.h>
241 #include <stdio.h>
242
243 #include "lisp.h"
244 #include "character.h"
245 #include "buffer.h"
246 #include "dispextern.h"
247 #include "region-cache.h"
248
249 static bool bidi_initialized = 0;
250
251 static Lisp_Object bidi_type_table, bidi_mirror_table, bidi_brackets_table;
252
253 #define BIDI_EOB (-1)
254
255 /* Data type for describing the bidirectional character categories. */
256 typedef enum {
257 UNKNOWN_BC,
258 NEUTRAL,
259 WEAK,
260 STRONG,
261 EXPLICIT_FORMATTING
262 } bidi_category_t;
263
264 static Lisp_Object paragraph_start_re, paragraph_separate_re;
265
266 \f
267 /***********************************************************************
268 Utilities
269 ***********************************************************************/
270
271 /* Return the bidi type of a character CH, subject to the current
272 directional OVERRIDE. */
273 static bidi_type_t
274 bidi_get_type (int ch, bidi_dir_t override)
275 {
276 bidi_type_t default_type;
277
278 if (ch == BIDI_EOB)
279 return NEUTRAL_B;
280 if (ch < 0 || ch > MAX_CHAR)
281 emacs_abort ();
282
283 default_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
284 /* Every valid character code, even those that are unassigned by the
285 UCD, have some bidi-class property, according to
286 DerivedBidiClass.txt file. Therefore, if we ever get UNKNOWN_BT
287 (= zero) code from CHAR_TABLE_REF, that's a bug. */
288 if (default_type == UNKNOWN_BT)
289 emacs_abort ();
290
291 switch (default_type)
292 {
293 case WEAK_BN:
294 case NEUTRAL_B:
295 case LRE:
296 case LRO:
297 case RLE:
298 case RLO:
299 case PDF:
300 case LRI:
301 case RLI:
302 case FSI:
303 case PDI:
304 return default_type;
305 default:
306 if (override == L2R)
307 return STRONG_L;
308 else if (override == R2L)
309 return STRONG_R;
310 else
311 return default_type;
312 }
313 }
314
315 static void
316 bidi_check_type (bidi_type_t type)
317 {
318 eassert (UNKNOWN_BT <= type && type <= NEUTRAL_ON);
319 }
320
321 /* Given a bidi TYPE of a character, return its category. */
322 static bidi_category_t
323 bidi_get_category (bidi_type_t type)
324 {
325 switch (type)
326 {
327 case UNKNOWN_BT:
328 return UNKNOWN_BC;
329 case STRONG_L:
330 case STRONG_R:
331 case STRONG_AL:
332 return STRONG;
333 case WEAK_EN:
334 case WEAK_ES:
335 case WEAK_ET:
336 case WEAK_AN:
337 case WEAK_CS:
338 case WEAK_NSM:
339 case WEAK_BN:
340 return WEAK;
341 case NEUTRAL_B:
342 case NEUTRAL_S:
343 case NEUTRAL_WS:
344 case NEUTRAL_ON:
345 return NEUTRAL;
346 case LRE:
347 case LRO:
348 case RLE:
349 case RLO:
350 case PDF:
351 case LRI:
352 case RLI:
353 case FSI:
354 case PDI:
355 return EXPLICIT_FORMATTING;
356 default:
357 emacs_abort ();
358 }
359 }
360
361 static bool
362 bidi_isolate_fmt_char (bidi_type_t ch_type)
363 {
364 return (ch_type == LRI || ch_type == RLI || ch_type == PDI || ch_type == FSI);
365 }
366
367 /* Return the mirrored character of C, if it has one. If C has no
368 mirrored counterpart, return C.
369 Note: The conditions in UAX#9 clause L4 regarding the surrounding
370 context must be tested by the caller. */
371 int
372 bidi_mirror_char (int c)
373 {
374 Lisp_Object val;
375
376 if (c == BIDI_EOB)
377 return c;
378 if (c < 0 || c > MAX_CHAR)
379 emacs_abort ();
380
381 val = CHAR_TABLE_REF (bidi_mirror_table, c);
382 if (INTEGERP (val))
383 {
384 int v;
385
386 /* When debugging, check before assigning to V, so that the check
387 isn't broken by undefined behavior due to int overflow. */
388 eassert (CHAR_VALID_P (XINT (val)));
389
390 v = XINT (val);
391
392 /* Minimal test we must do in optimized builds, to prevent weird
393 crashes further down the road. */
394 if (v < 0 || v > MAX_CHAR)
395 emacs_abort ();
396
397 return v;
398 }
399
400 return c;
401 }
402
403 /* Return the Bidi_Paired_Bracket_Type property of the character C. */
404 static bidi_bracket_type_t
405 bidi_paired_bracket_type (int c)
406 {
407 if (c == BIDI_EOB)
408 return BIDI_BRACKET_NONE;
409 if (c < 0 || c > MAX_CHAR)
410 emacs_abort ();
411
412 return (bidi_bracket_type_t) XINT (CHAR_TABLE_REF (bidi_brackets_table, c));
413 }
414
415 /* Determine the start-of-sequence (sos) directional type given the two
416 embedding levels on either side of the run boundary. Also, update
417 the saved info about previously seen characters, since that info is
418 generally valid for a single level run. */
419 static void
420 bidi_set_sos_type (struct bidi_it *bidi_it, int level_before, int level_after)
421 {
422 int higher_level = (level_before > level_after ? level_before : level_after);
423
424 /* FIXME: should the default sos direction be user selectable? */
425 bidi_it->sos = ((higher_level & 1) != 0 ? R2L : L2R); /* X10 */
426
427 bidi_it->prev.type = UNKNOWN_BT;
428 bidi_it->last_strong.type = bidi_it->last_strong.orig_type = UNKNOWN_BT;
429 bidi_it->prev_for_neutral.type = (bidi_it->sos == R2L ? STRONG_R : STRONG_L);
430 bidi_it->prev_for_neutral.charpos = bidi_it->charpos;
431 bidi_it->next_for_neutral.type
432 = bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
433 }
434
435 #define ISOLATE_STATUS(BIDI_IT, IDX) ((BIDI_IT)->level_stack[IDX].flags & 1)
436 #define OVERRIDE(BIDI_IT, IDX) (((BIDI_IT)->level_stack[IDX].flags >> 1) & 3)
437
438 /* Push the current embedding level and override status; reset the
439 current level to LEVEL and the current override status to OVERRIDE. */
440 static void
441 bidi_push_embedding_level (struct bidi_it *bidi_it,
442 int level, bidi_dir_t override, bool isolate_status)
443 {
444 struct bidi_stack *st;
445 int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
446
447 bidi_it->stack_idx++;
448 eassert (bidi_it->stack_idx < BIDI_MAXDEPTH+2+1);
449 st = &bidi_it->level_stack[bidi_it->stack_idx];
450 eassert (level <= (1 << 7));
451 st->level = level;
452 st->flags = (((override & 3) << 1) | (isolate_status != 0));
453 if (isolate_status)
454 {
455 st->last_strong_type = bidi_it->last_strong.type;
456 st->prev_for_neutral_type = bidi_it->prev_for_neutral.type;
457 st->next_for_neutral_type = bidi_it->next_for_neutral.type;
458 st->next_for_neutral_pos = bidi_it->next_for_neutral.charpos;
459 st->flags |= ((bidi_it->sos == L2R ? 0 : 1) << 3);
460 }
461 /* We've got a new isolating sequence, compute the directional type
462 of sos and initialize per-sequence variables (UAX#9, clause X10). */
463 bidi_set_sos_type (bidi_it, prev_level, level);
464 }
465
466 /* Pop from the stack the embedding level, the directional override
467 status, and optionally saved information for the isolating run
468 sequence. Return the new level. */
469 static int
470 bidi_pop_embedding_level (struct bidi_it *bidi_it)
471 {
472 int level;
473
474 /* UAX#9 says to ignore invalid PDFs (X7, last bullet)
475 and PDIs (X6a, 2nd bullet). */
476 if (bidi_it->stack_idx > 0)
477 {
478 bool isolate_status = ISOLATE_STATUS (bidi_it, bidi_it->stack_idx);
479 int old_level = bidi_it->level_stack[bidi_it->stack_idx].level;
480
481 struct bidi_stack st;
482
483 st = bidi_it->level_stack[bidi_it->stack_idx];
484 if (isolate_status)
485 {
486 bidi_dir_t sos = ((st.flags >> 3) & 1);
487 /* PREV is used in W1 for resolving WEAK_NSM. By the time
488 we get to an NSM, we must have gotten past at least one
489 character: the PDI that ends the isolate from which we
490 are popping here. So PREV will have been filled up by
491 the time we first use it. We initialize it here to
492 UNKNOWN_BT to be able to catch any blunders in this
493 logic. */
494 bidi_it->prev.orig_type = bidi_it->prev.type = UNKNOWN_BT;
495 bidi_it->last_strong.type = st.last_strong_type;
496 bidi_it->prev_for_neutral.type = st.prev_for_neutral_type;
497 bidi_it->next_for_neutral.type = st.next_for_neutral_type;
498 bidi_it->next_for_neutral.charpos = st.next_for_neutral_pos;
499 bidi_it->sos = (sos == 0 ? L2R : R2L);
500 }
501 else
502 bidi_set_sos_type (bidi_it, old_level,
503 bidi_it->level_stack[bidi_it->stack_idx - 1].level);
504
505 bidi_it->stack_idx--;
506 }
507 level = bidi_it->level_stack[bidi_it->stack_idx].level;
508 eassert (0 <= level && level <= BIDI_MAXDEPTH + 1);
509 return level;
510 }
511
512 /* Record in SAVED_INFO the information about the current character. */
513 static void
514 bidi_remember_char (struct bidi_saved_info *saved_info,
515 struct bidi_it *bidi_it, bool from_type)
516 {
517 saved_info->charpos = bidi_it->charpos;
518 if (from_type)
519 saved_info->type = bidi_it->type;
520 else
521 saved_info->type = bidi_it->type_after_wn;
522 bidi_check_type (saved_info->type);
523 saved_info->orig_type = bidi_it->orig_type;
524 bidi_check_type (saved_info->orig_type);
525 }
526
527 /* Copy the bidi iterator from FROM to TO. To save cycles, this only
528 copies the part of the level stack that is actually in use. */
529 static void
530 bidi_copy_it (struct bidi_it *to, struct bidi_it *from)
531 {
532 /* Copy everything from the start through the active part of
533 the level stack. */
534 memcpy (to, from,
535 (offsetof (struct bidi_it, level_stack) + sizeof from->level_stack[0]
536 + from->stack_idx * sizeof from->level_stack[0]));
537 }
538
539 \f
540 /***********************************************************************
541 Caching the bidi iterator states
542 ***********************************************************************/
543
544 /* We allocate and de-allocate the cache in chunks of this size (in
545 characters). 200 was chosen as an upper limit for reasonably-long
546 lines in a text file/buffer. */
547 #define BIDI_CACHE_CHUNK 200
548 /* Maximum size we allow the cache to become, per iterator stack slot,
549 in units of struct bidi_it size. If we allow unlimited growth, we
550 could run out of memory for pathologically long bracketed text or
551 very long text lines that need to be reordered. This is aggravated
552 when word-wrap is in effect, since then functions display_line and
553 move_it_in_display_line_to need to keep up to 4 copies of the
554 cache.
555
556 This limitation means there can be no more than that amount of
557 contiguous RTL text on any single physical line in a LTR paragraph,
558 and similarly with contiguous LTR + numeric text in a RTL
559 paragraph. (LTR text in a LTR paragraph and RTL text in a RTL
560 paragraph are not reordered, and so don't need the cache, and
561 cannot hit this limit.) More importantly, no single line can have
562 text longer than this inside paired brackets (because bracket pairs
563 resolution uses the cache). If the limit is exceeded, the fallback
564 code will produce visual order that will be incorrect if there are
565 RTL characters in the offending line of text. */
566 /* Do we need to allow customization of this limit? */
567 #define BIDI_CACHE_MAX_ELTS_PER_SLOT 50000
568 #if BIDI_CACHE_CHUNK >= BIDI_CACHE_MAX_ELTS_PER_SLOT
569 # error BIDI_CACHE_CHUNK must be less than BIDI_CACHE_MAX_ELTS_PER_SLOT
570 #endif
571 static ptrdiff_t bidi_cache_max_elts = BIDI_CACHE_MAX_ELTS_PER_SLOT;
572 static struct bidi_it *bidi_cache;
573 static ptrdiff_t bidi_cache_size = 0;
574 enum { elsz = sizeof (struct bidi_it) };
575 static ptrdiff_t bidi_cache_idx; /* next unused cache slot */
576 static ptrdiff_t bidi_cache_last_idx; /* slot of last cache hit */
577 static ptrdiff_t bidi_cache_start = 0; /* start of cache for this
578 "stack" level */
579
580 /* 5-slot stack for saving the start of the previous level of the
581 cache. xdisp.c maintains a 5-slot stack for its iterator state,
582 and we need the same size of our stack. */
583 static ptrdiff_t bidi_cache_start_stack[IT_STACK_SIZE];
584 static int bidi_cache_sp;
585
586 /* Size of header used by bidi_shelve_cache. */
587 enum
588 {
589 bidi_shelve_header_size
590 = (sizeof (bidi_cache_idx) + sizeof (bidi_cache_start_stack)
591 + sizeof (bidi_cache_sp) + sizeof (bidi_cache_start)
592 + sizeof (bidi_cache_last_idx) + sizeof (bidi_cache_max_elts))
593 };
594
595 /* Effectively remove the cached states beyond the Nth state from the
596 part of the cache relevant to iteration of the current object
597 (buffer or string). */
598 static void
599 bidi_cache_reset_to (int n)
600 {
601 bidi_cache_idx = bidi_cache_start + n;
602 bidi_cache_last_idx = -1;
603 }
604
605 /* Reset the cache state to the empty state. We only reset the part
606 of the cache relevant to iteration of the current object. Previous
607 objects, which are pushed on the display iterator's stack, are left
608 intact. This is called when the cached information is no more
609 useful for the current iteration, e.g. when we were reseated to a
610 new position on the same object. */
611 static void
612 bidi_cache_reset (void)
613 {
614 bidi_cache_reset_to (0);
615 }
616
617 /* Shrink the cache to its minimal size. Called when we init the bidi
618 iterator for reordering a buffer or a string that does not come
619 from display properties, because that means all the previously
620 cached info is of no further use. */
621 static void
622 bidi_cache_shrink (void)
623 {
624 if (bidi_cache_size > BIDI_CACHE_CHUNK)
625 {
626 bidi_cache = xrealloc (bidi_cache, BIDI_CACHE_CHUNK * elsz);
627 bidi_cache_size = BIDI_CACHE_CHUNK;
628 }
629 bidi_cache_reset ();
630 bidi_cache_max_elts = BIDI_CACHE_MAX_ELTS_PER_SLOT;
631 }
632
633 static void
634 bidi_cache_fetch_state (ptrdiff_t idx, struct bidi_it *bidi_it)
635 {
636 int current_scan_dir = bidi_it->scan_dir;
637
638 if (idx < bidi_cache_start || idx >= bidi_cache_idx)
639 emacs_abort ();
640
641 bidi_copy_it (bidi_it, &bidi_cache[idx]);
642 bidi_it->scan_dir = current_scan_dir;
643 bidi_cache_last_idx = idx;
644 }
645
646 /* Find a cached state with a given CHARPOS and resolved embedding
647 level less or equal to LEVEL. If LEVEL is -1, disregard the
648 resolved levels in cached states. DIR, if non-zero, means search
649 in that direction from the last cache hit.
650
651 Value is the index of the cached state, or -1 if not found. */
652 static ptrdiff_t
653 bidi_cache_search (ptrdiff_t charpos, int level, int dir)
654 {
655 ptrdiff_t i, i_start;
656
657 if (bidi_cache_idx > bidi_cache_start)
658 {
659 if (bidi_cache_last_idx == -1)
660 bidi_cache_last_idx = bidi_cache_idx - 1;
661 if (charpos < bidi_cache[bidi_cache_last_idx].charpos)
662 {
663 dir = -1;
664 i_start = bidi_cache_last_idx - 1;
665 }
666 else if (charpos > (bidi_cache[bidi_cache_last_idx].charpos
667 + bidi_cache[bidi_cache_last_idx].nchars - 1))
668 {
669 dir = 1;
670 i_start = bidi_cache_last_idx + 1;
671 }
672 else if (dir)
673 i_start = bidi_cache_last_idx;
674 else
675 {
676 dir = -1;
677 i_start = bidi_cache_idx - 1;
678 }
679
680 if (dir < 0)
681 {
682 /* Linear search for now; FIXME! */
683 for (i = i_start; i >= bidi_cache_start; i--)
684 if (bidi_cache[i].charpos <= charpos
685 && charpos < bidi_cache[i].charpos + bidi_cache[i].nchars
686 && (level == -1 || bidi_cache[i].resolved_level <= level))
687 return i;
688 }
689 else
690 {
691 for (i = i_start; i < bidi_cache_idx; i++)
692 if (bidi_cache[i].charpos <= charpos
693 && charpos < bidi_cache[i].charpos + bidi_cache[i].nchars
694 && (level == -1 || bidi_cache[i].resolved_level <= level))
695 return i;
696 }
697 }
698
699 return -1;
700 }
701
702 /* Find a cached state where the resolved level changes to a value
703 that is lower than LEVEL, and return its cache slot index. DIR is
704 the direction to search, starting with the last used cache slot.
705 If DIR is zero, we search backwards from the last occupied cache
706 slot. BEFORE means return the index of the slot that
707 is ``before'' the level change in the search direction. That is,
708 given the cached levels like this:
709
710 1122333442211
711 AB C
712
713 and assuming we are at the position cached at the slot marked with
714 C, searching backwards (DIR = -1) for LEVEL = 2 will return the
715 index of slot B or A, depending whether BEFORE is, respectively,
716 true or false. */
717 static ptrdiff_t
718 bidi_cache_find_level_change (int level, int dir, bool before)
719 {
720 if (bidi_cache_idx)
721 {
722 ptrdiff_t i = dir ? bidi_cache_last_idx : bidi_cache_idx - 1;
723 int incr = before ? 1 : 0;
724
725 if (i < 0) /* cache overflowed? */
726 i = 0;
727
728 if (!dir)
729 dir = -1;
730 else if (!incr)
731 i += dir;
732
733 if (dir < 0)
734 {
735 while (i >= bidi_cache_start + incr)
736 {
737 if (bidi_cache[i - incr].resolved_level >= 0
738 && bidi_cache[i - incr].resolved_level < level)
739 return i;
740 i--;
741 }
742 }
743 else
744 {
745 while (i < bidi_cache_idx - incr)
746 {
747 if (bidi_cache[i + incr].resolved_level >= 0
748 && bidi_cache[i + incr].resolved_level < level)
749 return i;
750 i++;
751 }
752 }
753 }
754
755 return -1;
756 }
757
758 static void
759 bidi_cache_ensure_space (ptrdiff_t idx)
760 {
761 /* Enlarge the cache as needed. */
762 if (idx >= bidi_cache_size)
763 {
764 ptrdiff_t chunk_size = BIDI_CACHE_CHUNK;
765
766 if (bidi_cache_size > bidi_cache_max_elts - chunk_size)
767 chunk_size = bidi_cache_max_elts - bidi_cache_size;
768
769 if (max (idx + 1,
770 bidi_cache_size + chunk_size) <= bidi_cache_max_elts)
771 {
772 /* The bidi cache cannot be larger than the largest Lisp
773 string or buffer. */
774 ptrdiff_t string_or_buffer_bound
775 = max (BUF_BYTES_MAX, STRING_BYTES_BOUND);
776
777 /* Also, it cannot be larger than what C can represent. */
778 ptrdiff_t c_bound
779 = (min (PTRDIFF_MAX, SIZE_MAX) - bidi_shelve_header_size) / elsz;
780 ptrdiff_t max_elts = bidi_cache_max_elts;
781
782 max_elts = min (max_elts, min (string_or_buffer_bound, c_bound));
783
784 /* Force xpalloc not to over-allocate by passing it MAX_ELTS
785 as its 4th argument. */
786 bidi_cache = xpalloc (bidi_cache, &bidi_cache_size,
787 max (chunk_size, idx - bidi_cache_size + 1),
788 max_elts, elsz);
789 eassert (bidi_cache_size > idx);
790 }
791 }
792 }
793
794 static int
795 bidi_cache_iterator_state (struct bidi_it *bidi_it, bool resolved,
796 bool update_only)
797 {
798 ptrdiff_t idx;
799
800 /* We should never cache on backward scans. */
801 if (bidi_it->scan_dir == -1)
802 emacs_abort ();
803 idx = bidi_cache_search (bidi_it->charpos, -1, 1);
804
805 if (idx < 0 && update_only)
806 return 0;
807
808 if (idx < 0)
809 {
810 idx = bidi_cache_idx;
811 bidi_cache_ensure_space (idx);
812 /* Character positions should correspond to cache positions 1:1.
813 If we are outside the range of cached positions, the cache is
814 useless and must be reset. */
815 if (bidi_cache_start < idx && idx < bidi_cache_size
816 && (bidi_it->charpos > (bidi_cache[idx - 1].charpos
817 + bidi_cache[idx - 1].nchars)
818 || bidi_it->charpos < bidi_cache[bidi_cache_start].charpos))
819 {
820 bidi_cache_reset ();
821 idx = bidi_cache_start;
822 }
823 if (bidi_it->nchars <= 0)
824 emacs_abort ();
825 /* Don't cache if no available space in the cache. */
826 if (bidi_cache_size > idx)
827 {
828 bidi_copy_it (&bidi_cache[idx], bidi_it);
829 if (!resolved)
830 bidi_cache[idx].resolved_level = -1;
831 }
832 }
833 else
834 {
835 /* Copy only the members which could have changed, to avoid
836 costly copying of the entire struct. */
837 bidi_cache[idx].type = bidi_it->type;
838 bidi_check_type (bidi_it->type);
839 bidi_cache[idx].type_after_wn = bidi_it->type_after_wn;
840 bidi_check_type (bidi_it->type_after_wn);
841 if (resolved)
842 bidi_cache[idx].resolved_level = bidi_it->resolved_level;
843 else
844 bidi_cache[idx].resolved_level = -1;
845 bidi_cache[idx].invalid_levels = bidi_it->invalid_levels;
846 bidi_cache[idx].next_for_neutral = bidi_it->next_for_neutral;
847 bidi_cache[idx].next_for_ws = bidi_it->next_for_ws;
848 bidi_cache[idx].disp_pos = bidi_it->disp_pos;
849 bidi_cache[idx].disp_prop = bidi_it->disp_prop;
850 bidi_cache[idx].bracket_pairing_pos = bidi_it->bracket_pairing_pos;
851 bidi_cache[idx].bracket_enclosed_type = bidi_it->bracket_enclosed_type;
852 }
853
854 if (bidi_cache_size > idx)
855 {
856 bidi_cache_last_idx = idx;
857 if (idx >= bidi_cache_idx)
858 bidi_cache_idx = idx + 1;
859 return 1;
860 }
861 else
862 {
863 /* The cache overflowed. */
864 bidi_cache_last_idx = -1;
865 return 0;
866 }
867 }
868
869 /* Look for a cached iterator state that corresponds to CHARPOS. If
870 found, copy the cached state into BIDI_IT and return the type of
871 the cached entry. If not found, return UNKNOWN_BT. RESOLVED_ONLY
872 zero means it is OK to return cached states that were not fully
873 resolved yet. This can happen if the state was cached before it
874 was resolved in bidi_resolve_neutral. */
875 static bidi_type_t
876 bidi_cache_find (ptrdiff_t charpos, bool resolved_only, struct bidi_it *bidi_it)
877 {
878 ptrdiff_t i = bidi_cache_search (charpos, -1, bidi_it->scan_dir);
879
880 if (i >= bidi_cache_start
881 && (!resolved_only
882 /* Callers that want only fully resolved states (and set
883 resolved_only = true) need to be sure that there's enough
884 info in the cached state to return the state as final,
885 and if not, they don't want the cached state. */
886 || bidi_cache[i].resolved_level >= 0))
887 {
888 bidi_dir_t current_scan_dir = bidi_it->scan_dir;
889
890 bidi_copy_it (bidi_it, &bidi_cache[i]);
891 bidi_cache_last_idx = i;
892 /* Don't let scan direction from the cached state override
893 the current scan direction. */
894 bidi_it->scan_dir = current_scan_dir;
895 return bidi_it->type;
896 }
897
898 return UNKNOWN_BT;
899 }
900
901 static int
902 bidi_peek_at_next_level (struct bidi_it *bidi_it)
903 {
904 if (bidi_cache_idx == bidi_cache_start)
905 emacs_abort ();
906 /* If the cache overflowed, return the level of the last cached
907 character. */
908 if (bidi_cache_last_idx == -1
909 || (bidi_cache_last_idx >= bidi_cache_idx - 1 && bidi_it->scan_dir > 0))
910 return bidi_cache[bidi_cache_idx - 1].resolved_level;
911 return bidi_cache[bidi_cache_last_idx + bidi_it->scan_dir].resolved_level;
912 }
913
914 \f
915 /***********************************************************************
916 Pushing and popping the bidi iterator state
917 ***********************************************************************/
918
919 /* Push the bidi iterator state in preparation for reordering a
920 different object, e.g. display string found at certain buffer
921 position. Pushing the bidi iterator boils down to saving its
922 entire state on the cache and starting a new cache "stacked" on top
923 of the current cache. */
924 void
925 bidi_push_it (struct bidi_it *bidi_it)
926 {
927 /* Give this stack slot its cache room. */
928 bidi_cache_max_elts += BIDI_CACHE_MAX_ELTS_PER_SLOT;
929 /* Save the current iterator state in its entirety after the last
930 used cache slot. */
931 bidi_cache_ensure_space (bidi_cache_idx);
932 bidi_cache[bidi_cache_idx++] = *bidi_it;
933
934 /* Push the current cache start onto the stack. */
935 eassert (bidi_cache_sp < IT_STACK_SIZE);
936 bidi_cache_start_stack[bidi_cache_sp++] = bidi_cache_start;
937
938 /* Start a new level of cache, and make it empty. */
939 bidi_cache_start = bidi_cache_idx;
940 bidi_cache_last_idx = -1;
941 }
942
943 /* Restore the iterator state saved by bidi_push_it and return the
944 cache to the corresponding state. */
945 void
946 bidi_pop_it (struct bidi_it *bidi_it)
947 {
948 if (bidi_cache_start <= 0)
949 emacs_abort ();
950
951 /* Reset the next free cache slot index to what it was before the
952 call to bidi_push_it. */
953 bidi_cache_idx = bidi_cache_start - 1;
954
955 /* Restore the bidi iterator state saved in the cache. */
956 *bidi_it = bidi_cache[bidi_cache_idx];
957
958 /* Pop the previous cache start from the stack. */
959 if (bidi_cache_sp <= 0)
960 emacs_abort ();
961 bidi_cache_start = bidi_cache_start_stack[--bidi_cache_sp];
962
963 /* Invalidate the last-used cache slot data. */
964 bidi_cache_last_idx = -1;
965
966 bidi_cache_max_elts -= BIDI_CACHE_MAX_ELTS_PER_SLOT;
967 eassert (bidi_cache_max_elts > 0);
968 }
969
970 static ptrdiff_t bidi_cache_total_alloc;
971
972 /* Stash away a copy of the cache and its control variables. */
973 void *
974 bidi_shelve_cache (void)
975 {
976 unsigned char *databuf;
977 ptrdiff_t alloc;
978
979 /* Empty cache. */
980 if (bidi_cache_idx == 0)
981 return NULL;
982
983 alloc = (bidi_shelve_header_size
984 + bidi_cache_idx * sizeof (struct bidi_it));
985 databuf = xmalloc (alloc);
986 bidi_cache_total_alloc += alloc;
987
988 memcpy (databuf, &bidi_cache_idx, sizeof (bidi_cache_idx));
989 memcpy (databuf + sizeof (bidi_cache_idx),
990 bidi_cache, bidi_cache_idx * sizeof (struct bidi_it));
991 memcpy (databuf + sizeof (bidi_cache_idx)
992 + bidi_cache_idx * sizeof (struct bidi_it),
993 bidi_cache_start_stack, sizeof (bidi_cache_start_stack));
994 memcpy (databuf + sizeof (bidi_cache_idx)
995 + bidi_cache_idx * sizeof (struct bidi_it)
996 + sizeof (bidi_cache_start_stack),
997 &bidi_cache_sp, sizeof (bidi_cache_sp));
998 memcpy (databuf + sizeof (bidi_cache_idx)
999 + bidi_cache_idx * sizeof (struct bidi_it)
1000 + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp),
1001 &bidi_cache_start, sizeof (bidi_cache_start));
1002 memcpy (databuf + sizeof (bidi_cache_idx)
1003 + bidi_cache_idx * sizeof (struct bidi_it)
1004 + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp)
1005 + sizeof (bidi_cache_start),
1006 &bidi_cache_last_idx, sizeof (bidi_cache_last_idx));
1007 memcpy (databuf + sizeof (bidi_cache_idx)
1008 + bidi_cache_idx * sizeof (struct bidi_it)
1009 + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp)
1010 + sizeof (bidi_cache_start) + sizeof (bidi_cache_last_idx),
1011 &bidi_cache_max_elts, sizeof (bidi_cache_max_elts));
1012
1013 return databuf;
1014 }
1015
1016 /* Restore the cache state from a copy stashed away by
1017 bidi_shelve_cache, and free the buffer used to stash that copy.
1018 JUST_FREE means free the buffer, but don't restore the
1019 cache; used when the corresponding iterator is discarded instead of
1020 being restored. */
1021 void
1022 bidi_unshelve_cache (void *databuf, bool just_free)
1023 {
1024 unsigned char *p = databuf;
1025
1026 if (!p)
1027 {
1028 if (!just_free)
1029 {
1030 /* A NULL pointer means an empty cache. */
1031 bidi_cache_start = 0;
1032 bidi_cache_sp = 0;
1033 bidi_cache_max_elts = BIDI_CACHE_MAX_ELTS_PER_SLOT;
1034 bidi_cache_reset ();
1035 }
1036 }
1037 else
1038 {
1039 if (just_free)
1040 {
1041 ptrdiff_t idx;
1042
1043 memcpy (&idx, p, sizeof (bidi_cache_idx));
1044 bidi_cache_total_alloc
1045 -= bidi_shelve_header_size + idx * sizeof (struct bidi_it);
1046 }
1047 else
1048 {
1049 memcpy (&bidi_cache_idx, p, sizeof (bidi_cache_idx));
1050 bidi_cache_ensure_space (bidi_cache_idx);
1051 memcpy (bidi_cache, p + sizeof (bidi_cache_idx),
1052 bidi_cache_idx * sizeof (struct bidi_it));
1053 memcpy (bidi_cache_start_stack,
1054 p + sizeof (bidi_cache_idx)
1055 + bidi_cache_idx * sizeof (struct bidi_it),
1056 sizeof (bidi_cache_start_stack));
1057 memcpy (&bidi_cache_sp,
1058 p + sizeof (bidi_cache_idx)
1059 + bidi_cache_idx * sizeof (struct bidi_it)
1060 + sizeof (bidi_cache_start_stack),
1061 sizeof (bidi_cache_sp));
1062 memcpy (&bidi_cache_start,
1063 p + sizeof (bidi_cache_idx)
1064 + bidi_cache_idx * sizeof (struct bidi_it)
1065 + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp),
1066 sizeof (bidi_cache_start));
1067 memcpy (&bidi_cache_last_idx,
1068 p + sizeof (bidi_cache_idx)
1069 + bidi_cache_idx * sizeof (struct bidi_it)
1070 + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp)
1071 + sizeof (bidi_cache_start),
1072 sizeof (bidi_cache_last_idx));
1073 memcpy (&bidi_cache_max_elts,
1074 p + sizeof (bidi_cache_idx)
1075 + bidi_cache_idx * sizeof (struct bidi_it)
1076 + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp)
1077 + sizeof (bidi_cache_start) + sizeof (bidi_cache_last_idx),
1078 sizeof (bidi_cache_max_elts));
1079 bidi_cache_total_alloc
1080 -= (bidi_shelve_header_size
1081 + bidi_cache_idx * sizeof (struct bidi_it));
1082 }
1083
1084 xfree (p);
1085 }
1086 }
1087
1088 \f
1089 /***********************************************************************
1090 Initialization
1091 ***********************************************************************/
1092 static void
1093 bidi_initialize (void)
1094 {
1095 bidi_type_table = uniprop_table (intern ("bidi-class"));
1096 if (NILP (bidi_type_table))
1097 emacs_abort ();
1098 staticpro (&bidi_type_table);
1099
1100 bidi_mirror_table = uniprop_table (intern ("mirroring"));
1101 if (NILP (bidi_mirror_table))
1102 emacs_abort ();
1103 staticpro (&bidi_mirror_table);
1104
1105 bidi_brackets_table = uniprop_table (intern ("bracket-type"));
1106 if (NILP (bidi_brackets_table))
1107 emacs_abort ();
1108 staticpro (&bidi_brackets_table);
1109
1110 DEFSYM (Qparagraph_start, "paragraph-start");
1111 paragraph_start_re = Fsymbol_value (Qparagraph_start);
1112 if (!STRINGP (paragraph_start_re))
1113 paragraph_start_re = build_string ("\f\\|[ \t]*$");
1114 staticpro (&paragraph_start_re);
1115 DEFSYM (Qparagraph_separate, "paragraph-separate");
1116 paragraph_separate_re = Fsymbol_value (Qparagraph_separate);
1117 if (!STRINGP (paragraph_separate_re))
1118 paragraph_separate_re = build_string ("[ \t\f]*$");
1119 staticpro (&paragraph_separate_re);
1120
1121 bidi_cache_sp = 0;
1122 bidi_cache_total_alloc = 0;
1123 bidi_cache_max_elts = BIDI_CACHE_MAX_ELTS_PER_SLOT;
1124
1125 bidi_initialized = 1;
1126 }
1127
1128 /* Do whatever UAX#9 clause X8 says should be done at paragraph's
1129 end. */
1130 static void
1131 bidi_set_paragraph_end (struct bidi_it *bidi_it)
1132 {
1133 bidi_it->invalid_levels = 0;
1134 bidi_it->invalid_isolates = 0;
1135 bidi_it->stack_idx = 0;
1136 bidi_it->resolved_level = bidi_it->level_stack[0].level;
1137 }
1138
1139 /* Initialize the bidi iterator from buffer/string position CHARPOS. */
1140 void
1141 bidi_init_it (ptrdiff_t charpos, ptrdiff_t bytepos, bool frame_window_p,
1142 struct bidi_it *bidi_it)
1143 {
1144 if (! bidi_initialized)
1145 bidi_initialize ();
1146 if (charpos >= 0)
1147 bidi_it->charpos = charpos;
1148 if (bytepos >= 0)
1149 bidi_it->bytepos = bytepos;
1150 bidi_it->frame_window_p = frame_window_p;
1151 bidi_it->nchars = -1; /* to be computed in bidi_resolve_explicit */
1152 bidi_it->first_elt = 1;
1153 bidi_set_paragraph_end (bidi_it);
1154 bidi_it->new_paragraph = 1;
1155 bidi_it->separator_limit = -1;
1156 bidi_it->type = NEUTRAL_B;
1157 bidi_it->type_after_wn = NEUTRAL_B;
1158 bidi_it->orig_type = NEUTRAL_B;
1159 /* FIXME: Review this!!! */
1160 bidi_it->prev.type = bidi_it->prev.orig_type = UNKNOWN_BT;
1161 bidi_it->last_strong.type = bidi_it->last_strong.orig_type = UNKNOWN_BT;
1162 bidi_it->next_for_neutral.charpos = -1;
1163 bidi_it->next_for_neutral.type
1164 = bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
1165 bidi_it->prev_for_neutral.charpos = -1;
1166 bidi_it->prev_for_neutral.type
1167 = bidi_it->prev_for_neutral.orig_type = UNKNOWN_BT;
1168 bidi_it->bracket_pairing_pos = -1;
1169 bidi_it->sos = L2R; /* FIXME: should it be user-selectable? */
1170 bidi_it->disp_pos = -1; /* invalid/unknown */
1171 bidi_it->disp_prop = 0;
1172 /* We can only shrink the cache if we are at the bottom level of its
1173 "stack". */
1174 if (bidi_cache_start == 0)
1175 bidi_cache_shrink ();
1176 else
1177 bidi_cache_reset ();
1178 }
1179
1180 /* Perform initializations for reordering a new line of bidi text. */
1181 static void
1182 bidi_line_init (struct bidi_it *bidi_it)
1183 {
1184 bidi_it->scan_dir = 1; /* FIXME: do we need to have control on this? */
1185 bidi_it->stack_idx = 0;
1186 bidi_it->resolved_level = bidi_it->level_stack[0].level;
1187 bidi_it->level_stack[0].flags = 0; /* NEUTRAL_DIR, false per X1 */
1188 bidi_it->invalid_levels = 0;
1189 bidi_it->isolate_level = 0; /* X1 */
1190 bidi_it->invalid_isolates = 0; /* X1 */
1191 /* Setting this to zero will force its recomputation the first time
1192 we need it for W5. */
1193 bidi_it->next_en_pos = 0;
1194 bidi_it->next_en_type = UNKNOWN_BT;
1195 bidi_it->next_for_ws.charpos = -1;
1196 bidi_it->next_for_ws.type = UNKNOWN_BT;
1197 bidi_it->bracket_pairing_pos = -1;
1198 bidi_set_sos_type (bidi_it,
1199 (bidi_it->paragraph_dir == R2L ? 1 : 0),
1200 bidi_it->level_stack[0].level); /* X10 */
1201
1202 bidi_cache_reset ();
1203 }
1204
1205 \f
1206 /***********************************************************************
1207 Fetching characters
1208 ***********************************************************************/
1209
1210 /* Count bytes in string S between BEG/BEGBYTE and END. BEG and END
1211 are zero-based character positions in S, BEGBYTE is byte position
1212 corresponding to BEG. UNIBYTE means S is a unibyte string. */
1213 static ptrdiff_t
1214 bidi_count_bytes (const unsigned char *s, ptrdiff_t beg,
1215 ptrdiff_t begbyte, ptrdiff_t end, bool unibyte)
1216 {
1217 ptrdiff_t pos = beg;
1218 const unsigned char *p = s + begbyte, *start = p;
1219
1220 if (unibyte)
1221 p = s + end;
1222 else
1223 {
1224 if (!CHAR_HEAD_P (*p))
1225 emacs_abort ();
1226
1227 while (pos < end)
1228 {
1229 p += BYTES_BY_CHAR_HEAD (*p);
1230 pos++;
1231 }
1232 }
1233
1234 return p - start;
1235 }
1236
1237 /* Fetch and return the character at byte position BYTEPOS. If S is
1238 non-NULL, fetch the character from string S; otherwise fetch the
1239 character from the current buffer. UNIBYTE means S is a
1240 unibyte string. */
1241 static int
1242 bidi_char_at_pos (ptrdiff_t bytepos, const unsigned char *s, bool unibyte)
1243 {
1244 if (s)
1245 {
1246 s += bytepos;
1247 if (unibyte)
1248 return *s;
1249 }
1250 else
1251 s = BYTE_POS_ADDR (bytepos);
1252 return STRING_CHAR (s);
1253 }
1254
1255 /* Fetch and return the character at CHARPOS/BYTEPOS. If that
1256 character is covered by a display string, treat the entire run of
1257 covered characters as a single character, either u+2029 or u+FFFC,
1258 and return their combined length in CH_LEN and NCHARS. DISP_POS
1259 specifies the character position of the next display string, or -1
1260 if not yet computed. When the next character is at or beyond that
1261 position, the function updates DISP_POS with the position of the
1262 next display string. *DISP_PROP non-zero means that there's really
1263 a display string at DISP_POS, as opposed to when we searched till
1264 DISP_POS without finding one. If *DISP_PROP is 2, it means the
1265 display spec is of the form `(space ...)', which is replaced with
1266 u+2029 to handle it as a paragraph separator. STRING->s is the C
1267 string to iterate, or NULL if iterating over a buffer or a Lisp
1268 string; in the latter case, STRING->lstring is the Lisp string. */
1269 static int
1270 bidi_fetch_char (ptrdiff_t charpos, ptrdiff_t bytepos, ptrdiff_t *disp_pos,
1271 int *disp_prop, struct bidi_string_data *string,
1272 struct window *w,
1273 bool frame_window_p, ptrdiff_t *ch_len, ptrdiff_t *nchars)
1274 {
1275 int ch;
1276 ptrdiff_t endpos
1277 = (string->s || STRINGP (string->lstring)) ? string->schars : ZV;
1278 struct text_pos pos;
1279 int len;
1280
1281 /* If we got past the last known position of display string, compute
1282 the position of the next one. That position could be at CHARPOS. */
1283 if (charpos < endpos && charpos > *disp_pos)
1284 {
1285 SET_TEXT_POS (pos, charpos, bytepos);
1286 *disp_pos = compute_display_string_pos (&pos, string, w, frame_window_p,
1287 disp_prop);
1288 }
1289
1290 /* Fetch the character at BYTEPOS. */
1291 if (charpos >= endpos)
1292 {
1293 ch = BIDI_EOB;
1294 *ch_len = 1;
1295 *nchars = 1;
1296 *disp_pos = endpos;
1297 *disp_prop = 0;
1298 }
1299 else if (charpos >= *disp_pos && *disp_prop)
1300 {
1301 ptrdiff_t disp_end_pos;
1302
1303 /* We don't expect to find ourselves in the middle of a display
1304 property. Hopefully, it will never be needed. */
1305 if (charpos > *disp_pos)
1306 emacs_abort ();
1307 /* Text covered by `display' properties and overlays with
1308 display properties or display strings is handled as a single
1309 character that represents the entire run of characters
1310 covered by the display property. */
1311 if (*disp_prop == 2)
1312 {
1313 /* `(space ...)' display specs are handled as paragraph
1314 separators for the purposes of the reordering; see UAX#9
1315 section 3 and clause HL1 in section 4.3 there. */
1316 ch = PARAGRAPH_SEPARATOR;
1317 }
1318 else
1319 {
1320 /* All other display specs are handled as the Unicode Object
1321 Replacement Character. */
1322 ch = OBJECT_REPLACEMENT_CHARACTER;
1323 }
1324 disp_end_pos = compute_display_string_end (*disp_pos, string);
1325 if (disp_end_pos < 0)
1326 {
1327 /* Somebody removed the display string from the buffer
1328 behind our back. Recover by processing this buffer
1329 position as if no display property were present there to
1330 begin with. */
1331 *disp_prop = 0;
1332 goto normal_char;
1333 }
1334 *nchars = disp_end_pos - *disp_pos;
1335 if (*nchars <= 0)
1336 emacs_abort ();
1337 if (string->s)
1338 *ch_len = bidi_count_bytes (string->s, *disp_pos, bytepos,
1339 disp_end_pos, string->unibyte);
1340 else if (STRINGP (string->lstring))
1341 *ch_len = bidi_count_bytes (SDATA (string->lstring), *disp_pos,
1342 bytepos, disp_end_pos, string->unibyte);
1343 else
1344 *ch_len = CHAR_TO_BYTE (disp_end_pos) - bytepos;
1345 }
1346 else
1347 {
1348 normal_char:
1349 if (string->s)
1350 {
1351
1352 if (!string->unibyte)
1353 {
1354 ch = STRING_CHAR_AND_LENGTH (string->s + bytepos, len);
1355 *ch_len = len;
1356 }
1357 else
1358 {
1359 ch = UNIBYTE_TO_CHAR (string->s[bytepos]);
1360 *ch_len = 1;
1361 }
1362 }
1363 else if (STRINGP (string->lstring))
1364 {
1365 if (!string->unibyte)
1366 {
1367 ch = STRING_CHAR_AND_LENGTH (SDATA (string->lstring) + bytepos,
1368 len);
1369 *ch_len = len;
1370 }
1371 else
1372 {
1373 ch = UNIBYTE_TO_CHAR (SREF (string->lstring, bytepos));
1374 *ch_len = 1;
1375 }
1376 }
1377 else
1378 {
1379 ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (bytepos), len);
1380 *ch_len = len;
1381 }
1382 *nchars = 1;
1383 }
1384
1385 /* If we just entered a run of characters covered by a display
1386 string, compute the position of the next display string. */
1387 if (charpos + *nchars <= endpos && charpos + *nchars > *disp_pos
1388 && *disp_prop)
1389 {
1390 SET_TEXT_POS (pos, charpos + *nchars, bytepos + *ch_len);
1391 *disp_pos = compute_display_string_pos (&pos, string, w, frame_window_p,
1392 disp_prop);
1393 }
1394
1395 return ch;
1396 }
1397
1398 /* Like bidi_fetch_char, but ignore any text between an isolate
1399 initiator and its matching PDI or, if it has no matching PDI, the
1400 end of the paragraph. If isolates were skipped, CH_LEN and NCHARS
1401 are set to the number of bytes and characters between BYTEPOS/CHARPOS
1402 and the character that was fetched after skipping the isolates. */
1403 static int
1404 bidi_fetch_char_skip_isolates (ptrdiff_t charpos, ptrdiff_t bytepos,
1405 ptrdiff_t *disp_pos, int *disp_prop,
1406 struct bidi_string_data *string,
1407 struct window *w, bool frame_window_p,
1408 ptrdiff_t *ch_len, ptrdiff_t *nchars)
1409 {
1410 ptrdiff_t orig_charpos = charpos, orig_bytepos = bytepos;
1411 int ch = bidi_fetch_char (charpos, bytepos, disp_pos, disp_prop, string, w,
1412 frame_window_p, ch_len, nchars);
1413 bidi_type_t ch_type = bidi_get_type (ch, NEUTRAL_DIR);
1414 ptrdiff_t level = 0;
1415
1416 if (ch_type == LRI || ch_type == RLI || ch_type == FSI)
1417 {
1418 level++;
1419 while (level > 0 && ch_type != NEUTRAL_B)
1420 {
1421 charpos += *nchars;
1422 bytepos += *ch_len;
1423 ch = bidi_fetch_char (charpos, bytepos, disp_pos, disp_prop, string,
1424 w, frame_window_p, ch_len, nchars);
1425 ch_type = bidi_get_type (ch, NEUTRAL_DIR);
1426 /* A Note to P2 says to ignore max_depth limit. */
1427 if (ch_type == LRI || ch_type == RLI || ch_type == FSI)
1428 level++;
1429 else if (ch_type == PDI)
1430 level--;
1431 }
1432 }
1433
1434 /* Communicate to the caller how much did we skip, so it could get
1435 past the last character position we examined. */
1436 *nchars += charpos - orig_charpos;
1437 *ch_len += bytepos - orig_bytepos;
1438 return ch;
1439 }
1440
1441
1442 \f
1443 /***********************************************************************
1444 Determining paragraph direction
1445 ***********************************************************************/
1446
1447 /* Check if buffer position CHARPOS/BYTEPOS is the end of a paragraph.
1448 Value is the non-negative length of the paragraph separator
1449 following the buffer position, -1 if position is at the beginning
1450 of a new paragraph, or -2 if position is neither at beginning nor
1451 at end of a paragraph. */
1452 static ptrdiff_t
1453 bidi_at_paragraph_end (ptrdiff_t charpos, ptrdiff_t bytepos)
1454 {
1455 Lisp_Object sep_re;
1456 Lisp_Object start_re;
1457 ptrdiff_t val;
1458
1459 sep_re = paragraph_separate_re;
1460 start_re = paragraph_start_re;
1461
1462 val = fast_looking_at (sep_re, charpos, bytepos, ZV, ZV_BYTE, Qnil);
1463 if (val < 0)
1464 {
1465 if (fast_looking_at (start_re, charpos, bytepos, ZV, ZV_BYTE, Qnil) >= 0)
1466 val = -1;
1467 else
1468 val = -2;
1469 }
1470
1471 return val;
1472 }
1473
1474 /* If the user has requested the long scans caching, make sure that
1475 BIDI cache is enabled. Otherwise, make sure it's disabled. */
1476
1477 static struct region_cache *
1478 bidi_paragraph_cache_on_off (void)
1479 {
1480 struct buffer *cache_buffer = current_buffer;
1481 bool indirect_p = false;
1482
1483 /* For indirect buffers, make sure to use the cache of their base
1484 buffer. */
1485 if (cache_buffer->base_buffer)
1486 {
1487 cache_buffer = cache_buffer->base_buffer;
1488 indirect_p = true;
1489 }
1490
1491 /* Don't turn on or off the cache in the base buffer, if the value
1492 of cache-long-scans of the base buffer is inconsistent with that.
1493 This is because doing so will just make the cache pure overhead,
1494 since if we turn it on via indirect buffer, it will be
1495 immediately turned off by its base buffer. */
1496 if (NILP (BVAR (current_buffer, cache_long_scans)))
1497 {
1498 if (!indirect_p
1499 || NILP (BVAR (cache_buffer, cache_long_scans)))
1500 {
1501 if (cache_buffer->bidi_paragraph_cache)
1502 {
1503 free_region_cache (cache_buffer->bidi_paragraph_cache);
1504 cache_buffer->bidi_paragraph_cache = 0;
1505 }
1506 }
1507 return NULL;
1508 }
1509 else
1510 {
1511 if (!indirect_p
1512 || !NILP (BVAR (cache_buffer, cache_long_scans)))
1513 {
1514 if (!cache_buffer->bidi_paragraph_cache)
1515 cache_buffer->bidi_paragraph_cache = new_region_cache ();
1516 }
1517 return cache_buffer->bidi_paragraph_cache;
1518 }
1519 }
1520
1521 /* On my 2005-vintage machine, searching back for paragraph start
1522 takes ~1 ms per line. And bidi_paragraph_init is called 4 times
1523 when user types C-p. The number below limits each call to
1524 bidi_paragraph_init to about 10 ms. */
1525 #define MAX_PARAGRAPH_SEARCH 7500
1526
1527 /* Find the beginning of this paragraph by looking back in the buffer.
1528 Value is the byte position of the paragraph's beginning, or
1529 BEGV_BYTE if paragraph_start_re is still not found after looking
1530 back MAX_PARAGRAPH_SEARCH lines in the buffer. */
1531 static ptrdiff_t
1532 bidi_find_paragraph_start (ptrdiff_t pos, ptrdiff_t pos_byte)
1533 {
1534 Lisp_Object re = paragraph_start_re;
1535 ptrdiff_t limit = ZV, limit_byte = ZV_BYTE;
1536 struct region_cache *bpc = bidi_paragraph_cache_on_off ();
1537 ptrdiff_t n = 0, oldpos = pos, next;
1538 struct buffer *cache_buffer = current_buffer;
1539
1540 if (cache_buffer->base_buffer)
1541 cache_buffer = cache_buffer->base_buffer;
1542
1543 while (pos_byte > BEGV_BYTE
1544 && n++ < MAX_PARAGRAPH_SEARCH
1545 && fast_looking_at (re, pos, pos_byte, limit, limit_byte, Qnil) < 0)
1546 {
1547 /* FIXME: What if the paragraph beginning is covered by a
1548 display string? And what if a display string covering some
1549 of the text over which we scan back includes
1550 paragraph_start_re? */
1551 DEC_BOTH (pos, pos_byte);
1552 if (bpc && region_cache_backward (cache_buffer, bpc, pos, &next))
1553 {
1554 pos = next, pos_byte = CHAR_TO_BYTE (pos);
1555 break;
1556 }
1557 else
1558 pos = find_newline_no_quit (pos, pos_byte, -1, &pos_byte);
1559 }
1560 if (n >= MAX_PARAGRAPH_SEARCH)
1561 pos = BEGV, pos_byte = BEGV_BYTE;
1562 if (bpc)
1563 know_region_cache (cache_buffer, bpc, pos, oldpos);
1564 /* Positions returned by the region cache are not limited to
1565 BEGV..ZV range, so we limit them here. */
1566 pos_byte = clip_to_bounds (BEGV_BYTE, pos_byte, ZV_BYTE);
1567 return pos_byte;
1568 }
1569
1570 /* On a 3.4 GHz machine, searching forward for a strong directional
1571 character in a long paragraph full of weaks or neutrals takes about
1572 1 ms for each 20K characters. The number below limits each call to
1573 bidi_paragraph_init to less than 10 ms even on slow machines. */
1574 #define MAX_STRONG_CHAR_SEARCH 100000
1575
1576 /* Starting from POS, find the first strong (L, R, or AL) character,
1577 while skipping over any characters between an isolate initiator and
1578 its matching PDI. STOP_AT_PDI non-zero means stop at the PDI that
1579 matches the isolate initiator at POS. Return the bidi type of the
1580 character where the search stopped. Give up if after examining
1581 MAX_STRONG_CHAR_SEARCH buffer or string positions no strong
1582 character was found. */
1583 static bidi_type_t
1584 find_first_strong_char (ptrdiff_t pos, ptrdiff_t bytepos, ptrdiff_t end,
1585 ptrdiff_t *disp_pos, int *disp_prop,
1586 struct bidi_string_data *string, struct window *w,
1587 bool string_p, bool frame_window_p,
1588 ptrdiff_t *ch_len, ptrdiff_t *nchars, bool stop_at_pdi)
1589 {
1590 ptrdiff_t pos1;
1591 bidi_type_t type;
1592 int ch;
1593
1594 if (stop_at_pdi)
1595 {
1596 /* If STOP_AT_PDI is non-zero, we must have been called with FSI
1597 at POS. Get past it. */
1598 #ifdef ENABLE_CHECKING
1599 ch = bidi_fetch_char (pos, bytepos, disp_pos, disp_prop, string, w,
1600 frame_window_p, ch_len, nchars);
1601 type = bidi_get_type (ch, NEUTRAL_DIR);
1602 eassert (type == FSI /* || type == LRI || type == RLI */);
1603 #endif
1604 pos += *nchars;
1605 bytepos += *ch_len;
1606 }
1607 ch = bidi_fetch_char_skip_isolates (pos, bytepos, disp_pos, disp_prop, string,
1608 w, frame_window_p, ch_len, nchars);
1609 type = bidi_get_type (ch, NEUTRAL_DIR);
1610
1611 pos1 = pos;
1612 for (pos += *nchars, bytepos += *ch_len;
1613 bidi_get_category (type) != STRONG
1614 /* If requested to stop at first PDI, stop there. */
1615 && !(stop_at_pdi && type == PDI)
1616 /* Stop when searched too far into an abnormally large
1617 paragraph full of weak or neutral characters. */
1618 && pos - pos1 < MAX_STRONG_CHAR_SEARCH;
1619 type = bidi_get_type (ch, NEUTRAL_DIR))
1620 {
1621 if (pos >= end)
1622 {
1623 /* Pretend there's a paragraph separator at end of
1624 buffer/string. */
1625 type = NEUTRAL_B;
1626 break;
1627 }
1628 if (!string_p
1629 && type == NEUTRAL_B
1630 && bidi_at_paragraph_end (pos, bytepos) >= -1)
1631 break;
1632 /* Fetch next character and advance to get past it. */
1633 ch = bidi_fetch_char_skip_isolates (pos, bytepos, disp_pos, disp_prop,
1634 string, w, frame_window_p,
1635 ch_len, nchars);
1636 pos += *nchars;
1637 bytepos += *ch_len;
1638 }
1639 return type;
1640 }
1641
1642 /* Determine the base direction, a.k.a. base embedding level, of the
1643 paragraph we are about to iterate through. If DIR is either L2R or
1644 R2L, just use that. Otherwise, determine the paragraph direction
1645 from the first strong directional character of the paragraph.
1646
1647 NO_DEFAULT_P means don't default to L2R if the paragraph
1648 has no strong directional characters and both DIR and
1649 bidi_it->paragraph_dir are NEUTRAL_DIR. In that case, search back
1650 in the buffer until a paragraph is found with a strong character,
1651 or until hitting BEGV. In the latter case, fall back to L2R. This
1652 flag is used in current-bidi-paragraph-direction.
1653
1654 Note that this function gives the paragraph separator the same
1655 direction as the preceding paragraph, even though Emacs generally
1656 views the separator as not belonging to any paragraph. */
1657 void
1658 bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, bool no_default_p)
1659 {
1660 ptrdiff_t bytepos = bidi_it->bytepos;
1661 bool string_p = bidi_it->string.s || STRINGP (bidi_it->string.lstring);
1662 ptrdiff_t pstartbyte;
1663 /* Note that begbyte is a byte position, while end is a character
1664 position. Yes, this is ugly, but we are trying to avoid costly
1665 calls to BYTE_TO_CHAR and its ilk. */
1666 ptrdiff_t begbyte = string_p ? 0 : BEGV_BYTE;
1667 ptrdiff_t end = string_p ? bidi_it->string.schars : ZV;
1668
1669 /* Special case for an empty buffer. */
1670 if (bytepos == begbyte && bidi_it->charpos == end)
1671 dir = L2R;
1672 /* We should never be called at EOB or before BEGV. */
1673 else if (bidi_it->charpos >= end || bytepos < begbyte)
1674 emacs_abort ();
1675
1676 if (dir == L2R)
1677 {
1678 bidi_it->paragraph_dir = L2R;
1679 bidi_it->new_paragraph = 0;
1680 }
1681 else if (dir == R2L)
1682 {
1683 bidi_it->paragraph_dir = R2L;
1684 bidi_it->new_paragraph = 0;
1685 }
1686 else if (dir == NEUTRAL_DIR) /* P2 */
1687 {
1688 ptrdiff_t ch_len, nchars;
1689 ptrdiff_t pos, disp_pos = -1;
1690 int disp_prop = 0;
1691 bidi_type_t type;
1692 const unsigned char *s;
1693
1694 if (!bidi_initialized)
1695 bidi_initialize ();
1696
1697 /* If we are inside a paragraph separator, we are just waiting
1698 for the separator to be exhausted; use the previous paragraph
1699 direction. But don't do that if we have been just reseated,
1700 because we need to reinitialize below in that case. */
1701 if (!bidi_it->first_elt
1702 && bidi_it->charpos < bidi_it->separator_limit)
1703 return;
1704
1705 /* If we are on a newline, get past it to where the next
1706 paragraph might start. But don't do that at BEGV since then
1707 we are potentially in a new paragraph that doesn't yet
1708 exist. */
1709 pos = bidi_it->charpos;
1710 s = (STRINGP (bidi_it->string.lstring)
1711 ? SDATA (bidi_it->string.lstring)
1712 : bidi_it->string.s);
1713 if (bytepos > begbyte
1714 && bidi_char_at_pos (bytepos, s, bidi_it->string.unibyte) == '\n')
1715 {
1716 bytepos++;
1717 pos++;
1718 }
1719
1720 /* We are either at the beginning of a paragraph or in the
1721 middle of it. Find where this paragraph starts. */
1722 if (string_p)
1723 {
1724 /* We don't support changes of paragraph direction inside a
1725 string. It is treated as a single paragraph. */
1726 pstartbyte = 0;
1727 }
1728 else
1729 pstartbyte = bidi_find_paragraph_start (pos, bytepos);
1730 bidi_it->separator_limit = -1;
1731 bidi_it->new_paragraph = 0;
1732
1733 /* The following loop is run more than once only if NO_DEFAULT_P,
1734 and only if we are iterating on a buffer. */
1735 do {
1736 bytepos = pstartbyte;
1737 if (!string_p)
1738 pos = BYTE_TO_CHAR (bytepos);
1739 type = find_first_strong_char (pos, bytepos, end, &disp_pos, &disp_prop,
1740 &bidi_it->string, bidi_it->w,
1741 string_p, bidi_it->frame_window_p,
1742 &ch_len, &nchars, false);
1743 if (type == STRONG_R || type == STRONG_AL) /* P3 */
1744 bidi_it->paragraph_dir = R2L;
1745 else if (type == STRONG_L)
1746 bidi_it->paragraph_dir = L2R;
1747 if (!string_p
1748 && no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR)
1749 {
1750 /* If this paragraph is at BEGV, default to L2R. */
1751 if (pstartbyte == BEGV_BYTE)
1752 bidi_it->paragraph_dir = L2R; /* P3 and HL1 */
1753 else
1754 {
1755 ptrdiff_t prevpbyte = pstartbyte;
1756 ptrdiff_t p = BYTE_TO_CHAR (pstartbyte), pbyte = pstartbyte;
1757
1758 /* Find the beginning of the previous paragraph, if any. */
1759 while (pbyte > BEGV_BYTE && prevpbyte >= pstartbyte)
1760 {
1761 /* FXIME: What if p is covered by a display
1762 string? See also a FIXME inside
1763 bidi_find_paragraph_start. */
1764 DEC_BOTH (p, pbyte);
1765 prevpbyte = bidi_find_paragraph_start (p, pbyte);
1766 }
1767 pstartbyte = prevpbyte;
1768 }
1769 }
1770 } while (!string_p
1771 && no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR);
1772 }
1773 else
1774 emacs_abort ();
1775
1776 /* Contrary to UAX#9 clause P3, we only default the paragraph
1777 direction to L2R if we have no previous usable paragraph
1778 direction. This is allowed by the HL1 clause. */
1779 if (bidi_it->paragraph_dir != L2R && bidi_it->paragraph_dir != R2L)
1780 bidi_it->paragraph_dir = L2R; /* P3 and HL1 ``higher-level protocols'' */
1781 if (bidi_it->paragraph_dir == R2L)
1782 bidi_it->level_stack[0].level = 1;
1783 else
1784 bidi_it->level_stack[0].level = 0;
1785
1786 bidi_line_init (bidi_it);
1787 }
1788
1789 \f
1790 /***********************************************************************
1791 Resolving explicit and implicit levels.
1792 The rest of this file constitutes the core of the UBA implementation.
1793 ***********************************************************************/
1794
1795 static bool
1796 bidi_explicit_dir_char (int ch)
1797 {
1798 bidi_type_t ch_type;
1799
1800 if (!bidi_initialized)
1801 emacs_abort ();
1802 if (ch < 0)
1803 {
1804 eassert (ch == BIDI_EOB);
1805 return false;
1806 }
1807 ch_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
1808 return (ch_type == LRE || ch_type == LRO
1809 || ch_type == RLE || ch_type == RLO
1810 || ch_type == PDF);
1811 }
1812
1813 /* Given an iterator state in BIDI_IT, advance one character position
1814 in the buffer/string to the next character (in the logical order),
1815 resolve any explicit embeddings, directional overrides, and isolate
1816 initiators and terminators, and return the embedding level of the
1817 character after resolving these explicit directives. */
1818 static int
1819 bidi_resolve_explicit (struct bidi_it *bidi_it)
1820 {
1821 int curchar;
1822 bidi_type_t type, typ1, prev_type = UNKNOWN_BT;
1823 int current_level;
1824 int new_level;
1825 bidi_dir_t override;
1826 bool isolate_status;
1827 bool string_p = bidi_it->string.s || STRINGP (bidi_it->string.lstring);
1828 ptrdiff_t ch_len, nchars, disp_pos, end;
1829 int disp_prop;
1830 ptrdiff_t eob
1831 = ((bidi_it->string.s || STRINGP (bidi_it->string.lstring))
1832 ? bidi_it->string.schars : ZV);
1833
1834 /* Record the info about the previous character. */
1835 if (bidi_it->type_after_wn != WEAK_BN /* W1/Retaining */
1836 && bidi_it->type != WEAK_BN)
1837 {
1838 /* This special case is needed in support of Unicode 8.0
1839 correction to N0, as implemented in bidi_resolve_weak/W1
1840 below. */
1841 if (bidi_it->type_after_wn == NEUTRAL_ON
1842 && bidi_get_category (bidi_it->type) == STRONG
1843 && bidi_paired_bracket_type (bidi_it->ch) == BIDI_BRACKET_CLOSE)
1844 bidi_remember_char (&bidi_it->prev, bidi_it, 1);
1845 else
1846 bidi_remember_char (&bidi_it->prev, bidi_it, 0);
1847 }
1848 if (bidi_it->type_after_wn == STRONG_R
1849 || bidi_it->type_after_wn == STRONG_L
1850 || bidi_it->type_after_wn == STRONG_AL)
1851 bidi_remember_char (&bidi_it->last_strong, bidi_it, 0);
1852 if (bidi_it->type == STRONG_R || bidi_it->type == STRONG_L
1853 || bidi_it->type == WEAK_EN || bidi_it->type == WEAK_AN)
1854 bidi_remember_char (&bidi_it->prev_for_neutral, bidi_it, 1);
1855
1856 /* If we overstepped the characters used for resolving neutrals
1857 and whitespace, invalidate their info in the iterator. */
1858 if (bidi_it->charpos >= bidi_it->next_for_neutral.charpos)
1859 {
1860 bidi_it->next_for_neutral.type = UNKNOWN_BT;
1861 /* If needed, reset the "magical" value of pairing bracket
1862 position, so that bidi_resolve_brackets will resume
1863 resolution of brackets according to BPA. */
1864 if (bidi_it->bracket_pairing_pos == eob)
1865 bidi_it->bracket_pairing_pos = -1;
1866 }
1867 if (bidi_it->next_en_pos >= 0
1868 && bidi_it->charpos >= bidi_it->next_en_pos)
1869 {
1870 bidi_it->next_en_pos = 0;
1871 bidi_it->next_en_type = UNKNOWN_BT;
1872 }
1873
1874 /* Reset the bracket resolution info, unless we previously decided
1875 (in bidi_find_bracket_pairs) that brackets in this level run
1876 should be resolved as neutrals. */
1877 if (bidi_it->bracket_pairing_pos != eob)
1878 {
1879 bidi_it->bracket_pairing_pos = -1;
1880 bidi_it->bracket_enclosed_type = UNKNOWN_BT;
1881 }
1882
1883 /* If reseat()'ed, don't advance, so as to start iteration from the
1884 position where we were reseated. bidi_it->bytepos can be less
1885 than BEGV_BYTE after reseat to BEGV. */
1886 if (bidi_it->bytepos < (string_p ? 0 : BEGV_BYTE)
1887 || bidi_it->first_elt)
1888 {
1889 bidi_it->first_elt = 0;
1890 if (string_p)
1891 {
1892 const unsigned char *p
1893 = (STRINGP (bidi_it->string.lstring)
1894 ? SDATA (bidi_it->string.lstring)
1895 : bidi_it->string.s);
1896
1897 if (bidi_it->charpos < 0)
1898 bidi_it->charpos = bidi_it->bytepos = 0;
1899 eassert (bidi_it->bytepos == bidi_count_bytes (p, 0, 0,
1900 bidi_it->charpos,
1901 bidi_it->string.unibyte));
1902 }
1903 else
1904 {
1905 if (bidi_it->charpos < BEGV)
1906 {
1907 bidi_it->charpos = BEGV;
1908 bidi_it->bytepos = BEGV_BYTE;
1909 }
1910 eassert (bidi_it->bytepos == CHAR_TO_BYTE (bidi_it->charpos));
1911 }
1912 /* Determine the original bidi type of the previous character,
1913 which is needed for handling isolate initiators and PDF. The
1914 type of the previous character will be non-trivial only if
1915 our caller moved through some previous text in
1916 get_visually_first_element, in which case bidi_it->prev holds
1917 the information we want. */
1918 if (bidi_it->first_elt && bidi_it->prev.type != UNKNOWN_BT)
1919 {
1920 eassert (bidi_it->prev.charpos == bidi_it->charpos - 1);
1921 prev_type = bidi_it->prev.orig_type;
1922 }
1923 }
1924 /* Don't move at end of buffer/string. */
1925 else if (bidi_it->charpos < (string_p ? bidi_it->string.schars : ZV))
1926 {
1927 /* Advance to the next character, skipping characters covered by
1928 display strings (nchars > 1). */
1929 if (bidi_it->nchars <= 0)
1930 emacs_abort ();
1931 bidi_it->charpos += bidi_it->nchars;
1932 if (bidi_it->ch_len == 0)
1933 emacs_abort ();
1934 bidi_it->bytepos += bidi_it->ch_len;
1935 prev_type = bidi_it->orig_type;
1936 }
1937 else /* EOB or end of string */
1938 prev_type = NEUTRAL_B;
1939
1940 current_level = bidi_it->level_stack[bidi_it->stack_idx].level; /* X1 */
1941 isolate_status = ISOLATE_STATUS (bidi_it, bidi_it->stack_idx);
1942 override = OVERRIDE (bidi_it, bidi_it->stack_idx);
1943 new_level = current_level;
1944
1945 if (bidi_it->charpos >= (string_p ? bidi_it->string.schars : ZV))
1946 {
1947 curchar = BIDI_EOB;
1948 bidi_it->ch_len = 1;
1949 bidi_it->nchars = 1;
1950 bidi_it->disp_pos = (string_p ? bidi_it->string.schars : ZV);
1951 bidi_it->disp_prop = 0;
1952 }
1953 else
1954 {
1955 /* LRI, RLI, and FSI increment, and PDF decrements, the
1956 embedding level of the _following_ characters, so we must
1957 first look at the type of the previous character to support
1958 that. */
1959 switch (prev_type)
1960 {
1961 case RLI: /* X5a */
1962 if (current_level < BIDI_MAXDEPTH
1963 && bidi_it->invalid_levels == 0
1964 && bidi_it->invalid_isolates == 0)
1965 {
1966 new_level = ((current_level + 1) & ~1) + 1;
1967 bidi_it->isolate_level++;
1968 bidi_push_embedding_level (bidi_it, new_level,
1969 NEUTRAL_DIR, true);
1970 }
1971 else
1972 bidi_it->invalid_isolates++;
1973 break;
1974 case LRI: /* X5b */
1975 if (current_level < BIDI_MAXDEPTH - 1
1976 && bidi_it->invalid_levels == 0
1977 && bidi_it->invalid_isolates == 0)
1978 {
1979 new_level = ((current_level + 2) & ~1);
1980 bidi_it->isolate_level++;
1981 bidi_push_embedding_level (bidi_it, new_level,
1982 NEUTRAL_DIR, true);
1983 }
1984 else
1985 bidi_it->invalid_isolates++;
1986 break;
1987 case PDF: /* X7 */
1988 if (!bidi_it->invalid_isolates)
1989 {
1990 if (bidi_it->invalid_levels)
1991 bidi_it->invalid_levels--;
1992 else if (!isolate_status && bidi_it->stack_idx >= 1)
1993 new_level = bidi_pop_embedding_level (bidi_it);
1994 }
1995 break;
1996 default:
1997 eassert (prev_type != FSI);
1998 /* Nothing. */
1999 break;
2000 }
2001 /* Fetch the character at BYTEPOS. If it is covered by a
2002 display string, treat the entire run of covered characters as
2003 a single character u+FFFC. */
2004 curchar = bidi_fetch_char (bidi_it->charpos, bidi_it->bytepos,
2005 &bidi_it->disp_pos, &bidi_it->disp_prop,
2006 &bidi_it->string, bidi_it->w,
2007 bidi_it->frame_window_p,
2008 &bidi_it->ch_len, &bidi_it->nchars);
2009 }
2010 bidi_it->ch = curchar;
2011 bidi_it->resolved_level = new_level;
2012
2013 /* Don't apply directional override here, as all the types we handle
2014 below will not be affected by the override anyway, and we need
2015 the original type unaltered. The override will be applied in
2016 bidi_resolve_weak. */
2017 type = bidi_get_type (curchar, NEUTRAL_DIR);
2018 bidi_it->orig_type = type;
2019 bidi_check_type (bidi_it->orig_type);
2020
2021 bidi_it->type_after_wn = UNKNOWN_BT;
2022
2023 switch (type)
2024 {
2025 case RLE: /* X2 */
2026 case RLO: /* X4 */
2027 bidi_it->type_after_wn = type;
2028 bidi_check_type (bidi_it->type_after_wn);
2029 type = WEAK_BN; /* X9/Retaining */
2030 if (new_level < BIDI_MAXDEPTH
2031 && bidi_it->invalid_levels == 0
2032 && bidi_it->invalid_isolates == 0)
2033 {
2034 /* Compute the least odd embedding level greater than
2035 the current level. */
2036 new_level = ((new_level + 1) & ~1) + 1;
2037 if (bidi_it->type_after_wn == RLE)
2038 override = NEUTRAL_DIR;
2039 else
2040 override = R2L;
2041 bidi_push_embedding_level (bidi_it, new_level, override, false);
2042 bidi_it->resolved_level = new_level;
2043 }
2044 else
2045 {
2046 if (bidi_it->invalid_isolates == 0)
2047 bidi_it->invalid_levels++;
2048 }
2049 break;
2050 case LRE: /* X3 */
2051 case LRO: /* X5 */
2052 bidi_it->type_after_wn = type;
2053 bidi_check_type (bidi_it->type_after_wn);
2054 type = WEAK_BN; /* X9/Retaining */
2055 if (new_level < BIDI_MAXDEPTH - 1
2056 && bidi_it->invalid_levels == 0
2057 && bidi_it->invalid_isolates == 0)
2058 {
2059 /* Compute the least even embedding level greater than
2060 the current level. */
2061 new_level = ((new_level + 2) & ~1);
2062 if (bidi_it->type_after_wn == LRE)
2063 override = NEUTRAL_DIR;
2064 else
2065 override = L2R;
2066 bidi_push_embedding_level (bidi_it, new_level, override, false);
2067 bidi_it->resolved_level = new_level;
2068 }
2069 else
2070 {
2071 if (bidi_it->invalid_isolates == 0)
2072 bidi_it->invalid_levels++;
2073 }
2074 break;
2075 case FSI: /* X5c */
2076 end = string_p ? bidi_it->string.schars : ZV;
2077 disp_pos = bidi_it->disp_pos;
2078 disp_prop = bidi_it->disp_prop;
2079 nchars = bidi_it->nchars;
2080 ch_len = bidi_it->ch_len;
2081 typ1 = find_first_strong_char (bidi_it->charpos,
2082 bidi_it->bytepos, end,
2083 &disp_pos, &disp_prop,
2084 &bidi_it->string, bidi_it->w,
2085 string_p, bidi_it->frame_window_p,
2086 &ch_len, &nchars, true);
2087 if (typ1 != STRONG_R && typ1 != STRONG_AL)
2088 {
2089 type = LRI;
2090 /* Override orig_type, which will be needed when we come to
2091 examine the next character, which is the first character
2092 inside the isolate. */
2093 bidi_it->orig_type = type;
2094 goto fsi_as_lri;
2095 }
2096 else
2097 {
2098 type = RLI;
2099 bidi_it->orig_type = type;
2100 }
2101 /* FALLTHROUGH */
2102 case RLI: /* X5a */
2103 if (override == NEUTRAL_DIR)
2104 bidi_it->type_after_wn = type;
2105 else /* Unicode 8.0 correction. */
2106 bidi_it->type_after_wn = (override == L2R ? STRONG_L : STRONG_R);
2107 bidi_check_type (bidi_it->type_after_wn);
2108 break;
2109 case LRI: /* X5b */
2110 fsi_as_lri:
2111 if (override == NEUTRAL_DIR)
2112 bidi_it->type_after_wn = type;
2113 else /* Unicode 8.0 correction. */
2114 bidi_it->type_after_wn = (override == L2R ? STRONG_L : STRONG_R);
2115 bidi_check_type (bidi_it->type_after_wn);
2116 break;
2117 case PDI: /* X6a */
2118 if (bidi_it->invalid_isolates)
2119 bidi_it->invalid_isolates--;
2120 else if (bidi_it->isolate_level > 0)
2121 {
2122 bidi_it->invalid_levels = 0;
2123 while (!ISOLATE_STATUS (bidi_it, bidi_it->stack_idx))
2124 bidi_pop_embedding_level (bidi_it);
2125 eassert (bidi_it->stack_idx > 0);
2126 new_level = bidi_pop_embedding_level (bidi_it);
2127 bidi_it->isolate_level--;
2128 }
2129 bidi_it->resolved_level = new_level;
2130 /* Unicode 8.0 correction. */
2131 {
2132 bidi_dir_t stack_override = OVERRIDE (bidi_it, bidi_it->stack_idx);
2133 if (stack_override == L2R)
2134 bidi_it->type_after_wn = STRONG_L;
2135 else if (stack_override == R2L)
2136 bidi_it->type_after_wn = STRONG_R;
2137 else
2138 bidi_it->type_after_wn = type;
2139 }
2140 break;
2141 case PDF: /* X7 */
2142 bidi_it->type_after_wn = type;
2143 bidi_check_type (bidi_it->type_after_wn);
2144 type = WEAK_BN; /* X9/Retaining */
2145 break;
2146 default:
2147 /* Nothing. */
2148 break;
2149 }
2150
2151 bidi_it->type = type;
2152 bidi_check_type (bidi_it->type);
2153
2154 if (bidi_it->type == NEUTRAL_B) /* X8 */
2155 {
2156 bidi_set_paragraph_end (bidi_it);
2157 /* This is needed by bidi_resolve_weak below, and in L1. */
2158 bidi_it->type_after_wn = bidi_it->type;
2159 }
2160
2161 eassert (bidi_it->resolved_level >= 0);
2162 return bidi_it->resolved_level;
2163 }
2164
2165 /* Advance in the buffer/string, resolve weak types and return the
2166 type of the next character after weak type resolution. */
2167 static bidi_type_t
2168 bidi_resolve_weak (struct bidi_it *bidi_it)
2169 {
2170 bidi_type_t type;
2171 bidi_dir_t override;
2172 int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
2173 int new_level = bidi_resolve_explicit (bidi_it);
2174 int next_char;
2175 bidi_type_t type_of_next;
2176 struct bidi_it saved_it;
2177 ptrdiff_t eob
2178 = ((STRINGP (bidi_it->string.lstring) || bidi_it->string.s)
2179 ? bidi_it->string.schars : ZV);
2180
2181 type = bidi_it->type;
2182 override = OVERRIDE (bidi_it, bidi_it->stack_idx);
2183
2184 eassert (!(type == UNKNOWN_BT
2185 || type == LRE
2186 || type == LRO
2187 || type == RLE
2188 || type == RLO
2189 || type == PDF));
2190
2191 eassert (prev_level >= 0);
2192 if (bidi_it->type == NEUTRAL_B)
2193 {
2194 /* We've got a new isolating sequence, compute the directional
2195 type of sos and initialize per-run variables (UAX#9, clause
2196 X10). */
2197 bidi_set_sos_type (bidi_it, prev_level, new_level);
2198 }
2199 if (type == NEUTRAL_S || type == NEUTRAL_WS
2200 || type == WEAK_BN || type == STRONG_AL)
2201 bidi_it->type_after_wn = type; /* needed in L1 */
2202 bidi_check_type (bidi_it->type_after_wn);
2203
2204 /* Level and directional override status are already recorded in
2205 bidi_it, and do not need any change; see X6. */
2206 if (override == R2L) /* X6 */
2207 type = STRONG_R;
2208 else if (override == L2R)
2209 type = STRONG_L;
2210 else
2211 {
2212 if (type == WEAK_NSM) /* W1 */
2213 {
2214 /* Note that we don't need to consider the case where the
2215 prev character has its type overridden by an RLO or LRO,
2216 because then either the type of this NSM would have been
2217 also overridden, or the previous character is outside the
2218 current level run, and thus not relevant to this NSM.
2219 This is why NSM gets the type_after_wn of the previous
2220 character. */
2221 /* bidi_set_sos_type sets type_after_wn to UNKNOWN_BT. */
2222 if (bidi_it->prev.type != UNKNOWN_BT
2223 /* If type_after_wn is NEUTRAL_B, this NSM is at sos. */
2224 && bidi_it->prev.type != NEUTRAL_B)
2225 {
2226 if (bidi_isolate_fmt_char (bidi_it->prev.type))
2227 {
2228 /* From W1: "Note that in an isolating run sequence,
2229 an isolate initiator followed by an NSM or any
2230 type other than PDI must be an overflow isolate
2231 initiator." */
2232 eassert (bidi_it->invalid_isolates > 0);
2233 type = NEUTRAL_ON;
2234 }
2235 else
2236 {
2237 /* This includes the Unicode 8.0 correction for N0,
2238 due to how we set prev.type in bidi_resolve_explicit,
2239 which see. */
2240 type = bidi_it->prev.type;
2241 }
2242 }
2243 else if (bidi_it->sos == R2L)
2244 type = STRONG_R;
2245 else if (bidi_it->sos == L2R)
2246 type = STRONG_L;
2247 else /* shouldn't happen! */
2248 emacs_abort ();
2249 }
2250 if (type == WEAK_EN /* W2 */
2251 && bidi_it->last_strong.type == STRONG_AL)
2252 type = WEAK_AN;
2253 else if (type == STRONG_AL) /* W3 */
2254 type = STRONG_R;
2255 else if ((type == WEAK_ES /* W4 */
2256 && bidi_it->prev.type == WEAK_EN
2257 && bidi_it->prev.orig_type == WEAK_EN)
2258 || (type == WEAK_CS
2259 && ((bidi_it->prev.type == WEAK_EN
2260 && bidi_it->prev.orig_type == WEAK_EN)
2261 || bidi_it->prev.type == WEAK_AN)))
2262 {
2263 const unsigned char *s
2264 = (STRINGP (bidi_it->string.lstring)
2265 ? SDATA (bidi_it->string.lstring)
2266 : bidi_it->string.s);
2267
2268 next_char = (bidi_it->charpos + bidi_it->nchars >= eob
2269 ? BIDI_EOB
2270 : bidi_char_at_pos (bidi_it->bytepos + bidi_it->ch_len,
2271 s, bidi_it->string.unibyte));
2272 type_of_next = bidi_get_type (next_char, override);
2273
2274 if (type_of_next == WEAK_BN
2275 || bidi_explicit_dir_char (next_char))
2276 {
2277 bidi_copy_it (&saved_it, bidi_it);
2278 while (bidi_resolve_explicit (bidi_it) == new_level
2279 && bidi_it->type == WEAK_BN)
2280 type_of_next = bidi_it->type;
2281 bidi_copy_it (bidi_it, &saved_it);
2282 }
2283
2284 /* If the next character is EN, but the last strong-type
2285 character is AL, that next EN will be changed to AN when
2286 we process it in W2 above. So in that case, this ES
2287 should not be changed into EN. */
2288 if (type == WEAK_ES
2289 && type_of_next == WEAK_EN
2290 && bidi_it->last_strong.type != STRONG_AL)
2291 type = WEAK_EN;
2292 else if (type == WEAK_CS)
2293 {
2294 if (bidi_it->prev.type == WEAK_AN
2295 && (type_of_next == WEAK_AN
2296 /* If the next character is EN, but the last
2297 strong-type character is AL, EN will be later
2298 changed to AN when we process it in W2 above.
2299 So in that case, this ES should not be
2300 changed into EN. */
2301 || (type_of_next == WEAK_EN
2302 && bidi_it->last_strong.type == STRONG_AL)))
2303 type = WEAK_AN;
2304 else if (bidi_it->prev.type == WEAK_EN
2305 && type_of_next == WEAK_EN
2306 && bidi_it->last_strong.type != STRONG_AL)
2307 type = WEAK_EN;
2308 }
2309 }
2310 else if (type == WEAK_ET /* W5: ET with EN before or after it */
2311 || type == WEAK_BN) /* W5/Retaining */
2312 {
2313 if (bidi_it->prev.type == WEAK_EN) /* ET/BN w/EN before it */
2314 type = WEAK_EN;
2315 else if (bidi_it->next_en_pos > bidi_it->charpos
2316 && bidi_it->next_en_type != WEAK_BN)
2317 {
2318 if (bidi_it->next_en_type == WEAK_EN) /* ET/BN with EN after it */
2319 type = WEAK_EN;
2320 }
2321 else if (type == WEAK_BN
2322 /* This condition is for the following important case:
2323
2324 . we are at level zero
2325 . either previous strong character was L,
2326 or we've seen no strong characters since sos
2327 and the base paragraph direction is L2R
2328 . this BN is NOT a bidi directional control
2329
2330 For such a situation, either this BN will be
2331 converted to EN per W5, and then to L by virtue
2332 of W7; or it will become ON per W6, and then L
2333 because of N1/N2. So we take a shortcut here
2334 and make it L right away, to avoid the
2335 potentially costly loop below. This is
2336 important when the buffer has a long series of
2337 control characters, like binary nulls, and no
2338 R2L characters at all. */
2339 && new_level == 0
2340 && !bidi_explicit_dir_char (bidi_it->ch)
2341 && ((bidi_it->last_strong.type == STRONG_L)
2342 || (bidi_it->last_strong.type == UNKNOWN_BT
2343 && bidi_it->sos == L2R)))
2344 type = STRONG_L;
2345 else if (bidi_it->next_en_pos >= 0)
2346 {
2347 /* We overstepped the last known position for ET
2348 resolution but there could be other such characters
2349 in this paragraph (when we are sure there are no more
2350 such positions, we set next_en_pos to a negative
2351 value). Try to find the next position for ET
2352 resolution. */
2353 ptrdiff_t en_pos = bidi_it->charpos + bidi_it->nchars;
2354 const unsigned char *s = (STRINGP (bidi_it->string.lstring)
2355 ? SDATA (bidi_it->string.lstring)
2356 : bidi_it->string.s);
2357
2358 if (bidi_it->nchars <= 0)
2359 emacs_abort ();
2360 next_char
2361 = (bidi_it->charpos + bidi_it->nchars >= eob
2362 ? BIDI_EOB
2363 : bidi_char_at_pos (bidi_it->bytepos + bidi_it->ch_len, s,
2364 bidi_it->string.unibyte));
2365 type_of_next = bidi_get_type (next_char, override);
2366
2367 if (type_of_next == WEAK_ET
2368 || type_of_next == WEAK_BN
2369 || bidi_explicit_dir_char (next_char))
2370 {
2371 bidi_copy_it (&saved_it, bidi_it);
2372 while (bidi_resolve_explicit (bidi_it) == new_level
2373 && (bidi_it->type == WEAK_BN
2374 || bidi_it->type == WEAK_ET))
2375 type_of_next = bidi_it->type;
2376 if (type == WEAK_BN
2377 && bidi_it->charpos == saved_it.charpos + saved_it.nchars)
2378 {
2379 /* If we entered the above loop with a BN that
2380 changes the level, the type of next
2381 character, which is in a different level, is
2382 not relevant to resolving this series of ET
2383 and BN. */
2384 en_pos = saved_it.charpos;
2385 type_of_next = type;
2386 }
2387 else
2388 en_pos = bidi_it->charpos;
2389 bidi_copy_it (bidi_it, &saved_it);
2390 }
2391 /* Remember this position, to speed up processing of the
2392 next ETs. */
2393 bidi_it->next_en_pos = en_pos;
2394 if (type_of_next == WEAK_EN)
2395 {
2396 /* If the last strong character is AL, the EN we've
2397 found will become AN when we get to it (W2). */
2398 if (bidi_it->last_strong.type == STRONG_AL)
2399 type_of_next = WEAK_AN;
2400 else if (type == WEAK_BN)
2401 type = NEUTRAL_ON; /* W6/Retaining */
2402 else
2403 type = WEAK_EN;
2404 }
2405 else if (type_of_next == NEUTRAL_B)
2406 /* Record the fact that there are no more ENs from
2407 here to the end of paragraph, to avoid entering the
2408 loop above ever again in this paragraph. */
2409 bidi_it->next_en_pos = -1;
2410 /* Record the type of the character where we ended our search. */
2411 bidi_it->next_en_type = type_of_next;
2412 }
2413 }
2414 }
2415
2416 if (type == WEAK_ES || type == WEAK_ET || type == WEAK_CS /* W6 */
2417 || (type == WEAK_BN
2418 && (bidi_it->prev.type == WEAK_CS /* W6/Retaining */
2419 || bidi_it->prev.type == WEAK_ES
2420 || bidi_it->prev.type == WEAK_ET)))
2421 type = NEUTRAL_ON;
2422
2423 /* Store the type we've got so far, before we clobber it with strong
2424 types in W7 and while resolving neutral types. But leave alone
2425 the original types that were recorded above, because we will need
2426 them for the L1 clause. */
2427 if (bidi_it->type_after_wn == UNKNOWN_BT)
2428 bidi_it->type_after_wn = type;
2429 bidi_check_type (bidi_it->type_after_wn);
2430
2431 if (type == WEAK_EN) /* W7 */
2432 {
2433 if ((bidi_it->last_strong.type == STRONG_L)
2434 || (bidi_it->last_strong.type == UNKNOWN_BT && bidi_it->sos == L2R))
2435 type = STRONG_L;
2436 }
2437
2438 bidi_it->type = type;
2439 bidi_check_type (bidi_it->type);
2440 return type;
2441 }
2442
2443 /* Resolve the type of a neutral character according to the type of
2444 surrounding strong text and the current embedding level. */
2445 static bidi_type_t
2446 bidi_resolve_neutral_1 (bidi_type_t prev_type, bidi_type_t next_type, int lev)
2447 {
2448 /* N1: "European and Arabic numbers act as if they were R in terms
2449 of their influence on NIs." */
2450 if (next_type == WEAK_EN || next_type == WEAK_AN)
2451 next_type = STRONG_R;
2452 if (prev_type == WEAK_EN || prev_type == WEAK_AN)
2453 prev_type = STRONG_R;
2454
2455 if (next_type == prev_type) /* N1 */
2456 return next_type;
2457 else if ((lev & 1) == 0) /* N2 */
2458 return STRONG_L;
2459 else
2460 return STRONG_R;
2461 }
2462
2463 #define FLAG_EMBEDDING_INSIDE 1
2464 #define FLAG_OPPOSITE_INSIDE 2
2465
2466 /* A data type used in the stack maintained by
2467 bidi_find_bracket_pairs below. */
2468 typedef struct bpa_stack_entry {
2469 int close_bracket_char;
2470 int open_bracket_idx;
2471 #ifdef ENABLE_CHECKING
2472 ptrdiff_t open_bracket_pos;
2473 #endif
2474 unsigned flags : 2;
2475 } bpa_stack_entry;
2476
2477 /* With MAX_ALLOCA of 16KB, this should allow at least 1K slots in the
2478 BPA stack, which should be more than enough for actual bidi text. */
2479 #define MAX_BPA_STACK ((int)max (MAX_ALLOCA / sizeof (bpa_stack_entry), 1))
2480
2481 /* UAX#9 says to match opening brackets with the matching closing
2482 brackets or their canonical equivalents. As of Unicode 8.0, there
2483 are only 2 bracket characters that have canonical equivalence
2484 decompositions: u+2329 and u+232A. So instead of accessing the
2485 table in uni-decomposition.el, we just handle these 2 characters
2486 with this simple macro. Note that ASCII characters don't have
2487 canonical equivalents by definition. */
2488
2489 /* To find all the characters that need to be processed by
2490 CANONICAL_EQU, first find all the characters which have
2491 decompositions in UnicodeData.txt, with this Awk script:
2492
2493 awk -F ";" " {if ($6 != \"\") print $1, $6}" UnicodeData.txt
2494
2495 Then produce a list of all the bracket characters in BidiBrackets.txt:
2496
2497 awk -F "[ ;]" " {if ($1 != \"#\" && $1 != \"\") print $1}" BidiBrackets.txt
2498
2499 And finally, cross-reference these two:
2500
2501 grep -Fw -f brackets.txt decompositions.txt
2502
2503 where "decompositions.txt" was produced by the 1st script, and
2504 "brackets.txt" by the 2nd script. In the output of grep, look
2505 only for decompositions that don't begin with some compatibility
2506 formatting tag, such as "<compat>". Only decompositions that
2507 consist solely of character codepoints are relevant to bidi
2508 brackets processing. */
2509
2510 #define CANONICAL_EQU(c) \
2511 ( ASCII_CHAR_P (c) ? c \
2512 : (c) == LEFT_POINTING_ANGLE_BRACKET ? LEFT_ANGLE_BRACKET \
2513 : (c) == RIGHT_POINTING_ANGLE_BRACKET ? RIGHT_ANGLE_BRACKET \
2514 : c )
2515
2516 #ifdef ENABLE_CHECKING
2517 # define STORE_BRACKET_CHARPOS \
2518 bpa_stack[bpa_sp].open_bracket_pos = bidi_it->charpos
2519 #else
2520 # define STORE_BRACKET_CHARPOS /* nothing */
2521 #endif
2522
2523 #define PUSH_BPA_STACK \
2524 do { \
2525 int ch; \
2526 if (bpa_sp < MAX_BPA_STACK - 1) \
2527 { \
2528 bpa_sp++; \
2529 ch = CANONICAL_EQU (bidi_it->ch); \
2530 bpa_stack[bpa_sp].close_bracket_char = bidi_mirror_char (ch); \
2531 bpa_stack[bpa_sp].open_bracket_idx = bidi_cache_last_idx; \
2532 bpa_stack[bpa_sp].flags = 0; \
2533 STORE_BRACKET_CHARPOS; \
2534 } \
2535 } while (0)
2536
2537
2538 /* This function implements BPA, the Bidi Parenthesis Algorithm,
2539 described in BD16 and N0 of UAX#9. It finds all the bracket pairs
2540 in the current isolating sequence, and records the enclosed type
2541 and the position of the matching bracket in the cache. It returns
2542 non-zero if called with the iterator on the opening bracket which
2543 has a matching closing bracket in the current isolating sequence,
2544 zero otherwise. */
2545 static bool
2546 bidi_find_bracket_pairs (struct bidi_it *bidi_it)
2547 {
2548 bidi_bracket_type_t btype;
2549 bidi_type_t type = bidi_it->type;
2550 bool retval = false;
2551
2552 /* When scanning backwards, we don't expect any unresolved bidi
2553 bracket characters. */
2554 if (bidi_it->scan_dir != 1)
2555 emacs_abort ();
2556
2557 btype = bidi_paired_bracket_type (bidi_it->ch);
2558 if (btype == BIDI_BRACKET_OPEN)
2559 {
2560 bpa_stack_entry bpa_stack[MAX_BPA_STACK];
2561 int bpa_sp = -1;
2562 struct bidi_it saved_it;
2563 int base_level = bidi_it->level_stack[0].level;
2564 int embedding_level = bidi_it->level_stack[bidi_it->stack_idx].level;
2565 int maxlevel = embedding_level;
2566 bidi_type_t embedding_type = (embedding_level & 1) ? STRONG_R : STRONG_L;
2567 struct bidi_it tem_it;
2568 bool l2r_seen = false, r2l_seen = false;
2569 ptrdiff_t pairing_pos;
2570 int idx_at_entry = bidi_cache_idx;
2571
2572 eassert (MAX_BPA_STACK >= 100);
2573 bidi_copy_it (&saved_it, bidi_it);
2574 /* bidi_cache_iterator_state refuses to cache on backward scans,
2575 and bidi_cache_fetch_state doesn't bring scan_dir from the
2576 cache, so we must initialize this explicitly. */
2577 tem_it.scan_dir = 1;
2578
2579 while (1)
2580 {
2581 int old_sidx, new_sidx;
2582 int current_level = bidi_it->level_stack[bidi_it->stack_idx].level;
2583
2584 if (maxlevel < current_level)
2585 maxlevel = current_level;
2586 /* Mark every opening bracket character we've traversed by
2587 putting its own position into bracket_pairing_pos. This
2588 is examined in bidi_resolve_brackets to distinguish
2589 brackets that were already resolved to stay NEUTRAL_ON,
2590 and those that were not yet processed by this function
2591 (because they were skipped when we skip higher embedding
2592 levels below). */
2593 if (btype == BIDI_BRACKET_OPEN && bidi_it->bracket_pairing_pos == -1)
2594 bidi_it->bracket_pairing_pos = bidi_it->charpos;
2595 if (!bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B, 0))
2596 {
2597 /* No more space in cache -- give up and let the opening
2598 bracket that started this be processed as a
2599 NEUTRAL_ON. */
2600 bidi_cache_reset_to (idx_at_entry - bidi_cache_start);
2601 bidi_copy_it (bidi_it, &saved_it);
2602 goto give_up;
2603 }
2604 if (btype == BIDI_BRACKET_OPEN)
2605 PUSH_BPA_STACK;
2606 else if (btype == BIDI_BRACKET_CLOSE)
2607 {
2608 int sp = bpa_sp;
2609 int curchar = CANONICAL_EQU (bidi_it->ch);
2610
2611 eassert (sp >= 0);
2612 while (sp >= 0 && bpa_stack[sp].close_bracket_char != curchar)
2613 sp--;
2614 if (sp >= 0)
2615 {
2616 /* Update and cache the corresponding opening bracket. */
2617 bidi_cache_fetch_state (bpa_stack[sp].open_bracket_idx,
2618 &tem_it);
2619 #ifdef ENABLE_CHECKING
2620 eassert (bpa_stack[sp].open_bracket_pos == tem_it.charpos);
2621 #endif
2622 /* Determine the enclosed type for this bracket
2623 pair's type resolution according to N0. */
2624 if (bpa_stack[sp].flags & FLAG_EMBEDDING_INSIDE)
2625 tem_it.bracket_enclosed_type = embedding_type; /* N0b */
2626 else if (bpa_stack[sp].flags & FLAG_OPPOSITE_INSIDE)
2627 tem_it.bracket_enclosed_type /* N0c */
2628 = (embedding_type == STRONG_L ? STRONG_R : STRONG_L);
2629 else /* N0d */
2630 tem_it.bracket_enclosed_type = UNKNOWN_BT;
2631
2632 /* Record the position of the matching closing
2633 bracket, and update the cache. */
2634 tem_it.bracket_pairing_pos = bidi_it->charpos;
2635 bidi_cache_iterator_state (&tem_it, 0, 1);
2636
2637 /* Pop the BPA stack. */
2638 bpa_sp = sp - 1;
2639 }
2640 if (bpa_sp < 0)
2641 {
2642 retval = true;
2643 break;
2644 }
2645 }
2646 else if (bidi_get_category (bidi_it->type_after_wn) != NEUTRAL)
2647 {
2648 unsigned flag = 0;
2649 int sp;
2650
2651 /* Whenever we see a strong type, update the flags of
2652 all the slots on the stack. */
2653 switch (bidi_it->type)
2654 {
2655 case STRONG_L:
2656 flag = ((embedding_level & 1) == 0
2657 ? FLAG_EMBEDDING_INSIDE
2658 : FLAG_OPPOSITE_INSIDE);
2659 l2r_seen = true;
2660 break;
2661 case STRONG_R:
2662 case WEAK_EN:
2663 case WEAK_AN:
2664 flag = ((embedding_level & 1) == 1
2665 ? FLAG_EMBEDDING_INSIDE
2666 : FLAG_OPPOSITE_INSIDE);
2667 r2l_seen = true;
2668 break;
2669 default:
2670 break;
2671 }
2672 if (flag)
2673 {
2674 for (sp = bpa_sp; sp >= 0; sp--)
2675 bpa_stack[sp].flags |= flag;
2676 }
2677 }
2678 old_sidx = bidi_it->stack_idx;
2679 type = bidi_resolve_weak (bidi_it);
2680 /* Skip level runs excluded from this isolating run sequence. */
2681 new_sidx = bidi_it->stack_idx;
2682 if (bidi_it->level_stack[new_sidx].level > current_level
2683 && (ISOLATE_STATUS (bidi_it, new_sidx)
2684 || (new_sidx > old_sidx + 1
2685 && ISOLATE_STATUS (bidi_it, new_sidx - 1))))
2686 {
2687 while (bidi_it->level_stack[bidi_it->stack_idx].level
2688 > current_level)
2689 {
2690 if (maxlevel < bidi_it->level_stack[bidi_it->stack_idx].level)
2691 maxlevel = bidi_it->level_stack[bidi_it->stack_idx].level;
2692 if (!bidi_cache_iterator_state (bidi_it,
2693 type == NEUTRAL_B, 0))
2694 {
2695 /* No more space in cache -- give up and let the
2696 opening bracket that started this be
2697 processed as any other NEUTRAL_ON. */
2698 bidi_cache_reset_to (idx_at_entry - bidi_cache_start);
2699 bidi_copy_it (bidi_it, &saved_it);
2700 goto give_up;
2701 }
2702 type = bidi_resolve_weak (bidi_it);
2703 }
2704 }
2705 if (type == NEUTRAL_B
2706 || (bidi_it->level_stack[bidi_it->stack_idx].level
2707 != current_level))
2708 {
2709 /* We've marched all the way to the end of this
2710 isolating run sequence, and didn't find matching
2711 closing brackets for some opening brackets. Leave
2712 their type unchanged. */
2713 pairing_pos = bidi_it->charpos;
2714 break;
2715 }
2716 if (bidi_it->type_after_wn == NEUTRAL_ON) /* Unicode 8.0 correction */
2717 btype = bidi_paired_bracket_type (bidi_it->ch);
2718 else
2719 btype = BIDI_BRACKET_NONE;
2720 }
2721
2722 /* Restore bidi_it from the cache, which should have the bracket
2723 resolution members set as determined by the above loop. */
2724 type = bidi_cache_find (saved_it.charpos, 0, bidi_it);
2725 eassert (type == NEUTRAL_ON);
2726
2727 /* The following is an optimization for bracketed text that has
2728 only one level which is equal to the paragraph's base
2729 embedding level. That is, only L2R and weak/neutral
2730 characters in a L2R paragraph, or only R2L and weak/neutral
2731 characters in a R2L paragraph. Such brackets can be resolved
2732 by bidi_resolve_neutral, which has a further shortcut for
2733 this case. So we pretend we did not resolve the brackets in
2734 this case, set up next_for_neutral for the entire bracketed
2735 text, and reset the cache to the character before the opening
2736 bracket. The upshot is to allow bidi_move_to_visually_next
2737 reset the cache when it returns this opening bracket, thus
2738 cutting significantly on the size of the cache, which is
2739 important with long lines, especially if word-wrap is non-nil
2740 (which requires the display engine to copy the cache back and
2741 forth many times). */
2742 if (maxlevel == base_level
2743 && ((base_level == 0 && !r2l_seen)
2744 || (base_level == 1 && !l2r_seen)))
2745 {
2746 ptrdiff_t eob
2747 = ((bidi_it->string.s || STRINGP (bidi_it->string.lstring))
2748 ? bidi_it->string.schars : ZV);
2749
2750 if (retval)
2751 pairing_pos = bidi_it->bracket_pairing_pos;
2752
2753 /* This special value (which cannot possibly happen when
2754 brackets are resolved, since there's no character at ZV)
2755 will be noticed by bidi_resolve_explicit, and will be
2756 copied to the following iterator states, instead of being
2757 reset to -1. */
2758 bidi_it->bracket_pairing_pos = eob;
2759 /* This type value will be used for resolving the outermost
2760 closing bracket in bidi_resolve_brackets. */
2761 bidi_it->bracket_enclosed_type = embedding_type;
2762 /* bidi_cache_last_idx is set to the index of the current
2763 state, because we just called bidi_cache_find above.
2764 That state describes the outermost opening bracket, the
2765 one with which we entered this function. Force the cache
2766 to "forget" all the cached states starting from that state. */
2767 bidi_cache_reset_to (bidi_cache_last_idx - bidi_cache_start);
2768 /* Set up the next_for_neutral member, to help
2769 bidi_resolve_neutral. */
2770 bidi_it->next_for_neutral.type = embedding_type;
2771 bidi_it->next_for_neutral.charpos = pairing_pos;
2772 /* Pretend we didn't resolve this bracket. */
2773 retval = false;
2774 }
2775 }
2776
2777 give_up:
2778 return retval;
2779 }
2780
2781 static void
2782 bidi_record_type_for_neutral (struct bidi_saved_info *info, int level,
2783 bool nextp)
2784 {
2785 int idx;
2786
2787 for (idx = bidi_cache_last_idx + 1; idx < bidi_cache_idx; idx++)
2788 {
2789 int lev = bidi_cache[idx].level_stack[bidi_cache[idx].stack_idx].level;
2790
2791 if (lev <= level)
2792 {
2793 eassert (lev == level);
2794 if (nextp)
2795 bidi_cache[idx].next_for_neutral = *info;
2796 else
2797 bidi_cache[idx].prev_for_neutral = *info;
2798 break;
2799 }
2800 }
2801 }
2802
2803 static bidi_type_t
2804 bidi_resolve_brackets (struct bidi_it *bidi_it)
2805 {
2806 int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
2807 bool resolve_bracket = false;
2808 bidi_type_t type = UNKNOWN_BT;
2809 int ch;
2810 struct bidi_saved_info prev_for_neutral, next_for_neutral;
2811 ptrdiff_t eob
2812 = ((bidi_it->string.s || STRINGP (bidi_it->string.lstring))
2813 ? bidi_it->string.schars : ZV);
2814
2815 /* Record the prev_for_neutral type either from the previous
2816 character, if it was a strong or AN/EN, or from the
2817 prev_for_neutral information recorded previously. */
2818 if (bidi_it->type == STRONG_L || bidi_it->type == STRONG_R
2819 || bidi_it->type == WEAK_AN || bidi_it->type == WEAK_EN)
2820 bidi_remember_char (&prev_for_neutral, bidi_it, 1);
2821 else
2822 prev_for_neutral = bidi_it->prev_for_neutral;
2823 /* Record the next_for_neutral type information. */
2824 if (bidi_it->next_for_neutral.charpos > bidi_it->charpos)
2825 next_for_neutral = bidi_it->next_for_neutral;
2826 else
2827 next_for_neutral.charpos = -1;
2828 if (!bidi_it->first_elt)
2829 {
2830 type = bidi_cache_find (bidi_it->charpos + bidi_it->nchars, 0, bidi_it);
2831 ch = bidi_it->ch;
2832 }
2833 if (type == UNKNOWN_BT)
2834 {
2835 type = bidi_resolve_weak (bidi_it);
2836 if (type == NEUTRAL_ON)
2837 {
2838 /* bracket_pairing_pos == eob means this bracket does not
2839 need to be resolved as a bracket, but as a neutral, see
2840 the optimization trick we play near the end of
2841 bidi_find_bracket_pairs. */
2842 if (bidi_it->bracket_pairing_pos == eob)
2843 {
2844 /* If this is the outermost closing bracket of a run of
2845 characters in which we decided to resolve brackets as
2846 neutrals, use the embedding level's type, recorded in
2847 bracket_enclosed_type, to resolve the bracket. */
2848 if (bidi_it->next_for_neutral.charpos == bidi_it->charpos
2849 && bidi_paired_bracket_type (bidi_it->ch) == BIDI_BRACKET_CLOSE)
2850 type = bidi_it->bracket_enclosed_type;
2851 }
2852 else if (bidi_find_bracket_pairs (bidi_it))
2853 resolve_bracket = true;
2854 }
2855 }
2856 else if (bidi_it->bracket_pairing_pos != eob)
2857 {
2858 eassert (bidi_it->resolved_level == -1);
2859 /* If the cached state shows an increase of embedding level due
2860 to an isolate initiator, we need to update the 1st cached
2861 state of the next run of the current isolating sequence with
2862 the prev_for_neutral and next_for_neutral information, so
2863 that it will be picked up when we advance to that next run. */
2864 if (bidi_it->level_stack[bidi_it->stack_idx].level > prev_level
2865 && ISOLATE_STATUS (bidi_it, bidi_it->stack_idx))
2866 {
2867 bidi_record_type_for_neutral (&prev_for_neutral, prev_level, 0);
2868 bidi_record_type_for_neutral (&next_for_neutral, prev_level, 1);
2869 }
2870 if (type == NEUTRAL_ON
2871 && bidi_paired_bracket_type (ch) == BIDI_BRACKET_OPEN)
2872 {
2873 if (bidi_it->bracket_pairing_pos > bidi_it->charpos)
2874 {
2875 /* A cached opening bracket that wasn't completely
2876 resolved yet. */
2877 resolve_bracket = true;
2878 }
2879 else if (bidi_it->bracket_pairing_pos == -1)
2880 {
2881 /* Higher levels were not BPA-resolved yet, even if
2882 cached by bidi_find_bracket_pairs. Force application
2883 of BPA to the new level now. */
2884 if (bidi_find_bracket_pairs (bidi_it))
2885 resolve_bracket = true;
2886 }
2887 }
2888 /* Keep track of the prev_for_neutral and next_for_neutral
2889 types, needed for resolving brackets below and for resolving
2890 neutrals in bidi_resolve_neutral. */
2891 if (bidi_it->level_stack[bidi_it->stack_idx].level == prev_level)
2892 {
2893 bidi_it->prev_for_neutral = prev_for_neutral;
2894 if (next_for_neutral.charpos > 0)
2895 bidi_it->next_for_neutral = next_for_neutral;
2896 }
2897 }
2898
2899 /* If needed, resolve the bracket type according to N0. */
2900 if (resolve_bracket)
2901 {
2902 int embedding_level = bidi_it->level_stack[bidi_it->stack_idx].level;
2903 bidi_type_t embedding_type = (embedding_level & 1) ? STRONG_R : STRONG_L;
2904
2905 eassert (bidi_it->prev_for_neutral.type != UNKNOWN_BT);
2906 eassert (bidi_it->bracket_pairing_pos > bidi_it->charpos);
2907 if (bidi_it->bracket_enclosed_type == embedding_type) /* N0b */
2908 type = embedding_type;
2909 else
2910 {
2911 switch (bidi_it->prev_for_neutral.type)
2912 {
2913 case STRONG_R:
2914 case WEAK_EN:
2915 case WEAK_AN:
2916 type =
2917 (bidi_it->bracket_enclosed_type == STRONG_R) /* N0c */
2918 ? STRONG_R /* N0c1 */
2919 : embedding_type; /* N0c2 */
2920 break;
2921 case STRONG_L:
2922 type =
2923 (bidi_it->bracket_enclosed_type == STRONG_L) /* N0c */
2924 ? STRONG_L /* N0c1 */
2925 : embedding_type; /* N0c2 */
2926 break;
2927 default:
2928 /* N0d: Do not set the type for that bracket pair. */
2929 break;
2930 }
2931 }
2932 eassert (type == STRONG_L || type == STRONG_R || type == NEUTRAL_ON);
2933
2934 /* Update the type of the paired closing bracket to the same
2935 type as for the resolved opening bracket. */
2936 if (type != NEUTRAL_ON)
2937 {
2938 ptrdiff_t idx = bidi_cache_search (bidi_it->bracket_pairing_pos,
2939 -1, 1);
2940
2941 if (idx < bidi_cache_start)
2942 emacs_abort ();
2943 bidi_cache[idx].type = type;
2944 }
2945 }
2946
2947 return type;
2948 }
2949
2950 static bidi_type_t
2951 bidi_resolve_neutral (struct bidi_it *bidi_it)
2952 {
2953 bidi_type_t type = bidi_resolve_brackets (bidi_it);
2954 int current_level;
2955 bool is_neutral;
2956
2957 eassert (type == STRONG_R
2958 || type == STRONG_L
2959 || type == WEAK_BN
2960 || type == WEAK_EN
2961 || type == WEAK_AN
2962 || type == NEUTRAL_B
2963 || type == NEUTRAL_S
2964 || type == NEUTRAL_WS
2965 || type == NEUTRAL_ON
2966 || type == LRI
2967 || type == RLI
2968 || type == PDI);
2969
2970 current_level = bidi_it->level_stack[bidi_it->stack_idx].level;
2971 eassert (current_level >= 0);
2972 is_neutral = bidi_get_category (type) == NEUTRAL;
2973
2974 if ((type != NEUTRAL_B /* Don't risk entering the long loop below if
2975 we are already at paragraph end. */
2976 && (is_neutral || bidi_isolate_fmt_char (type)))
2977 /* N1-N2/Retaining */
2978 || type == WEAK_BN)
2979 {
2980 if (bidi_it->next_for_neutral.type != UNKNOWN_BT)
2981 {
2982 /* Make sure the data for resolving neutrals we are
2983 about to use is valid. */
2984 eassert (bidi_it->next_for_neutral.charpos > bidi_it->charpos
2985 /* PDI defines an eos, so it's OK for it to
2986 serve as its own next_for_neutral. */
2987 || (bidi_it->next_for_neutral.charpos == bidi_it->charpos
2988 && bidi_it->type == PDI));
2989 type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
2990 bidi_it->next_for_neutral.type,
2991 current_level);
2992 }
2993 /* The next two "else if" clauses are shortcuts for the
2994 important special case when we have a long sequence of
2995 neutral or WEAK_BN characters, such as whitespace or nulls or
2996 other control characters, on the base embedding level of the
2997 paragraph, and that sequence goes all the way to the end of
2998 the paragraph and follows a character whose resolved
2999 directionality is identical to the base embedding level.
3000 (This is what happens in a buffer with plain L2R text that
3001 happens to include long sequences of control characters.) By
3002 virtue of N1, the result of examining this long sequence will
3003 always be either STRONG_L or STRONG_R, depending on the base
3004 embedding level. So we use this fact directly instead of
3005 entering the expensive loop in the "else" clause. */
3006 else if (current_level == 0
3007 && bidi_it->prev_for_neutral.type == STRONG_L
3008 && (ASCII_CHAR_P (bidi_it->ch)
3009 || (type != WEAK_BN
3010 && !bidi_explicit_dir_char (bidi_it->ch)
3011 && !bidi_isolate_fmt_char (type))))
3012 type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
3013 STRONG_L, current_level);
3014 else if (/* current level is 1 */
3015 current_level == 1
3016 /* base embedding level is also 1 */
3017 && bidi_it->level_stack[0].level == 1
3018 /* previous character is one of those considered R for
3019 the purposes of W5 */
3020 && (bidi_it->prev_for_neutral.type == STRONG_R
3021 || bidi_it->prev_for_neutral.type == WEAK_EN
3022 || bidi_it->prev_for_neutral.type == WEAK_AN)
3023 && type != WEAK_BN
3024 && !bidi_explicit_dir_char (bidi_it->ch)
3025 && !bidi_isolate_fmt_char (type))
3026 type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
3027 STRONG_R, current_level);
3028 else
3029 {
3030 /* Arrrgh!! The UAX#9 algorithm is too deeply entrenched in
3031 the assumption of batch-style processing; see clauses W4,
3032 W5, and especially N1, which require looking far forward
3033 (as well as back) in the buffer/string. May the fleas of
3034 a thousand camels infest the armpits of those who design
3035 supposedly general-purpose algorithms by looking at their
3036 own implementations, and fail to consider other possible
3037 implementations! */
3038 struct bidi_it saved_it;
3039 bidi_type_t next_type;
3040 bool adjacent_to_neutrals = is_neutral;
3041
3042 bidi_copy_it (&saved_it, bidi_it);
3043 /* Scan the text forward until we find the first non-neutral
3044 character, and then use that to resolve the neutral we
3045 are dealing with now. We also cache the scanned iterator
3046 states, to salvage some of the effort later. */
3047 do {
3048 int old_sidx, new_sidx;
3049
3050 /* Paragraph separators have their levels fully resolved
3051 at this point, so cache them as resolved. */
3052 bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B, 0);
3053 old_sidx = bidi_it->stack_idx;
3054 type = bidi_resolve_brackets (bidi_it);
3055 /* Skip level runs excluded from this isolating run sequence. */
3056 new_sidx = bidi_it->stack_idx;
3057 if (bidi_it->level_stack[new_sidx].level > current_level
3058 && (ISOLATE_STATUS (bidi_it, new_sidx)
3059 /* This is for when we have an isolate initiator
3060 immediately followed by an embedding or
3061 override initiator, in which case we get the
3062 level stack pushed twice by the single call to
3063 bidi_resolve_weak above. */
3064 || (new_sidx > old_sidx + 1
3065 && ISOLATE_STATUS (bidi_it, new_sidx - 1))))
3066 {
3067 while (bidi_it->level_stack[bidi_it->stack_idx].level
3068 > current_level)
3069 {
3070 bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B, 0);
3071 type = bidi_resolve_brackets (bidi_it);
3072 }
3073 }
3074 if (!adjacent_to_neutrals
3075 && (bidi_get_category (type) == NEUTRAL
3076 || bidi_isolate_fmt_char (type)))
3077 adjacent_to_neutrals = true;
3078 } while (!(type == NEUTRAL_B
3079 || (type != WEAK_BN
3080 && bidi_get_category (type) != NEUTRAL
3081 && !bidi_isolate_fmt_char (type))
3082 /* This is all per level run, so stop when we
3083 reach the end of this level run. */
3084 || (bidi_it->level_stack[bidi_it->stack_idx].level
3085 != current_level)));
3086
3087 /* Record the character we stopped at. */
3088 bidi_remember_char (&saved_it.next_for_neutral, bidi_it, 1);
3089
3090 if ((bidi_it->level_stack[bidi_it->stack_idx].level != current_level)
3091 || type == NEUTRAL_B)
3092 {
3093 /* Marched all the way to the end of this level run. We
3094 need to use the eos type, whose information is stored
3095 by bidi_set_sos_type in the prev_for_neutral
3096 member. */
3097 if (adjacent_to_neutrals)
3098 next_type = bidi_it->prev_for_neutral.type;
3099 else
3100 {
3101 /* This is a BN which does not adjoin neutrals.
3102 Leave its type alone. */
3103 bidi_copy_it (bidi_it, &saved_it);
3104 return bidi_it->type;
3105 }
3106 }
3107 else
3108 {
3109 switch (type)
3110 {
3111 case STRONG_L:
3112 case STRONG_R:
3113 case STRONG_AL:
3114 /* Actually, STRONG_AL cannot happen here, because
3115 bidi_resolve_weak converts it to STRONG_R, per W3. */
3116 eassert (type != STRONG_AL);
3117 next_type = type;
3118 break;
3119 case WEAK_EN:
3120 case WEAK_AN:
3121 /* N1: "European and Arabic numbers act as if they
3122 were R in terms of their influence on NIs." */
3123 next_type = STRONG_R;
3124 break;
3125 default:
3126 emacs_abort ();
3127 break;
3128 }
3129 }
3130 /* Resolve the type of all the NIs found during the above loop. */
3131 type = bidi_resolve_neutral_1 (saved_it.prev_for_neutral.type,
3132 next_type, current_level);
3133 /* Update next_for_neutral with the resolved type, so we
3134 could use it for all the other NIs up to the place where
3135 we exited the loop. */
3136 saved_it.next_for_neutral.type = next_type;
3137 bidi_check_type (type);
3138 /* Update the character which caused us to enter the above loop. */
3139 saved_it.type = type;
3140 bidi_check_type (next_type);
3141 bidi_copy_it (bidi_it, &saved_it);
3142 }
3143 }
3144 return type;
3145 }
3146
3147 /* Given an iterator state in BIDI_IT, advance one character position
3148 in the buffer/string to the next character (in the logical order),
3149 resolve the bidi type of that next character, and return that
3150 type. */
3151 static bidi_type_t
3152 bidi_type_of_next_char (struct bidi_it *bidi_it)
3153 {
3154 bidi_type_t type;
3155
3156 /* This should always be called during a forward scan. */
3157 if (bidi_it->scan_dir != 1)
3158 emacs_abort ();
3159
3160 type = bidi_resolve_neutral (bidi_it);
3161
3162 return type;
3163 }
3164
3165 /* Given an iterator state BIDI_IT, advance one character position in
3166 the buffer/string to the next character (in the current scan
3167 direction), resolve the embedding and implicit levels of that next
3168 character, and return the resulting level. */
3169 static int
3170 bidi_level_of_next_char (struct bidi_it *bidi_it)
3171 {
3172 bidi_type_t type = UNKNOWN_BT;
3173 int level;
3174 ptrdiff_t next_char_pos = -2;
3175
3176 if (bidi_it->scan_dir == 1)
3177 {
3178 ptrdiff_t eob
3179 = ((bidi_it->string.s || STRINGP (bidi_it->string.lstring))
3180 ? bidi_it->string.schars : ZV);
3181
3182 /* There's no sense in trying to advance if we've already hit
3183 the end of text. */
3184 if (bidi_it->charpos >= eob)
3185 {
3186 eassert (bidi_it->resolved_level >= 0);
3187 return bidi_it->resolved_level;
3188 }
3189 }
3190
3191 /* Perhaps the character we want is already cached as fully resolved.
3192 If it is, the call to bidi_cache_find below will return a type
3193 other than UNKNOWN_BT. */
3194 if (bidi_cache_idx > bidi_cache_start && !bidi_it->first_elt)
3195 {
3196 int bob = ((bidi_it->string.s || STRINGP (bidi_it->string.lstring))
3197 ? 0 : 1);
3198
3199 if (bidi_it->scan_dir > 0)
3200 {
3201 if (bidi_it->nchars <= 0)
3202 emacs_abort ();
3203 next_char_pos = bidi_it->charpos + bidi_it->nchars;
3204 }
3205 else if (bidi_it->charpos >= bob)
3206 /* Implementation note: we allow next_char_pos to be as low as
3207 0 for buffers or -1 for strings, and that is okay because
3208 that's the "position" of the sentinel iterator state we
3209 cached at the beginning of the iteration. */
3210 next_char_pos = bidi_it->charpos - 1;
3211 if (next_char_pos >= bob - 1)
3212 type = bidi_cache_find (next_char_pos, 1, bidi_it);
3213 if (type != UNKNOWN_BT)
3214 {
3215 /* We asked the cache for fully resolved states. */
3216 eassert (bidi_it->resolved_level >= 0);
3217 return bidi_it->resolved_level;
3218 }
3219 }
3220
3221 if (bidi_it->scan_dir == -1)
3222 /* If we are going backwards, the iterator state is already cached
3223 from previous scans, and should be fully resolved. */
3224 emacs_abort ();
3225
3226 if (type == UNKNOWN_BT)
3227 type = bidi_type_of_next_char (bidi_it);
3228
3229 if (type == NEUTRAL_B)
3230 {
3231 eassert (bidi_it->resolved_level >= 0);
3232 return bidi_it->resolved_level;
3233 }
3234
3235 level = bidi_it->level_stack[bidi_it->stack_idx].level;
3236
3237 eassert ((type == STRONG_R
3238 || type == STRONG_L
3239 || type == WEAK_BN
3240 || type == WEAK_EN
3241 || type == WEAK_AN));
3242 bidi_it->type = type;
3243 bidi_check_type (bidi_it->type);
3244
3245 /* For L1 below, we need to know, for each WS character, whether
3246 it belongs to a sequence of WS characters preceding a newline
3247 or a TAB or a paragraph separator. */
3248 if ((bidi_it->orig_type == NEUTRAL_WS
3249 || bidi_it->orig_type == WEAK_BN
3250 || bidi_isolate_fmt_char (bidi_it->orig_type))
3251 && bidi_it->next_for_ws.charpos < bidi_it->charpos
3252 /* If this character is already at base level, we don't need to
3253 reset it, so avoid the potentially costly loop below. */
3254 && level != bidi_it->level_stack[0].level)
3255 {
3256 int ch;
3257 ptrdiff_t clen = bidi_it->ch_len;
3258 ptrdiff_t bpos = bidi_it->bytepos;
3259 ptrdiff_t cpos = bidi_it->charpos;
3260 ptrdiff_t disp_pos = bidi_it->disp_pos;
3261 ptrdiff_t nc = bidi_it->nchars;
3262 struct bidi_string_data bs = bidi_it->string;
3263 bidi_type_t chtype;
3264 bool fwp = bidi_it->frame_window_p;
3265 int dpp = bidi_it->disp_prop;
3266
3267 if (bidi_it->nchars <= 0)
3268 emacs_abort ();
3269 do {
3270 ch = bidi_fetch_char (cpos += nc, bpos += clen, &disp_pos, &dpp, &bs,
3271 bidi_it->w, fwp, &clen, &nc);
3272 chtype = bidi_get_type (ch, NEUTRAL_DIR);
3273 } while (chtype == NEUTRAL_WS || chtype == WEAK_BN
3274 || bidi_isolate_fmt_char (chtype)
3275 || bidi_explicit_dir_char (ch)); /* L1/Retaining */
3276 bidi_it->next_for_ws.type = chtype;
3277 bidi_check_type (bidi_it->next_for_ws.type);
3278 bidi_it->next_for_ws.charpos = cpos;
3279 }
3280
3281 /* Update the cache, but only if this state was already cached. */
3282 bidi_cache_iterator_state (bidi_it, 1, 1);
3283
3284 /* Resolve implicit levels. */
3285 if (bidi_it->orig_type == NEUTRAL_B /* L1 */
3286 || bidi_it->orig_type == NEUTRAL_S
3287 || bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB
3288 || ((bidi_it->orig_type == NEUTRAL_WS
3289 || bidi_it->orig_type == WEAK_BN
3290 || bidi_isolate_fmt_char (bidi_it->orig_type)
3291 || bidi_explicit_dir_char (bidi_it->ch))
3292 && (bidi_it->next_for_ws.type == NEUTRAL_B
3293 || bidi_it->next_for_ws.type == NEUTRAL_S)))
3294 level = bidi_it->level_stack[0].level;
3295 else if ((level & 1) == 0) /* I1 */
3296 {
3297 if (type == STRONG_R)
3298 level++;
3299 else if (type == WEAK_EN || type == WEAK_AN)
3300 level += 2;
3301 }
3302 else /* I2 */
3303 {
3304 if (type == STRONG_L || type == WEAK_EN || type == WEAK_AN)
3305 level++;
3306 }
3307
3308 bidi_it->resolved_level = level;
3309 return level;
3310 }
3311
3312 /* Move to the other edge of a level given by LEVEL. If END_FLAG,
3313 we are at the end of a level, and we need to prepare to
3314 resume the scan of the lower level.
3315
3316 If this level's other edge is cached, we simply jump to it, filling
3317 the iterator structure with the iterator state on the other edge.
3318 Otherwise, we walk the buffer or string until we come back to the
3319 same level as LEVEL.
3320
3321 Note: we are not talking here about a ``level run'' in the UAX#9
3322 sense of the term, but rather about a ``level'' which includes
3323 all the levels higher than it. In other words, given the levels
3324 like this:
3325
3326 11111112222222333333334443343222222111111112223322111
3327 A B C
3328
3329 and assuming we are at point A scanning left to right, this
3330 function moves to point C, whereas the UAX#9 ``level 2 run'' ends
3331 at point B. */
3332 static void
3333 bidi_find_other_level_edge (struct bidi_it *bidi_it, int level, bool end_flag)
3334 {
3335 int dir = end_flag ? -bidi_it->scan_dir : bidi_it->scan_dir;
3336 ptrdiff_t idx;
3337
3338 /* Try the cache first. */
3339 if ((idx = bidi_cache_find_level_change (level, dir, end_flag))
3340 >= bidi_cache_start)
3341 bidi_cache_fetch_state (idx, bidi_it);
3342 else
3343 {
3344 int new_level;
3345
3346 /* If we are at end of level, its edges must be cached. */
3347 if (end_flag)
3348 emacs_abort ();
3349
3350 if (!bidi_cache_iterator_state (bidi_it, 1, 0))
3351 {
3352 /* Can't happen: if the cache needs to grow, it means we
3353 were at base embedding level, so the cache should have
3354 been either empty or already large enough to cover this
3355 character position. */
3356 emacs_abort ();
3357 }
3358 do {
3359 new_level = bidi_level_of_next_char (bidi_it);
3360 /* If the cache is full, perform an emergency return by
3361 pretending that the level ended. */
3362 if (!bidi_cache_iterator_state (bidi_it, 1, 0))
3363 {
3364 new_level = level - 1;
3365 /* Since the cache should only grow when we are scanning
3366 forward looking for the edge of the level that is one
3367 above the base embedding level, we can only have this
3368 contingency when LEVEL - 1 is the base embedding
3369 level. */
3370 eassert (new_level == bidi_it->level_stack[0].level);
3371 /* Plan B, for when the cache overflows: Back up to the
3372 previous character by fetching the last cached state,
3373 and force the resolved level of that character be the
3374 base embedding level. */
3375 bidi_cache_fetch_state (bidi_cache_idx - 1, bidi_it);
3376 bidi_it->resolved_level = new_level;
3377 bidi_cache_iterator_state (bidi_it, 1, 1);
3378 }
3379 } while (new_level >= level);
3380 }
3381 }
3382
3383 void
3384 bidi_move_to_visually_next (struct bidi_it *bidi_it)
3385 {
3386 int old_level, new_level, next_level;
3387 struct bidi_it sentinel;
3388
3389 if (bidi_it->charpos < 0 || bidi_it->bytepos < 0)
3390 emacs_abort ();
3391
3392 if (bidi_it->scan_dir == 0)
3393 {
3394 bidi_it->scan_dir = 1; /* default to logical order */
3395 }
3396
3397 /* If we just passed a newline, initialize for the next line. */
3398 if (!bidi_it->first_elt
3399 && (bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB))
3400 bidi_line_init (bidi_it);
3401
3402 /* Prepare the sentinel iterator state, and cache it. When we bump
3403 into it, scanning backwards, we'll know that the last non-base
3404 level is exhausted. */
3405 if (bidi_cache_idx == bidi_cache_start)
3406 {
3407 bidi_copy_it (&sentinel, bidi_it);
3408 if (bidi_it->first_elt)
3409 {
3410 sentinel.charpos--; /* cached charpos needs to be monotonic */
3411 sentinel.bytepos--;
3412 sentinel.ch = '\n'; /* doesn't matter, but why not? */
3413 sentinel.ch_len = 1;
3414 sentinel.nchars = 1;
3415 }
3416 bidi_cache_iterator_state (&sentinel, 1, 0);
3417 }
3418
3419 old_level = bidi_it->resolved_level;
3420 new_level = bidi_level_of_next_char (bidi_it);
3421
3422 /* Reordering of resolved levels (clause L2) is implemented by
3423 jumping to the other edge of the level and flipping direction of
3424 scanning the text whenever we find a level change. */
3425 if (new_level != old_level)
3426 {
3427 bool ascending = new_level > old_level;
3428 int level_to_search = ascending ? old_level + 1 : old_level;
3429 int incr = ascending ? 1 : -1;
3430 int expected_next_level = old_level + incr;
3431
3432 /* Jump (or walk) to the other edge of this level. */
3433 bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
3434 /* Switch scan direction and peek at the next character in the
3435 new direction. */
3436 bidi_it->scan_dir = -bidi_it->scan_dir;
3437
3438 /* The following loop handles the case where the resolved level
3439 jumps by more than one. This is typical for numbers inside a
3440 run of text with left-to-right embedding direction, but can
3441 also happen in other situations. In those cases the decision
3442 where to continue after a level change, and in what direction,
3443 is tricky. For example, given a text like below:
3444
3445 abcdefgh
3446 11336622
3447
3448 (where the numbers below the text show the resolved levels),
3449 the result of reordering according to UAX#9 should be this:
3450
3451 efdcghba
3452
3453 This is implemented by the loop below which flips direction
3454 and jumps to the other edge of the level each time it finds
3455 the new level not to be the expected one. The expected level
3456 is always one more or one less than the previous one. */
3457 next_level = bidi_peek_at_next_level (bidi_it);
3458 while (next_level != expected_next_level)
3459 {
3460 /* If next_level is -1, it means we have an unresolved level
3461 in the cache, which at this point should not happen. If
3462 it does, we will infloop. */
3463 eassert (next_level >= 0);
3464 /* If next_level is not consistent with incr, we might
3465 infloop. */
3466 eassert (incr > 0
3467 ? next_level > expected_next_level
3468 : next_level < expected_next_level);
3469 expected_next_level += incr;
3470 level_to_search += incr;
3471 bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
3472 bidi_it->scan_dir = -bidi_it->scan_dir;
3473 next_level = bidi_peek_at_next_level (bidi_it);
3474 }
3475
3476 /* Finally, deliver the next character in the new direction. */
3477 next_level = bidi_level_of_next_char (bidi_it);
3478 }
3479
3480 /* Take note when we have just processed the newline that precedes
3481 the end of the paragraph. The next time we are about to be
3482 called, set_iterator_to_next will automatically reinit the
3483 paragraph direction, if needed. We do this at the newline before
3484 the paragraph separator, because the next character might not be
3485 the first character of the next paragraph, due to the bidi
3486 reordering, whereas we _must_ know the paragraph base direction
3487 _before_ we process the paragraph's text, since the base
3488 direction affects the reordering. */
3489 if (bidi_it->scan_dir == 1
3490 && (bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB))
3491 {
3492 /* The paragraph direction of the entire string, once
3493 determined, is in effect for the entire string. Setting the
3494 separator limit to the end of the string prevents
3495 bidi_paragraph_init from being called automatically on this
3496 string. */
3497 if (bidi_it->string.s || STRINGP (bidi_it->string.lstring))
3498 bidi_it->separator_limit = bidi_it->string.schars;
3499 else if (bidi_it->bytepos < ZV_BYTE)
3500 {
3501 ptrdiff_t sep_len
3502 = bidi_at_paragraph_end (bidi_it->charpos + bidi_it->nchars,
3503 bidi_it->bytepos + bidi_it->ch_len);
3504 if (bidi_it->nchars <= 0)
3505 emacs_abort ();
3506 if (sep_len >= 0)
3507 {
3508 bidi_it->new_paragraph = 1;
3509 /* Record the buffer position of the last character of the
3510 paragraph separator. */
3511 bidi_it->separator_limit
3512 = bidi_it->charpos + bidi_it->nchars + sep_len;
3513 }
3514 }
3515 }
3516
3517 if (bidi_it->scan_dir == 1 && bidi_cache_idx > bidi_cache_start)
3518 {
3519 /* If we are at paragraph's base embedding level and beyond the
3520 last cached position, the cache's job is done and we can
3521 discard it. */
3522 if (bidi_it->resolved_level == bidi_it->level_stack[0].level
3523 && bidi_it->charpos > (bidi_cache[bidi_cache_idx - 1].charpos
3524 + bidi_cache[bidi_cache_idx - 1].nchars - 1))
3525 bidi_cache_reset ();
3526 /* Also reset the cache if it overflowed and we have just
3527 emergency-exited using Plan B. */
3528 else if (bidi_it->resolved_level == bidi_it->level_stack[0].level
3529 && bidi_cache_idx >= bidi_cache_size
3530 && bidi_it->charpos == bidi_cache[bidi_cache_idx - 1].charpos)
3531 bidi_cache_reset ();
3532 /* But as long as we are caching during forward scan, we must
3533 cache each state, or else the cache integrity will be
3534 compromised: it assumes cached states correspond to buffer
3535 positions 1:1. */
3536 else
3537 bidi_cache_iterator_state (bidi_it, 1, 0);
3538 }
3539
3540 eassert (bidi_it->resolved_level >= 0
3541 && bidi_it->resolved_level <= BIDI_MAXDEPTH + 2);
3542 }
3543
3544 /* Utility function for looking for strong directional characters
3545 whose bidi type was overridden by a directional override. */
3546 ptrdiff_t
3547 bidi_find_first_overridden (struct bidi_it *bidi_it)
3548 {
3549 ptrdiff_t found_pos = ZV;
3550
3551 do
3552 {
3553 /* Need to call bidi_resolve_weak, not bidi_resolve_explicit,
3554 because the directional overrides are applied by the
3555 former. */
3556 bidi_type_t type = bidi_resolve_weak (bidi_it);
3557
3558 if ((type == STRONG_R && bidi_it->orig_type == STRONG_L)
3559 || (type == STRONG_L
3560 && (bidi_it->orig_type == STRONG_R
3561 || bidi_it->orig_type == STRONG_AL)))
3562 found_pos = bidi_it->charpos;
3563 } while (found_pos == ZV
3564 && bidi_it->charpos < ZV
3565 && bidi_it->ch != BIDI_EOB
3566 && bidi_it->ch != '\n');
3567
3568 return found_pos;
3569 }
3570
3571 /* This is meant to be called from within the debugger, whenever you
3572 wish to examine the cache contents. */
3573 void bidi_dump_cached_states (void) EXTERNALLY_VISIBLE;
3574 void
3575 bidi_dump_cached_states (void)
3576 {
3577 ptrdiff_t i;
3578 int ndigits = 1;
3579
3580 if (bidi_cache_idx == 0)
3581 {
3582 fprintf (stderr, "The cache is empty.\n");
3583 return;
3584 }
3585 fprintf (stderr, "Total of %"pD"d state%s in cache:\n",
3586 bidi_cache_idx, bidi_cache_idx == 1 ? "" : "s");
3587
3588 for (i = bidi_cache[bidi_cache_idx - 1].charpos; i > 0; i /= 10)
3589 ndigits++;
3590 fputs ("ch ", stderr);
3591 for (i = 0; i < bidi_cache_idx; i++)
3592 fprintf (stderr, "%*c", ndigits, bidi_cache[i].ch);
3593 fputs ("\n", stderr);
3594 fputs ("lvl ", stderr);
3595 for (i = 0; i < bidi_cache_idx; i++)
3596 fprintf (stderr, "%*d", ndigits, bidi_cache[i].resolved_level);
3597 fputs ("\n", stderr);
3598 fputs ("pos ", stderr);
3599 for (i = 0; i < bidi_cache_idx; i++)
3600 fprintf (stderr, "%*"pD"d", ndigits, bidi_cache[i].charpos);
3601 fputs ("\n", stderr);
3602 }