code.delx.au - gnu-emacs/blob - src/bidi.c

   1 /* Low-level bidirectional buffer/string-scanning functions for GNU Emacs.
   2    Copyright (C) 2000-2001, 2004-2005, 2009-2013 Free Software
   3    Foundation, Inc.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software: you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation, either version 3 of the License, or
  10 (at your option) any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  19
  20 /* Written by Eli Zaretskii <eliz@gnu.org>.
  21
  22    A sequential implementation of the Unicode Bidirectional algorithm,
  23    (UBA) as per UAX#9, a part of the Unicode Standard.
  24
  25    Unlike the reference and most other implementations, this one is
  26    designed to be called once for every character in the buffer or
  27    string.
  28
  29    The main entry point is bidi_move_to_visually_next.  Each time it
  30    is called, it finds the next character in the visual order, and
  31    returns its information in a special structure.  The caller is then
  32    expected to process this character for display or any other
  33    purposes, and call bidi_move_to_visually_next for the next
  34    character.  See the comments in bidi_move_to_visually_next for more
  35    details about its algorithm that finds the next visual-order
  36    character by resolving their levels on the fly.
  37
  38    Two other entry points are bidi_paragraph_init and
  39    bidi_mirror_char.  The first determines the base direction of a
  40    paragraph, while the second returns the mirrored version of its
  41    argument character.
  42
  43    A few auxiliary entry points are used to initialize the bidi
  44    iterator for iterating an object (buffer or string), push and pop
  45    the bidi iterator state, and save and restore the state of the bidi
  46    cache.
  47
  48    If you want to understand the code, you will have to read it
  49    together with the relevant portions of UAX#9.  The comments include
  50    references to UAX#9 rules, for that very reason.
  51
  52    A note about references to UAX#9 rules: if the reference says
  53    something like "X9/Retaining", it means that you need to refer to
  54    rule X9 and to its modifications described in the "Implementation
  55    Notes" section of UAX#9, under "Retaining Format Codes".  */
  56
  57 #include <config.h>
  58 #include <stdio.h>
  59
  60 #include "lisp.h"
  61 #include "character.h"
  62 #include "buffer.h"
  63 #include "dispextern.h"
  64
  65 static bool bidi_initialized = 0;
  66
  67 static Lisp_Object bidi_type_table, bidi_mirror_table;
  68
  69 #define LRM_CHAR   0x200E
  70 #define RLM_CHAR   0x200F
  71 #define BIDI_EOB   -1
  72
  73 /* Data type for describing the bidirectional character categories.  */
  74 typedef enum {
  75   UNKNOWN_BC,
  76   NEUTRAL,
  77   WEAK,
  78   STRONG
  79 } bidi_category_t;
  80
  81 /* UAX#9 says to search only for L, AL, or R types of characters, and
  82    ignore RLE, RLO, LRE, and LRO, when determining the base paragraph
  83    level.  Yudit indeed ignores them.  This variable is therefore set
  84    by default to ignore them, but clearing it will take them into
  85    account.  */
  86 extern bool bidi_ignore_explicit_marks_for_paragraph_level EXTERNALLY_VISIBLE;
  87 bool bidi_ignore_explicit_marks_for_paragraph_level = 1;
  88
  89 static Lisp_Object paragraph_start_re, paragraph_separate_re;
  90 static Lisp_Object Qparagraph_start, Qparagraph_separate;
  91
  92 \f
  93 /***********************************************************************
  94                         Utilities
  95  ***********************************************************************/
  96
  97 /* Return the bidi type of a character CH, subject to the current
  98    directional OVERRIDE.  */
  99 static bidi_type_t
 100 bidi_get_type (int ch, bidi_dir_t override)
 101 {
 102   bidi_type_t default_type;
 103
 104   if (ch == BIDI_EOB)
 105     return NEUTRAL_B;
 106   if (ch < 0 || ch > MAX_CHAR)
 107     emacs_abort ();
 108
 109   default_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
 110   /* Every valid character code, even those that are unassigned by the
 111      UCD, have some bidi-class property, according to
 112      DerivedBidiClass.txt file.  Therefore, if we ever get UNKNOWN_BT
 113      (= zero) code from CHAR_TABLE_REF, that's a bug.  */
 114   if (default_type == UNKNOWN_BT)
 115     emacs_abort ();
 116
 117   if (override == NEUTRAL_DIR)
 118     return default_type;
 119
 120   switch (default_type)
 121     {
 122       /* Although UAX#9 does not tell, it doesn't make sense to
 123          override NEUTRAL_B and LRM/RLM characters.  */
 124       case NEUTRAL_B:
 125       case LRE:
 126       case LRO:
 127       case RLE:
 128       case RLO:
 129       case PDF:
 130         return default_type;
 131       default:
 132         switch (ch)
 133           {
 134             case LRM_CHAR:
 135             case RLM_CHAR:
 136               return default_type;
 137             default:
 138               if (override == L2R) /* X6 */
 139                 return STRONG_L;
 140               else if (override == R2L)
 141                 return STRONG_R;
 142               else
 143                 emacs_abort (); /* can't happen: handled above */
 144           }
 145     }
 146 }
 147
 148 static void
 149 bidi_check_type (bidi_type_t type)
 150 {
 151   eassert (UNKNOWN_BT <= type && type <= NEUTRAL_ON);
 152 }
 153
 154 /* Given a bidi TYPE of a character, return its category.  */
 155 static bidi_category_t
 156 bidi_get_category (bidi_type_t type)
 157 {
 158   switch (type)
 159     {
 160       case UNKNOWN_BT:
 161         return UNKNOWN_BC;
 162       case STRONG_L:
 163       case STRONG_R:
 164       case STRONG_AL:
 165       case LRE:
 166       case LRO:
 167       case RLE:
 168       case RLO:
 169         return STRONG;
 170       case PDF:         /* ??? really?? */
 171       case WEAK_EN:
 172       case WEAK_ES:
 173       case WEAK_ET:
 174       case WEAK_AN:
 175       case WEAK_CS:
 176       case WEAK_NSM:
 177       case WEAK_BN:
 178         return WEAK;
 179       case NEUTRAL_B:
 180       case NEUTRAL_S:
 181       case NEUTRAL_WS:
 182       case NEUTRAL_ON:
 183         return NEUTRAL;
 184       default:
 185         emacs_abort ();
 186     }
 187 }
 188
 189 /* Return the mirrored character of C, if it has one.  If C has no
 190    mirrored counterpart, return C.
 191    Note: The conditions in UAX#9 clause L4 regarding the surrounding
 192    context must be tested by the caller.  */
 193 int
 194 bidi_mirror_char (int c)
 195 {
 196   Lisp_Object val;
 197
 198   if (c == BIDI_EOB)
 199     return c;
 200   if (c < 0 || c > MAX_CHAR)
 201     emacs_abort ();
 202
 203   val = CHAR_TABLE_REF (bidi_mirror_table, c);
 204   if (INTEGERP (val))
 205     {
 206       int v;
 207
 208       /* When debugging, check before assigning to V, so that the check
 209          isn't broken by undefined behavior due to int overflow.  */
 210       eassert (CHAR_VALID_P (XINT (val)));
 211
 212       v = XINT (val);
 213
 214       /* Minimal test we must do in optimized builds, to prevent weird
 215          crashes further down the road.  */
 216       if (v < 0 || v > MAX_CHAR)
 217         emacs_abort ();
 218
 219       return v;
 220     }
 221
 222   return c;
 223 }
 224
 225 /* Determine the start-of-run (sor) directional type given the two
 226    embedding levels on either side of the run boundary.  Also, update
 227    the saved info about previously seen characters, since that info is
 228    generally valid for a single level run.  */
 229 static void
 230 bidi_set_sor_type (struct bidi_it *bidi_it, int level_before, int level_after)
 231 {
 232   int higher_level = (level_before > level_after ? level_before : level_after);
 233
 234   /* The prev_was_pdf gork is required for when we have several PDFs
 235      in a row.  In that case, we want to compute the sor type for the
 236      next level run only once: when we see the first PDF.  That's
 237      because the sor type depends only on the higher of the two levels
 238      that we find on the two sides of the level boundary (see UAX#9,
 239      clause X10), and so we don't need to know the final embedding
 240      level to which we descend after processing all the PDFs.  */
 241   if (!bidi_it->prev_was_pdf || level_before < level_after)
 242     /* FIXME: should the default sor direction be user selectable?  */
 243     bidi_it->sor = ((higher_level & 1) != 0 ? R2L : L2R);
 244   if (level_before > level_after)
 245     bidi_it->prev_was_pdf = 1;
 246
 247   bidi_it->prev.type = UNKNOWN_BT;
 248   bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1
 249     = bidi_it->last_strong.orig_type = UNKNOWN_BT;
 250   bidi_it->prev_for_neutral.type = (bidi_it->sor == R2L ? STRONG_R : STRONG_L);
 251   bidi_it->prev_for_neutral.charpos = bidi_it->charpos;
 252   bidi_it->prev_for_neutral.bytepos = bidi_it->bytepos;
 253   bidi_it->next_for_neutral.type = bidi_it->next_for_neutral.type_after_w1
 254     = bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
 255   bidi_it->ignore_bn_limit = -1; /* meaning it's unknown */
 256 }
 257
 258 /* Push the current embedding level and override status; reset the
 259    current level to LEVEL and the current override status to OVERRIDE.  */
 260 static void
 261 bidi_push_embedding_level (struct bidi_it *bidi_it,
 262                            int level, bidi_dir_t override)
 263 {
 264   bidi_it->stack_idx++;
 265   eassert (bidi_it->stack_idx < BIDI_MAXLEVEL);
 266   bidi_it->level_stack[bidi_it->stack_idx].level = level;
 267   bidi_it->level_stack[bidi_it->stack_idx].override = override;
 268 }
 269
 270 /* Pop the embedding level and directional override status from the
 271    stack, and return the new level.  */
 272 static int
 273 bidi_pop_embedding_level (struct bidi_it *bidi_it)
 274 {
 275   /* UAX#9 says to ignore invalid PDFs.  */
 276   if (bidi_it->stack_idx > 0)
 277     bidi_it->stack_idx--;
 278   return bidi_it->level_stack[bidi_it->stack_idx].level;
 279 }
 280
 281 /* Record in SAVED_INFO the information about the current character.  */
 282 static void
 283 bidi_remember_char (struct bidi_saved_info *saved_info,
 284                     struct bidi_it *bidi_it)
 285 {
 286   saved_info->charpos = bidi_it->charpos;
 287   saved_info->bytepos = bidi_it->bytepos;
 288   saved_info->type = bidi_it->type;
 289   bidi_check_type (bidi_it->type);
 290   saved_info->type_after_w1 = bidi_it->type_after_w1;
 291   bidi_check_type (bidi_it->type_after_w1);
 292   saved_info->orig_type = bidi_it->orig_type;
 293   bidi_check_type (bidi_it->orig_type);
 294 }
 295
 296 /* Copy the bidi iterator from FROM to TO.  To save cycles, this only
 297    copies the part of the level stack that is actually in use.  */
 298 static void
 299 bidi_copy_it (struct bidi_it *to, struct bidi_it *from)
 300 {
 301   /* Copy everything from the start through the active part of
 302      the level stack.  */
 303   memcpy (to, from,
 304           (offsetof (struct bidi_it, level_stack[1])
 305            + from->stack_idx * sizeof from->level_stack[0]));
 306 }
 307
 308 \f
 309 /***********************************************************************
 310                         Caching the bidi iterator states
 311  ***********************************************************************/
 312
 313 #define BIDI_CACHE_CHUNK 200
 314 static struct bidi_it *bidi_cache;
 315 static ptrdiff_t bidi_cache_size = 0;
 316 enum { elsz = sizeof (struct bidi_it) };
 317 static ptrdiff_t bidi_cache_idx;        /* next unused cache slot */
 318 static ptrdiff_t bidi_cache_last_idx;   /* slot of last cache hit */
 319 static ptrdiff_t bidi_cache_start = 0;  /* start of cache for this
 320                                            "stack" level */
 321
 322 /* 5-slot stack for saving the start of the previous level of the
 323    cache.  xdisp.c maintains a 5-slot stack for its iterator state,
 324    and we need the same size of our stack.  */
 325 static ptrdiff_t bidi_cache_start_stack[IT_STACK_SIZE];
 326 static int bidi_cache_sp;
 327
 328 /* Size of header used by bidi_shelve_cache.  */
 329 enum
 330   {
 331     bidi_shelve_header_size
 332       = (sizeof (bidi_cache_idx) + sizeof (bidi_cache_start_stack)
 333          + sizeof (bidi_cache_sp) + sizeof (bidi_cache_start)
 334          + sizeof (bidi_cache_last_idx))
 335   };
 336
 337 /* Reset the cache state to the empty state.  We only reset the part
 338    of the cache relevant to iteration of the current object.  Previous
 339    objects, which are pushed on the display iterator's stack, are left
 340    intact.  This is called when the cached information is no more
 341    useful for the current iteration, e.g. when we were reseated to a
 342    new position on the same object.  */
 343 static void
 344 bidi_cache_reset (void)
 345 {
 346   bidi_cache_idx = bidi_cache_start;
 347   bidi_cache_last_idx = -1;
 348 }
 349
 350 /* Shrink the cache to its minimal size.  Called when we init the bidi
 351    iterator for reordering a buffer or a string that does not come
 352    from display properties, because that means all the previously
 353    cached info is of no further use.  */
 354 static void
 355 bidi_cache_shrink (void)
 356 {
 357   if (bidi_cache_size > BIDI_CACHE_CHUNK)
 358     {
 359       bidi_cache = xrealloc (bidi_cache, BIDI_CACHE_CHUNK * elsz);
 360       bidi_cache_size = BIDI_CACHE_CHUNK;
 361     }
 362   bidi_cache_reset ();
 363 }
 364
 365 static void
 366 bidi_cache_fetch_state (ptrdiff_t idx, struct bidi_it *bidi_it)
 367 {
 368   int current_scan_dir = bidi_it->scan_dir;
 369
 370   if (idx < bidi_cache_start || idx >= bidi_cache_idx)
 371     emacs_abort ();
 372
 373   bidi_copy_it (bidi_it, &bidi_cache[idx]);
 374   bidi_it->scan_dir = current_scan_dir;
 375   bidi_cache_last_idx = idx;
 376 }
 377
 378 /* Find a cached state with a given CHARPOS and resolved embedding
 379    level less or equal to LEVEL.  if LEVEL is -1, disregard the
 380    resolved levels in cached states.  DIR, if non-zero, means search
 381    in that direction from the last cache hit.  */
 382 static ptrdiff_t
 383 bidi_cache_search (ptrdiff_t charpos, int level, int dir)
 384 {
 385   ptrdiff_t i, i_start;
 386
 387   if (bidi_cache_idx > bidi_cache_start)
 388     {
 389       if (bidi_cache_last_idx == -1)
 390         bidi_cache_last_idx = bidi_cache_idx - 1;
 391       if (charpos < bidi_cache[bidi_cache_last_idx].charpos)
 392         {
 393           dir = -1;
 394           i_start = bidi_cache_last_idx - 1;
 395         }
 396       else if (charpos > (bidi_cache[bidi_cache_last_idx].charpos
 397                           + bidi_cache[bidi_cache_last_idx].nchars - 1))
 398         {
 399           dir = 1;
 400           i_start = bidi_cache_last_idx + 1;
 401         }
 402       else if (dir)
 403         i_start = bidi_cache_last_idx;
 404       else
 405         {
 406           dir = -1;
 407           i_start = bidi_cache_idx - 1;
 408         }
 409
 410       if (dir < 0)
 411         {
 412           /* Linear search for now; FIXME!  */
 413           for (i = i_start; i >= bidi_cache_start; i--)
 414             if (bidi_cache[i].charpos <= charpos
 415                 && charpos < bidi_cache[i].charpos + bidi_cache[i].nchars
 416                 && (level == -1 || bidi_cache[i].resolved_level <= level))
 417               return i;
 418         }
 419       else
 420         {
 421           for (i = i_start; i < bidi_cache_idx; i++)
 422             if (bidi_cache[i].charpos <= charpos
 423                 && charpos < bidi_cache[i].charpos + bidi_cache[i].nchars
 424                 && (level == -1 || bidi_cache[i].resolved_level <= level))
 425               return i;
 426         }
 427     }
 428
 429   return -1;
 430 }
 431
 432 /* Find a cached state where the resolved level changes to a value
 433    that is lower than LEVEL, and return its cache slot index.  DIR is
 434    the direction to search, starting with the last used cache slot.
 435    If DIR is zero, we search backwards from the last occupied cache
 436    slot.  BEFORE means return the index of the slot that
 437    is ``before'' the level change in the search direction.  That is,
 438    given the cached levels like this:
 439
 440          1122333442211
 441           AB        C
 442
 443    and assuming we are at the position cached at the slot marked with
 444    C, searching backwards (DIR = -1) for LEVEL = 2 will return the
 445    index of slot B or A, depending whether BEFORE is, respectively,
 446    true or false.  */
 447 static ptrdiff_t
 448 bidi_cache_find_level_change (int level, int dir, bool before)
 449 {
 450   if (bidi_cache_idx)
 451     {
 452       ptrdiff_t i = dir ? bidi_cache_last_idx : bidi_cache_idx - 1;
 453       int incr = before ? 1 : 0;
 454
 455       eassert (!dir || bidi_cache_last_idx >= 0);
 456
 457       if (!dir)
 458         dir = -1;
 459       else if (!incr)
 460         i += dir;
 461
 462       if (dir < 0)
 463         {
 464           while (i >= bidi_cache_start + incr)
 465             {
 466               if (bidi_cache[i - incr].resolved_level >= 0
 467                   && bidi_cache[i - incr].resolved_level < level)
 468                 return i;
 469               i--;
 470             }
 471         }
 472       else
 473         {
 474           while (i < bidi_cache_idx - incr)
 475             {
 476               if (bidi_cache[i + incr].resolved_level >= 0
 477                   && bidi_cache[i + incr].resolved_level < level)
 478                 return i;
 479               i++;
 480             }
 481         }
 482     }
 483
 484   return -1;
 485 }
 486
 487 static void
 488 bidi_cache_ensure_space (ptrdiff_t idx)
 489 {
 490   /* Enlarge the cache as needed.  */
 491   if (idx >= bidi_cache_size)
 492     {
 493       /* The bidi cache cannot be larger than the largest Lisp string
 494          or buffer.  */
 495       ptrdiff_t string_or_buffer_bound
 496         = max (BUF_BYTES_MAX, STRING_BYTES_BOUND);
 497
 498       /* Also, it cannot be larger than what C can represent.  */
 499       ptrdiff_t c_bound
 500         = (min (PTRDIFF_MAX, SIZE_MAX) - bidi_shelve_header_size) / elsz;
 501
 502       bidi_cache
 503         = xpalloc (bidi_cache, &bidi_cache_size,
 504                    max (BIDI_CACHE_CHUNK, idx - bidi_cache_size + 1),
 505                    min (string_or_buffer_bound, c_bound), elsz);
 506     }
 507 }
 508
 509 static void
 510 bidi_cache_iterator_state (struct bidi_it *bidi_it, bool resolved)
 511 {
 512   ptrdiff_t idx;
 513
 514   /* We should never cache on backward scans.  */
 515   if (bidi_it->scan_dir == -1)
 516     emacs_abort ();
 517   idx = bidi_cache_search (bidi_it->charpos, -1, 1);
 518
 519   if (idx < 0)
 520     {
 521       idx = bidi_cache_idx;
 522       bidi_cache_ensure_space (idx);
 523       /* Character positions should correspond to cache positions 1:1.
 524          If we are outside the range of cached positions, the cache is
 525          useless and must be reset.  */
 526       if (idx > bidi_cache_start &&
 527           (bidi_it->charpos > (bidi_cache[idx - 1].charpos
 528                                + bidi_cache[idx - 1].nchars)
 529            || bidi_it->charpos < bidi_cache[bidi_cache_start].charpos))
 530         {
 531           bidi_cache_reset ();
 532           idx = bidi_cache_start;
 533         }
 534       if (bidi_it->nchars <= 0)
 535         emacs_abort ();
 536       bidi_copy_it (&bidi_cache[idx], bidi_it);
 537       if (!resolved)
 538         bidi_cache[idx].resolved_level = -1;
 539     }
 540   else
 541     {
 542       /* Copy only the members which could have changed, to avoid
 543          costly copying of the entire struct.  */
 544       bidi_cache[idx].type = bidi_it->type;
 545       bidi_check_type (bidi_it->type);
 546       bidi_cache[idx].type_after_w1 = bidi_it->type_after_w1;
 547       bidi_check_type (bidi_it->type_after_w1);
 548       if (resolved)
 549         bidi_cache[idx].resolved_level = bidi_it->resolved_level;
 550       else
 551         bidi_cache[idx].resolved_level = -1;
 552       bidi_cache[idx].invalid_levels = bidi_it->invalid_levels;
 553       bidi_cache[idx].invalid_rl_levels = bidi_it->invalid_rl_levels;
 554       bidi_cache[idx].next_for_neutral = bidi_it->next_for_neutral;
 555       bidi_cache[idx].next_for_ws = bidi_it->next_for_ws;
 556       bidi_cache[idx].ignore_bn_limit = bidi_it->ignore_bn_limit;
 557       bidi_cache[idx].disp_pos = bidi_it->disp_pos;
 558       bidi_cache[idx].disp_prop = bidi_it->disp_prop;
 559     }
 560
 561   bidi_cache_last_idx = idx;
 562   if (idx >= bidi_cache_idx)
 563     bidi_cache_idx = idx + 1;
 564 }
 565
 566 static bidi_type_t
 567 bidi_cache_find (ptrdiff_t charpos, int level, struct bidi_it *bidi_it)
 568 {
 569   ptrdiff_t i = bidi_cache_search (charpos, level, bidi_it->scan_dir);
 570
 571   if (i >= bidi_cache_start)
 572     {
 573       bidi_dir_t current_scan_dir = bidi_it->scan_dir;
 574
 575       bidi_copy_it (bidi_it, &bidi_cache[i]);
 576       bidi_cache_last_idx = i;
 577       /* Don't let scan direction from the cached state override
 578          the current scan direction.  */
 579       bidi_it->scan_dir = current_scan_dir;
 580       return bidi_it->type;
 581     }
 582
 583   return UNKNOWN_BT;
 584 }
 585
 586 static int
 587 bidi_peek_at_next_level (struct bidi_it *bidi_it)
 588 {
 589   if (bidi_cache_idx == bidi_cache_start || bidi_cache_last_idx == -1)
 590     emacs_abort ();
 591   return bidi_cache[bidi_cache_last_idx + bidi_it->scan_dir].resolved_level;
 592 }
 593
 594 \f
 595 /***********************************************************************
 596              Pushing and popping the bidi iterator state
 597  ***********************************************************************/
 598
 599 /* Push the bidi iterator state in preparation for reordering a
 600    different object, e.g. display string found at certain buffer
 601    position.  Pushing the bidi iterator boils down to saving its
 602    entire state on the cache and starting a new cache "stacked" on top
 603    of the current cache.  */
 604 void
 605 bidi_push_it (struct bidi_it *bidi_it)
 606 {
 607   /* Save the current iterator state in its entirety after the last
 608      used cache slot.  */
 609   bidi_cache_ensure_space (bidi_cache_idx);
 610   bidi_cache[bidi_cache_idx++] = *bidi_it;
 611
 612   /* Push the current cache start onto the stack.  */
 613   eassert (bidi_cache_sp < IT_STACK_SIZE);
 614   bidi_cache_start_stack[bidi_cache_sp++] = bidi_cache_start;
 615
 616   /* Start a new level of cache, and make it empty.  */
 617   bidi_cache_start = bidi_cache_idx;
 618   bidi_cache_last_idx = -1;
 619 }
 620
 621 /* Restore the iterator state saved by bidi_push_it and return the
 622    cache to the corresponding state.  */
 623 void
 624 bidi_pop_it (struct bidi_it *bidi_it)
 625 {
 626   if (bidi_cache_start <= 0)
 627     emacs_abort ();
 628
 629   /* Reset the next free cache slot index to what it was before the
 630      call to bidi_push_it.  */
 631   bidi_cache_idx = bidi_cache_start - 1;
 632
 633   /* Restore the bidi iterator state saved in the cache.  */
 634   *bidi_it = bidi_cache[bidi_cache_idx];
 635
 636   /* Pop the previous cache start from the stack.  */
 637   if (bidi_cache_sp <= 0)
 638     emacs_abort ();
 639   bidi_cache_start = bidi_cache_start_stack[--bidi_cache_sp];
 640
 641   /* Invalidate the last-used cache slot data.  */
 642   bidi_cache_last_idx = -1;
 643 }
 644
 645 static ptrdiff_t bidi_cache_total_alloc;
 646
 647 /* Stash away a copy of the cache and its control variables.  */
 648 void *
 649 bidi_shelve_cache (void)
 650 {
 651   unsigned char *databuf;
 652   ptrdiff_t alloc;
 653
 654   /* Empty cache.  */
 655   if (bidi_cache_idx == 0)
 656     return NULL;
 657
 658   alloc = (bidi_shelve_header_size
 659            + bidi_cache_idx * sizeof (struct bidi_it));
 660   databuf = xmalloc (alloc);
 661   bidi_cache_total_alloc += alloc;
 662
 663   memcpy (databuf, &bidi_cache_idx, sizeof (bidi_cache_idx));
 664   memcpy (databuf + sizeof (bidi_cache_idx),
 665           bidi_cache, bidi_cache_idx * sizeof (struct bidi_it));
 666   memcpy (databuf + sizeof (bidi_cache_idx)
 667           + bidi_cache_idx * sizeof (struct bidi_it),
 668           bidi_cache_start_stack, sizeof (bidi_cache_start_stack));
 669   memcpy (databuf + sizeof (bidi_cache_idx)
 670           + bidi_cache_idx * sizeof (struct bidi_it)
 671           + sizeof (bidi_cache_start_stack),
 672           &bidi_cache_sp, sizeof (bidi_cache_sp));
 673   memcpy (databuf + sizeof (bidi_cache_idx)
 674           + bidi_cache_idx * sizeof (struct bidi_it)
 675           + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp),
 676           &bidi_cache_start, sizeof (bidi_cache_start));
 677   memcpy (databuf + sizeof (bidi_cache_idx)
 678           + bidi_cache_idx * sizeof (struct bidi_it)
 679           + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp)
 680           + sizeof (bidi_cache_start),
 681           &bidi_cache_last_idx, sizeof (bidi_cache_last_idx));
 682
 683   return databuf;
 684 }
 685
 686 /* Restore the cache state from a copy stashed away by
 687    bidi_shelve_cache, and free the buffer used to stash that copy.
 688    JUST_FREE means free the buffer, but don't restore the
 689    cache; used when the corresponding iterator is discarded instead of
 690    being restored.  */
 691 void
 692 bidi_unshelve_cache (void *databuf, bool just_free)
 693 {
 694   unsigned char *p = databuf;
 695
 696   if (!p)
 697     {
 698       if (!just_free)
 699         {
 700           /* A NULL pointer means an empty cache.  */
 701           bidi_cache_start = 0;
 702           bidi_cache_sp = 0;
 703           bidi_cache_reset ();
 704         }
 705     }
 706   else
 707     {
 708       if (just_free)
 709         {
 710           ptrdiff_t idx;
 711
 712           memcpy (&idx, p, sizeof (bidi_cache_idx));
 713           bidi_cache_total_alloc
 714             -= bidi_shelve_header_size + idx * sizeof (struct bidi_it);
 715         }
 716       else
 717         {
 718           memcpy (&bidi_cache_idx, p, sizeof (bidi_cache_idx));
 719           bidi_cache_ensure_space (bidi_cache_idx);
 720           memcpy (bidi_cache, p + sizeof (bidi_cache_idx),
 721                   bidi_cache_idx * sizeof (struct bidi_it));
 722           memcpy (bidi_cache_start_stack,
 723                   p + sizeof (bidi_cache_idx)
 724                   + bidi_cache_idx * sizeof (struct bidi_it),
 725                   sizeof (bidi_cache_start_stack));
 726           memcpy (&bidi_cache_sp,
 727                   p + sizeof (bidi_cache_idx)
 728                   + bidi_cache_idx * sizeof (struct bidi_it)
 729                   + sizeof (bidi_cache_start_stack),
 730                   sizeof (bidi_cache_sp));
 731           memcpy (&bidi_cache_start,
 732                   p + sizeof (bidi_cache_idx)
 733                   + bidi_cache_idx * sizeof (struct bidi_it)
 734                   + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp),
 735                   sizeof (bidi_cache_start));
 736           memcpy (&bidi_cache_last_idx,
 737                   p + sizeof (bidi_cache_idx)
 738                   + bidi_cache_idx * sizeof (struct bidi_it)
 739                   + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp)
 740                   + sizeof (bidi_cache_start),
 741                   sizeof (bidi_cache_last_idx));
 742           bidi_cache_total_alloc
 743             -= (bidi_shelve_header_size
 744                 + bidi_cache_idx * sizeof (struct bidi_it));
 745         }
 746
 747       xfree (p);
 748     }
 749 }
 750
 751 \f
 752 /***********************************************************************
 753                         Initialization
 754  ***********************************************************************/
 755 static void
 756 bidi_initialize (void)
 757 {
 758   bidi_type_table = uniprop_table (intern ("bidi-class"));
 759   if (NILP (bidi_type_table))
 760     emacs_abort ();
 761   staticpro (&bidi_type_table);
 762
 763   bidi_mirror_table = uniprop_table (intern ("mirroring"));
 764   if (NILP (bidi_mirror_table))
 765     emacs_abort ();
 766   staticpro (&bidi_mirror_table);
 767
 768   Qparagraph_start = intern ("paragraph-start");
 769   staticpro (&Qparagraph_start);
 770   paragraph_start_re = Fsymbol_value (Qparagraph_start);
 771   if (!STRINGP (paragraph_start_re))
 772     paragraph_start_re = build_string ("\f\\|[ \t]*$");
 773   staticpro (&paragraph_start_re);
 774   Qparagraph_separate = intern ("paragraph-separate");
 775   staticpro (&Qparagraph_separate);
 776   paragraph_separate_re = Fsymbol_value (Qparagraph_separate);
 777   if (!STRINGP (paragraph_separate_re))
 778     paragraph_separate_re = build_string ("[ \t\f]*$");
 779   staticpro (&paragraph_separate_re);
 780
 781   bidi_cache_sp = 0;
 782   bidi_cache_total_alloc = 0;
 783
 784   bidi_initialized = 1;
 785 }
 786
 787 /* Do whatever UAX#9 clause X8 says should be done at paragraph's
 788    end.  */
 789 static void
 790 bidi_set_paragraph_end (struct bidi_it *bidi_it)
 791 {
 792   bidi_it->invalid_levels = 0;
 793   bidi_it->invalid_rl_levels = -1;
 794   bidi_it->stack_idx = 0;
 795   bidi_it->resolved_level = bidi_it->level_stack[0].level;
 796 }
 797
 798 /* Initialize the bidi iterator from buffer/string position CHARPOS.  */
 799 void
 800 bidi_init_it (ptrdiff_t charpos, ptrdiff_t bytepos, bool frame_window_p,
 801               struct bidi_it *bidi_it)
 802 {
 803   if (! bidi_initialized)
 804     bidi_initialize ();
 805   if (charpos >= 0)
 806     bidi_it->charpos = charpos;
 807   if (bytepos >= 0)
 808     bidi_it->bytepos = bytepos;
 809   bidi_it->frame_window_p = frame_window_p;
 810   bidi_it->nchars = -1; /* to be computed in bidi_resolve_explicit_1 */
 811   bidi_it->first_elt = 1;
 812   bidi_set_paragraph_end (bidi_it);
 813   bidi_it->new_paragraph = 1;
 814   bidi_it->separator_limit = -1;
 815   bidi_it->type = NEUTRAL_B;
 816   bidi_it->type_after_w1 = NEUTRAL_B;
 817   bidi_it->orig_type = NEUTRAL_B;
 818   bidi_it->prev_was_pdf = 0;
 819   bidi_it->prev.type = bidi_it->prev.type_after_w1
 820     = bidi_it->prev.orig_type = UNKNOWN_BT;
 821   bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1
 822     = bidi_it->last_strong.orig_type = UNKNOWN_BT;
 823   bidi_it->next_for_neutral.charpos = -1;
 824   bidi_it->next_for_neutral.type
 825     = bidi_it->next_for_neutral.type_after_w1
 826     = bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
 827   bidi_it->prev_for_neutral.charpos = -1;
 828   bidi_it->prev_for_neutral.type
 829     = bidi_it->prev_for_neutral.type_after_w1
 830     = bidi_it->prev_for_neutral.orig_type = UNKNOWN_BT;
 831   bidi_it->sor = L2R;    /* FIXME: should it be user-selectable? */
 832   bidi_it->disp_pos = -1;       /* invalid/unknown */
 833   bidi_it->disp_prop = 0;
 834   /* We can only shrink the cache if we are at the bottom level of its
 835      "stack".  */
 836   if (bidi_cache_start == 0)
 837     bidi_cache_shrink ();
 838   else
 839     bidi_cache_reset ();
 840 }
 841
 842 /* Perform initializations for reordering a new line of bidi text.  */
 843 static void
 844 bidi_line_init (struct bidi_it *bidi_it)
 845 {
 846   bidi_it->scan_dir = 1; /* FIXME: do we need to have control on this? */
 847   bidi_it->resolved_level = bidi_it->level_stack[0].level;
 848   bidi_it->level_stack[0].override = NEUTRAL_DIR; /* X1 */
 849   bidi_it->invalid_levels = 0;
 850   bidi_it->invalid_rl_levels = -1;
 851   /* Setting this to zero will force its recomputation the first time
 852      we need it for W5.  */
 853   bidi_it->next_en_pos = 0;
 854   bidi_it->next_en_type = UNKNOWN_BT;
 855   bidi_it->next_for_ws.type = UNKNOWN_BT;
 856   bidi_set_sor_type (bidi_it,
 857                      (bidi_it->paragraph_dir == R2L ? 1 : 0),
 858                      bidi_it->level_stack[0].level); /* X10 */
 859
 860   bidi_cache_reset ();
 861 }
 862
 863 \f
 864 /***********************************************************************
 865                         Fetching characters
 866  ***********************************************************************/
 867
 868 /* Count bytes in string S between BEG/BEGBYTE and END.  BEG and END
 869    are zero-based character positions in S, BEGBYTE is byte position
 870    corresponding to BEG.  UNIBYTE means S is a unibyte string.  */
 871 static ptrdiff_t
 872 bidi_count_bytes (const unsigned char *s, const ptrdiff_t beg,
 873                   const ptrdiff_t begbyte, const ptrdiff_t end, bool unibyte)
 874 {
 875   ptrdiff_t pos = beg;
 876   const unsigned char *p = s + begbyte, *start = p;
 877
 878   if (unibyte)
 879     p = s + end;
 880   else
 881     {
 882       if (!CHAR_HEAD_P (*p))
 883         emacs_abort ();
 884
 885       while (pos < end)
 886         {
 887           p += BYTES_BY_CHAR_HEAD (*p);
 888           pos++;
 889         }
 890     }
 891
 892   return p - start;
 893 }
 894
 895 /* Fetch and return the character at byte position BYTEPOS.  If S is
 896    non-NULL, fetch the character from string S; otherwise fetch the
 897    character from the current buffer.  UNIBYTE means S is a
 898    unibyte string.  */
 899 static int
 900 bidi_char_at_pos (ptrdiff_t bytepos, const unsigned char *s, bool unibyte)
 901 {
 902   if (s)
 903     {
 904       s += bytepos;
 905       if (unibyte)
 906         return *s;
 907     }
 908   else
 909     s = BYTE_POS_ADDR (bytepos);
 910   return STRING_CHAR (s);
 911 }
 912
 913 /* Fetch and return the character at BYTEPOS/CHARPOS.  If that
 914    character is covered by a display string, treat the entire run of
 915    covered characters as a single character, either u+2029 or u+FFFC,
 916    and return their combined length in CH_LEN and NCHARS.  DISP_POS
 917    specifies the character position of the next display string, or -1
 918    if not yet computed.  When the next character is at or beyond that
 919    position, the function updates DISP_POS with the position of the
 920    next display string.  *DISP_PROP non-zero means that there's really
 921    a display string at DISP_POS, as opposed to when we searched till
 922    DISP_POS without finding one.  If *DISP_PROP is 2, it means the
 923    display spec is of the form `(space ...)', which is replaced with
 924    u+2029 to handle it as a paragraph separator.  STRING->s is the C
 925    string to iterate, or NULL if iterating over a buffer or a Lisp
 926    string; in the latter case, STRING->lstring is the Lisp string.  */
 927 static int
 928 bidi_fetch_char (ptrdiff_t bytepos, ptrdiff_t charpos, ptrdiff_t *disp_pos,
 929                  int *disp_prop, struct bidi_string_data *string,
 930                  bool frame_window_p, ptrdiff_t *ch_len, ptrdiff_t *nchars)
 931 {
 932   int ch;
 933   ptrdiff_t endpos
 934     = (string->s || STRINGP (string->lstring)) ? string->schars : ZV;
 935   struct text_pos pos;
 936   int len;
 937
 938   /* If we got past the last known position of display string, compute
 939      the position of the next one.  That position could be at CHARPOS.  */
 940   if (charpos < endpos && charpos > *disp_pos)
 941     {
 942       SET_TEXT_POS (pos, charpos, bytepos);
 943       *disp_pos = compute_display_string_pos (&pos, string, frame_window_p,
 944                                               disp_prop);
 945     }
 946
 947   /* Fetch the character at BYTEPOS.  */
 948   if (charpos >= endpos)
 949     {
 950       ch = BIDI_EOB;
 951       *ch_len = 1;
 952       *nchars = 1;
 953       *disp_pos = endpos;
 954       *disp_prop = 0;
 955     }
 956   else if (charpos >= *disp_pos && *disp_prop)
 957     {
 958       ptrdiff_t disp_end_pos;
 959
 960       /* We don't expect to find ourselves in the middle of a display
 961          property.  Hopefully, it will never be needed.  */
 962       if (charpos > *disp_pos)
 963         emacs_abort ();
 964       /* Text covered by `display' properties and overlays with
 965          display properties or display strings is handled as a single
 966          character that represents the entire run of characters
 967          covered by the display property.  */
 968       if (*disp_prop == 2)
 969         {
 970           /* `(space ...)' display specs are handled as paragraph
 971              separators for the purposes of the reordering; see UAX#9
 972              section 3 and clause HL1 in section 4.3 there.  */
 973           ch = 0x2029;
 974         }
 975       else
 976         {
 977           /* All other display specs are handled as the Unicode Object
 978              Replacement Character.  */
 979           ch = 0xFFFC;
 980         }
 981       disp_end_pos = compute_display_string_end (*disp_pos, string);
 982       if (disp_end_pos < 0)
 983         {
 984           /* Somebody removed the display string from the buffer
 985              behind our back.  Recover by processing this buffer
 986              position as if no display property were present there to
 987              begin with.  */
 988           *disp_prop = 0;
 989           goto normal_char;
 990         }
 991       *nchars = disp_end_pos - *disp_pos;
 992       if (*nchars <= 0)
 993         emacs_abort ();
 994       if (string->s)
 995         *ch_len = bidi_count_bytes (string->s, *disp_pos, bytepos,
 996                                     disp_end_pos, string->unibyte);
 997       else if (STRINGP (string->lstring))
 998         *ch_len = bidi_count_bytes (SDATA (string->lstring), *disp_pos,
 999                                     bytepos, disp_end_pos, string->unibyte);
1000       else
1001         *ch_len = CHAR_TO_BYTE (disp_end_pos) - bytepos;
1002     }
1003   else
1004     {
1005     normal_char:
1006       if (string->s)
1007         {
1008
1009           if (!string->unibyte)
1010             {
1011               ch = STRING_CHAR_AND_LENGTH (string->s + bytepos, len);
1012               *ch_len = len;
1013             }
1014           else
1015             {
1016               ch = UNIBYTE_TO_CHAR (string->s[bytepos]);
1017               *ch_len = 1;
1018             }
1019         }
1020       else if (STRINGP (string->lstring))
1021         {
1022           if (!string->unibyte)
1023             {
1024               ch = STRING_CHAR_AND_LENGTH (SDATA (string->lstring) + bytepos,
1025                                            len);
1026               *ch_len = len;
1027             }
1028           else
1029             {
1030               ch = UNIBYTE_TO_CHAR (SREF (string->lstring, bytepos));
1031               *ch_len = 1;
1032             }
1033         }
1034       else
1035         {
1036           ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (bytepos), len);
1037           *ch_len = len;
1038         }
1039       *nchars = 1;
1040     }
1041
1042   /* If we just entered a run of characters covered by a display
1043      string, compute the position of the next display string.  */
1044   if (charpos + *nchars <= endpos && charpos + *nchars > *disp_pos
1045       && *disp_prop)
1046     {
1047       SET_TEXT_POS (pos, charpos + *nchars, bytepos + *ch_len);
1048       *disp_pos = compute_display_string_pos (&pos, string, frame_window_p,
1049                                               disp_prop);
1050     }
1051
1052   return ch;
1053 }
1054
1055 \f
1056 /***********************************************************************
1057                         Determining paragraph direction
1058  ***********************************************************************/
1059
1060 /* Check if buffer position CHARPOS/BYTEPOS is the end of a paragraph.
1061    Value is the non-negative length of the paragraph separator
1062    following the buffer position, -1 if position is at the beginning
1063    of a new paragraph, or -2 if position is neither at beginning nor
1064    at end of a paragraph.  */
1065 static ptrdiff_t
1066 bidi_at_paragraph_end (ptrdiff_t charpos, ptrdiff_t bytepos)
1067 {
1068   Lisp_Object sep_re;
1069   Lisp_Object start_re;
1070   ptrdiff_t val;
1071
1072   sep_re = paragraph_separate_re;
1073   start_re = paragraph_start_re;
1074
1075   val = fast_looking_at (sep_re, charpos, bytepos, ZV, ZV_BYTE, Qnil);
1076   if (val < 0)
1077     {
1078       if (fast_looking_at (start_re, charpos, bytepos, ZV, ZV_BYTE, Qnil) >= 0)
1079         val = -1;
1080       else
1081         val = -2;
1082     }
1083
1084   return val;
1085 }
1086
1087 /* On my 2005-vintage machine, searching back for paragraph start
1088    takes ~1 ms per line.  And bidi_paragraph_init is called 4 times
1089    when user types C-p.  The number below limits each call to
1090    bidi_paragraph_init to about 10 ms.  */
1091 #define MAX_PARAGRAPH_SEARCH 7500
1092
1093 /* Find the beginning of this paragraph by looking back in the buffer.
1094    Value is the byte position of the paragraph's beginning, or
1095    BEGV_BYTE if paragraph_start_re is still not found after looking
1096    back MAX_PARAGRAPH_SEARCH lines in the buffer.  */
1097 static ptrdiff_t
1098 bidi_find_paragraph_start (ptrdiff_t pos, ptrdiff_t pos_byte)
1099 {
1100   Lisp_Object re = paragraph_start_re;
1101   ptrdiff_t limit = ZV, limit_byte = ZV_BYTE;
1102   ptrdiff_t n = 0;
1103
1104   while (pos_byte > BEGV_BYTE
1105          && n++ < MAX_PARAGRAPH_SEARCH
1106          && fast_looking_at (re, pos, pos_byte, limit, limit_byte, Qnil) < 0)
1107     /* FIXME: What if the paragraph beginning is covered by a
1108        display string?  And what if a display string covering some
1109        of the text over which we scan back includes
1110        paragraph_start_re?  */
1111     pos = find_next_newline_no_quit (pos - 1, -1, &pos_byte);
1112   if (n >= MAX_PARAGRAPH_SEARCH)
1113     pos_byte = BEGV_BYTE;
1114   return pos_byte;
1115 }
1116
1117 /* On a 3.4 GHz machine, searching forward for a strong directional
1118    character in a long paragraph full of weaks or neutrals takes about
1119    1 ms for each 20K characters.  The number below limits each call to
1120    bidi_paragraph_init to less than 10 ms even on slow machines.  */
1121 #define MAX_STRONG_CHAR_SEARCH 100000
1122
1123 /* Determine the base direction, a.k.a. base embedding level, of the
1124    paragraph we are about to iterate through.  If DIR is either L2R or
1125    R2L, just use that.  Otherwise, determine the paragraph direction
1126    from the first strong directional character of the paragraph.
1127
1128    NO_DEFAULT_P means don't default to L2R if the paragraph
1129    has no strong directional characters and both DIR and
1130    bidi_it->paragraph_dir are NEUTRAL_DIR.  In that case, search back
1131    in the buffer until a paragraph is found with a strong character,
1132    or until hitting BEGV.  In the latter case, fall back to L2R.  This
1133    flag is used in current-bidi-paragraph-direction.
1134
1135    Note that this function gives the paragraph separator the same
1136    direction as the preceding paragraph, even though Emacs generally
1137    views the separator as not belonging to any paragraph.  */
1138 void
1139 bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, bool no_default_p)
1140 {
1141   ptrdiff_t bytepos = bidi_it->bytepos;
1142   bool string_p = bidi_it->string.s || STRINGP (bidi_it->string.lstring);
1143   ptrdiff_t pstartbyte;
1144   /* Note that begbyte is a byte position, while end is a character
1145      position.  Yes, this is ugly, but we are trying to avoid costly
1146      calls to BYTE_TO_CHAR and its ilk.  */
1147   ptrdiff_t begbyte = string_p ? 0 : BEGV_BYTE;
1148   ptrdiff_t end = string_p ? bidi_it->string.schars : ZV;
1149
1150   /* Special case for an empty buffer. */
1151   if (bytepos == begbyte && bidi_it->charpos == end)
1152     dir = L2R;
1153   /* We should never be called at EOB or before BEGV.  */
1154   else if (bidi_it->charpos >= end || bytepos < begbyte)
1155     emacs_abort ();
1156
1157   if (dir == L2R)
1158     {
1159       bidi_it->paragraph_dir = L2R;
1160       bidi_it->new_paragraph = 0;
1161     }
1162   else if (dir == R2L)
1163     {
1164       bidi_it->paragraph_dir = R2L;
1165       bidi_it->new_paragraph = 0;
1166     }
1167   else if (dir == NEUTRAL_DIR)  /* P2 */
1168     {
1169       int ch;
1170       ptrdiff_t ch_len, nchars;
1171       ptrdiff_t pos, disp_pos = -1;
1172       int disp_prop = 0;
1173       bidi_type_t type;
1174       const unsigned char *s;
1175
1176       if (!bidi_initialized)
1177         bidi_initialize ();
1178
1179       /* If we are inside a paragraph separator, we are just waiting
1180          for the separator to be exhausted; use the previous paragraph
1181          direction.  But don't do that if we have been just reseated,
1182          because we need to reinitialize below in that case.  */
1183       if (!bidi_it->first_elt
1184           && bidi_it->charpos < bidi_it->separator_limit)
1185         return;
1186
1187       /* If we are on a newline, get past it to where the next
1188          paragraph might start.  But don't do that at BEGV since then
1189          we are potentially in a new paragraph that doesn't yet
1190          exist.  */
1191       pos = bidi_it->charpos;
1192       s = (STRINGP (bidi_it->string.lstring)
1193            ? SDATA (bidi_it->string.lstring)
1194            : bidi_it->string.s);
1195       if (bytepos > begbyte
1196           && bidi_char_at_pos (bytepos, s, bidi_it->string.unibyte) == '\n')
1197         {
1198           bytepos++;
1199           pos++;
1200         }
1201
1202       /* We are either at the beginning of a paragraph or in the
1203          middle of it.  Find where this paragraph starts.  */
1204       if (string_p)
1205         {
1206           /* We don't support changes of paragraph direction inside a
1207              string.  It is treated as a single paragraph.  */
1208           pstartbyte = 0;
1209         }
1210       else
1211         pstartbyte = bidi_find_paragraph_start (pos, bytepos);
1212       bidi_it->separator_limit = -1;
1213       bidi_it->new_paragraph = 0;
1214
1215       /* The following loop is run more than once only if NO_DEFAULT_P,
1216          and only if we are iterating on a buffer.  */
1217       do {
1218         ptrdiff_t pos1;
1219
1220         bytepos = pstartbyte;
1221         if (!string_p)
1222           pos = BYTE_TO_CHAR (bytepos);
1223         ch = bidi_fetch_char (bytepos, pos, &disp_pos, &disp_prop,
1224                               &bidi_it->string,
1225                               bidi_it->frame_window_p, &ch_len, &nchars);
1226         type = bidi_get_type (ch, NEUTRAL_DIR);
1227
1228         pos1 = pos;
1229         for (pos += nchars, bytepos += ch_len;
1230              ((bidi_get_category (type) != STRONG)
1231               || (bidi_ignore_explicit_marks_for_paragraph_level
1232                   && (type == RLE || type == RLO
1233                       || type == LRE || type == LRO)))
1234                /* Stop when searched too far into an abnormally large
1235                   paragraph full of weak or neutral characters.  */
1236                && pos - pos1 < MAX_STRONG_CHAR_SEARCH;
1237              type = bidi_get_type (ch, NEUTRAL_DIR))
1238           {
1239             if (pos >= end)
1240               {
1241                 /* Pretend there's a paragraph separator at end of
1242                    buffer/string.  */
1243                 type = NEUTRAL_B;
1244                 break;
1245               }
1246             if (!string_p
1247                 && type == NEUTRAL_B
1248                 && bidi_at_paragraph_end (pos, bytepos) >= -1)
1249               break;
1250             /* Fetch next character and advance to get past it.  */
1251             ch = bidi_fetch_char (bytepos, pos, &disp_pos,
1252                                   &disp_prop, &bidi_it->string,
1253                                   bidi_it->frame_window_p, &ch_len, &nchars);
1254             pos += nchars;
1255             bytepos += ch_len;
1256           }
1257         if ((type == STRONG_R || type == STRONG_AL) /* P3 */
1258             || (!bidi_ignore_explicit_marks_for_paragraph_level
1259                 && (type == RLO || type == RLE)))
1260           bidi_it->paragraph_dir = R2L;
1261         else if (type == STRONG_L
1262                  || (!bidi_ignore_explicit_marks_for_paragraph_level
1263                      && (type == LRO || type == LRE)))
1264           bidi_it->paragraph_dir = L2R;
1265         if (!string_p
1266             && no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR)
1267           {
1268             /* If this paragraph is at BEGV, default to L2R.  */
1269             if (pstartbyte == BEGV_BYTE)
1270               bidi_it->paragraph_dir = L2R; /* P3 and HL1 */
1271             else
1272               {
1273                 ptrdiff_t prevpbyte = pstartbyte;
1274                 ptrdiff_t p = BYTE_TO_CHAR (pstartbyte), pbyte = pstartbyte;
1275
1276                 /* Find the beginning of the previous paragraph, if any.  */
1277                 while (pbyte > BEGV_BYTE && prevpbyte >= pstartbyte)
1278                   {
1279                     /* FXIME: What if p is covered by a display
1280                        string?  See also a FIXME inside
1281                        bidi_find_paragraph_start.  */
1282                     p--;
1283                     pbyte = CHAR_TO_BYTE (p);
1284                     prevpbyte = bidi_find_paragraph_start (p, pbyte);
1285                   }
1286                 pstartbyte = prevpbyte;
1287               }
1288           }
1289       } while (!string_p
1290                && no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR);
1291     }
1292   else
1293     emacs_abort ();
1294
1295   /* Contrary to UAX#9 clause P3, we only default the paragraph
1296      direction to L2R if we have no previous usable paragraph
1297      direction.  This is allowed by the HL1 clause.  */
1298   if (bidi_it->paragraph_dir != L2R && bidi_it->paragraph_dir != R2L)
1299     bidi_it->paragraph_dir = L2R; /* P3 and HL1 ``higher-level protocols'' */
1300   if (bidi_it->paragraph_dir == R2L)
1301     bidi_it->level_stack[0].level = 1;
1302   else
1303     bidi_it->level_stack[0].level = 0;
1304
1305   bidi_line_init (bidi_it);
1306 }
1307
1308 \f
1309 /***********************************************************************
1310                  Resolving explicit and implicit levels.
1311   The rest of this file constitutes the core of the UBA implementation.
1312  ***********************************************************************/
1313
1314 static bool
1315 bidi_explicit_dir_char (int ch)
1316 {
1317   bidi_type_t ch_type;
1318
1319   if (!bidi_initialized)
1320     emacs_abort ();
1321   ch_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
1322   return (ch_type == LRE || ch_type == LRO
1323           || ch_type == RLE || ch_type == RLO
1324           || ch_type == PDF);
1325 }
1326
1327 /* A helper function for bidi_resolve_explicit.  It advances to the
1328    next character in logical order and determines the new embedding
1329    level and directional override, but does not take into account
1330    empty embeddings.  */
1331 static int
1332 bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
1333 {
1334   int curchar;
1335   bidi_type_t type;
1336   int current_level;
1337   int new_level;
1338   bidi_dir_t override;
1339   bool string_p = bidi_it->string.s || STRINGP (bidi_it->string.lstring);
1340
1341   /* If reseat()'ed, don't advance, so as to start iteration from the
1342      position where we were reseated.  bidi_it->bytepos can be less
1343      than BEGV_BYTE after reseat to BEGV.  */
1344   if (bidi_it->bytepos < (string_p ? 0 : BEGV_BYTE)
1345       || bidi_it->first_elt)
1346     {
1347       bidi_it->first_elt = 0;
1348       if (string_p)
1349         {
1350           const unsigned char *p
1351             = (STRINGP (bidi_it->string.lstring)
1352                ? SDATA (bidi_it->string.lstring)
1353                : bidi_it->string.s);
1354
1355           if (bidi_it->charpos < 0)
1356             bidi_it->charpos = 0;
1357           bidi_it->bytepos = bidi_count_bytes (p, 0, 0, bidi_it->charpos,
1358                                                bidi_it->string.unibyte);
1359         }
1360       else
1361         {
1362           if (bidi_it->charpos < BEGV)
1363             bidi_it->charpos = BEGV;
1364           bidi_it->bytepos = CHAR_TO_BYTE (bidi_it->charpos);
1365         }
1366     }
1367   /* Don't move at end of buffer/string.  */
1368   else if (bidi_it->charpos < (string_p ? bidi_it->string.schars : ZV))
1369     {
1370       /* Advance to the next character, skipping characters covered by
1371          display strings (nchars > 1).  */
1372       if (bidi_it->nchars <= 0)
1373         emacs_abort ();
1374       bidi_it->charpos += bidi_it->nchars;
1375       if (bidi_it->ch_len == 0)
1376         emacs_abort ();
1377       bidi_it->bytepos += bidi_it->ch_len;
1378     }
1379
1380   current_level = bidi_it->level_stack[bidi_it->stack_idx].level; /* X1 */
1381   override = bidi_it->level_stack[bidi_it->stack_idx].override;
1382   new_level = current_level;
1383
1384   if (bidi_it->charpos >= (string_p ? bidi_it->string.schars : ZV))
1385     {
1386       curchar = BIDI_EOB;
1387       bidi_it->ch_len = 1;
1388       bidi_it->nchars = 1;
1389       bidi_it->disp_pos = (string_p ? bidi_it->string.schars : ZV);
1390       bidi_it->disp_prop = 0;
1391     }
1392   else
1393     {
1394       /* Fetch the character at BYTEPOS.  If it is covered by a
1395          display string, treat the entire run of covered characters as
1396          a single character u+FFFC.  */
1397       curchar = bidi_fetch_char (bidi_it->bytepos, bidi_it->charpos,
1398                                  &bidi_it->disp_pos, &bidi_it->disp_prop,
1399                                  &bidi_it->string, bidi_it->frame_window_p,
1400                                  &bidi_it->ch_len, &bidi_it->nchars);
1401     }
1402   bidi_it->ch = curchar;
1403
1404   /* Don't apply directional override here, as all the types we handle
1405      below will not be affected by the override anyway, and we need
1406      the original type unaltered.  The override will be applied in
1407      bidi_resolve_weak.  */
1408   type = bidi_get_type (curchar, NEUTRAL_DIR);
1409   bidi_it->orig_type = type;
1410   bidi_check_type (bidi_it->orig_type);
1411
1412   if (type != PDF)
1413     bidi_it->prev_was_pdf = 0;
1414
1415   bidi_it->type_after_w1 = UNKNOWN_BT;
1416
1417   switch (type)
1418     {
1419       case RLE: /* X2 */
1420       case RLO: /* X4 */
1421         bidi_it->type_after_w1 = type;
1422         bidi_check_type (bidi_it->type_after_w1);
1423         type = WEAK_BN; /* X9/Retaining */
1424         if (bidi_it->ignore_bn_limit <= -1)
1425           {
1426             if (current_level <= BIDI_MAXLEVEL - 4)
1427               {
1428                 /* Compute the least odd embedding level greater than
1429                    the current level.  */
1430                 new_level = ((current_level + 1) & ~1) + 1;
1431                 if (bidi_it->type_after_w1 == RLE)
1432                   override = NEUTRAL_DIR;
1433                 else
1434                   override = R2L;
1435                 if (current_level == BIDI_MAXLEVEL - 4)
1436                   bidi_it->invalid_rl_levels = 0;
1437                 bidi_push_embedding_level (bidi_it, new_level, override);
1438               }
1439             else
1440               {
1441                 bidi_it->invalid_levels++;
1442                 /* See the commentary about invalid_rl_levels below.  */
1443                 if (bidi_it->invalid_rl_levels < 0)
1444                   bidi_it->invalid_rl_levels = 0;
1445                 bidi_it->invalid_rl_levels++;
1446               }
1447           }
1448         else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1449                  || (bidi_it->next_en_pos > bidi_it->charpos
1450                      && bidi_it->next_en_type == WEAK_EN))
1451           type = WEAK_EN;
1452         break;
1453       case LRE: /* X3 */
1454       case LRO: /* X5 */
1455         bidi_it->type_after_w1 = type;
1456         bidi_check_type (bidi_it->type_after_w1);
1457         type = WEAK_BN; /* X9/Retaining */
1458         if (bidi_it->ignore_bn_limit <= -1)
1459           {
1460             if (current_level <= BIDI_MAXLEVEL - 5)
1461               {
1462                 /* Compute the least even embedding level greater than
1463                    the current level.  */
1464                 new_level = ((current_level + 2) & ~1);
1465                 if (bidi_it->type_after_w1 == LRE)
1466                   override = NEUTRAL_DIR;
1467                 else
1468                   override = L2R;
1469                 bidi_push_embedding_level (bidi_it, new_level, override);
1470               }
1471             else
1472               {
1473                 bidi_it->invalid_levels++;
1474                 /* invalid_rl_levels counts invalid levels encountered
1475                    while the embedding level was already too high for
1476                    LRE/LRO, but not for RLE/RLO.  That is because
1477                    there may be exactly one PDF which we should not
1478                    ignore even though invalid_levels is non-zero.
1479                    invalid_rl_levels helps to know what PDF is
1480                    that.  */
1481                 if (bidi_it->invalid_rl_levels >= 0)
1482                   bidi_it->invalid_rl_levels++;
1483               }
1484           }
1485         else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1486                  || (bidi_it->next_en_pos > bidi_it->charpos
1487                      && bidi_it->next_en_type == WEAK_EN))
1488           type = WEAK_EN;
1489         break;
1490       case PDF: /* X7 */
1491         bidi_it->type_after_w1 = type;
1492         bidi_check_type (bidi_it->type_after_w1);
1493         type = WEAK_BN; /* X9/Retaining */
1494         if (bidi_it->ignore_bn_limit <= -1)
1495           {
1496             if (!bidi_it->invalid_rl_levels)
1497               {
1498                 new_level = bidi_pop_embedding_level (bidi_it);
1499                 bidi_it->invalid_rl_levels = -1;
1500                 if (bidi_it->invalid_levels)
1501                   bidi_it->invalid_levels--;
1502                 /* else nothing: UAX#9 says to ignore invalid PDFs */
1503               }
1504             if (!bidi_it->invalid_levels)
1505               new_level = bidi_pop_embedding_level (bidi_it);
1506             else
1507               {
1508                 bidi_it->invalid_levels--;
1509                 bidi_it->invalid_rl_levels--;
1510               }
1511           }
1512         else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1513                  || (bidi_it->next_en_pos > bidi_it->charpos
1514                      && bidi_it->next_en_type == WEAK_EN))
1515           type = WEAK_EN;
1516         break;
1517       default:
1518         /* Nothing.  */
1519         break;
1520     }
1521
1522   bidi_it->type = type;
1523   bidi_check_type (bidi_it->type);
1524
1525   return new_level;
1526 }
1527
1528 /* Given an iterator state in BIDI_IT, advance one character position
1529    in the buffer/string to the next character (in the logical order),
1530    resolve any explicit embeddings and directional overrides, and
1531    return the embedding level of the character after resolving
1532    explicit directives and ignoring empty embeddings.  */
1533 static int
1534 bidi_resolve_explicit (struct bidi_it *bidi_it)
1535 {
1536   int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1537   int new_level  = bidi_resolve_explicit_1 (bidi_it);
1538   ptrdiff_t eob = bidi_it->string.s ? bidi_it->string.schars : ZV;
1539   const unsigned char *s
1540     = (STRINGP (bidi_it->string.lstring)
1541        ? SDATA (bidi_it->string.lstring)
1542        : bidi_it->string.s);
1543
1544   if (prev_level < new_level
1545       && bidi_it->type == WEAK_BN
1546       && bidi_it->ignore_bn_limit == -1 /* only if not already known */
1547       && bidi_it->charpos < eob         /* not already at EOB */
1548       && bidi_explicit_dir_char (bidi_char_at_pos (bidi_it->bytepos
1549                                                    + bidi_it->ch_len, s,
1550                                                    bidi_it->string.unibyte)))
1551     {
1552       /* Avoid pushing and popping embedding levels if the level run
1553          is empty, as this breaks level runs where it shouldn't.
1554          UAX#9 removes all the explicit embedding and override codes,
1555          so empty embeddings disappear without a trace.  We need to
1556          behave as if we did the same.  */
1557       struct bidi_it saved_it;
1558       int level = prev_level;
1559
1560       bidi_copy_it (&saved_it, bidi_it);
1561
1562       while (bidi_explicit_dir_char (bidi_char_at_pos (bidi_it->bytepos
1563                                                        + bidi_it->ch_len, s,
1564                                                        bidi_it->string.unibyte)))
1565         {
1566           /* This advances to the next character, skipping any
1567              characters covered by display strings.  */
1568           level = bidi_resolve_explicit_1 (bidi_it);
1569           /* If string.lstring was relocated inside bidi_resolve_explicit_1,
1570              a pointer to its data is no longer valid.  */
1571           if (STRINGP (bidi_it->string.lstring))
1572             s = SDATA (bidi_it->string.lstring);
1573         }
1574
1575       if (bidi_it->nchars <= 0)
1576         emacs_abort ();
1577       if (level == prev_level)  /* empty embedding */
1578         saved_it.ignore_bn_limit = bidi_it->charpos + bidi_it->nchars;
1579       else                      /* this embedding is non-empty */
1580         saved_it.ignore_bn_limit = -2;
1581
1582       bidi_copy_it (bidi_it, &saved_it);
1583       if (bidi_it->ignore_bn_limit > -1)
1584         {
1585           /* We pushed a level, but we shouldn't have.  Undo that. */
1586           if (!bidi_it->invalid_rl_levels)
1587             {
1588               new_level = bidi_pop_embedding_level (bidi_it);
1589               bidi_it->invalid_rl_levels = -1;
1590               if (bidi_it->invalid_levels)
1591                 bidi_it->invalid_levels--;
1592             }
1593           if (!bidi_it->invalid_levels)
1594             new_level = bidi_pop_embedding_level (bidi_it);
1595           else
1596             {
1597               bidi_it->invalid_levels--;
1598               bidi_it->invalid_rl_levels--;
1599             }
1600         }
1601     }
1602
1603   if (bidi_it->type == NEUTRAL_B)       /* X8 */
1604     {
1605       bidi_set_paragraph_end (bidi_it);
1606       /* This is needed by bidi_resolve_weak below, and in L1.  */
1607       bidi_it->type_after_w1 = bidi_it->type;
1608       bidi_check_type (bidi_it->type_after_w1);
1609     }
1610
1611   return new_level;
1612 }
1613
1614 /* Advance in the buffer/string, resolve weak types and return the
1615    type of the next character after weak type resolution.  */
1616 static bidi_type_t
1617 bidi_resolve_weak (struct bidi_it *bidi_it)
1618 {
1619   bidi_type_t type;
1620   bidi_dir_t override;
1621   int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1622   int new_level  = bidi_resolve_explicit (bidi_it);
1623   int next_char;
1624   bidi_type_t type_of_next;
1625   struct bidi_it saved_it;
1626   ptrdiff_t eob
1627     = ((STRINGP (bidi_it->string.lstring) || bidi_it->string.s)
1628        ? bidi_it->string.schars : ZV);
1629
1630   type = bidi_it->type;
1631   override = bidi_it->level_stack[bidi_it->stack_idx].override;
1632
1633   if (type == UNKNOWN_BT
1634       || type == LRE
1635       || type == LRO
1636       || type == RLE
1637       || type == RLO
1638       || type == PDF)
1639     emacs_abort ();
1640
1641   if (new_level != prev_level
1642       || bidi_it->type == NEUTRAL_B)
1643     {
1644       /* We've got a new embedding level run, compute the directional
1645          type of sor and initialize per-run variables (UAX#9, clause
1646          X10).  */
1647       bidi_set_sor_type (bidi_it, prev_level, new_level);
1648     }
1649   else if (type == NEUTRAL_S || type == NEUTRAL_WS
1650            || type == WEAK_BN || type == STRONG_AL)
1651     bidi_it->type_after_w1 = type;      /* needed in L1 */
1652   bidi_check_type (bidi_it->type_after_w1);
1653
1654   /* Level and directional override status are already recorded in
1655      bidi_it, and do not need any change; see X6.  */
1656   if (override == R2L)          /* X6 */
1657     type = STRONG_R;
1658   else if (override == L2R)
1659     type = STRONG_L;
1660   else
1661     {
1662       if (type == WEAK_NSM)     /* W1 */
1663         {
1664           /* Note that we don't need to consider the case where the
1665              prev character has its type overridden by an RLO or LRO,
1666              because then either the type of this NSM would have been
1667              also overridden, or the previous character is outside the
1668              current level run, and thus not relevant to this NSM.
1669              This is why NSM gets the type_after_w1 of the previous
1670              character.  */
1671           if (bidi_it->prev.type_after_w1 != UNKNOWN_BT
1672               /* if type_after_w1 is NEUTRAL_B, this NSM is at sor */
1673               && bidi_it->prev.type_after_w1 != NEUTRAL_B)
1674             type = bidi_it->prev.type_after_w1;
1675           else if (bidi_it->sor == R2L)
1676             type = STRONG_R;
1677           else if (bidi_it->sor == L2R)
1678             type = STRONG_L;
1679           else /* shouldn't happen! */
1680             emacs_abort ();
1681         }
1682       if (type == WEAK_EN       /* W2 */
1683           && bidi_it->last_strong.type_after_w1 == STRONG_AL)
1684         type = WEAK_AN;
1685       else if (type == STRONG_AL) /* W3 */
1686         type = STRONG_R;
1687       else if ((type == WEAK_ES /* W4 */
1688                 && bidi_it->prev.type_after_w1 == WEAK_EN
1689                 && bidi_it->prev.orig_type == WEAK_EN)
1690                || (type == WEAK_CS
1691                    && ((bidi_it->prev.type_after_w1 == WEAK_EN
1692                         && bidi_it->prev.orig_type == WEAK_EN)
1693                        || bidi_it->prev.type_after_w1 == WEAK_AN)))
1694         {
1695           const unsigned char *s
1696             = (STRINGP (bidi_it->string.lstring)
1697                ? SDATA (bidi_it->string.lstring)
1698                : bidi_it->string.s);
1699
1700           next_char = (bidi_it->charpos + bidi_it->nchars >= eob
1701                        ? BIDI_EOB
1702                        : bidi_char_at_pos (bidi_it->bytepos + bidi_it->ch_len,
1703                                            s, bidi_it->string.unibyte));
1704           type_of_next = bidi_get_type (next_char, override);
1705
1706           if (type_of_next == WEAK_BN
1707               || bidi_explicit_dir_char (next_char))
1708             {
1709               bidi_copy_it (&saved_it, bidi_it);
1710               while (bidi_resolve_explicit (bidi_it) == new_level
1711                      && bidi_it->type == WEAK_BN)
1712                 ;
1713               type_of_next = bidi_it->type;
1714               bidi_copy_it (bidi_it, &saved_it);
1715             }
1716
1717           /* If the next character is EN, but the last strong-type
1718              character is AL, that next EN will be changed to AN when
1719              we process it in W2 above.  So in that case, this ES
1720              should not be changed into EN.  */
1721           if (type == WEAK_ES
1722               && type_of_next == WEAK_EN
1723               && bidi_it->last_strong.type_after_w1 != STRONG_AL)
1724             type = WEAK_EN;
1725           else if (type == WEAK_CS)
1726             {
1727               if (bidi_it->prev.type_after_w1 == WEAK_AN
1728                   && (type_of_next == WEAK_AN
1729                       /* If the next character is EN, but the last
1730                          strong-type character is AL, EN will be later
1731                          changed to AN when we process it in W2 above.
1732                          So in that case, this ES should not be
1733                          changed into EN.  */
1734                       || (type_of_next == WEAK_EN
1735                           && bidi_it->last_strong.type_after_w1 == STRONG_AL)))
1736                 type = WEAK_AN;
1737               else if (bidi_it->prev.type_after_w1 == WEAK_EN
1738                        && type_of_next == WEAK_EN
1739                        && bidi_it->last_strong.type_after_w1 != STRONG_AL)
1740                 type = WEAK_EN;
1741             }
1742         }
1743       else if (type == WEAK_ET  /* W5: ET with EN before or after it */
1744                || type == WEAK_BN)      /* W5/Retaining */
1745         {
1746           if (bidi_it->prev.type_after_w1 == WEAK_EN) /* ET/BN w/EN before it */
1747             type = WEAK_EN;
1748           else if (bidi_it->next_en_pos > bidi_it->charpos
1749                    && bidi_it->next_en_type != WEAK_BN)
1750             {
1751               if (bidi_it->next_en_type == WEAK_EN) /* ET/BN with EN after it */
1752                 type = WEAK_EN;
1753             }
1754           else if (bidi_it->next_en_pos >=0)
1755             {
1756               ptrdiff_t en_pos = bidi_it->charpos + bidi_it->nchars;
1757               const unsigned char *s = (STRINGP (bidi_it->string.lstring)
1758                                         ? SDATA (bidi_it->string.lstring)
1759                                         : bidi_it->string.s);
1760
1761               if (bidi_it->nchars <= 0)
1762                 emacs_abort ();
1763               next_char
1764                 = (bidi_it->charpos + bidi_it->nchars >= eob
1765                    ? BIDI_EOB
1766                    : bidi_char_at_pos (bidi_it->bytepos + bidi_it->ch_len, s,
1767                                        bidi_it->string.unibyte));
1768               type_of_next = bidi_get_type (next_char, override);
1769
1770               if (type_of_next == WEAK_ET
1771                   || type_of_next == WEAK_BN
1772                   || bidi_explicit_dir_char (next_char))
1773                 {
1774                   bidi_copy_it (&saved_it, bidi_it);
1775                   while (bidi_resolve_explicit (bidi_it) == new_level
1776                          && (bidi_it->type == WEAK_BN
1777                              || bidi_it->type == WEAK_ET))
1778                     ;
1779                   type_of_next = bidi_it->type;
1780                   en_pos = bidi_it->charpos;
1781                   bidi_copy_it (bidi_it, &saved_it);
1782                 }
1783               /* Remember this position, to speed up processing of the
1784                  next ETs.  */
1785               bidi_it->next_en_pos = en_pos;
1786               if (type_of_next == WEAK_EN)
1787                 {
1788                   /* If the last strong character is AL, the EN we've
1789                      found will become AN when we get to it (W2). */
1790                   if (bidi_it->last_strong.type_after_w1 == STRONG_AL)
1791                     type_of_next = WEAK_AN;
1792                   else if (type == WEAK_BN)
1793                     type = NEUTRAL_ON; /* W6/Retaining */
1794                   else
1795                     type = WEAK_EN;
1796                 }
1797               else if (type_of_next == NEUTRAL_B)
1798                 /* Record the fact that there are no more ENs from
1799                    here to the end of paragraph, to avoid entering the
1800                    loop above ever again in this paragraph.  */
1801                 bidi_it->next_en_pos = -1;
1802               /* Record the type of the character where we ended our search.  */
1803               bidi_it->next_en_type = type_of_next;
1804             }
1805         }
1806     }
1807
1808   if (type == WEAK_ES || type == WEAK_ET || type == WEAK_CS /* W6 */
1809       || (type == WEAK_BN
1810           && (bidi_it->prev.type_after_w1 == WEAK_CS        /* W6/Retaining */
1811               || bidi_it->prev.type_after_w1 == WEAK_ES
1812               || bidi_it->prev.type_after_w1 == WEAK_ET)))
1813     type = NEUTRAL_ON;
1814
1815   /* Store the type we've got so far, before we clobber it with strong
1816      types in W7 and while resolving neutral types.  But leave alone
1817      the original types that were recorded above, because we will need
1818      them for the L1 clause.  */
1819   if (bidi_it->type_after_w1 == UNKNOWN_BT)
1820     bidi_it->type_after_w1 = type;
1821   bidi_check_type (bidi_it->type_after_w1);
1822
1823   if (type == WEAK_EN)  /* W7 */
1824     {
1825       if ((bidi_it->last_strong.type_after_w1 == STRONG_L)
1826           || (bidi_it->last_strong.type == UNKNOWN_BT && bidi_it->sor == L2R))
1827         type = STRONG_L;
1828     }
1829
1830   bidi_it->type = type;
1831   bidi_check_type (bidi_it->type);
1832   return type;
1833 }
1834
1835 /* Resolve the type of a neutral character according to the type of
1836    surrounding strong text and the current embedding level.  */
1837 static bidi_type_t
1838 bidi_resolve_neutral_1 (bidi_type_t prev_type, bidi_type_t next_type, int lev)
1839 {
1840   /* N1: European and Arabic numbers are treated as though they were R.  */
1841   if (next_type == WEAK_EN || next_type == WEAK_AN)
1842     next_type = STRONG_R;
1843   if (prev_type == WEAK_EN || prev_type == WEAK_AN)
1844     prev_type = STRONG_R;
1845
1846   if (next_type == prev_type)   /* N1 */
1847     return next_type;
1848   else if ((lev & 1) == 0)      /* N2 */
1849     return STRONG_L;
1850   else
1851     return STRONG_R;
1852 }
1853
1854 static bidi_type_t
1855 bidi_resolve_neutral (struct bidi_it *bidi_it)
1856 {
1857   int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1858   bidi_type_t type = bidi_resolve_weak (bidi_it);
1859   int current_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1860
1861   if (!(type == STRONG_R
1862         || type == STRONG_L
1863         || type == WEAK_BN
1864         || type == WEAK_EN
1865         || type == WEAK_AN
1866         || type == NEUTRAL_B
1867         || type == NEUTRAL_S
1868         || type == NEUTRAL_WS
1869         || type == NEUTRAL_ON))
1870     emacs_abort ();
1871
1872   if ((type != NEUTRAL_B /* Don't risk entering the long loop below if
1873                             we are already at paragraph end.  */
1874        && bidi_get_category (type) == NEUTRAL)
1875       || (type == WEAK_BN && prev_level == current_level))
1876     {
1877       if (bidi_it->next_for_neutral.type != UNKNOWN_BT)
1878         type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
1879                                        bidi_it->next_for_neutral.type,
1880                                        current_level);
1881       /* The next two "else if" clauses are shortcuts for the
1882          important special case when we have a long sequence of
1883          neutral or WEAK_BN characters, such as whitespace or nulls or
1884          other control characters, on the base embedding level of the
1885          paragraph, and that sequence goes all the way to the end of
1886          the paragraph and follows a character whose resolved
1887          directionality is identical to the base embedding level.
1888          (This is what happens in a buffer with plain L2R text that
1889          happens to include long sequences of control characters.)  By
1890          virtue of N1, the result of examining this long sequence will
1891          always be either STRONG_L or STRONG_R, depending on the base
1892          embedding level.  So we use this fact directly instead of
1893          entering the expensive loop in the "else" clause.  */
1894       else if (current_level == 0
1895                && bidi_it->prev_for_neutral.type == STRONG_L
1896                && !bidi_explicit_dir_char (bidi_it->ch))
1897         type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
1898                                        STRONG_L, current_level);
1899       else if (/* current level is 1 */
1900                current_level == 1
1901                /* base embedding level is also 1 */
1902                && bidi_it->level_stack[0].level == 1
1903                /* previous character is one of those considered R for
1904                   the purposes of W5 */
1905                && (bidi_it->prev_for_neutral.type == STRONG_R
1906                    || bidi_it->prev_for_neutral.type == WEAK_EN
1907                    || bidi_it->prev_for_neutral.type == WEAK_AN)
1908                && !bidi_explicit_dir_char (bidi_it->ch))
1909         type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
1910                                        STRONG_R, current_level);
1911       else
1912         {
1913           /* Arrrgh!!  The UAX#9 algorithm is too deeply entrenched in
1914              the assumption of batch-style processing; see clauses W4,
1915              W5, and especially N1, which require to look far forward
1916              (as well as back) in the buffer/string.  May the fleas of
1917              a thousand camels infest the armpits of those who design
1918              supposedly general-purpose algorithms by looking at their
1919              own implementations, and fail to consider other possible
1920              implementations!  */
1921           struct bidi_it saved_it;
1922           bidi_type_t next_type;
1923
1924           if (bidi_it->scan_dir == -1)
1925             emacs_abort ();
1926
1927           bidi_copy_it (&saved_it, bidi_it);
1928           /* Scan the text forward until we find the first non-neutral
1929              character, and then use that to resolve the neutral we
1930              are dealing with now.  We also cache the scanned iterator
1931              states, to salvage some of the effort later.  */
1932           bidi_cache_iterator_state (bidi_it, 0);
1933           do {
1934             /* Record the info about the previous character, so that
1935                it will be cached below with this state.  */
1936             if (bidi_it->type_after_w1 != WEAK_BN /* W1/Retaining */
1937                 && bidi_it->type != WEAK_BN)
1938               bidi_remember_char (&bidi_it->prev, bidi_it);
1939             type = bidi_resolve_weak (bidi_it);
1940             /* Paragraph separators have their levels fully resolved
1941                at this point, so cache them as resolved.  */
1942             bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B);
1943             /* FIXME: implement L1 here, by testing for a newline and
1944                resetting the level for any sequence of whitespace
1945                characters adjacent to it.  */
1946           } while (!(type == NEUTRAL_B
1947                      || (type != WEAK_BN
1948                          && bidi_get_category (type) != NEUTRAL)
1949                      /* This is all per level run, so stop when we
1950                         reach the end of this level run.  */
1951                      || (bidi_it->level_stack[bidi_it->stack_idx].level
1952                          != current_level)));
1953
1954           bidi_remember_char (&saved_it.next_for_neutral, bidi_it);
1955
1956           switch (type)
1957             {
1958               case STRONG_L:
1959               case STRONG_R:
1960               case STRONG_AL:
1961                 /* Actually, STRONG_AL cannot happen here, because
1962                    bidi_resolve_weak converts it to STRONG_R, per W3.  */
1963                 eassert (type != STRONG_AL);
1964                 next_type = type;
1965                 break;
1966               case WEAK_EN:
1967               case WEAK_AN:
1968                 /* N1: ``European and Arabic numbers are treated as
1969                    though they were R.''  */
1970                 next_type = STRONG_R;
1971                 break;
1972               case WEAK_BN:
1973               case NEUTRAL_ON:  /* W6/Retaining */
1974                 if (!bidi_explicit_dir_char (bidi_it->ch))
1975                   emacs_abort (); /* can't happen: BNs are skipped */
1976                 /* FALLTHROUGH */
1977               case NEUTRAL_B:
1978                 /* Marched all the way to the end of this level run.
1979                    We need to use the eor type, whose information is
1980                    stored by bidi_set_sor_type in the prev_for_neutral
1981                    member.  */
1982                 if (saved_it.type != WEAK_BN
1983                     || bidi_get_category (bidi_it->prev.type_after_w1) == NEUTRAL)
1984                   next_type = bidi_it->prev_for_neutral.type;
1985                 else
1986                   {
1987                     /* This is a BN which does not adjoin neutrals.
1988                        Leave its type alone.  */
1989                     bidi_copy_it (bidi_it, &saved_it);
1990                     return bidi_it->type;
1991                   }
1992                 break;
1993               default:
1994                 emacs_abort ();
1995             }
1996           type = bidi_resolve_neutral_1 (saved_it.prev_for_neutral.type,
1997                                          next_type, current_level);
1998           saved_it.next_for_neutral.type = next_type;
1999           saved_it.type = type;
2000           bidi_check_type (next_type);
2001           bidi_check_type (type);
2002           bidi_copy_it (bidi_it, &saved_it);
2003         }
2004     }
2005   return type;
2006 }
2007
2008 /* Given an iterator state in BIDI_IT, advance one character position
2009    in the buffer/string to the next character (in the logical order),
2010    resolve the bidi type of that next character, and return that
2011    type.  */
2012 static bidi_type_t
2013 bidi_type_of_next_char (struct bidi_it *bidi_it)
2014 {
2015   bidi_type_t type;
2016
2017   /* This should always be called during a forward scan.  */
2018   if (bidi_it->scan_dir != 1)
2019     emacs_abort ();
2020
2021   /* Reset the limit until which to ignore BNs if we step out of the
2022      area where we found only empty levels.  */
2023   if ((bidi_it->ignore_bn_limit > -1
2024        && bidi_it->ignore_bn_limit <= bidi_it->charpos)
2025       || (bidi_it->ignore_bn_limit == -2
2026           && !bidi_explicit_dir_char (bidi_it->ch)))
2027     bidi_it->ignore_bn_limit = -1;
2028
2029   type = bidi_resolve_neutral (bidi_it);
2030
2031   return type;
2032 }
2033
2034 /* Given an iterator state BIDI_IT, advance one character position in
2035    the buffer/string to the next character (in the current scan
2036    direction), resolve the embedding and implicit levels of that next
2037    character, and return the resulting level.  */
2038 static int
2039 bidi_level_of_next_char (struct bidi_it *bidi_it)
2040 {
2041   bidi_type_t type;
2042   int level, prev_level = -1;
2043   struct bidi_saved_info next_for_neutral;
2044   ptrdiff_t next_char_pos = -2;
2045
2046   if (bidi_it->scan_dir == 1)
2047     {
2048       ptrdiff_t eob
2049         = ((bidi_it->string.s || STRINGP (bidi_it->string.lstring))
2050            ? bidi_it->string.schars : ZV);
2051
2052       /* There's no sense in trying to advance if we hit end of text.  */
2053       if (bidi_it->charpos >= eob)
2054         return bidi_it->resolved_level;
2055
2056       /* Record the info about the previous character.  */
2057       if (bidi_it->type_after_w1 != WEAK_BN /* W1/Retaining */
2058           && bidi_it->type != WEAK_BN)
2059         bidi_remember_char (&bidi_it->prev, bidi_it);
2060       if (bidi_it->type_after_w1 == STRONG_R
2061           || bidi_it->type_after_w1 == STRONG_L
2062           || bidi_it->type_after_w1 == STRONG_AL)
2063         bidi_remember_char (&bidi_it->last_strong, bidi_it);
2064       /* FIXME: it sounds like we don't need both prev and
2065          prev_for_neutral members, but I'm leaving them both for now.  */
2066       if (bidi_it->type == STRONG_R || bidi_it->type == STRONG_L
2067           || bidi_it->type == WEAK_EN || bidi_it->type == WEAK_AN)
2068         bidi_remember_char (&bidi_it->prev_for_neutral, bidi_it);
2069
2070       /* If we overstepped the characters used for resolving neutrals
2071          and whitespace, invalidate their info in the iterator.  */
2072       if (bidi_it->charpos >= bidi_it->next_for_neutral.charpos)
2073         bidi_it->next_for_neutral.type = UNKNOWN_BT;
2074       if (bidi_it->next_en_pos >= 0
2075           && bidi_it->charpos >= bidi_it->next_en_pos)
2076         {
2077           bidi_it->next_en_pos = 0;
2078           bidi_it->next_en_type = UNKNOWN_BT;
2079         }
2080       if (bidi_it->next_for_ws.type != UNKNOWN_BT
2081           && bidi_it->charpos >= bidi_it->next_for_ws.charpos)
2082         bidi_it->next_for_ws.type = UNKNOWN_BT;
2083
2084       /* This must be taken before we fill the iterator with the info
2085          about the next char.  If we scan backwards, the iterator
2086          state must be already cached, so there's no need to know the
2087          embedding level of the previous character, since we will be
2088          returning to our caller shortly.  */
2089       prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
2090     }
2091   next_for_neutral = bidi_it->next_for_neutral;
2092
2093   /* Perhaps the character we want is already cached.  If it is, the
2094      call to bidi_cache_find below will return a type other than
2095      UNKNOWN_BT.  */
2096   if (bidi_cache_idx > bidi_cache_start && !bidi_it->first_elt)
2097     {
2098       int bob = ((bidi_it->string.s || STRINGP (bidi_it->string.lstring))
2099                  ? 0 : 1);
2100       if (bidi_it->scan_dir > 0)
2101         {
2102           if (bidi_it->nchars <= 0)
2103             emacs_abort ();
2104           next_char_pos = bidi_it->charpos + bidi_it->nchars;
2105         }
2106       else if (bidi_it->charpos >= bob)
2107         /* Implementation note: we allow next_char_pos to be as low as
2108            0 for buffers or -1 for strings, and that is okay because
2109            that's the "position" of the sentinel iterator state we
2110            cached at the beginning of the iteration.  */
2111         next_char_pos = bidi_it->charpos - 1;
2112       if (next_char_pos >= bob - 1)
2113         type = bidi_cache_find (next_char_pos, -1, bidi_it);
2114       else
2115         type = UNKNOWN_BT;
2116     }
2117   else
2118     type = UNKNOWN_BT;
2119   if (type != UNKNOWN_BT)
2120     {
2121       /* Don't lose the information for resolving neutrals!  The
2122          cached states could have been cached before their
2123          next_for_neutral member was computed.  If we are on our way
2124          forward, we can simply take the info from the previous
2125          state.  */
2126       if (bidi_it->scan_dir == 1
2127           && bidi_it->next_for_neutral.type == UNKNOWN_BT)
2128         bidi_it->next_for_neutral = next_for_neutral;
2129
2130       /* If resolved_level is -1, it means this state was cached
2131          before it was completely resolved, so we cannot return
2132          it.  */
2133       if (bidi_it->resolved_level != -1)
2134         return bidi_it->resolved_level;
2135     }
2136   if (bidi_it->scan_dir == -1)
2137     /* If we are going backwards, the iterator state is already cached
2138        from previous scans, and should be fully resolved.  */
2139     emacs_abort ();
2140
2141   if (type == UNKNOWN_BT)
2142     type = bidi_type_of_next_char (bidi_it);
2143
2144   if (type == NEUTRAL_B)
2145     return bidi_it->resolved_level;
2146
2147   level = bidi_it->level_stack[bidi_it->stack_idx].level;
2148   if ((bidi_get_category (type) == NEUTRAL /* && type != NEUTRAL_B */)
2149       || (type == WEAK_BN && prev_level == level))
2150     {
2151       if (bidi_it->next_for_neutral.type == UNKNOWN_BT)
2152         emacs_abort ();
2153
2154       /* If the cached state shows a neutral character, it was not
2155          resolved by bidi_resolve_neutral, so do it now.  */
2156       type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
2157                                      bidi_it->next_for_neutral.type,
2158                                      level);
2159     }
2160
2161   if (!(type == STRONG_R
2162         || type == STRONG_L
2163         || type == WEAK_BN
2164         || type == WEAK_EN
2165         || type == WEAK_AN))
2166     emacs_abort ();
2167   bidi_it->type = type;
2168   bidi_check_type (bidi_it->type);
2169
2170   /* For L1 below, we need to know, for each WS character, whether
2171      it belongs to a sequence of WS characters preceding a newline
2172      or a TAB or a paragraph separator.  */
2173   if (bidi_it->orig_type == NEUTRAL_WS
2174       && bidi_it->next_for_ws.type == UNKNOWN_BT)
2175     {
2176       int ch;
2177       ptrdiff_t clen = bidi_it->ch_len;
2178       ptrdiff_t bpos = bidi_it->bytepos;
2179       ptrdiff_t cpos = bidi_it->charpos;
2180       ptrdiff_t disp_pos = bidi_it->disp_pos;
2181       ptrdiff_t nc = bidi_it->nchars;
2182       struct bidi_string_data bs = bidi_it->string;
2183       bidi_type_t chtype;
2184       bool fwp = bidi_it->frame_window_p;
2185       int dpp = bidi_it->disp_prop;
2186
2187       if (bidi_it->nchars <= 0)
2188         emacs_abort ();
2189       do {
2190         ch = bidi_fetch_char (bpos += clen, cpos += nc, &disp_pos, &dpp, &bs,
2191                               fwp, &clen, &nc);
2192         if (ch == '\n' || ch == BIDI_EOB)
2193           chtype = NEUTRAL_B;
2194         else
2195           chtype = bidi_get_type (ch, NEUTRAL_DIR);
2196       } while (chtype == NEUTRAL_WS || chtype == WEAK_BN
2197                || bidi_explicit_dir_char (ch)); /* L1/Retaining */
2198       bidi_it->next_for_ws.type = chtype;
2199       bidi_check_type (bidi_it->next_for_ws.type);
2200       bidi_it->next_for_ws.charpos = cpos;
2201       bidi_it->next_for_ws.bytepos = bpos;
2202     }
2203
2204   /* Resolve implicit levels, with a twist: PDFs get the embedding
2205      level of the embedding they terminate.  See below for the
2206      reason.  */
2207   if (bidi_it->orig_type == PDF
2208       /* Don't do this if this formatting code didn't change the
2209          embedding level due to invalid or empty embeddings.  */
2210       && prev_level != level)
2211     {
2212       /* Don't look in UAX#9 for the reason for this: it's our own
2213          private quirk.  The reason is that we want the formatting
2214          codes to be delivered so that they bracket the text of their
2215          embedding.  For example, given the text
2216
2217              {RLO}teST{PDF}
2218
2219          we want it to be displayed as
2220
2221              {PDF}STet{RLO}
2222
2223          not as
2224
2225              STet{RLO}{PDF}
2226
2227          which will result because we bump up the embedding level as
2228          soon as we see the RLO and pop it as soon as we see the PDF,
2229          so RLO itself has the same embedding level as "teST", and
2230          thus would be normally delivered last, just before the PDF.
2231          The switch below fiddles with the level of PDF so that this
2232          ugly side effect does not happen.
2233
2234          (This is, of course, only important if the formatting codes
2235          are actually displayed, but Emacs does need to display them
2236          if the user wants to.)  */
2237       level = prev_level;
2238     }
2239   else if (bidi_it->orig_type == NEUTRAL_B /* L1 */
2240            || bidi_it->orig_type == NEUTRAL_S
2241            || bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB
2242            || (bidi_it->orig_type == NEUTRAL_WS
2243                && (bidi_it->next_for_ws.type == NEUTRAL_B
2244                    || bidi_it->next_for_ws.type == NEUTRAL_S)))
2245     level = bidi_it->level_stack[0].level;
2246   else if ((level & 1) == 0) /* I1 */
2247     {
2248       if (type == STRONG_R)
2249         level++;
2250       else if (type == WEAK_EN || type == WEAK_AN)
2251         level += 2;
2252     }
2253   else                  /* I2 */
2254     {
2255       if (type == STRONG_L || type == WEAK_EN || type == WEAK_AN)
2256         level++;
2257     }
2258
2259   bidi_it->resolved_level = level;
2260   return level;
2261 }
2262
2263 /* Move to the other edge of a level given by LEVEL.  If END_FLAG,
2264    we are at the end of a level, and we need to prepare to
2265    resume the scan of the lower level.
2266
2267    If this level's other edge is cached, we simply jump to it, filling
2268    the iterator structure with the iterator state on the other edge.
2269    Otherwise, we walk the buffer or string until we come back to the
2270    same level as LEVEL.
2271
2272    Note: we are not talking here about a ``level run'' in the UAX#9
2273    sense of the term, but rather about a ``level'' which includes
2274    all the levels higher than it.  In other words, given the levels
2275    like this:
2276
2277          11111112222222333333334443343222222111111112223322111
2278                 A      B                    C
2279
2280    and assuming we are at point A scanning left to right, this
2281    function moves to point C, whereas the UAX#9 ``level 2 run'' ends
2282    at point B.  */
2283 static void
2284 bidi_find_other_level_edge (struct bidi_it *bidi_it, int level, bool end_flag)
2285 {
2286   int dir = end_flag ? -bidi_it->scan_dir : bidi_it->scan_dir;
2287   ptrdiff_t idx;
2288
2289   /* Try the cache first.  */
2290   if ((idx = bidi_cache_find_level_change (level, dir, end_flag))
2291       >= bidi_cache_start)
2292     bidi_cache_fetch_state (idx, bidi_it);
2293   else
2294     {
2295       int new_level;
2296
2297       /* If we are at end of level, its edges must be cached.  */
2298       if (end_flag)
2299         emacs_abort ();
2300
2301       bidi_cache_iterator_state (bidi_it, 1);
2302       do {
2303         new_level = bidi_level_of_next_char (bidi_it);
2304         bidi_cache_iterator_state (bidi_it, 1);
2305       } while (new_level >= level);
2306     }
2307 }
2308
2309 void
2310 bidi_move_to_visually_next (struct bidi_it *bidi_it)
2311 {
2312   int old_level, new_level, next_level;
2313   struct bidi_it sentinel;
2314   struct gcpro gcpro1;
2315
2316   if (bidi_it->charpos < 0 || bidi_it->bytepos < 0)
2317     emacs_abort ();
2318
2319   if (bidi_it->scan_dir == 0)
2320     {
2321       bidi_it->scan_dir = 1;    /* default to logical order */
2322     }
2323
2324   /* The code below can call eval, and thus cause GC.  If we are
2325      iterating a Lisp string, make sure it won't be GCed.  */
2326   if (STRINGP (bidi_it->string.lstring))
2327     GCPRO1 (bidi_it->string.lstring);
2328
2329   /* If we just passed a newline, initialize for the next line.  */
2330   if (!bidi_it->first_elt
2331       && (bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB))
2332     bidi_line_init (bidi_it);
2333
2334   /* Prepare the sentinel iterator state, and cache it.  When we bump
2335      into it, scanning backwards, we'll know that the last non-base
2336      level is exhausted.  */
2337   if (bidi_cache_idx == bidi_cache_start)
2338     {
2339       bidi_copy_it (&sentinel, bidi_it);
2340       if (bidi_it->first_elt)
2341         {
2342           sentinel.charpos--;   /* cached charpos needs to be monotonic */
2343           sentinel.bytepos--;
2344           sentinel.ch = '\n';   /* doesn't matter, but why not? */
2345           sentinel.ch_len = 1;
2346           sentinel.nchars = 1;
2347         }
2348       bidi_cache_iterator_state (&sentinel, 1);
2349     }
2350
2351   old_level = bidi_it->resolved_level;
2352   new_level = bidi_level_of_next_char (bidi_it);
2353
2354   /* Reordering of resolved levels (clause L2) is implemented by
2355      jumping to the other edge of the level and flipping direction of
2356      scanning the text whenever we find a level change.  */
2357   if (new_level != old_level)
2358     {
2359       bool ascending = new_level > old_level;
2360       int level_to_search = ascending ? old_level + 1 : old_level;
2361       int incr = ascending ? 1 : -1;
2362       int expected_next_level = old_level + incr;
2363
2364       /* Jump (or walk) to the other edge of this level.  */
2365       bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
2366       /* Switch scan direction and peek at the next character in the
2367          new direction.  */
2368       bidi_it->scan_dir = -bidi_it->scan_dir;
2369
2370       /* The following loop handles the case where the resolved level
2371          jumps by more than one.  This is typical for numbers inside a
2372          run of text with left-to-right embedding direction, but can
2373          also happen in other situations.  In those cases the decision
2374          where to continue after a level change, and in what direction,
2375          is tricky.  For example, given a text like below:
2376
2377                   abcdefgh
2378                   11336622
2379
2380          (where the numbers below the text show the resolved levels),
2381          the result of reordering according to UAX#9 should be this:
2382
2383                   efdcghba
2384
2385          This is implemented by the loop below which flips direction
2386          and jumps to the other edge of the level each time it finds
2387          the new level not to be the expected one.  The expected level
2388          is always one more or one less than the previous one.  */
2389       next_level = bidi_peek_at_next_level (bidi_it);
2390       while (next_level != expected_next_level)
2391         {
2392           /* If next_level is -1, it means we have an unresolved level
2393              in the cache, which at this point should not happen.  If
2394              it does, we will infloop.  */
2395           eassert (next_level >= 0);
2396           expected_next_level += incr;
2397           level_to_search += incr;
2398           bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
2399           bidi_it->scan_dir = -bidi_it->scan_dir;
2400           next_level = bidi_peek_at_next_level (bidi_it);
2401         }
2402
2403       /* Finally, deliver the next character in the new direction.  */
2404       next_level = bidi_level_of_next_char (bidi_it);
2405     }
2406
2407   /* Take note when we have just processed the newline that precedes
2408      the end of the paragraph.  The next time we are about to be
2409      called, set_iterator_to_next will automatically reinit the
2410      paragraph direction, if needed.  We do this at the newline before
2411      the paragraph separator, because the next character might not be
2412      the first character of the next paragraph, due to the bidi
2413      reordering, whereas we _must_ know the paragraph base direction
2414      _before_ we process the paragraph's text, since the base
2415      direction affects the reordering.  */
2416   if (bidi_it->scan_dir == 1
2417       && (bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB))
2418     {
2419       /* The paragraph direction of the entire string, once
2420          determined, is in effect for the entire string.  Setting the
2421          separator limit to the end of the string prevents
2422          bidi_paragraph_init from being called automatically on this
2423          string.  */
2424       if (bidi_it->string.s || STRINGP (bidi_it->string.lstring))
2425         bidi_it->separator_limit = bidi_it->string.schars;
2426       else if (bidi_it->bytepos < ZV_BYTE)
2427         {
2428           ptrdiff_t sep_len
2429             = bidi_at_paragraph_end (bidi_it->charpos + bidi_it->nchars,
2430                                      bidi_it->bytepos + bidi_it->ch_len);
2431           if (bidi_it->nchars <= 0)
2432             emacs_abort ();
2433           if (sep_len >= 0)
2434             {
2435               bidi_it->new_paragraph = 1;
2436               /* Record the buffer position of the last character of the
2437                  paragraph separator.  */
2438               bidi_it->separator_limit
2439                 = bidi_it->charpos + bidi_it->nchars + sep_len;
2440             }
2441         }
2442     }
2443
2444   if (bidi_it->scan_dir == 1 && bidi_cache_idx > bidi_cache_start)
2445     {
2446       /* If we are at paragraph's base embedding level and beyond the
2447          last cached position, the cache's job is done and we can
2448          discard it.  */
2449       if (bidi_it->resolved_level == bidi_it->level_stack[0].level
2450           && bidi_it->charpos > (bidi_cache[bidi_cache_idx - 1].charpos
2451                                  + bidi_cache[bidi_cache_idx - 1].nchars - 1))
2452         bidi_cache_reset ();
2453         /* But as long as we are caching during forward scan, we must
2454            cache each state, or else the cache integrity will be
2455            compromised: it assumes cached states correspond to buffer
2456            positions 1:1.  */
2457       else
2458         bidi_cache_iterator_state (bidi_it, 1);
2459     }
2460
2461   if (STRINGP (bidi_it->string.lstring))
2462     UNGCPRO;
2463 }
2464
2465 /* This is meant to be called from within the debugger, whenever you
2466    wish to examine the cache contents.  */
2467 void bidi_dump_cached_states (void) EXTERNALLY_VISIBLE;
2468 void
2469 bidi_dump_cached_states (void)
2470 {
2471   ptrdiff_t i;
2472   int ndigits = 1;
2473
2474   if (bidi_cache_idx == 0)
2475     {
2476       fprintf (stderr, "The cache is empty.\n");
2477       return;
2478     }
2479   fprintf (stderr, "Total of  %"pD"d state%s in cache:\n",
2480            bidi_cache_idx, bidi_cache_idx == 1 ? "" : "s");
2481
2482   for (i = bidi_cache[bidi_cache_idx - 1].charpos; i > 0; i /= 10)
2483     ndigits++;
2484   fputs ("ch  ", stderr);
2485   for (i = 0; i < bidi_cache_idx; i++)
2486     fprintf (stderr, "%*c", ndigits, bidi_cache[i].ch);
2487   fputs ("\n", stderr);
2488   fputs ("lvl ", stderr);
2489   for (i = 0; i < bidi_cache_idx; i++)
2490     fprintf (stderr, "%*d", ndigits, bidi_cache[i].resolved_level);
2491   fputs ("\n", stderr);
2492   fputs ("pos ", stderr);
2493   for (i = 0; i < bidi_cache_idx; i++)
2494     fprintf (stderr, "%*"pD"d", ndigits, bidi_cache[i].charpos);
2495   fputs ("\n", stderr);
2496 }