Fix an error in Tramp for rsync

[gnu-emacs] / src / bidi.c
diff --git a/src/bidi.c b/src/bidi.c

index e5e08c6a25297cd39326d02306e7fe6b14d7aa79..6ea84d9c0c4eddb42e3696dac647b4c66eb39f0c 100644 (file)
--- a/src/bidi.c
+++ b/src/bidi.c
@@ -1,13 +1,13 @@
  /* Low-level bidirectional buffer/string-scanning functions for GNU Emacs.
-   Copyright (C) 2000-2001, 2004-2005, 2009-2015 Free Software
+   Copyright (C) 2000-2001, 2004-2005, 2009-2016 Free Software
     Foundation, Inc.
  
  This file is part of GNU Emacs.
  
  GNU Emacs is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
+the Free Software Foundation, either version 3 of the License, or (at
+your option) any later version.
  
  GNU Emacs is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -532,7 +532,7 @@ bidi_copy_it (struct bidi_it *to, struct bidi_it *from)
    /* Copy everything from the start through the active part of
       the level stack.  */
    memcpy (to, from,
-         (offsetof (struct bidi_it, level_stack[1])
+         (offsetof (struct bidi_it, level_stack) + sizeof from->level_stack[0]
            + from->stack_idx * sizeof from->level_stack[0]));
  }
  
@@ -1107,15 +1107,9 @@ bidi_initialize (void)
      emacs_abort ();
    staticpro (&bidi_brackets_table);
  
-  DEFSYM (Qparagraph_start, "paragraph-start");
-  paragraph_start_re = Fsymbol_value (Qparagraph_start);
-  if (!STRINGP (paragraph_start_re))
-    paragraph_start_re = build_string ("\f\\|[ \t]*$");
+  paragraph_start_re = build_string ("^\\(\f\\|[ \t]*\\)$");
    staticpro (&paragraph_start_re);
-  DEFSYM (Qparagraph_separate, "paragraph-separate");
-  paragraph_separate_re = Fsymbol_value (Qparagraph_separate);
-  if (!STRINGP (paragraph_separate_re))
-    paragraph_separate_re = build_string ("[ \t\f]*$");
+  paragraph_separate_re = build_string ("^[ \t\f]*$");
    staticpro (&paragraph_separate_re);
  
    bidi_cache_sp = 0;
@@ -1313,13 +1307,13 @@ bidi_fetch_char (ptrdiff_t charpos, ptrdiff_t bytepos, ptrdiff_t *disp_pos,
           /* `(space ...)' display specs are handled as paragraph
              separators for the purposes of the reordering; see UAX#9
              section 3 and clause HL1 in section 4.3 there.  */
-         ch = 0x2029;
+         ch = PARAGRAPH_SEPARATOR;
         }
        else
         {
           /* All other display specs are handled as the Unicode Object
              Replacement Character.  */
-         ch = 0xFFFC;
+         ch = OBJECT_REPLACEMENT_CHARACTER;
         }
        disp_end_pos = compute_display_string_end (*disp_pos, string);
        if (disp_end_pos < 0)
@@ -1919,8 +1913,6 @@ bidi_resolve_explicit (struct bidi_it *bidi_it)
         {
           eassert (bidi_it->prev.charpos == bidi_it->charpos - 1);
           prev_type = bidi_it->prev.orig_type;
-         if (prev_type == FSI)
-           prev_type = bidi_it->type_after_wn;
         }
      }
    /* Don't move at end of buffer/string.  */
@@ -1935,8 +1927,6 @@ bidi_resolve_explicit (struct bidi_it *bidi_it)
         emacs_abort ();
        bidi_it->bytepos += bidi_it->ch_len;
        prev_type = bidi_it->orig_type;
-      if (prev_type == FSI)
-       prev_type = bidi_it->type_after_wn;
      }
    else /* EOB or end of string */
      prev_type = NEUTRAL_B;
@@ -2091,10 +2081,17 @@ bidi_resolve_explicit (struct bidi_it *bidi_it)
        if (typ1 != STRONG_R && typ1 != STRONG_AL)
         {
           type = LRI;
+         /* Override orig_type, which will be needed when we come to
+            examine the next character, which is the first character
+            inside the isolate.  */
+         bidi_it->orig_type = type;
           goto fsi_as_lri;
         }
        else
-       type = RLI;
+       {
+         type = RLI;
+         bidi_it->orig_type = type;
+       }
        /* FALLTHROUGH */
      case RLI:  /* X5a */
        if (override == NEUTRAL_DIR)
@@ -2315,7 +2312,31 @@ bidi_resolve_weak (struct bidi_it *bidi_it)
               if (bidi_it->next_en_type == WEAK_EN) /* ET/BN with EN after it */
                 type = WEAK_EN;
             }
-         else if (bidi_it->next_en_pos >=0)
+         else if (type == WEAK_BN
+                  /* This condition is for the following important case:
+
+                     . we are at level zero
+                     . either previous strong character was L,
+                        or we've seen no strong characters since sos
+                        and the base paragraph direction is L2R
+                     . this BN is NOT a bidi directional control
+
+                     For such a situation, either this BN will be
+                     converted to EN per W5, and then to L by virtue
+                     of W7; or it will become ON per W6, and then L
+                     because of N1/N2.  So we take a shortcut here
+                     and make it L right away, to avoid the
+                     potentially costly loop below.  This is
+                     important when the buffer has a long series of
+                     control characters, like binary nulls, and no
+                     R2L characters at all.  */
+                  && new_level == 0
+                  && !bidi_explicit_dir_char (bidi_it->ch)
+                  && ((bidi_it->last_strong.type == STRONG_L)
+                      || (bidi_it->last_strong.type == UNKNOWN_BT
+                          && bidi_it->sos == L2R)))
+           type = STRONG_L;
+         else if (bidi_it->next_en_pos >= 0)
             {
               /* We overstepped the last known position for ET
                  resolution but there could be other such characters
@@ -2452,7 +2473,7 @@ typedef struct bpa_stack_entry {
  #define MAX_BPA_STACK ((int)max (MAX_ALLOCA / sizeof (bpa_stack_entry), 1))
  
  /* UAX#9 says to match opening brackets with the matching closing
-   brackets or their canonical equivalents.  As of Unicode 7.0, there
+   brackets or their canonical equivalents.  As of Unicode 8.0, there
     are only 2 bracket characters that have canonical equivalence
     decompositions: u+2329 and u+232A.  So instead of accessing the
     table in uni-decomposition.el, we just handle these 2 characters
@@ -2471,10 +2492,10 @@ typedef struct bpa_stack_entry {
  
     And finally, cross-reference these two:
  
-    fgrep -w -f brackets.txt decompositions.txt
+    grep -Fw -f brackets.txt decompositions.txt
  
     where "decompositions.txt" was produced by the 1st script, and
-   "brackets.txt" by the 2nd script.  In the output of fgrep, look
+   "brackets.txt" by the 2nd script.  In the output of grep, look
     only for decompositions that don't begin with some compatibility
     formatting tag, such as "<compat>".  Only decompositions that
     consist solely of character codepoints are relevant to bidi
@@ -2482,8 +2503,8 @@ typedef struct bpa_stack_entry {
  
  #define CANONICAL_EQU(c)                                       \
    ( ASCII_CHAR_P (c) ? c                                       \
-    : (c) == 0x2329 ? 0x3008                                   \
-    : (c) == 0x232a ? 0x3009                                   \
+    : (c) == LEFT_POINTING_ANGLE_BRACKET ? LEFT_ANGLE_BRACKET  \
+    : (c) == RIGHT_POINTING_ANGLE_BRACKET ? RIGHT_ANGLE_BRACKET        \
      : c )
  
  #ifdef ENABLE_CHECKING
@@ -2948,7 +2969,7 @@ bidi_resolve_neutral (struct bidi_it *bidi_it)
                             we are already at paragraph end.  */
         && (is_neutral || bidi_isolate_fmt_char (type)))
        /* N1-N2/Retaining */
-      || (type == WEAK_BN && bidi_explicit_dir_char (bidi_it->ch)))
+      || type == WEAK_BN)
      {
        if (bidi_it->next_for_neutral.type != UNKNOWN_BT)
         {
@@ -2978,8 +2999,10 @@ bidi_resolve_neutral (struct bidi_it *bidi_it)
          entering the expensive loop in the "else" clause.  */
        else if (current_level == 0
                && bidi_it->prev_for_neutral.type == STRONG_L
-              && !bidi_explicit_dir_char (bidi_it->ch)
-              && !bidi_isolate_fmt_char (type))
+              && (ASCII_CHAR_P (bidi_it->ch)
+                  || (type != WEAK_BN
+                      && !bidi_explicit_dir_char (bidi_it->ch)
+                      && !bidi_isolate_fmt_char (type))))
         type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
                                        STRONG_L, current_level);
        else if (/* current level is 1 */
@@ -2991,6 +3014,7 @@ bidi_resolve_neutral (struct bidi_it *bidi_it)
                && (bidi_it->prev_for_neutral.type == STRONG_R
                    || bidi_it->prev_for_neutral.type == WEAK_EN
                    || bidi_it->prev_for_neutral.type == WEAK_AN)
+              && type != WEAK_BN
                && !bidi_explicit_dir_char (bidi_it->ch)
                && !bidi_isolate_fmt_char (type))
         type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
@@ -2999,7 +3023,7 @@ bidi_resolve_neutral (struct bidi_it *bidi_it)
         {
           /* Arrrgh!!  The UAX#9 algorithm is too deeply entrenched in
              the assumption of batch-style processing; see clauses W4,
-            W5, and especially N1, which require to look far forward
+            W5, and especially N1, which require looking far forward
              (as well as back) in the buffer/string.  May the fleas of
              a thousand camels infest the armpits of those who design
              supposedly general-purpose algorithms by looking at their
@@ -3158,7 +3182,7 @@ bidi_level_of_next_char (struct bidi_it *bidi_it)
         }
      }
  
-  /* Perhaps the character we want is already cached s fully resolved.
+  /* Perhaps the character we want is already cached as fully resolved.
       If it is, the call to bidi_cache_find below will return a type
       other than UNKNOWN_BT.  */
    if (bidi_cache_idx > bidi_cache_start && !bidi_it->first_elt)
@@ -3216,8 +3240,12 @@ bidi_level_of_next_char (struct bidi_it *bidi_it)
       it belongs to a sequence of WS characters preceding a newline
       or a TAB or a paragraph separator.  */
    if ((bidi_it->orig_type == NEUTRAL_WS
+       || bidi_it->orig_type == WEAK_BN
         || bidi_isolate_fmt_char (bidi_it->orig_type))
-      && bidi_it->next_for_ws.charpos < bidi_it->charpos)
+      && bidi_it->next_for_ws.charpos < bidi_it->charpos
+      /* If this character is already at base level, we don't need to
+        reset it, so avoid the potentially costly loop below.  */
+      && level != bidi_it->level_stack[0].level)
      {
        int ch;
        ptrdiff_t clen = bidi_it->ch_len;
@@ -3249,11 +3277,14 @@ bidi_level_of_next_char (struct bidi_it *bidi_it)
  
    /* Resolve implicit levels.  */
    if (bidi_it->orig_type == NEUTRAL_B /* L1 */
-          || bidi_it->orig_type == NEUTRAL_S
-          || bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB
-          || (bidi_it->orig_type == NEUTRAL_WS
-              && (bidi_it->next_for_ws.type == NEUTRAL_B
-                  || bidi_it->next_for_ws.type == NEUTRAL_S)))
+      || bidi_it->orig_type == NEUTRAL_S
+      || bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB
+      || ((bidi_it->orig_type == NEUTRAL_WS
+          || bidi_it->orig_type == WEAK_BN
+          || bidi_isolate_fmt_char (bidi_it->orig_type)
+          || bidi_explicit_dir_char (bidi_it->ch))
+         && (bidi_it->next_for_ws.type == NEUTRAL_B
+             || bidi_it->next_for_ws.type == NEUTRAL_S)))
      level = bidi_it->level_stack[0].level;
    else if ((level & 1) == 0) /* I1 */
      {
@@ -3348,7 +3379,6 @@ bidi_move_to_visually_next (struct bidi_it *bidi_it)
  {
    int old_level, new_level, next_level;
    struct bidi_it sentinel;
-  struct gcpro gcpro1;
  
    if (bidi_it->charpos < 0 || bidi_it->bytepos < 0)
      emacs_abort ();
@@ -3358,11 +3388,6 @@ bidi_move_to_visually_next (struct bidi_it *bidi_it)
        bidi_it->scan_dir = 1;   /* default to logical order */
      }
  
-  /* The code below can call eval, and thus cause GC.  If we are
-     iterating a Lisp string, make sure it won't be GCed.  */
-  if (STRINGP (bidi_it->string.lstring))
-    GCPRO1 (bidi_it->string.lstring);
-
    /* If we just passed a newline, initialize for the next line.  */
    if (!bidi_it->first_elt
        && (bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB))
@@ -3508,9 +3533,6 @@ bidi_move_to_visually_next (struct bidi_it *bidi_it)
  
    eassert (bidi_it->resolved_level >= 0
            && bidi_it->resolved_level <= BIDI_MAXDEPTH + 2);
-
-  if (STRINGP (bidi_it->string.lstring))
-    UNGCPRO;
  }
  
  /* Utility function for looking for strong directional characters