*** empty log message ***

[gnu-emacs] / src / search.c
diff --git a/src/search.c b/src/search.c

index 507ad5c899281dfab17aadffb710e0045777857b..96ea41e8f8ec4903f9d6017f77133ea03ed6d851 100644 (file)
--- a/src/search.c
+++ b/src/search.c
@@ -104,9 +104,8 @@ matcher_overflow ()
     subexpression bounds.
     POSIX is nonzero if we want full backtracking (POSIX style)
     for this pattern.  0 means backtrack only enough to get a valid match.
-   MULTIBYTE is nonzero if we want to handle multibyte characters in
-   the target.  0 means all multibyte characters are recognized just as
-   sequences of binary data.  */
+   MULTIBYTE is nonzero iff a target of match is a multibyte buffer or
+   string.  */
  
  static void
  compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
@@ -128,9 +127,8 @@ compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
    BLOCK_INPUT;
    old = re_set_syntax (RE_SYNTAX_EMACS
                        | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
-  val = (char *) re_compile_pattern ((char *) (XSTRING (pattern)->data),
-                                    STRING_BYTES (XSTRING (pattern)),
-                                    &cp->buf);
+  val = (char *) re_compile_pattern ((char *) SDATA (pattern),
+                                    SBYTES (pattern), &cp->buf);
    re_set_syntax (old);
    UNBLOCK_INPUT;
    if (val)
@@ -152,7 +150,7 @@ shrink_regexp_cache ()
      {
        cp->buf.allocated = cp->buf.used;
        cp->buf.buffer
-       = (unsigned char *) realloc (cp->buf.buffer, cp->buf.used);
+       = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
      }
  }
  
@@ -181,12 +179,12 @@ compile_pattern (pattern, regp, translate, posix, multibyte)
        cp = *cpp;
        /* Entries are initialized to nil, and may be set to nil by
          compile_pattern_1 if the pattern isn't valid.  Don't apply
-        XSTRING in those cases.  However, compile_pattern_1 is only
-        applied to the cache entry we pick here to reuse.  So nil
-        should never appear before a non-nil entry.  */
+        string accessors in those cases.  However, compile_pattern_1
+        is only applied to the cache entry we pick here to reuse.  So
+        nil should never appear before a non-nil entry.  */
        if (NILP (cp->regexp))
         goto compile_it;
-      if (XSTRING (cp->regexp)->size == XSTRING (pattern)->size
+      if (SCHARS (cp->regexp) == SCHARS (pattern)
           && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
           && !NILP (Fstring_equal (cp->regexp, pattern))
           && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
@@ -275,12 +273,12 @@ looking_at_1 (string, posix)
      }
  
    re_match_object = Qnil;
-  
+
    i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
                   PT_BYTE - BEGV_BYTE, &search_regs,
                   ZV_BYTE - BEGV_BYTE);
    immediate_quit = 0;
-  
+
    if (i == -2)
      matcher_overflow ();
  
@@ -341,7 +339,7 @@ string_match_1 (regexp, string, start, posix)
      pos = 0, pos_byte = 0;
    else
      {
-      int len = XSTRING (string)->size;
+      int len = SCHARS (string);
  
        CHECK_NUMBER (start);
        pos = XINT (start);
@@ -359,10 +357,10 @@ string_match_1 (regexp, string, start, posix)
                           STRING_MULTIBYTE (string));
    immediate_quit = 1;
    re_match_object = string;
-  
-  val = re_search (bufp, (char *) XSTRING (string)->data,
-                  STRING_BYTES (XSTRING (string)), pos_byte,
-                  STRING_BYTES (XSTRING (string)) - pos_byte,
+
+  val = re_search (bufp, (char *) SDATA (string),
+                  SBYTES (string), pos_byte,
+                  SBYTES (string) - pos_byte,
                    &search_regs);
    immediate_quit = 0;
    last_thing_searched = Qt;
@@ -388,7 +386,10 @@ Case is ignored if `case-fold-search' is non-nil in the current buffer.
  If third arg START is non-nil, start search at that index in STRING.
  For index of first char beyond the match, do (match-end 0).
  `match-end' and `match-beginning' also give indices of substrings
-matched by parenthesis constructs in the pattern.  */)
+matched by parenthesis constructs in the pattern.
+
+You can use the function `match-string' to extract the substrings
+matched by the parenthesis constructions in REGEXP. */)
       (regexp, string, start)
       Lisp_Object regexp, string, start;
  {
@@ -424,10 +425,10 @@ fast_string_match (regexp, string)
                           0, STRING_MULTIBYTE (string));
    immediate_quit = 1;
    re_match_object = string;
-  
-  val = re_search (bufp, (char *) XSTRING (string)->data,
-                  STRING_BYTES (XSTRING (string)), 0,
-                  STRING_BYTES (XSTRING (string)), 0);
+
+  val = re_search (bufp, (char *) SDATA (string),
+                  SBYTES (string), 0,
+                  SBYTES (string), 0);
    immediate_quit = 0;
    return val;
  }
@@ -442,7 +443,7 @@ extern Lisp_Object Vascii_downcase_table;
  int
  fast_c_string_match_ignore_case (regexp, string)
       Lisp_Object regexp;
-     char *string;
+     const char *string;
  {
    int val;
    struct re_pattern_buffer *bufp;
@@ -516,7 +517,7 @@ scan_buffer (target, start, end, count, shortage, allow_quit)
       int allow_quit;
  {
    struct region_cache *newline_cache;
-  int direction; 
+  int direction;
  
    if (count > 0)
      {
@@ -577,7 +578,7 @@ scan_buffer (target, start, end, count, shortage, allow_quit)
         ceiling_byte = min (tem, ceiling_byte);
  
          {
-          /* The termination address of the dumb loop.  */ 
+          /* The termination address of the dumb loop.  */
            register unsigned char *ceiling_addr
             = BYTE_POS_ADDR (ceiling_byte) + 1;
            register unsigned char *cursor
@@ -822,7 +823,7 @@ find_before_next_newline (from, to, cnt)
  
    if (shortage == 0)
      pos--;
-  
+
    return pos;
  }
  \f
@@ -908,8 +909,8 @@ static int
  trivial_regexp_p (regexp)
       Lisp_Object regexp;
  {
-  int len = STRING_BYTES (XSTRING (regexp));
-  unsigned char *s = XSTRING (regexp)->data;
+  int len = SBYTES (regexp);
+  unsigned char *s = SDATA (regexp);
    while (--len >= 0)
      {
        switch (*s++)
@@ -980,8 +981,8 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n,
       Lisp_Object inverse_trt;
       int posix;
  {
-  int len = XSTRING (string)->size;
-  int len_byte = STRING_BYTES (XSTRING (string));
+  int len = SCHARS (string);
+  int len_byte = SBYTES (string);
    register int i;
  
    if (running_asynch_code)
@@ -1028,7 +1029,7 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n,
           s2 = 0;
         }
        re_match_object = Qnil;
-  
+
        while (n < 0)
         {
           int val;
@@ -1105,12 +1106,12 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n,
        int raw_pattern_size_byte;
        unsigned char *patbuf;
        int multibyte = !NILP (current_buffer->enable_multibyte_characters);
-      unsigned char *base_pat = XSTRING (string)->data;
-      /* High bits of char, calculated by (CHAR & 0x3F).  Characters
-        of the same high bits have the same sequence of bytes but
-        last.  To do the BM search, all characters in STRING must
-        have the same high bits (including their case
-        translations).  */
+      unsigned char *base_pat = SDATA (string);
+      /* High bits of char; 0 for ASCII characters, (CHAR & ~0x3F)
+        otherwise.  Characters of the same high bits have the same
+        sequence of bytes but last.  To do the BM search, all
+        characters in STRING must have the same high bits (including
+        their case translations).  */
        int char_high_bits = -1;
        int boyer_moore_ok = 1;
  
@@ -1120,19 +1121,19 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n,
  
        if (multibyte == STRING_MULTIBYTE (string))
         {
-         raw_pattern = (unsigned char *) XSTRING (string)->data;
-         raw_pattern_size = XSTRING (string)->size;
-         raw_pattern_size_byte = STRING_BYTES (XSTRING (string));
+         raw_pattern = (unsigned char *) SDATA (string);
+         raw_pattern_size = SCHARS (string);
+         raw_pattern_size_byte = SBYTES (string);
         }
        else if (multibyte)
         {
-         raw_pattern_size = XSTRING (string)->size;
+         raw_pattern_size = SCHARS (string);
           raw_pattern_size_byte
-           = count_size_as_multibyte (XSTRING (string)->data,
+           = count_size_as_multibyte (SDATA (string),
                                        raw_pattern_size);
           raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
-         copy_text (XSTRING (string)->data, raw_pattern,
-                    XSTRING (string)->size, 0, 1);
+         copy_text (SDATA (string), raw_pattern,
+                    SCHARS (string), 0, 1);
         }
        else
         {
@@ -1142,11 +1143,11 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n,
              by subtracting nonascii-insert-offset from each non-ASCII char,
              so that only the multibyte chars which really correspond to
              the chosen single-byte character set can possibly match.  */
-         raw_pattern_size = XSTRING (string)->size;
-         raw_pattern_size_byte = XSTRING (string)->size;
+         raw_pattern_size = SCHARS (string);
+         raw_pattern_size_byte = SCHARS (string);
           raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
-         copy_text (XSTRING (string)->data, raw_pattern,
-                    STRING_BYTES (XSTRING (string)), 1, 0);
+         copy_text (SDATA (string), raw_pattern,
+                    SBYTES (string), 1, 0);
         }
  
        /* Copy and optionally translate the pattern.  */
@@ -1184,10 +1185,10 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n,
                 {
                   /* Keep track of which character set row
                      contains the characters that need translation.  */
-                 int this_high_bit = c & ~0x3F;
-                 int trt_high_bit = ((inverse != c ? inverse : translated)
-                                     & ~0x3F);
-                 
+                 int this_high_bit = ASCII_CHAR_P (c) ? 0 : (c & ~0x3F);
+                 int c1 = inverse != c ? inverse : translated;
+                 int trt_high_bit = ASCII_CHAR_P (c1) ? 0 : (c1 & ~0x3F);
+
                   if (this_high_bit != trt_high_bit)
                     boyer_moore_ok = 0;
                   else if (char_high_bits == -1)
@@ -1481,7 +1482,7 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
    int infinity, limit, stride_for_teases = 0;
    register int *BM_tab;
    int *BM_tab_base;
-  register unsigned char *cursor, *p_limit;  
+  register unsigned char *cursor, *p_limit;
    register int i, j;
    unsigned char *pat, *pat_end;
    int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
@@ -1514,14 +1515,14 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
    /* a single test, a test for having gone past the end of the */
    /* permissible match region, to test for both possible matches (when */
    /* the stride goes past the end immediately) and failure to */
-  /* match (where you get nudged past the end one stride at a time). */ 
+  /* match (where you get nudged past the end one stride at a time). */
  
    /* Here we make a "mickey mouse" BM table.  The stride of the search */
    /* is determined only by the last character of the putative match. */
    /* If that character does not match, we will stride the proper */
    /* distance to propose a match that superimposes it on the last */
    /* instance of a character that matches it (per trt), or misses */
-  /* it entirely if there is none. */  
+  /* it entirely if there is none. */
  
    dirlen = len_byte * direction;
    infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
@@ -1575,7 +1576,8 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
               while (! CHAR_HEAD_P (*charstart))
                 charstart--;
               untranslated = STRING_CHAR (charstart, ptr - charstart + 1);
-             if (char_high_bits == (untranslated & ~0x3F))
+             if (char_high_bits
+                 == (ASCII_CHAR_P (untranslated) ? 0 : untranslated & ~0x3F))
                 {
                   TRANSLATE (ch, trt, untranslated);
                   if (! CHAR_HEAD_P (*ptr))
@@ -1610,7 +1612,7 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
  
           BM_tab[j] = dirlen - i;
           /* A translation table is accompanied by its inverse -- see */
-         /* comment following downcase_table for details */ 
+         /* comment following downcase_table for details */
           if (this_translated)
             {
               int starting_ch = ch;
@@ -1798,7 +1800,7 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
               /* This loop can be coded for space rather than */
               /* speed because it will usually run only once. */
               /* (the reach is at most len + 21, and typically */
-             /* does not exceed len) */    
+             /* does not exceed len) */
               while ((limit - pos_byte) * direction >= 0)
                 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
               /* now run the same tests to distinguish going off the */
@@ -1905,13 +1907,13 @@ wordify (string)
    int adjust;
  
    CHECK_STRING (string);
-  p = XSTRING (string)->data;
-  len = XSTRING (string)->size;
+  p = SDATA (string);
+  len = SCHARS (string);
  
    for (i = 0, i_byte = 0; i < len; )
      {
        int c;
-      
+
        FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
  
        if (SYNTAX (c) != Sword)
@@ -1932,12 +1934,12 @@ wordify (string)
    adjust = - punct_count + 5 * (word_count - 1) + 4;
    if (STRING_MULTIBYTE (string))
      val = make_uninit_multibyte_string (len + adjust,
-                                       STRING_BYTES (XSTRING (string))
+                                       SBYTES (string)
                                         + adjust);
    else
      val = make_uninit_string (len + adjust);
  
-  o = XSTRING (val)->data;
+  o = SDATA (val);
    *o++ = '\\';
    *o++ = 'b';
    prev_c = 0;
@@ -1946,12 +1948,12 @@ wordify (string)
      {
        int c;
        int i_byte_orig = i_byte;
-      
+
        FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
  
        if (SYNTAX (c) == Sword)
         {
-         bcopy (&XSTRING (string)->data[i_byte_orig], o,
+         bcopy (SDATA (string) + i_byte_orig, o,
                  i_byte - i_byte_orig);
           o += i_byte - i_byte_orig;
         }
@@ -2119,13 +2121,15 @@ and `replace-match'.  */)
  \f
  DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
         doc: /* Replace text matched by last search with NEWTEXT.
+Leave point at the end of the replacement text.
+
  If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
  Otherwise maybe capitalize the whole text, or maybe just word initials,
  based on the replaced text.
  If the replaced text has only capital letters
  and has at least one multiletter word, convert NEWTEXT to all caps.
-If the replaced text has at least one word starting with a capital letter,
-then capitalize each word in NEWTEXT.
+Otherwise if all words are capitalized in the replaced text,
+capitalize each word in NEWTEXT.
  
  If third arg LITERAL is non-nil, insert NEWTEXT literally.
  Otherwise treat `\\' as special:
@@ -2133,8 +2137,9 @@ Otherwise treat `\\' as special:
    `\\N' means substitute what matched the Nth `\\(...\\)'.
         If Nth parens didn't match, substitute nothing.
    `\\\\' means insert one `\\'.
+Case conversion does not apply to these substitutions.
+
  FIXEDCASE and LITERAL are optional arguments.
-Leaves point at end of replacement text.
  
  The optional fourth argument STRING can be a string to modify.
  This is meaningful when the previous match was done against STRING,
@@ -2196,7 +2201,7 @@ since only regular expressions have distinguished subexpressions.  */)
      {
        if (search_regs.start[sub] < 0
           || search_regs.start[sub] > search_regs.end[sub]
-         || search_regs.end[sub] > XSTRING (string)->size)
+         || search_regs.end[sub] > SCHARS (string))
         args_out_of_range (make_number (search_regs.start[sub]),
                            make_number (search_regs.end[sub]));
      }
@@ -2296,7 +2301,7 @@ since only regular expressions have distinguished subexpressions.  */)
           /* We build up the substituted string in ACCUM.  */
           Lisp_Object accum;
           Lisp_Object middle;
-         int length = STRING_BYTES (XSTRING (newtext));
+         int length = SBYTES (newtext);
  
           accum = Qnil;
  
@@ -2311,7 +2316,7 @@ since only regular expressions have distinguished subexpressions.  */)
               if (c == '\\')
                 {
                   FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
-                     
+
                   if (c == '&')
                     {
                       substart = search_regs.start[sub];
@@ -2388,7 +2393,7 @@ since only regular expressions have distinguished subexpressions.  */)
       perform substitution on the replacement string.  */
    if (NILP (literal))
      {
-      int length = STRING_BYTES (XSTRING (newtext));
+      int length = SBYTES (newtext);
        unsigned char *substed;
        int substed_alloc_size, substed_len;
        int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
@@ -2396,7 +2401,7 @@ since only regular expressions have distinguished subexpressions.  */)
        Lisp_Object rev_tbl;
        int really_changed = 0;
  
-      rev_tbl= Qnil;
+      rev_tbl = Qnil;
  
        substed_alloc_size = length * 2 + 100;
        substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
@@ -2422,7 +2427,7 @@ since only regular expressions have distinguished subexpressions.  */)
           else
             {
               /* Note that we don't have to increment POS.  */
-             c = XSTRING (newtext)->data[pos_byte++];
+             c = SREF (newtext, pos_byte++);
               if (buf_multibyte)
                 c = unibyte_char_to_multibyte (c);
             }
@@ -2444,7 +2449,7 @@ since only regular expressions have distinguished subexpressions.  */)
                 }
               else
                 {
-                 c = XSTRING (newtext)->data[pos_byte++];
+                 c = SREF (newtext, pos_byte++);
                   if (buf_multibyte)
                     c = unibyte_char_to_multibyte (c);
                 }
@@ -2509,7 +2514,7 @@ since only regular expressions have distinguished subexpressions.  */)
    /* Replace the old text with the new in the cleanest possible way.  */
    replace_range (search_regs.start[sub], search_regs.end[sub],
                  newtext, 1, 0, 1);
-  newpoint = search_regs.start[sub] + XSTRING (newtext)->size;
+  newpoint = search_regs.start[sub] + SCHARS (newtext);
  
    if (case_action == all_caps)
      Fupcase_region (make_number (search_regs.start[sub]),
@@ -2518,6 +2523,21 @@ since only regular expressions have distinguished subexpressions.  */)
      Fupcase_initials_region (make_number (search_regs.start[sub]),
                              make_number (newpoint));
  
+  /* Adjust search data for this change.  */
+  {
+    int oldend = search_regs.end[sub];
+    int change = newpoint - search_regs.end[sub];
+    int i;
+
+    for (i = 0; i < search_regs.num_regs; i++)
+      {
+       if (search_regs.start[i] > oldend)
+         search_regs.start[i] += change;
+       if (search_regs.end[i] > oldend)
+         search_regs.end[i] += change;
+      }
+  }
+
    /* Put point back where it was in the text.  */
    if (opoint <= 0)
      TEMP_SET_PT (opoint + ZV);
@@ -2526,7 +2546,7 @@ since only regular expressions have distinguished subexpressions.  */)
  
    /* Now move point "officially" to the start of the inserted replacement.  */
    move_if_not_intangible (newpoint);
-  
+
    return Qnil;
  }
  \f
@@ -2572,7 +2592,7 @@ Zero means the entire text matched by the whole regexp or whole string.  */)
       Lisp_Object subexp;
  {
    return match_limit (subexp, 0);
-} 
+}
  
  DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 2, 0,
         doc: /* Return a list containing all info on what the last search matched.
@@ -2584,7 +2604,9 @@ Use `store-match-data' to reinstate the data in this list.
  If INTEGERS (the optional first argument) is non-nil, always use integers
  \(rather than markers) to represent buffer positions.
  If REUSE is a list, reuse it as part of the value.  If REUSE is long enough
-to hold all the values, and if INTEGERS is non-nil, no consing is done.  */)
+to hold all the values, and if INTEGERS is non-nil, no consing is done.
+
+Return value is undefined if the last search failed.  */)
       (integers, reuse)
       Lisp_Object integers, reuse;
  {
@@ -2620,7 +2642,7 @@ to hold all the values, and if INTEGERS is non-nil, no consing is done.  */)
                            last_thing_searched);
               data[2 * i + 1] = Fmake_marker ();
               Fset_marker (data[2 * i + 1],
-                          make_number (search_regs.end[i]), 
+                          make_number (search_regs.end[i]),
                            last_thing_searched);
             }
           else
@@ -2673,7 +2695,7 @@ LIST should have been created by calling `match-data' previously.  */)
    if (!CONSP (list) && !NILP (list))
      list = wrong_type_argument (Qconsp, list);
  
-  /* Unless we find a marker with a buffer in LIST, assume that this 
+  /* Unless we find a marker with a buffer in LIST, assume that this
       match data came from a string.  */
    last_thing_searched = Qt;
  
@@ -2742,7 +2764,7 @@ LIST should have been created by calling `match-data' previously.  */)
        list = Fcdr (list);
      }
  
-  return Qnil;  
+  return Qnil;
  }
  
  /* If non-zero the match data have been saved in saved_search_regs
@@ -2800,13 +2822,13 @@ DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
  
    CHECK_STRING (string);
  
-  temp = (unsigned char *) alloca (STRING_BYTES (XSTRING (string)) * 2);
+  temp = (unsigned char *) alloca (SBYTES (string) * 2);
  
    /* Now copy the data into the new string, inserting escapes. */
  
-  in = XSTRING (string)->data;
-  end = in + STRING_BYTES (XSTRING (string));
-  out = temp; 
+  in = SDATA (string);
+  end = in + SBYTES (string);
+  out = temp;
  
    for (; in != end; in++)
      {
@@ -2819,11 +2841,11 @@ DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
      }
  
    return make_specified_string (temp,
-                               XSTRING (string)->size + backslashes_added,
+                               SCHARS (string) + backslashes_added,
                                 out - temp,
                                 STRING_MULTIBYTE (string));
  }
-\f  
+\f
  void
  syms_of_search ()
  {
@@ -2832,7 +2854,7 @@ syms_of_search ()
    for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
      {
        searchbufs[i].buf.allocated = 100;
-      searchbufs[i].buf.buffer = (unsigned char *) malloc (100);
+      searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
        searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
        searchbufs[i].regexp = Qnil;
        staticpro (&searchbufs[i].regexp);