code.delx.au - gnu-emacs/blob - src/search.c

   1 /* String search routines for GNU Emacs.
   2    Copyright (C) 1985, 1986, 1987, 1993, 1994, 1997, 1998, 1999, 2001, 2002,
   3                  2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
   4                  Free Software Foundation, Inc.
   5
   6 This file is part of GNU Emacs.
   7
   8 GNU Emacs is free software: you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation, either version 3 of the License, or
  11 (at your option) any later version.
  12
  13 GNU Emacs is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include <config.h>
  23 #include <setjmp.h>
  24 #include "lisp.h"
  25 #include "syntax.h"
  26 #include "category.h"
  27 #include "buffer.h"
  28 #include "character.h"
  29 #include "charset.h"
  30 #include "region-cache.h"
  31 #include "commands.h"
  32 #include "blockinput.h"
  33 #include "intervals.h"
  34
  35 #include <sys/types.h>
  36 #include "regex.h"
  37
  38 #define REGEXP_CACHE_SIZE 20
  39
  40 /* If the regexp is non-nil, then the buffer contains the compiled form
  41    of that regexp, suitable for searching.  */
  42 struct regexp_cache
  43 {
  44   struct regexp_cache *next;
  45   Lisp_Object regexp, whitespace_regexp;
  46   /* Syntax table for which the regexp applies.  We need this because
  47      of character classes.  If this is t, then the compiled pattern is valid
  48      for any syntax-table.  */
  49   Lisp_Object syntax_table;
  50   struct re_pattern_buffer buf;
  51   char fastmap[0400];
  52   /* Nonzero means regexp was compiled to do full POSIX backtracking.  */
  53   char posix;
  54 };
  55
  56 /* The instances of that struct.  */
  57 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
  58
  59 /* The head of the linked list; points to the most recently used buffer.  */
  60 struct regexp_cache *searchbuf_head;
  61
  62
  63 /* Every call to re_match, etc., must pass &search_regs as the regs
  64    argument unless you can show it is unnecessary (i.e., if re_match
  65    is certainly going to be called again before region-around-match
  66    can be called).
  67
  68    Since the registers are now dynamically allocated, we need to make
  69    sure not to refer to the Nth register before checking that it has
  70    been allocated by checking search_regs.num_regs.
  71
  72    The regex code keeps track of whether it has allocated the search
  73    buffer using bits in the re_pattern_buffer.  This means that whenever
  74    you compile a new pattern, it completely forgets whether it has
  75    allocated any registers, and will allocate new registers the next
  76    time you call a searching or matching function.  Therefore, we need
  77    to call re_set_registers after compiling a new pattern or after
  78    setting the match registers, so that the regex functions will be
  79    able to free or re-allocate it properly.  */
  80 static struct re_registers search_regs;
  81
  82 /* The buffer in which the last search was performed, or
  83    Qt if the last search was done in a string;
  84    Qnil if no searching has been done yet.  */
  85 static Lisp_Object last_thing_searched;
  86
  87 /* error condition signaled when regexp compile_pattern fails */
  88
  89 Lisp_Object Qinvalid_regexp;
  90
  91 /* Error condition used for failing searches */
  92 Lisp_Object Qsearch_failed;
  93
  94 static void set_search_regs (EMACS_INT, EMACS_INT);
  95 static void save_search_regs (void);
  96 static EMACS_INT simple_search (EMACS_INT, unsigned char *, EMACS_INT,
  97                                 EMACS_INT, Lisp_Object, EMACS_INT, EMACS_INT,
  98                                 EMACS_INT, EMACS_INT);
  99 static EMACS_INT boyer_moore (EMACS_INT, unsigned char *, EMACS_INT, EMACS_INT,
 100                               Lisp_Object, Lisp_Object,
 101                               EMACS_INT, EMACS_INT,
 102                               EMACS_INT, EMACS_INT, int);
 103 static EMACS_INT search_buffer (Lisp_Object, EMACS_INT, EMACS_INT,
 104                                 EMACS_INT, EMACS_INT, EMACS_INT, int,
 105                                 Lisp_Object, Lisp_Object, int);
 106 static void matcher_overflow (void) NO_RETURN;
 107
 108 static void
 109 matcher_overflow (void)
 110 {
 111   error ("Stack overflow in regexp matcher");
 112 }
 113
 114 /* Compile a regexp and signal a Lisp error if anything goes wrong.
 115    PATTERN is the pattern to compile.
 116    CP is the place to put the result.
 117    TRANSLATE is a translation table for ignoring case, or nil for none.
 118    REGP is the structure that says where to store the "register"
 119    values that will result from matching this pattern.
 120    If it is 0, we should compile the pattern not to record any
 121    subexpression bounds.
 122    POSIX is nonzero if we want full backtracking (POSIX style)
 123    for this pattern.  0 means backtrack only enough to get a valid match.
 124
 125    The behavior also depends on Vsearch_spaces_regexp.  */
 126
 127 static void
 128 compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern, Lisp_Object translate, struct re_registers *regp, int posix)
 129 {
 130   char *val;
 131   reg_syntax_t old;
 132
 133   cp->regexp = Qnil;
 134   cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
 135   cp->posix = posix;
 136   cp->buf.multibyte = STRING_MULTIBYTE (pattern);
 137   cp->buf.charset_unibyte = charset_unibyte;
 138   if (STRINGP (Vsearch_spaces_regexp))
 139     cp->whitespace_regexp = Vsearch_spaces_regexp;
 140   else
 141     cp->whitespace_regexp = Qnil;
 142
 143   /* rms: I think BLOCK_INPUT is not needed here any more,
 144      because regex.c defines malloc to call xmalloc.
 145      Using BLOCK_INPUT here means the debugger won't run if an error occurs.
 146      So let's turn it off.  */
 147   /*  BLOCK_INPUT;  */
 148   old = re_set_syntax (RE_SYNTAX_EMACS
 149                        | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
 150
 151   if (STRINGP (Vsearch_spaces_regexp))
 152     re_set_whitespace_regexp (SDATA (Vsearch_spaces_regexp));
 153   else
 154     re_set_whitespace_regexp (NULL);
 155
 156   val = (char *) re_compile_pattern (SSDATA (pattern),
 157                                      SBYTES (pattern), &cp->buf);
 158
 159   /* If the compiled pattern hard codes some of the contents of the
 160      syntax-table, it can only be reused with *this* syntax table.  */
 161   cp->syntax_table = cp->buf.used_syntax ? current_buffer->syntax_table : Qt;
 162
 163   re_set_whitespace_regexp (NULL);
 164
 165   re_set_syntax (old);
 166   /* UNBLOCK_INPUT;  */
 167   if (val)
 168     xsignal1 (Qinvalid_regexp, build_string (val));
 169
 170   cp->regexp = Fcopy_sequence (pattern);
 171 }
 172
 173 /* Shrink each compiled regexp buffer in the cache
 174    to the size actually used right now.
 175    This is called from garbage collection.  */
 176
 177 void
 178 shrink_regexp_cache (void)
 179 {
 180   struct regexp_cache *cp;
 181
 182   for (cp = searchbuf_head; cp != 0; cp = cp->next)
 183     {
 184       cp->buf.allocated = cp->buf.used;
 185       cp->buf.buffer
 186         = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
 187     }
 188 }
 189
 190 /* Clear the regexp cache w.r.t. a particular syntax table,
 191    because it was changed.
 192    There is no danger of memory leak here because re_compile_pattern
 193    automagically manages the memory in each re_pattern_buffer struct,
 194    based on its `allocated' and `buffer' values.  */
 195 void
 196 clear_regexp_cache (void)
 197 {
 198   int i;
 199
 200   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
 201     /* It's tempting to compare with the syntax-table we've actually changed,
 202        but it's not sufficient because char-table inheritance means that
 203        modifying one syntax-table can change others at the same time.  */
 204     if (!EQ (searchbufs[i].syntax_table, Qt))
 205       searchbufs[i].regexp = Qnil;
 206 }
 207
 208 /* Compile a regexp if necessary, but first check to see if there's one in
 209    the cache.
 210    PATTERN is the pattern to compile.
 211    TRANSLATE is a translation table for ignoring case, or nil for none.
 212    REGP is the structure that says where to store the "register"
 213    values that will result from matching this pattern.
 214    If it is 0, we should compile the pattern not to record any
 215    subexpression bounds.
 216    POSIX is nonzero if we want full backtracking (POSIX style)
 217    for this pattern.  0 means backtrack only enough to get a valid match.  */
 218
 219 struct re_pattern_buffer *
 220 compile_pattern (Lisp_Object pattern, struct re_registers *regp, Lisp_Object translate, int posix, int multibyte)
 221 {
 222   struct regexp_cache *cp, **cpp;
 223
 224   for (cpp = &searchbuf_head; ; cpp = &cp->next)
 225     {
 226       cp = *cpp;
 227       /* Entries are initialized to nil, and may be set to nil by
 228          compile_pattern_1 if the pattern isn't valid.  Don't apply
 229          string accessors in those cases.  However, compile_pattern_1
 230          is only applied to the cache entry we pick here to reuse.  So
 231          nil should never appear before a non-nil entry.  */
 232       if (NILP (cp->regexp))
 233         goto compile_it;
 234       if (SCHARS (cp->regexp) == SCHARS (pattern)
 235           && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
 236           && !NILP (Fstring_equal (cp->regexp, pattern))
 237           && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
 238           && cp->posix == posix
 239           && (EQ (cp->syntax_table, Qt)
 240               || EQ (cp->syntax_table, current_buffer->syntax_table))
 241           && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp))
 242           && cp->buf.charset_unibyte == charset_unibyte)
 243         break;
 244
 245       /* If we're at the end of the cache, compile into the nil cell
 246          we found, or the last (least recently used) cell with a
 247          string value.  */
 248       if (cp->next == 0)
 249         {
 250         compile_it:
 251           compile_pattern_1 (cp, pattern, translate, regp, posix);
 252           break;
 253         }
 254     }
 255
 256   /* When we get here, cp (aka *cpp) contains the compiled pattern,
 257      either because we found it in the cache or because we just compiled it.
 258      Move it to the front of the queue to mark it as most recently used.  */
 259   *cpp = cp->next;
 260   cp->next = searchbuf_head;
 261   searchbuf_head = cp;
 262
 263   /* Advise the searching functions about the space we have allocated
 264      for register data.  */
 265   if (regp)
 266     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
 267
 268   /* The compiled pattern can be used both for multibyte and unibyte
 269      target.  But, we have to tell which the pattern is used for. */
 270   cp->buf.target_multibyte = multibyte;
 271
 272   return &cp->buf;
 273 }
 274
 275 \f
 276 static Lisp_Object
 277 looking_at_1 (Lisp_Object string, int posix)
 278 {
 279   Lisp_Object val;
 280   unsigned char *p1, *p2;
 281   EMACS_INT s1, s2;
 282   register EMACS_INT i;
 283   struct re_pattern_buffer *bufp;
 284
 285   if (running_asynch_code)
 286     save_search_regs ();
 287
 288   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 289   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
 290     = current_buffer->case_eqv_table;
 291
 292   CHECK_STRING (string);
 293   bufp = compile_pattern (string,
 294                           (NILP (Vinhibit_changing_match_data)
 295                            ? &search_regs : NULL),
 296                           (!NILP (current_buffer->case_fold_search)
 297                            ? current_buffer->case_canon_table : Qnil),
 298                           posix,
 299                           !NILP (current_buffer->enable_multibyte_characters));
 300
 301   immediate_quit = 1;
 302   QUIT;                 /* Do a pending quit right away, to avoid paradoxical behavior */
 303
 304   /* Get pointers and sizes of the two strings
 305      that make up the visible portion of the buffer. */
 306
 307   p1 = BEGV_ADDR;
 308   s1 = GPT_BYTE - BEGV_BYTE;
 309   p2 = GAP_END_ADDR;
 310   s2 = ZV_BYTE - GPT_BYTE;
 311   if (s1 < 0)
 312     {
 313       p2 = p1;
 314       s2 = ZV_BYTE - BEGV_BYTE;
 315       s1 = 0;
 316     }
 317   if (s2 < 0)
 318     {
 319       s1 = ZV_BYTE - BEGV_BYTE;
 320       s2 = 0;
 321     }
 322
 323   re_match_object = Qnil;
 324
 325   i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
 326                   PT_BYTE - BEGV_BYTE,
 327                   (NILP (Vinhibit_changing_match_data)
 328                    ? &search_regs : NULL),
 329                   ZV_BYTE - BEGV_BYTE);
 330   immediate_quit = 0;
 331
 332   if (i == -2)
 333     matcher_overflow ();
 334
 335   val = (0 <= i ? Qt : Qnil);
 336   if (NILP (Vinhibit_changing_match_data) && i >= 0)
 337     for (i = 0; i < search_regs.num_regs; i++)
 338       if (search_regs.start[i] >= 0)
 339         {
 340           search_regs.start[i]
 341             = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
 342           search_regs.end[i]
 343             = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
 344         }
 345
 346   /* Set last_thing_searched only when match data is changed.  */
 347   if (NILP (Vinhibit_changing_match_data))
 348     XSETBUFFER (last_thing_searched, current_buffer);
 349
 350   return val;
 351 }
 352
 353 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
 354        doc: /* Return t if text after point matches regular expression REGEXP.
 355 This function modifies the match data that `match-beginning',
 356 `match-end' and `match-data' access; save and restore the match
 357 data if you want to preserve them.  */)
 358   (Lisp_Object regexp)
 359 {
 360   return looking_at_1 (regexp, 0);
 361 }
 362
 363 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
 364        doc: /* Return t if text after point matches regular expression REGEXP.
 365 Find the longest match, in accord with Posix regular expression rules.
 366 This function modifies the match data that `match-beginning',
 367 `match-end' and `match-data' access; save and restore the match
 368 data if you want to preserve them.  */)
 369   (Lisp_Object regexp)
 370 {
 371   return looking_at_1 (regexp, 1);
 372 }
 373 \f
 374 static Lisp_Object
 375 string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start, int posix)
 376 {
 377   int val;
 378   struct re_pattern_buffer *bufp;
 379   EMACS_INT pos, pos_byte;
 380   int i;
 381
 382   if (running_asynch_code)
 383     save_search_regs ();
 384
 385   CHECK_STRING (regexp);
 386   CHECK_STRING (string);
 387
 388   if (NILP (start))
 389     pos = 0, pos_byte = 0;
 390   else
 391     {
 392       EMACS_INT len = SCHARS (string);
 393
 394       CHECK_NUMBER (start);
 395       pos = XINT (start);
 396       if (pos < 0 && -pos <= len)
 397         pos = len + pos;
 398       else if (0 > pos || pos > len)
 399         args_out_of_range (string, start);
 400       pos_byte = string_char_to_byte (string, pos);
 401     }
 402
 403   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 404   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
 405     = current_buffer->case_eqv_table;
 406
 407   bufp = compile_pattern (regexp,
 408                           (NILP (Vinhibit_changing_match_data)
 409                            ? &search_regs : NULL),
 410                           (!NILP (current_buffer->case_fold_search)
 411                            ? current_buffer->case_canon_table : Qnil),
 412                           posix,
 413                           STRING_MULTIBYTE (string));
 414   immediate_quit = 1;
 415   re_match_object = string;
 416
 417   val = re_search (bufp, SSDATA (string),
 418                    SBYTES (string), pos_byte,
 419                    SBYTES (string) - pos_byte,
 420                    (NILP (Vinhibit_changing_match_data)
 421                     ? &search_regs : NULL));
 422   immediate_quit = 0;
 423
 424   /* Set last_thing_searched only when match data is changed.  */
 425   if (NILP (Vinhibit_changing_match_data))
 426     last_thing_searched = Qt;
 427
 428   if (val == -2)
 429     matcher_overflow ();
 430   if (val < 0) return Qnil;
 431
 432   if (NILP (Vinhibit_changing_match_data))
 433     for (i = 0; i < search_regs.num_regs; i++)
 434       if (search_regs.start[i] >= 0)
 435         {
 436           search_regs.start[i]
 437             = string_byte_to_char (string, search_regs.start[i]);
 438           search_regs.end[i]
 439             = string_byte_to_char (string, search_regs.end[i]);
 440         }
 441
 442   return make_number (string_byte_to_char (string, val));
 443 }
 444
 445 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
 446        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 447 Matching ignores case if `case-fold-search' is non-nil.
 448 If third arg START is non-nil, start search at that index in STRING.
 449 For index of first char beyond the match, do (match-end 0).
 450 `match-end' and `match-beginning' also give indices of substrings
 451 matched by parenthesis constructs in the pattern.
 452
 453 You can use the function `match-string' to extract the substrings
 454 matched by the parenthesis constructions in REGEXP. */)
 455   (Lisp_Object regexp, Lisp_Object string, Lisp_Object start)
 456 {
 457   return string_match_1 (regexp, string, start, 0);
 458 }
 459
 460 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
 461        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 462 Find the longest match, in accord with Posix regular expression rules.
 463 Case is ignored if `case-fold-search' is non-nil in the current buffer.
 464 If third arg START is non-nil, start search at that index in STRING.
 465 For index of first char beyond the match, do (match-end 0).
 466 `match-end' and `match-beginning' also give indices of substrings
 467 matched by parenthesis constructs in the pattern.  */)
 468   (Lisp_Object regexp, Lisp_Object string, Lisp_Object start)
 469 {
 470   return string_match_1 (regexp, string, start, 1);
 471 }
 472
 473 /* Match REGEXP against STRING, searching all of STRING,
 474    and return the index of the match, or negative on failure.
 475    This does not clobber the match data.  */
 476
 477 int
 478 fast_string_match (Lisp_Object regexp, Lisp_Object string)
 479 {
 480   int val;
 481   struct re_pattern_buffer *bufp;
 482
 483   bufp = compile_pattern (regexp, 0, Qnil,
 484                           0, STRING_MULTIBYTE (string));
 485   immediate_quit = 1;
 486   re_match_object = string;
 487
 488   val = re_search (bufp, SSDATA (string),
 489                    SBYTES (string), 0,
 490                    SBYTES (string), 0);
 491   immediate_quit = 0;
 492   return val;
 493 }
 494
 495 /* Match REGEXP against STRING, searching all of STRING ignoring case,
 496    and return the index of the match, or negative on failure.
 497    This does not clobber the match data.
 498    We assume that STRING contains single-byte characters.  */
 499
 500 int
 501 fast_c_string_match_ignore_case (Lisp_Object regexp, const char *string)
 502 {
 503   int val;
 504   struct re_pattern_buffer *bufp;
 505   size_t len = strlen (string);
 506
 507   regexp = string_make_unibyte (regexp);
 508   re_match_object = Qt;
 509   bufp = compile_pattern (regexp, 0,
 510                           Vascii_canon_table, 0,
 511                           0);
 512   immediate_quit = 1;
 513   val = re_search (bufp, string, len, 0, len, 0);
 514   immediate_quit = 0;
 515   return val;
 516 }
 517
 518 /* Like fast_string_match but ignore case.  */
 519
 520 int
 521 fast_string_match_ignore_case (Lisp_Object regexp, Lisp_Object string)
 522 {
 523   int val;
 524   struct re_pattern_buffer *bufp;
 525
 526   bufp = compile_pattern (regexp, 0, Vascii_canon_table,
 527                           0, STRING_MULTIBYTE (string));
 528   immediate_quit = 1;
 529   re_match_object = string;
 530
 531   val = re_search (bufp, SSDATA (string),
 532                    SBYTES (string), 0,
 533                    SBYTES (string), 0);
 534   immediate_quit = 0;
 535   return val;
 536 }
 537 \f
 538 /* Match REGEXP against the characters after POS to LIMIT, and return
 539    the number of matched characters.  If STRING is non-nil, match
 540    against the characters in it.  In that case, POS and LIMIT are
 541    indices into the string.  This function doesn't modify the match
 542    data.  */
 543
 544 EMACS_INT
 545 fast_looking_at (Lisp_Object regexp, EMACS_INT pos, EMACS_INT pos_byte, EMACS_INT limit, EMACS_INT limit_byte, Lisp_Object string)
 546 {
 547   int multibyte;
 548   struct re_pattern_buffer *buf;
 549   unsigned char *p1, *p2;
 550   EMACS_INT s1, s2;
 551   EMACS_INT len;
 552
 553   if (STRINGP (string))
 554     {
 555       if (pos_byte < 0)
 556         pos_byte = string_char_to_byte (string, pos);
 557       if (limit_byte < 0)
 558         limit_byte = string_char_to_byte (string, limit);
 559       p1 = NULL;
 560       s1 = 0;
 561       p2 = SDATA (string);
 562       s2 = SBYTES (string);
 563       re_match_object = string;
 564       multibyte = STRING_MULTIBYTE (string);
 565     }
 566   else
 567     {
 568       if (pos_byte < 0)
 569         pos_byte = CHAR_TO_BYTE (pos);
 570       if (limit_byte < 0)
 571         limit_byte = CHAR_TO_BYTE (limit);
 572       pos_byte -= BEGV_BYTE;
 573       limit_byte -= BEGV_BYTE;
 574       p1 = BEGV_ADDR;
 575       s1 = GPT_BYTE - BEGV_BYTE;
 576       p2 = GAP_END_ADDR;
 577       s2 = ZV_BYTE - GPT_BYTE;
 578       if (s1 < 0)
 579         {
 580           p2 = p1;
 581           s2 = ZV_BYTE - BEGV_BYTE;
 582           s1 = 0;
 583         }
 584       if (s2 < 0)
 585         {
 586           s1 = ZV_BYTE - BEGV_BYTE;
 587           s2 = 0;
 588         }
 589       re_match_object = Qnil;
 590       multibyte = ! NILP (current_buffer->enable_multibyte_characters);
 591     }
 592
 593   buf = compile_pattern (regexp, 0, Qnil, 0, multibyte);
 594   immediate_quit = 1;
 595   len = re_match_2 (buf, (char *) p1, s1, (char *) p2, s2,
 596                     pos_byte, NULL, limit_byte);
 597   immediate_quit = 0;
 598
 599   return len;
 600 }
 601
 602 \f
 603 /* The newline cache: remembering which sections of text have no newlines.  */
 604
 605 /* If the user has requested newline caching, make sure it's on.
 606    Otherwise, make sure it's off.
 607    This is our cheezy way of associating an action with the change of
 608    state of a buffer-local variable.  */
 609 static void
 610 newline_cache_on_off (struct buffer *buf)
 611 {
 612   if (NILP (buf->cache_long_line_scans))
 613     {
 614       /* It should be off.  */
 615       if (buf->newline_cache)
 616         {
 617           free_region_cache (buf->newline_cache);
 618           buf->newline_cache = 0;
 619         }
 620     }
 621   else
 622     {
 623       /* It should be on.  */
 624       if (buf->newline_cache == 0)
 625         buf->newline_cache = new_region_cache ();
 626     }
 627 }
 628
 629 \f
 630 /* Search for COUNT instances of the character TARGET between START and END.
 631
 632    If COUNT is positive, search forwards; END must be >= START.
 633    If COUNT is negative, search backwards for the -COUNTth instance;
 634       END must be <= START.
 635    If COUNT is zero, do anything you please; run rogue, for all I care.
 636
 637    If END is zero, use BEGV or ZV instead, as appropriate for the
 638    direction indicated by COUNT.
 639
 640    If we find COUNT instances, set *SHORTAGE to zero, and return the
 641    position past the COUNTth match.  Note that for reverse motion
 642    this is not the same as the usual convention for Emacs motion commands.
 643
 644    If we don't find COUNT instances before reaching END, set *SHORTAGE
 645    to the number of TARGETs left unfound, and return END.
 646
 647    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 648    except when inside redisplay.  */
 649
 650 EMACS_INT
 651 scan_buffer (register int target, EMACS_INT start, EMACS_INT end,
 652              EMACS_INT count, int *shortage, int allow_quit)
 653 {
 654   struct region_cache *newline_cache;
 655   int direction;
 656
 657   if (count > 0)
 658     {
 659       direction = 1;
 660       if (! end) end = ZV;
 661     }
 662   else
 663     {
 664       direction = -1;
 665       if (! end) end = BEGV;
 666     }
 667
 668   newline_cache_on_off (current_buffer);
 669   newline_cache = current_buffer->newline_cache;
 670
 671   if (shortage != 0)
 672     *shortage = 0;
 673
 674   immediate_quit = allow_quit;
 675
 676   if (count > 0)
 677     while (start != end)
 678       {
 679         /* Our innermost scanning loop is very simple; it doesn't know
 680            about gaps, buffer ends, or the newline cache.  ceiling is
 681            the position of the last character before the next such
 682            obstacle --- the last character the dumb search loop should
 683            examine.  */
 684         EMACS_INT ceiling_byte = CHAR_TO_BYTE (end) - 1;
 685         EMACS_INT start_byte = CHAR_TO_BYTE (start);
 686         EMACS_INT tem;
 687
 688         /* If we're looking for a newline, consult the newline cache
 689            to see where we can avoid some scanning.  */
 690         if (target == '\n' && newline_cache)
 691           {
 692             EMACS_INT next_change;
 693             immediate_quit = 0;
 694             while (region_cache_forward
 695                    (current_buffer, newline_cache, start_byte, &next_change))
 696               start_byte = next_change;
 697             immediate_quit = allow_quit;
 698
 699             /* START should never be after END.  */
 700             if (start_byte > ceiling_byte)
 701               start_byte = ceiling_byte;
 702
 703             /* Now the text after start is an unknown region, and
 704                next_change is the position of the next known region. */
 705             ceiling_byte = min (next_change - 1, ceiling_byte);
 706           }
 707
 708         /* The dumb loop can only scan text stored in contiguous
 709            bytes. BUFFER_CEILING_OF returns the last character
 710            position that is contiguous, so the ceiling is the
 711            position after that.  */
 712         tem = BUFFER_CEILING_OF (start_byte);
 713         ceiling_byte = min (tem, ceiling_byte);
 714
 715         {
 716           /* The termination address of the dumb loop.  */
 717           register unsigned char *ceiling_addr
 718             = BYTE_POS_ADDR (ceiling_byte) + 1;
 719           register unsigned char *cursor
 720             = BYTE_POS_ADDR (start_byte);
 721           unsigned char *base = cursor;
 722
 723           while (cursor < ceiling_addr)
 724             {
 725               unsigned char *scan_start = cursor;
 726
 727               /* The dumb loop.  */
 728               while (*cursor != target && ++cursor < ceiling_addr)
 729                 ;
 730
 731               /* If we're looking for newlines, cache the fact that
 732                  the region from start to cursor is free of them. */
 733               if (target == '\n' && newline_cache)
 734                 know_region_cache (current_buffer, newline_cache,
 735                                    start_byte + scan_start - base,
 736                                    start_byte + cursor - base);
 737
 738               /* Did we find the target character?  */
 739               if (cursor < ceiling_addr)
 740                 {
 741                   if (--count == 0)
 742                     {
 743                       immediate_quit = 0;
 744                       return BYTE_TO_CHAR (start_byte + cursor - base + 1);
 745                     }
 746                   cursor++;
 747                 }
 748             }
 749
 750           start = BYTE_TO_CHAR (start_byte + cursor - base);
 751         }
 752       }
 753   else
 754     while (start > end)
 755       {
 756         /* The last character to check before the next obstacle.  */
 757         EMACS_INT ceiling_byte = CHAR_TO_BYTE (end);
 758         EMACS_INT start_byte = CHAR_TO_BYTE (start);
 759         EMACS_INT tem;
 760
 761         /* Consult the newline cache, if appropriate.  */
 762         if (target == '\n' && newline_cache)
 763           {
 764             EMACS_INT next_change;
 765             immediate_quit = 0;
 766             while (region_cache_backward
 767                    (current_buffer, newline_cache, start_byte, &next_change))
 768               start_byte = next_change;
 769             immediate_quit = allow_quit;
 770
 771             /* Start should never be at or before end.  */
 772             if (start_byte <= ceiling_byte)
 773               start_byte = ceiling_byte + 1;
 774
 775             /* Now the text before start is an unknown region, and
 776                next_change is the position of the next known region. */
 777             ceiling_byte = max (next_change, ceiling_byte);
 778           }
 779
 780         /* Stop scanning before the gap.  */
 781         tem = BUFFER_FLOOR_OF (start_byte - 1);
 782         ceiling_byte = max (tem, ceiling_byte);
 783
 784         {
 785           /* The termination address of the dumb loop.  */
 786           register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
 787           register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
 788           unsigned char *base = cursor;
 789
 790           while (cursor >= ceiling_addr)
 791             {
 792               unsigned char *scan_start = cursor;
 793
 794               while (*cursor != target && --cursor >= ceiling_addr)
 795                 ;
 796
 797               /* If we're looking for newlines, cache the fact that
 798                  the region from after the cursor to start is free of them.  */
 799               if (target == '\n' && newline_cache)
 800                 know_region_cache (current_buffer, newline_cache,
 801                                    start_byte + cursor - base,
 802                                    start_byte + scan_start - base);
 803
 804               /* Did we find the target character?  */
 805               if (cursor >= ceiling_addr)
 806                 {
 807                   if (++count >= 0)
 808                     {
 809                       immediate_quit = 0;
 810                       return BYTE_TO_CHAR (start_byte + cursor - base);
 811                     }
 812                   cursor--;
 813                 }
 814             }
 815
 816           start = BYTE_TO_CHAR (start_byte + cursor - base);
 817         }
 818       }
 819
 820   immediate_quit = 0;
 821   if (shortage != 0)
 822     *shortage = count * direction;
 823   return start;
 824 }
 825 \f
 826 /* Search for COUNT instances of a line boundary, which means either a
 827    newline or (if selective display enabled) a carriage return.
 828    Start at START.  If COUNT is negative, search backwards.
 829
 830    We report the resulting position by calling TEMP_SET_PT_BOTH.
 831
 832    If we find COUNT instances. we position after (always after,
 833    even if scanning backwards) the COUNTth match, and return 0.
 834
 835    If we don't find COUNT instances before reaching the end of the
 836    buffer (or the beginning, if scanning backwards), we return
 837    the number of line boundaries left unfound, and position at
 838    the limit we bumped up against.
 839
 840    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 841    except in special cases.  */
 842
 843 EMACS_INT
 844 scan_newline (EMACS_INT start, EMACS_INT start_byte,
 845               EMACS_INT limit, EMACS_INT limit_byte,
 846               register EMACS_INT count, int allow_quit)
 847 {
 848   int direction = ((count > 0) ? 1 : -1);
 849
 850   register unsigned char *cursor;
 851   unsigned char *base;
 852
 853   EMACS_INT ceiling;
 854   register unsigned char *ceiling_addr;
 855
 856   int old_immediate_quit = immediate_quit;
 857
 858   /* The code that follows is like scan_buffer
 859      but checks for either newline or carriage return.  */
 860
 861   if (allow_quit)
 862     immediate_quit++;
 863
 864   start_byte = CHAR_TO_BYTE (start);
 865
 866   if (count > 0)
 867     {
 868       while (start_byte < limit_byte)
 869         {
 870           ceiling =  BUFFER_CEILING_OF (start_byte);
 871           ceiling = min (limit_byte - 1, ceiling);
 872           ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
 873           base = (cursor = BYTE_POS_ADDR (start_byte));
 874           while (1)
 875             {
 876               while (*cursor != '\n' && ++cursor != ceiling_addr)
 877                 ;
 878
 879               if (cursor != ceiling_addr)
 880                 {
 881                   if (--count == 0)
 882                     {
 883                       immediate_quit = old_immediate_quit;
 884                       start_byte = start_byte + cursor - base + 1;
 885                       start = BYTE_TO_CHAR (start_byte);
 886                       TEMP_SET_PT_BOTH (start, start_byte);
 887                       return 0;
 888                     }
 889                   else
 890                     if (++cursor == ceiling_addr)
 891                       break;
 892                 }
 893               else
 894                 break;
 895             }
 896           start_byte += cursor - base;
 897         }
 898     }
 899   else
 900     {
 901       while (start_byte > limit_byte)
 902         {
 903           ceiling = BUFFER_FLOOR_OF (start_byte - 1);
 904           ceiling = max (limit_byte, ceiling);
 905           ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
 906           base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
 907           while (1)
 908             {
 909               while (--cursor != ceiling_addr && *cursor != '\n')
 910                 ;
 911
 912               if (cursor != ceiling_addr)
 913                 {
 914                   if (++count == 0)
 915                     {
 916                       immediate_quit = old_immediate_quit;
 917                       /* Return the position AFTER the match we found.  */
 918                       start_byte = start_byte + cursor - base + 1;
 919                       start = BYTE_TO_CHAR (start_byte);
 920                       TEMP_SET_PT_BOTH (start, start_byte);
 921                       return 0;
 922                     }
 923                 }
 924               else
 925                 break;
 926             }
 927           /* Here we add 1 to compensate for the last decrement
 928              of CURSOR, which took it past the valid range.  */
 929           start_byte += cursor - base + 1;
 930         }
 931     }
 932
 933   TEMP_SET_PT_BOTH (limit, limit_byte);
 934   immediate_quit = old_immediate_quit;
 935
 936   return count * direction;
 937 }
 938
 939 EMACS_INT
 940 find_next_newline_no_quit (EMACS_INT from, EMACS_INT cnt)
 941 {
 942   return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
 943 }
 944
 945 /* Like find_next_newline, but returns position before the newline,
 946    not after, and only search up to TO.  This isn't just
 947    find_next_newline (...)-1, because you might hit TO.  */
 948
 949 EMACS_INT
 950 find_before_next_newline (EMACS_INT from, EMACS_INT to, EMACS_INT cnt)
 951 {
 952   int shortage;
 953   EMACS_INT pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
 954
 955   if (shortage == 0)
 956     pos--;
 957
 958   return pos;
 959 }
 960 \f
 961 /* Subroutines of Lisp buffer search functions. */
 962
 963 static Lisp_Object
 964 search_command (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror,
 965                 Lisp_Object count, int direction, int RE, int posix)
 966 {
 967   register int np;
 968   EMACS_INT lim, lim_byte;
 969   int n = direction;
 970
 971   if (!NILP (count))
 972     {
 973       CHECK_NUMBER (count);
 974       n *= XINT (count);
 975     }
 976
 977   CHECK_STRING (string);
 978   if (NILP (bound))
 979     {
 980       if (n > 0)
 981         lim = ZV, lim_byte = ZV_BYTE;
 982       else
 983         lim = BEGV, lim_byte = BEGV_BYTE;
 984     }
 985   else
 986     {
 987       CHECK_NUMBER_COERCE_MARKER (bound);
 988       lim = XINT (bound);
 989       if (n > 0 ? lim < PT : lim > PT)
 990         error ("Invalid search bound (wrong side of point)");
 991       if (lim > ZV)
 992         lim = ZV, lim_byte = ZV_BYTE;
 993       else if (lim < BEGV)
 994         lim = BEGV, lim_byte = BEGV_BYTE;
 995       else
 996         lim_byte = CHAR_TO_BYTE (lim);
 997     }
 998
 999   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
1000   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
1001     = current_buffer->case_eqv_table;
1002
1003   np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
1004                       (!NILP (current_buffer->case_fold_search)
1005                        ? current_buffer->case_canon_table
1006                        : Qnil),
1007                       (!NILP (current_buffer->case_fold_search)
1008                        ? current_buffer->case_eqv_table
1009                        : Qnil),
1010                       posix);
1011   if (np <= 0)
1012     {
1013       if (NILP (noerror))
1014         xsignal1 (Qsearch_failed, string);
1015
1016       if (!EQ (noerror, Qt))
1017         {
1018           if (lim < BEGV || lim > ZV)
1019             abort ();
1020           SET_PT_BOTH (lim, lim_byte);
1021           return Qnil;
1022 #if 0 /* This would be clean, but maybe programs depend on
1023          a value of nil here.  */
1024           np = lim;
1025 #endif
1026         }
1027       else
1028         return Qnil;
1029     }
1030
1031   if (np < BEGV || np > ZV)
1032     abort ();
1033
1034   SET_PT (np);
1035
1036   return make_number (np);
1037 }
1038 \f
1039 /* Return 1 if REGEXP it matches just one constant string.  */
1040
1041 static int
1042 trivial_regexp_p (Lisp_Object regexp)
1043 {
1044   EMACS_INT len = SBYTES (regexp);
1045   unsigned char *s = SDATA (regexp);
1046   while (--len >= 0)
1047     {
1048       switch (*s++)
1049         {
1050         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1051           return 0;
1052         case '\\':
1053           if (--len < 0)
1054             return 0;
1055           switch (*s++)
1056             {
1057             case '|': case '(': case ')': case '`': case '\'': case 'b':
1058             case 'B': case '<': case '>': case 'w': case 'W': case 's':
1059             case 'S': case '=': case '{': case '}': case '_':
1060             case 'c': case 'C': /* for categoryspec and notcategoryspec */
1061             case '1': case '2': case '3': case '4': case '5':
1062             case '6': case '7': case '8': case '9':
1063               return 0;
1064             }
1065         }
1066     }
1067   return 1;
1068 }
1069
1070 /* Search for the n'th occurrence of STRING in the current buffer,
1071    starting at position POS and stopping at position LIM,
1072    treating STRING as a literal string if RE is false or as
1073    a regular expression if RE is true.
1074
1075    If N is positive, searching is forward and LIM must be greater than POS.
1076    If N is negative, searching is backward and LIM must be less than POS.
1077
1078    Returns -x if x occurrences remain to be found (x > 0),
1079    or else the position at the beginning of the Nth occurrence
1080    (if searching backward) or the end (if searching forward).
1081
1082    POSIX is nonzero if we want full backtracking (POSIX style)
1083    for this pattern.  0 means backtrack only enough to get a valid match.  */
1084
1085 #define TRANSLATE(out, trt, d)                  \
1086 do                                              \
1087   {                                             \
1088     if (! NILP (trt))                           \
1089       {                                         \
1090         Lisp_Object temp;                       \
1091         temp = Faref (trt, make_number (d));    \
1092         if (INTEGERP (temp))                    \
1093           out = XINT (temp);                    \
1094         else                                    \
1095           out = d;                              \
1096       }                                         \
1097     else                                        \
1098       out = d;                                  \
1099   }                                             \
1100 while (0)
1101
1102 /* Only used in search_buffer, to record the end position of the match
1103    when searching regexps and SEARCH_REGS should not be changed
1104    (i.e. Vinhibit_changing_match_data is non-nil).  */
1105 static struct re_registers search_regs_1;
1106
1107 static EMACS_INT
1108 search_buffer (Lisp_Object string, EMACS_INT pos, EMACS_INT pos_byte,
1109                EMACS_INT lim, EMACS_INT lim_byte, EMACS_INT n,
1110                int RE, Lisp_Object trt, Lisp_Object inverse_trt, int posix)
1111 {
1112   EMACS_INT len = SCHARS (string);
1113   EMACS_INT len_byte = SBYTES (string);
1114   register int i;
1115
1116   if (running_asynch_code)
1117     save_search_regs ();
1118
1119   /* Searching 0 times means don't move.  */
1120   /* Null string is found at starting position.  */
1121   if (len == 0 || n == 0)
1122     {
1123       set_search_regs (pos_byte, 0);
1124       return pos;
1125     }
1126
1127   if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
1128     {
1129       unsigned char *p1, *p2;
1130       EMACS_INT s1, s2;
1131       struct re_pattern_buffer *bufp;
1132
1133       bufp = compile_pattern (string,
1134                               (NILP (Vinhibit_changing_match_data)
1135                                ? &search_regs : &search_regs_1),
1136                               trt, posix,
1137                               !NILP (current_buffer->enable_multibyte_characters));
1138
1139       immediate_quit = 1;       /* Quit immediately if user types ^G,
1140                                    because letting this function finish
1141                                    can take too long. */
1142       QUIT;                     /* Do a pending quit right away,
1143                                    to avoid paradoxical behavior */
1144       /* Get pointers and sizes of the two strings
1145          that make up the visible portion of the buffer. */
1146
1147       p1 = BEGV_ADDR;
1148       s1 = GPT_BYTE - BEGV_BYTE;
1149       p2 = GAP_END_ADDR;
1150       s2 = ZV_BYTE - GPT_BYTE;
1151       if (s1 < 0)
1152         {
1153           p2 = p1;
1154           s2 = ZV_BYTE - BEGV_BYTE;
1155           s1 = 0;
1156         }
1157       if (s2 < 0)
1158         {
1159           s1 = ZV_BYTE - BEGV_BYTE;
1160           s2 = 0;
1161         }
1162       re_match_object = Qnil;
1163
1164       while (n < 0)
1165         {
1166           EMACS_INT val;
1167           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1168                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1169                              (NILP (Vinhibit_changing_match_data)
1170                               ? &search_regs : &search_regs_1),
1171                              /* Don't allow match past current point */
1172                              pos_byte - BEGV_BYTE);
1173           if (val == -2)
1174             {
1175               matcher_overflow ();
1176             }
1177           if (val >= 0)
1178             {
1179               if (NILP (Vinhibit_changing_match_data))
1180                 {
1181                   pos_byte = search_regs.start[0] + BEGV_BYTE;
1182                   for (i = 0; i < search_regs.num_regs; i++)
1183                     if (search_regs.start[i] >= 0)
1184                       {
1185                         search_regs.start[i]
1186                           = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1187                         search_regs.end[i]
1188                           = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1189                       }
1190                   XSETBUFFER (last_thing_searched, current_buffer);
1191                   /* Set pos to the new position. */
1192                   pos = search_regs.start[0];
1193                 }
1194               else
1195                 {
1196                   pos_byte = search_regs_1.start[0] + BEGV_BYTE;
1197                   /* Set pos to the new position.  */
1198                   pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
1199                 }
1200             }
1201           else
1202             {
1203               immediate_quit = 0;
1204               return (n);
1205             }
1206           n++;
1207         }
1208       while (n > 0)
1209         {
1210           EMACS_INT val;
1211           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1212                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1213                              (NILP (Vinhibit_changing_match_data)
1214                               ? &search_regs : &search_regs_1),
1215                              lim_byte - BEGV_BYTE);
1216           if (val == -2)
1217             {
1218               matcher_overflow ();
1219             }
1220           if (val >= 0)
1221             {
1222               if (NILP (Vinhibit_changing_match_data))
1223                 {
1224                   pos_byte = search_regs.end[0] + BEGV_BYTE;
1225                   for (i = 0; i < search_regs.num_regs; i++)
1226                     if (search_regs.start[i] >= 0)
1227                       {
1228                         search_regs.start[i]
1229                           = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1230                         search_regs.end[i]
1231                           = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1232                       }
1233                   XSETBUFFER (last_thing_searched, current_buffer);
1234                   pos = search_regs.end[0];
1235                 }
1236               else
1237                 {
1238                   pos_byte = search_regs_1.end[0] + BEGV_BYTE;
1239                   pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
1240                 }
1241             }
1242           else
1243             {
1244               immediate_quit = 0;
1245               return (0 - n);
1246             }
1247           n--;
1248         }
1249       immediate_quit = 0;
1250       return (pos);
1251     }
1252   else                          /* non-RE case */
1253     {
1254       unsigned char *raw_pattern, *pat;
1255       EMACS_INT raw_pattern_size;
1256       EMACS_INT raw_pattern_size_byte;
1257       unsigned char *patbuf;
1258       int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1259       unsigned char *base_pat;
1260       /* Set to positive if we find a non-ASCII char that need
1261          translation.  Otherwise set to zero later.  */
1262       int char_base = -1;
1263       int boyer_moore_ok = 1;
1264
1265       /* MULTIBYTE says whether the text to be searched is multibyte.
1266          We must convert PATTERN to match that, or we will not really
1267          find things right.  */
1268
1269       if (multibyte == STRING_MULTIBYTE (string))
1270         {
1271           raw_pattern = SDATA (string);
1272           raw_pattern_size = SCHARS (string);
1273           raw_pattern_size_byte = SBYTES (string);
1274         }
1275       else if (multibyte)
1276         {
1277           raw_pattern_size = SCHARS (string);
1278           raw_pattern_size_byte
1279             = count_size_as_multibyte (SDATA (string),
1280                                        raw_pattern_size);
1281           raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
1282           copy_text (SDATA (string), raw_pattern,
1283                      SCHARS (string), 0, 1);
1284         }
1285       else
1286         {
1287           /* Converting multibyte to single-byte.
1288
1289              ??? Perhaps this conversion should be done in a special way
1290              by subtracting nonascii-insert-offset from each non-ASCII char,
1291              so that only the multibyte chars which really correspond to
1292              the chosen single-byte character set can possibly match.  */
1293           raw_pattern_size = SCHARS (string);
1294           raw_pattern_size_byte = SCHARS (string);
1295           raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
1296           copy_text (SDATA (string), raw_pattern,
1297                      SBYTES (string), 1, 0);
1298         }
1299
1300       /* Copy and optionally translate the pattern.  */
1301       len = raw_pattern_size;
1302       len_byte = raw_pattern_size_byte;
1303       patbuf = (unsigned char *) alloca (len * MAX_MULTIBYTE_LENGTH);
1304       pat = patbuf;
1305       base_pat = raw_pattern;
1306       if (multibyte)
1307         {
1308           /* Fill patbuf by translated characters in STRING while
1309              checking if we can use boyer-moore search.  If TRT is
1310              non-nil, we can use boyer-moore search only if TRT can be
1311              represented by the byte array of 256 elements.  For that,
1312              all non-ASCII case-equivalents of all case-senstive
1313              characters in STRING must belong to the same charset and
1314              row.  */
1315
1316           while (--len >= 0)
1317             {
1318               unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
1319               int c, translated, inverse;
1320               int in_charlen, charlen;
1321
1322               /* If we got here and the RE flag is set, it's because we're
1323                  dealing with a regexp known to be trivial, so the backslash
1324                  just quotes the next character.  */
1325               if (RE && *base_pat == '\\')
1326                 {
1327                   len--;
1328                   raw_pattern_size--;
1329                   len_byte--;
1330                   base_pat++;
1331                 }
1332
1333               c = STRING_CHAR_AND_LENGTH (base_pat, in_charlen);
1334
1335               if (NILP (trt))
1336                 {
1337                   str = base_pat;
1338                   charlen = in_charlen;
1339                 }
1340               else
1341                 {
1342                   /* Translate the character.  */
1343                   TRANSLATE (translated, trt, c);
1344                   charlen = CHAR_STRING (translated, str_base);
1345                   str = str_base;
1346
1347                   /* Check if C has any other case-equivalents.  */
1348                   TRANSLATE (inverse, inverse_trt, c);
1349                   /* If so, check if we can use boyer-moore.  */
1350                   if (c != inverse && boyer_moore_ok)
1351                     {
1352                       /* Check if all equivalents belong to the same
1353                          group of characters.  Note that the check of C
1354                          itself is done by the last iteration.  */
1355                       int this_char_base = -1;
1356
1357                       while (boyer_moore_ok)
1358                         {
1359                           if (ASCII_BYTE_P (inverse))
1360                             {
1361                               if (this_char_base > 0)
1362                                 boyer_moore_ok = 0;
1363                               else
1364                                 this_char_base = 0;
1365                             }
1366                           else if (CHAR_BYTE8_P (inverse))
1367                             /* Boyer-moore search can't handle a
1368                                translation of an eight-bit
1369                                character.  */
1370                             boyer_moore_ok = 0;
1371                           else if (this_char_base < 0)
1372                             {
1373                               this_char_base = inverse & ~0x3F;
1374                               if (char_base < 0)
1375                                 char_base = this_char_base;
1376                               else if (this_char_base != char_base)
1377                                 boyer_moore_ok = 0;
1378                             }
1379                           else if ((inverse & ~0x3F) != this_char_base)
1380                             boyer_moore_ok = 0;
1381                           if (c == inverse)
1382                             break;
1383                           TRANSLATE (inverse, inverse_trt, inverse);
1384                         }
1385                     }
1386                 }
1387
1388               /* Store this character into the translated pattern.  */
1389               memcpy (pat, str, charlen);
1390               pat += charlen;
1391               base_pat += in_charlen;
1392               len_byte -= in_charlen;
1393             }
1394
1395           /* If char_base is still negative we didn't find any translated
1396              non-ASCII characters.  */
1397           if (char_base < 0)
1398             char_base = 0;
1399         }
1400       else
1401         {
1402           /* Unibyte buffer.  */
1403           char_base = 0;
1404           while (--len >= 0)
1405             {
1406               int c, translated;
1407
1408               /* If we got here and the RE flag is set, it's because we're
1409                  dealing with a regexp known to be trivial, so the backslash
1410                  just quotes the next character.  */
1411               if (RE && *base_pat == '\\')
1412                 {
1413                   len--;
1414                   raw_pattern_size--;
1415                   base_pat++;
1416                 }
1417               c = *base_pat++;
1418               TRANSLATE (translated, trt, c);
1419               *pat++ = translated;
1420             }
1421         }
1422
1423       len_byte = pat - patbuf;
1424       len = raw_pattern_size;
1425       pat = base_pat = patbuf;
1426
1427       if (boyer_moore_ok)
1428         return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1429                             pos, pos_byte, lim, lim_byte,
1430                             char_base);
1431       else
1432         return simple_search (n, pat, len, len_byte, trt,
1433                               pos, pos_byte, lim, lim_byte);
1434     }
1435 }
1436 \f
1437 /* Do a simple string search N times for the string PAT,
1438    whose length is LEN/LEN_BYTE,
1439    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1440    TRT is the translation table.
1441
1442    Return the character position where the match is found.
1443    Otherwise, if M matches remained to be found, return -M.
1444
1445    This kind of search works regardless of what is in PAT and
1446    regardless of what is in TRT.  It is used in cases where
1447    boyer_moore cannot work.  */
1448
1449 static EMACS_INT
1450 simple_search (EMACS_INT n, unsigned char *pat,
1451                EMACS_INT len, EMACS_INT len_byte, Lisp_Object trt,
1452                EMACS_INT pos, EMACS_INT pos_byte,
1453                EMACS_INT lim, EMACS_INT lim_byte)
1454 {
1455   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1456   int forward = n > 0;
1457   /* Number of buffer bytes matched.  Note that this may be different
1458      from len_byte in a multibyte buffer.  */
1459   EMACS_INT match_byte;
1460
1461   if (lim > pos && multibyte)
1462     while (n > 0)
1463       {
1464         while (1)
1465           {
1466             /* Try matching at position POS.  */
1467             EMACS_INT this_pos = pos;
1468             EMACS_INT this_pos_byte = pos_byte;
1469             EMACS_INT this_len = len;
1470             unsigned char *p = pat;
1471             if (pos + len > lim || pos_byte + len_byte > lim_byte)
1472               goto stop;
1473
1474             while (this_len > 0)
1475               {
1476                 int charlen, buf_charlen;
1477                 int pat_ch, buf_ch;
1478
1479                 pat_ch = STRING_CHAR_AND_LENGTH (p, charlen);
1480                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1481                                                  buf_charlen);
1482                 TRANSLATE (buf_ch, trt, buf_ch);
1483
1484                 if (buf_ch != pat_ch)
1485                   break;
1486
1487                 this_len--;
1488                 p += charlen;
1489
1490                 this_pos_byte += buf_charlen;
1491                 this_pos++;
1492               }
1493
1494             if (this_len == 0)
1495               {
1496                 match_byte = this_pos_byte - pos_byte;
1497                 pos += len;
1498                 pos_byte += match_byte;
1499                 break;
1500               }
1501
1502             INC_BOTH (pos, pos_byte);
1503           }
1504
1505         n--;
1506       }
1507   else if (lim > pos)
1508     while (n > 0)
1509       {
1510         while (1)
1511           {
1512             /* Try matching at position POS.  */
1513             EMACS_INT this_pos = pos;
1514             EMACS_INT this_len = len;
1515             unsigned char *p = pat;
1516
1517             if (pos + len > lim)
1518               goto stop;
1519
1520             while (this_len > 0)
1521               {
1522                 int pat_ch = *p++;
1523                 int buf_ch = FETCH_BYTE (this_pos);
1524                 TRANSLATE (buf_ch, trt, buf_ch);
1525
1526                 if (buf_ch != pat_ch)
1527                   break;
1528
1529                 this_len--;
1530                 this_pos++;
1531               }
1532
1533             if (this_len == 0)
1534               {
1535                 match_byte = len;
1536                 pos += len;
1537                 break;
1538               }
1539
1540             pos++;
1541           }
1542
1543         n--;
1544       }
1545   /* Backwards search.  */
1546   else if (lim < pos && multibyte)
1547     while (n < 0)
1548       {
1549         while (1)
1550           {
1551             /* Try matching at position POS.  */
1552             EMACS_INT this_pos = pos;
1553             EMACS_INT this_pos_byte = pos_byte;
1554             EMACS_INT this_len = len;
1555             const unsigned char *p = pat + len_byte;
1556
1557             if (this_pos - len < lim || (pos_byte - len_byte) < lim_byte)
1558               goto stop;
1559
1560             while (this_len > 0)
1561               {
1562                 int charlen;
1563                 int pat_ch, buf_ch;
1564
1565                 DEC_BOTH (this_pos, this_pos_byte);
1566                 PREV_CHAR_BOUNDARY (p, pat);
1567                 pat_ch = STRING_CHAR (p);
1568                 buf_ch = STRING_CHAR (BYTE_POS_ADDR (this_pos_byte));
1569                 TRANSLATE (buf_ch, trt, buf_ch);
1570
1571                 if (buf_ch != pat_ch)
1572                   break;
1573
1574                 this_len--;
1575               }
1576
1577             if (this_len == 0)
1578               {
1579                 match_byte = pos_byte - this_pos_byte;
1580                 pos = this_pos;
1581                 pos_byte = this_pos_byte;
1582                 break;
1583               }
1584
1585             DEC_BOTH (pos, pos_byte);
1586           }
1587
1588         n++;
1589       }
1590   else if (lim < pos)
1591     while (n < 0)
1592       {
1593         while (1)
1594           {
1595             /* Try matching at position POS.  */
1596             EMACS_INT this_pos = pos - len;
1597             EMACS_INT this_len = len;
1598             unsigned char *p = pat;
1599
1600             if (this_pos < lim)
1601               goto stop;
1602
1603             while (this_len > 0)
1604               {
1605                 int pat_ch = *p++;
1606                 int buf_ch = FETCH_BYTE (this_pos);
1607                 TRANSLATE (buf_ch, trt, buf_ch);
1608
1609                 if (buf_ch != pat_ch)
1610                   break;
1611                 this_len--;
1612                 this_pos++;
1613               }
1614
1615             if (this_len == 0)
1616               {
1617                 match_byte = len;
1618                 pos -= len;
1619                 break;
1620               }
1621
1622             pos--;
1623           }
1624
1625         n++;
1626       }
1627
1628  stop:
1629   if (n == 0)
1630     {
1631       if (forward)
1632         set_search_regs ((multibyte ? pos_byte : pos) - match_byte, match_byte);
1633       else
1634         set_search_regs (multibyte ? pos_byte : pos, match_byte);
1635
1636       return pos;
1637     }
1638   else if (n > 0)
1639     return -n;
1640   else
1641     return n;
1642 }
1643 \f
1644 /* Do Boyer-Moore search N times for the string BASE_PAT,
1645    whose length is LEN/LEN_BYTE,
1646    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1647    DIRECTION says which direction we search in.
1648    TRT and INVERSE_TRT are translation tables.
1649    Characters in PAT are already translated by TRT.
1650
1651    This kind of search works if all the characters in BASE_PAT that
1652    have nontrivial translation are the same aside from the last byte.
1653    This makes it possible to translate just the last byte of a
1654    character, and do so after just a simple test of the context.
1655    CHAR_BASE is nonzero if there is such a non-ASCII character.
1656
1657    If that criterion is not satisfied, do not call this function.  */
1658
1659 static EMACS_INT
1660 boyer_moore (EMACS_INT n, unsigned char *base_pat,
1661              EMACS_INT len, EMACS_INT len_byte,
1662              Lisp_Object trt, Lisp_Object inverse_trt,
1663              EMACS_INT pos, EMACS_INT pos_byte,
1664              EMACS_INT lim, EMACS_INT lim_byte, int char_base)
1665 {
1666   int direction = ((n > 0) ? 1 : -1);
1667   register EMACS_INT dirlen;
1668   EMACS_INT limit;
1669   int stride_for_teases = 0;
1670   int BM_tab[0400];
1671   register unsigned char *cursor, *p_limit;
1672   register EMACS_INT i;
1673   register int j;
1674   unsigned char *pat, *pat_end;
1675   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1676
1677   unsigned char simple_translate[0400];
1678   /* These are set to the preceding bytes of a byte to be translated
1679      if char_base is nonzero.  As the maximum byte length of a
1680      multibyte character is 5, we have to check at most four previous
1681      bytes.  */
1682   int translate_prev_byte1 = 0;
1683   int translate_prev_byte2 = 0;
1684   int translate_prev_byte3 = 0;
1685   int translate_prev_byte4 = 0;
1686
1687   /* The general approach is that we are going to maintain that we know
1688      the first (closest to the present position, in whatever direction
1689      we're searching) character that could possibly be the last
1690      (furthest from present position) character of a valid match.  We
1691      advance the state of our knowledge by looking at that character
1692      and seeing whether it indeed matches the last character of the
1693      pattern.  If it does, we take a closer look.  If it does not, we
1694      move our pointer (to putative last characters) as far as is
1695      logically possible.  This amount of movement, which I call a
1696      stride, will be the length of the pattern if the actual character
1697      appears nowhere in the pattern, otherwise it will be the distance
1698      from the last occurrence of that character to the end of the
1699      pattern.  If the amount is zero we have a possible match.  */
1700
1701   /* Here we make a "mickey mouse" BM table.  The stride of the search
1702      is determined only by the last character of the putative match.
1703      If that character does not match, we will stride the proper
1704      distance to propose a match that superimposes it on the last
1705      instance of a character that matches it (per trt), or misses
1706      it entirely if there is none. */
1707
1708   dirlen = len_byte * direction;
1709
1710   /* Record position after the end of the pattern.  */
1711   pat_end = base_pat + len_byte;
1712   /* BASE_PAT points to a character that we start scanning from.
1713      It is the first character in a forward search,
1714      the last character in a backward search.  */
1715   if (direction < 0)
1716     base_pat = pat_end - 1;
1717
1718   /* A character that does not appear in the pattern induces a
1719      stride equal to the pattern length.  */
1720   for (i = 0; i < 0400; i++)
1721     BM_tab[i] = dirlen;
1722
1723   /* We use this for translation, instead of TRT itself.
1724      We fill this in to handle the characters that actually
1725      occur in the pattern.  Others don't matter anyway!  */
1726   for (i = 0; i < 0400; i++)
1727     simple_translate[i] = i;
1728
1729   if (char_base)
1730     {
1731       /* Setup translate_prev_byte1/2/3/4 from CHAR_BASE.  Only a
1732          byte following them are the target of translation.  */
1733       unsigned char str[MAX_MULTIBYTE_LENGTH];
1734       int len = CHAR_STRING (char_base, str);
1735
1736       translate_prev_byte1 = str[len - 2];
1737       if (len > 2)
1738         {
1739           translate_prev_byte2 = str[len - 3];
1740           if (len > 3)
1741             {
1742               translate_prev_byte3 = str[len - 4];
1743               if (len > 4)
1744                 translate_prev_byte4 = str[len - 5];
1745             }
1746         }
1747     }
1748
1749   i = 0;
1750   while (i != dirlen)
1751     {
1752       unsigned char *ptr = base_pat + i;
1753       i += direction;
1754       if (! NILP (trt))
1755         {
1756           /* If the byte currently looking at is the last of a
1757              character to check case-equivalents, set CH to that
1758              character.  An ASCII character and a non-ASCII character
1759              matching with CHAR_BASE are to be checked.  */
1760           int ch = -1;
1761
1762           if (ASCII_BYTE_P (*ptr) || ! multibyte)
1763             ch = *ptr;
1764           else if (char_base
1765                    && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
1766             {
1767               unsigned char *charstart = ptr - 1;
1768
1769               while (! (CHAR_HEAD_P (*charstart)))
1770                 charstart--;
1771               ch = STRING_CHAR (charstart);
1772               if (char_base != (ch & ~0x3F))
1773                 ch = -1;
1774             }
1775
1776           if (ch >= 0200)
1777             j = (ch & 0x3F) | 0200;
1778           else
1779             j = *ptr;
1780
1781           if (i == dirlen)
1782             stride_for_teases = BM_tab[j];
1783
1784           BM_tab[j] = dirlen - i;
1785           /* A translation table is accompanied by its inverse -- see */
1786           /* comment following downcase_table for details */
1787           if (ch >= 0)
1788             {
1789               int starting_ch = ch;
1790               int starting_j = j;
1791
1792               while (1)
1793                 {
1794                   TRANSLATE (ch, inverse_trt, ch);
1795                   if (ch >= 0200)
1796                     j = (ch & 0x3F) | 0200;
1797                   else
1798                     j = ch;
1799
1800                   /* For all the characters that map into CH,
1801                      set up simple_translate to map the last byte
1802                      into STARTING_J.  */
1803                   simple_translate[j] = starting_j;
1804                   if (ch == starting_ch)
1805                     break;
1806                   BM_tab[j] = dirlen - i;
1807                 }
1808             }
1809         }
1810       else
1811         {
1812           j = *ptr;
1813
1814           if (i == dirlen)
1815             stride_for_teases = BM_tab[j];
1816           BM_tab[j] = dirlen - i;
1817         }
1818       /* stride_for_teases tells how much to stride if we get a
1819          match on the far character but are subsequently
1820          disappointed, by recording what the stride would have been
1821          for that character if the last character had been
1822          different.  */
1823     }
1824   pos_byte += dirlen - ((direction > 0) ? direction : 0);
1825   /* loop invariant - POS_BYTE points at where last char (first
1826      char if reverse) of pattern would align in a possible match.  */
1827   while (n != 0)
1828     {
1829       EMACS_INT tail_end;
1830       unsigned char *tail_end_ptr;
1831
1832       /* It's been reported that some (broken) compiler thinks that
1833          Boolean expressions in an arithmetic context are unsigned.
1834          Using an explicit ?1:0 prevents this.  */
1835       if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1836           < 0)
1837         return (n * (0 - direction));
1838       /* First we do the part we can by pointers (maybe nothing) */
1839       QUIT;
1840       pat = base_pat;
1841       limit = pos_byte - dirlen + direction;
1842       if (direction > 0)
1843         {
1844           limit = BUFFER_CEILING_OF (limit);
1845           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1846              can take on without hitting edge of buffer or the gap.  */
1847           limit = min (limit, pos_byte + 20000);
1848           limit = min (limit, lim_byte - 1);
1849         }
1850       else
1851         {
1852           limit = BUFFER_FLOOR_OF (limit);
1853           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1854              can take on without hitting edge of buffer or the gap.  */
1855           limit = max (limit, pos_byte - 20000);
1856           limit = max (limit, lim_byte);
1857         }
1858       tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1859       tail_end_ptr = BYTE_POS_ADDR (tail_end);
1860
1861       if ((limit - pos_byte) * direction > 20)
1862         {
1863           unsigned char *p2;
1864
1865           p_limit = BYTE_POS_ADDR (limit);
1866           p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1867           /* In this loop, pos + cursor - p2 is the surrogate for pos.  */
1868           while (1)             /* use one cursor setting as long as i can */
1869             {
1870               if (direction > 0) /* worth duplicating */
1871                 {
1872                   while (cursor <= p_limit)
1873                     {
1874                       if (BM_tab[*cursor] == 0)
1875                         goto hit;
1876                       cursor += BM_tab[*cursor];
1877                     }
1878                 }
1879               else
1880                 {
1881                   while (cursor >= p_limit)
1882                     {
1883                       if (BM_tab[*cursor] == 0)
1884                         goto hit;
1885                       cursor += BM_tab[*cursor];
1886                     }
1887                 }
1888               /* If you are here, cursor is beyond the end of the
1889                  searched region.  You fail to match within the
1890                  permitted region and would otherwise try a character
1891                  beyond that region.  */
1892               break;
1893
1894             hit:
1895               i = dirlen - direction;
1896               if (! NILP (trt))
1897                 {
1898                   while ((i -= direction) + direction != 0)
1899                     {
1900                       int ch;
1901                       cursor -= direction;
1902                       /* Translate only the last byte of a character.  */
1903                       if (! multibyte
1904                           || ((cursor == tail_end_ptr
1905                                || CHAR_HEAD_P (cursor[1]))
1906                               && (CHAR_HEAD_P (cursor[0])
1907                                   /* Check if this is the last byte of
1908                                      a translable character.  */
1909                                   || (translate_prev_byte1 == cursor[-1]
1910                                       && (CHAR_HEAD_P (translate_prev_byte1)
1911                                           || (translate_prev_byte2 == cursor[-2]
1912                                               && (CHAR_HEAD_P (translate_prev_byte2)
1913                                                   || (translate_prev_byte3 == cursor[-3]))))))))
1914                         ch = simple_translate[*cursor];
1915                       else
1916                         ch = *cursor;
1917                       if (pat[i] != ch)
1918                         break;
1919                     }
1920                 }
1921               else
1922                 {
1923                   while ((i -= direction) + direction != 0)
1924                     {
1925                       cursor -= direction;
1926                       if (pat[i] != *cursor)
1927                         break;
1928                     }
1929                 }
1930               cursor += dirlen - i - direction; /* fix cursor */
1931               if (i + direction == 0)
1932                 {
1933                   EMACS_INT position, start, end;
1934
1935                   cursor -= direction;
1936
1937                   position = pos_byte + cursor - p2 + ((direction > 0)
1938                                                        ? 1 - len_byte : 0);
1939                   set_search_regs (position, len_byte);
1940
1941                   if (NILP (Vinhibit_changing_match_data))
1942                     {
1943                       start = search_regs.start[0];
1944                       end = search_regs.end[0];
1945                     }
1946                   else
1947                     /* If Vinhibit_changing_match_data is non-nil,
1948                        search_regs will not be changed.  So let's
1949                        compute start and end here.  */
1950                     {
1951                       start = BYTE_TO_CHAR (position);
1952                       end = BYTE_TO_CHAR (position + len_byte);
1953                     }
1954
1955                   if ((n -= direction) != 0)
1956                     cursor += dirlen; /* to resume search */
1957                   else
1958                     return direction > 0 ? end : start;
1959                 }
1960               else
1961                 cursor += stride_for_teases; /* <sigh> we lose -  */
1962             }
1963           pos_byte += cursor - p2;
1964         }
1965       else
1966         /* Now we'll pick up a clump that has to be done the hard
1967            way because it covers a discontinuity.  */
1968         {
1969           limit = ((direction > 0)
1970                    ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1971                    : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1972           limit = ((direction > 0)
1973                    ? min (limit + len_byte, lim_byte - 1)
1974                    : max (limit - len_byte, lim_byte));
1975           /* LIMIT is now the last value POS_BYTE can have
1976              and still be valid for a possible match.  */
1977           while (1)
1978             {
1979               /* This loop can be coded for space rather than
1980                  speed because it will usually run only once.
1981                  (the reach is at most len + 21, and typically
1982                  does not exceed len).  */
1983               while ((limit - pos_byte) * direction >= 0)
1984                 {
1985                   int ch = FETCH_BYTE (pos_byte);
1986                   if (BM_tab[ch] == 0)
1987                     goto hit2;
1988                   pos_byte += BM_tab[ch];
1989                 }
1990               break;    /* ran off the end */
1991
1992             hit2:
1993               /* Found what might be a match.  */
1994               i = dirlen - direction;
1995               while ((i -= direction) + direction != 0)
1996                 {
1997                   int ch;
1998                   unsigned char *ptr;
1999                   pos_byte -= direction;
2000                   ptr = BYTE_POS_ADDR (pos_byte);
2001                   /* Translate only the last byte of a character.  */
2002                   if (! multibyte
2003                       || ((ptr == tail_end_ptr
2004                            || CHAR_HEAD_P (ptr[1]))
2005                           && (CHAR_HEAD_P (ptr[0])
2006                               /* Check if this is the last byte of a
2007                                  translable character.  */
2008                               || (translate_prev_byte1 == ptr[-1]
2009                                   && (CHAR_HEAD_P (translate_prev_byte1)
2010                                       || (translate_prev_byte2 == ptr[-2]
2011                                           && (CHAR_HEAD_P (translate_prev_byte2)
2012                                               || translate_prev_byte3 == ptr[-3])))))))
2013                     ch = simple_translate[*ptr];
2014                   else
2015                     ch = *ptr;
2016                   if (pat[i] != ch)
2017                     break;
2018                 }
2019               /* Above loop has moved POS_BYTE part or all the way
2020                  back to the first pos (last pos if reverse).
2021                  Set it once again at the last (first if reverse) char.  */
2022               pos_byte += dirlen - i - direction;
2023               if (i + direction == 0)
2024                 {
2025                   EMACS_INT position, start, end;
2026                   pos_byte -= direction;
2027
2028                   position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
2029                   set_search_regs (position, len_byte);
2030
2031                   if (NILP (Vinhibit_changing_match_data))
2032                     {
2033                       start = search_regs.start[0];
2034                       end = search_regs.end[0];
2035                     }
2036                   else
2037                     /* If Vinhibit_changing_match_data is non-nil,
2038                        search_regs will not be changed.  So let's
2039                        compute start and end here.  */
2040                     {
2041                       start = BYTE_TO_CHAR (position);
2042                       end = BYTE_TO_CHAR (position + len_byte);
2043                     }
2044
2045                   if ((n -= direction) != 0)
2046                     pos_byte += dirlen; /* to resume search */
2047                   else
2048                     return direction > 0 ? end : start;
2049                 }
2050               else
2051                 pos_byte += stride_for_teases;
2052             }
2053           }
2054       /* We have done one clump.  Can we continue? */
2055       if ((lim_byte - pos_byte) * direction < 0)
2056         return ((0 - n) * direction);
2057     }
2058   return BYTE_TO_CHAR (pos_byte);
2059 }
2060
2061 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
2062    for the overall match just found in the current buffer.
2063    Also clear out the match data for registers 1 and up.  */
2064
2065 static void
2066 set_search_regs (EMACS_INT beg_byte, EMACS_INT nbytes)
2067 {
2068   int i;
2069
2070   if (!NILP (Vinhibit_changing_match_data))
2071     return;
2072
2073   /* Make sure we have registers in which to store
2074      the match position.  */
2075   if (search_regs.num_regs == 0)
2076     {
2077       search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2078       search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2079       search_regs.num_regs = 2;
2080     }
2081
2082   /* Clear out the other registers.  */
2083   for (i = 1; i < search_regs.num_regs; i++)
2084     {
2085       search_regs.start[i] = -1;
2086       search_regs.end[i] = -1;
2087     }
2088
2089   search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2090   search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
2091   XSETBUFFER (last_thing_searched, current_buffer);
2092 }
2093 \f
2094 /* Given STRING, a string of words separated by word delimiters,
2095    compute a regexp that matches those exact words separated by
2096    arbitrary punctuation.  If LAX is nonzero, the end of the string
2097    need not match a word boundary unless it ends in whitespace.  */
2098
2099 static Lisp_Object
2100 wordify (Lisp_Object string, int lax)
2101 {
2102   register unsigned char *p, *o;
2103   register EMACS_INT i, i_byte, len, punct_count = 0, word_count = 0;
2104   Lisp_Object val;
2105   int prev_c = 0;
2106   EMACS_INT adjust;
2107   int whitespace_at_end;
2108
2109   CHECK_STRING (string);
2110   p = SDATA (string);
2111   len = SCHARS (string);
2112
2113   for (i = 0, i_byte = 0; i < len; )
2114     {
2115       int c;
2116
2117       FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
2118
2119       if (SYNTAX (c) != Sword)
2120         {
2121           punct_count++;
2122           if (i > 0 && SYNTAX (prev_c) == Sword)
2123             word_count++;
2124         }
2125
2126       prev_c = c;
2127     }
2128
2129   if (SYNTAX (prev_c) == Sword)
2130     {
2131       word_count++;
2132       whitespace_at_end = 0;
2133     }
2134   else
2135     whitespace_at_end = 1;
2136
2137   if (!word_count)
2138     return empty_unibyte_string;
2139
2140   adjust = - punct_count + 5 * (word_count - 1)
2141     + ((lax && !whitespace_at_end) ? 2 : 4);
2142   if (STRING_MULTIBYTE (string))
2143     val = make_uninit_multibyte_string (len + adjust,
2144                                         SBYTES (string)
2145                                         + adjust);
2146   else
2147     val = make_uninit_string (len + adjust);
2148
2149   o = SDATA (val);
2150   *o++ = '\\';
2151   *o++ = 'b';
2152   prev_c = 0;
2153
2154   for (i = 0, i_byte = 0; i < len; )
2155     {
2156       int c;
2157       EMACS_INT i_byte_orig = i_byte;
2158
2159       FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
2160
2161       if (SYNTAX (c) == Sword)
2162         {
2163           memcpy (o, SDATA (string) + i_byte_orig, i_byte - i_byte_orig);
2164           o += i_byte - i_byte_orig;
2165         }
2166       else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2167         {
2168           *o++ = '\\';
2169           *o++ = 'W';
2170           *o++ = '\\';
2171           *o++ = 'W';
2172           *o++ = '*';
2173         }
2174
2175       prev_c = c;
2176     }
2177
2178   if (!lax || whitespace_at_end)
2179     {
2180       *o++ = '\\';
2181       *o++ = 'b';
2182     }
2183
2184   return val;
2185 }
2186 \f
2187 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2188        "MSearch backward: ",
2189        doc: /* Search backward from point for STRING.
2190 Set point to the beginning of the occurrence found, and return point.
2191 An optional second argument bounds the search; it is a buffer position.
2192 The match found must not extend before that position.
2193 Optional third argument, if t, means if fail just return nil (no error).
2194  If not nil and not t, position at limit of search and return nil.
2195 Optional fourth argument is repeat count--search for successive occurrences.
2196
2197 Search case-sensitivity is determined by the value of the variable
2198 `case-fold-search', which see.
2199
2200 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2201   (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2202 {
2203   return search_command (string, bound, noerror, count, -1, 0, 0);
2204 }
2205
2206 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2207        doc: /* Search forward from point for STRING.
2208 Set point to the end of the occurrence found, and return point.
2209 An optional second argument bounds the search; it is a buffer position.
2210 The match found must not extend after that position.  A value of nil is
2211   equivalent to (point-max).
2212 Optional third argument, if t, means if fail just return nil (no error).
2213   If not nil and not t, move to limit of search and return nil.
2214 Optional fourth argument is repeat count--search for successive occurrences.
2215
2216 Search case-sensitivity is determined by the value of the variable
2217 `case-fold-search', which see.
2218
2219 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2220   (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2221 {
2222   return search_command (string, bound, noerror, count, 1, 0, 0);
2223 }
2224
2225 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2226        "sWord search backward: ",
2227        doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2228 Set point to the beginning of the occurrence found, and return point.
2229 An optional second argument bounds the search; it is a buffer position.
2230 The match found must not extend before that position.
2231 Optional third argument, if t, means if fail just return nil (no error).
2232   If not nil and not t, move to limit of search and return nil.
2233 Optional fourth argument is repeat count--search for successive occurrences.  */)
2234   (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2235 {
2236   return search_command (wordify (string, 0), bound, noerror, count, -1, 1, 0);
2237 }
2238
2239 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2240        "sWord search: ",
2241        doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2242 Set point to the end of the occurrence found, and return point.
2243 An optional second argument bounds the search; it is a buffer position.
2244 The match found must not extend after that position.
2245 Optional third argument, if t, means if fail just return nil (no error).
2246   If not nil and not t, move to limit of search and return nil.
2247 Optional fourth argument is repeat count--search for successive occurrences.  */)
2248   (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2249 {
2250   return search_command (wordify (string, 0), bound, noerror, count, 1, 1, 0);
2251 }
2252
2253 DEFUN ("word-search-backward-lax", Fword_search_backward_lax, Sword_search_backward_lax, 1, 4,
2254        "sWord search backward: ",
2255        doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2256 Set point to the beginning of the occurrence found, and return point.
2257
2258 Unlike `word-search-backward', the end of STRING need not match a word
2259 boundary unless it ends in whitespace.
2260
2261 An optional second argument bounds the search; it is a buffer position.
2262 The match found must not extend before that position.
2263 Optional third argument, if t, means if fail just return nil (no error).
2264   If not nil and not t, move to limit of search and return nil.
2265 Optional fourth argument is repeat count--search for successive occurrences.  */)
2266   (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2267 {
2268   return search_command (wordify (string, 1), bound, noerror, count, -1, 1, 0);
2269 }
2270
2271 DEFUN ("word-search-forward-lax", Fword_search_forward_lax, Sword_search_forward_lax, 1, 4,
2272        "sWord search: ",
2273        doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2274 Set point to the end of the occurrence found, and return point.
2275
2276 Unlike `word-search-forward', the end of STRING need not match a word
2277 boundary unless it ends in whitespace.
2278
2279 An optional second argument bounds the search; it is a buffer position.
2280 The match found must not extend after that position.
2281 Optional third argument, if t, means if fail just return nil (no error).
2282   If not nil and not t, move to limit of search and return nil.
2283 Optional fourth argument is repeat count--search for successive occurrences.  */)
2284   (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2285 {
2286   return search_command (wordify (string, 1), bound, noerror, count, 1, 1, 0);
2287 }
2288
2289 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2290        "sRE search backward: ",
2291        doc: /* Search backward from point for match for regular expression REGEXP.
2292 Set point to the beginning of the match, and return point.
2293 The match found is the one starting last in the buffer
2294 and yet ending before the origin of the search.
2295 An optional second argument bounds the search; it is a buffer position.
2296 The match found must start at or after that position.
2297 Optional third argument, if t, means if fail just return nil (no error).
2298   If not nil and not t, move to limit of search and return nil.
2299 Optional fourth argument is repeat count--search for successive occurrences.
2300 See also the functions `match-beginning', `match-end', `match-string',
2301 and `replace-match'.  */)
2302   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2303 {
2304   return search_command (regexp, bound, noerror, count, -1, 1, 0);
2305 }
2306
2307 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2308        "sRE search: ",
2309        doc: /* Search forward from point for regular expression REGEXP.
2310 Set point to the end of the occurrence found, and return point.
2311 An optional second argument bounds the search; it is a buffer position.
2312 The match found must not extend after that position.
2313 Optional third argument, if t, means if fail just return nil (no error).
2314   If not nil and not t, move to limit of search and return nil.
2315 Optional fourth argument is repeat count--search for successive occurrences.
2316 See also the functions `match-beginning', `match-end', `match-string',
2317 and `replace-match'.  */)
2318   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2319 {
2320   return search_command (regexp, bound, noerror, count, 1, 1, 0);
2321 }
2322
2323 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2324        "sPosix search backward: ",
2325        doc: /* Search backward from point for match for regular expression REGEXP.
2326 Find the longest match in accord with Posix regular expression rules.
2327 Set point to the beginning of the match, and return point.
2328 The match found is the one starting last in the buffer
2329 and yet ending before the origin of the search.
2330 An optional second argument bounds the search; it is a buffer position.
2331 The match found must start at or after that position.
2332 Optional third argument, if t, means if fail just return nil (no error).
2333   If not nil and not t, move to limit of search and return nil.
2334 Optional fourth argument is repeat count--search for successive occurrences.
2335 See also the functions `match-beginning', `match-end', `match-string',
2336 and `replace-match'.  */)
2337   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2338 {
2339   return search_command (regexp, bound, noerror, count, -1, 1, 1);
2340 }
2341
2342 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2343        "sPosix search: ",
2344        doc: /* Search forward from point for regular expression REGEXP.
2345 Find the longest match in accord with Posix regular expression rules.
2346 Set point to the end of the occurrence found, and return point.
2347 An optional second argument bounds the search; it is a buffer position.
2348 The match found must not extend after that position.
2349 Optional third argument, if t, means if fail just return nil (no error).
2350   If not nil and not t, move to limit of search and return nil.
2351 Optional fourth argument is repeat count--search for successive occurrences.
2352 See also the functions `match-beginning', `match-end', `match-string',
2353 and `replace-match'.  */)
2354   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2355 {
2356   return search_command (regexp, bound, noerror, count, 1, 1, 1);
2357 }
2358 \f
2359 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2360        doc: /* Replace text matched by last search with NEWTEXT.
2361 Leave point at the end of the replacement text.
2362
2363 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2364 Otherwise maybe capitalize the whole text, or maybe just word initials,
2365 based on the replaced text.
2366 If the replaced text has only capital letters
2367 and has at least one multiletter word, convert NEWTEXT to all caps.
2368 Otherwise if all words are capitalized in the replaced text,
2369 capitalize each word in NEWTEXT.
2370
2371 If third arg LITERAL is non-nil, insert NEWTEXT literally.
2372 Otherwise treat `\\' as special:
2373   `\\&' in NEWTEXT means substitute original matched text.
2374   `\\N' means substitute what matched the Nth `\\(...\\)'.
2375        If Nth parens didn't match, substitute nothing.
2376   `\\\\' means insert one `\\'.
2377 Case conversion does not apply to these substitutions.
2378
2379 FIXEDCASE and LITERAL are optional arguments.
2380
2381 The optional fourth argument STRING can be a string to modify.
2382 This is meaningful when the previous match was done against STRING,
2383 using `string-match'.  When used this way, `replace-match'
2384 creates and returns a new string made by copying STRING and replacing
2385 the part of STRING that was matched.
2386
2387 The optional fifth argument SUBEXP specifies a subexpression;
2388 it says to replace just that subexpression with NEWTEXT,
2389 rather than replacing the entire matched text.
2390 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2391 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2392 NEWTEXT in place of subexp N.
2393 This is useful only after a regular expression search or match,
2394 since only regular expressions have distinguished subexpressions.  */)
2395   (Lisp_Object newtext, Lisp_Object fixedcase, Lisp_Object literal, Lisp_Object string, Lisp_Object subexp)
2396 {
2397   enum { nochange, all_caps, cap_initial } case_action;
2398   register EMACS_INT pos, pos_byte;
2399   int some_multiletter_word;
2400   int some_lowercase;
2401   int some_uppercase;
2402   int some_nonuppercase_initial;
2403   register int c, prevc;
2404   int sub;
2405   EMACS_INT opoint, newpoint;
2406
2407   CHECK_STRING (newtext);
2408
2409   if (! NILP (string))
2410     CHECK_STRING (string);
2411
2412   case_action = nochange;       /* We tried an initialization */
2413                                 /* but some C compilers blew it */
2414
2415   if (search_regs.num_regs <= 0)
2416     error ("`replace-match' called before any match found");
2417
2418   if (NILP (subexp))
2419     sub = 0;
2420   else
2421     {
2422       CHECK_NUMBER (subexp);
2423       sub = XINT (subexp);
2424       if (sub < 0 || sub >= search_regs.num_regs)
2425         args_out_of_range (subexp, make_number (search_regs.num_regs));
2426     }
2427
2428   if (NILP (string))
2429     {
2430       if (search_regs.start[sub] < BEGV
2431           || search_regs.start[sub] > search_regs.end[sub]
2432           || search_regs.end[sub] > ZV)
2433         args_out_of_range (make_number (search_regs.start[sub]),
2434                            make_number (search_regs.end[sub]));
2435     }
2436   else
2437     {
2438       if (search_regs.start[sub] < 0
2439           || search_regs.start[sub] > search_regs.end[sub]
2440           || search_regs.end[sub] > SCHARS (string))
2441         args_out_of_range (make_number (search_regs.start[sub]),
2442                            make_number (search_regs.end[sub]));
2443     }
2444
2445   if (NILP (fixedcase))
2446     {
2447       /* Decide how to casify by examining the matched text. */
2448       EMACS_INT last;
2449
2450       pos = search_regs.start[sub];
2451       last = search_regs.end[sub];
2452
2453       if (NILP (string))
2454         pos_byte = CHAR_TO_BYTE (pos);
2455       else
2456         pos_byte = string_char_to_byte (string, pos);
2457
2458       prevc = '\n';
2459       case_action = all_caps;
2460
2461       /* some_multiletter_word is set nonzero if any original word
2462          is more than one letter long. */
2463       some_multiletter_word = 0;
2464       some_lowercase = 0;
2465       some_nonuppercase_initial = 0;
2466       some_uppercase = 0;
2467
2468       while (pos < last)
2469         {
2470           if (NILP (string))
2471             {
2472               c = FETCH_CHAR_AS_MULTIBYTE (pos_byte);
2473               INC_BOTH (pos, pos_byte);
2474             }
2475           else
2476             FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, pos, pos_byte);
2477
2478           if (LOWERCASEP (c))
2479             {
2480               /* Cannot be all caps if any original char is lower case */
2481
2482               some_lowercase = 1;
2483               if (SYNTAX (prevc) != Sword)
2484                 some_nonuppercase_initial = 1;
2485               else
2486                 some_multiletter_word = 1;
2487             }
2488           else if (UPPERCASEP (c))
2489             {
2490               some_uppercase = 1;
2491               if (SYNTAX (prevc) != Sword)
2492                 ;
2493               else
2494                 some_multiletter_word = 1;
2495             }
2496           else
2497             {
2498               /* If the initial is a caseless word constituent,
2499                  treat that like a lowercase initial.  */
2500               if (SYNTAX (prevc) != Sword)
2501                 some_nonuppercase_initial = 1;
2502             }
2503
2504           prevc = c;
2505         }
2506
2507       /* Convert to all caps if the old text is all caps
2508          and has at least one multiletter word.  */
2509       if (! some_lowercase && some_multiletter_word)
2510         case_action = all_caps;
2511       /* Capitalize each word, if the old text has all capitalized words.  */
2512       else if (!some_nonuppercase_initial && some_multiletter_word)
2513         case_action = cap_initial;
2514       else if (!some_nonuppercase_initial && some_uppercase)
2515         /* Should x -> yz, operating on X, give Yz or YZ?
2516            We'll assume the latter.  */
2517         case_action = all_caps;
2518       else
2519         case_action = nochange;
2520     }
2521
2522   /* Do replacement in a string.  */
2523   if (!NILP (string))
2524     {
2525       Lisp_Object before, after;
2526
2527       before = Fsubstring (string, make_number (0),
2528                            make_number (search_regs.start[sub]));
2529       after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2530
2531       /* Substitute parts of the match into NEWTEXT
2532          if desired.  */
2533       if (NILP (literal))
2534         {
2535           EMACS_INT lastpos = 0;
2536           EMACS_INT lastpos_byte = 0;
2537           /* We build up the substituted string in ACCUM.  */
2538           Lisp_Object accum;
2539           Lisp_Object middle;
2540           int length = SBYTES (newtext);
2541
2542           accum = Qnil;
2543
2544           for (pos_byte = 0, pos = 0; pos_byte < length;)
2545             {
2546               EMACS_INT substart = -1;
2547               EMACS_INT subend = 0;
2548               int delbackslash = 0;
2549
2550               FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2551
2552               if (c == '\\')
2553                 {
2554                   FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2555
2556                   if (c == '&')
2557                     {
2558                       substart = search_regs.start[sub];
2559                       subend = search_regs.end[sub];
2560                     }
2561                   else if (c >= '1' && c <= '9')
2562                     {
2563                       if (search_regs.start[c - '0'] >= 0
2564                           && c <= search_regs.num_regs + '0')
2565                         {
2566                           substart = search_regs.start[c - '0'];
2567                           subend = search_regs.end[c - '0'];
2568                         }
2569                       else
2570                         {
2571                           /* If that subexp did not match,
2572                              replace \\N with nothing.  */
2573                           substart = 0;
2574                           subend = 0;
2575                         }
2576                     }
2577                   else if (c == '\\')
2578                     delbackslash = 1;
2579                   else
2580                     error ("Invalid use of `\\' in replacement text");
2581                 }
2582               if (substart >= 0)
2583                 {
2584                   if (pos - 2 != lastpos)
2585                     middle = substring_both (newtext, lastpos,
2586                                              lastpos_byte,
2587                                              pos - 2, pos_byte - 2);
2588                   else
2589                     middle = Qnil;
2590                   accum = concat3 (accum, middle,
2591                                    Fsubstring (string,
2592                                                make_number (substart),
2593                                                make_number (subend)));
2594                   lastpos = pos;
2595                   lastpos_byte = pos_byte;
2596                 }
2597               else if (delbackslash)
2598                 {
2599                   middle = substring_both (newtext, lastpos,
2600                                            lastpos_byte,
2601                                            pos - 1, pos_byte - 1);
2602
2603                   accum = concat2 (accum, middle);
2604                   lastpos = pos;
2605                   lastpos_byte = pos_byte;
2606                 }
2607             }
2608
2609           if (pos != lastpos)
2610             middle = substring_both (newtext, lastpos,
2611                                      lastpos_byte,
2612                                      pos, pos_byte);
2613           else
2614             middle = Qnil;
2615
2616           newtext = concat2 (accum, middle);
2617         }
2618
2619       /* Do case substitution in NEWTEXT if desired.  */
2620       if (case_action == all_caps)
2621         newtext = Fupcase (newtext);
2622       else if (case_action == cap_initial)
2623         newtext = Fupcase_initials (newtext);
2624
2625       return concat3 (before, newtext, after);
2626     }
2627
2628   /* Record point, then move (quietly) to the start of the match.  */
2629   if (PT >= search_regs.end[sub])
2630     opoint = PT - ZV;
2631   else if (PT > search_regs.start[sub])
2632     opoint = search_regs.end[sub] - ZV;
2633   else
2634     opoint = PT;
2635
2636   /* If we want non-literal replacement,
2637      perform substitution on the replacement string.  */
2638   if (NILP (literal))
2639     {
2640       EMACS_INT length = SBYTES (newtext);
2641       unsigned char *substed;
2642       EMACS_INT substed_alloc_size, substed_len;
2643       int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2644       int str_multibyte = STRING_MULTIBYTE (newtext);
2645       Lisp_Object rev_tbl;
2646       int really_changed = 0;
2647
2648       rev_tbl = Qnil;
2649
2650       substed_alloc_size = length * 2 + 100;
2651       substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2652       substed_len = 0;
2653
2654       /* Go thru NEWTEXT, producing the actual text to insert in
2655          SUBSTED while adjusting multibyteness to that of the current
2656          buffer.  */
2657
2658       for (pos_byte = 0, pos = 0; pos_byte < length;)
2659         {
2660           unsigned char str[MAX_MULTIBYTE_LENGTH];
2661           const unsigned char *add_stuff = NULL;
2662           EMACS_INT add_len = 0;
2663           int idx = -1;
2664
2665           if (str_multibyte)
2666             {
2667               FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2668               if (!buf_multibyte)
2669                 c = multibyte_char_to_unibyte (c, rev_tbl);
2670             }
2671           else
2672             {
2673               /* Note that we don't have to increment POS.  */
2674               c = SREF (newtext, pos_byte++);
2675               if (buf_multibyte)
2676                 MAKE_CHAR_MULTIBYTE (c);
2677             }
2678
2679           /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2680              or set IDX to a match index, which means put that part
2681              of the buffer text into SUBSTED.  */
2682
2683           if (c == '\\')
2684             {
2685               really_changed = 1;
2686
2687               if (str_multibyte)
2688                 {
2689                   FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2690                                                       pos, pos_byte);
2691                   if (!buf_multibyte && !ASCII_CHAR_P (c))
2692                     c = multibyte_char_to_unibyte (c, rev_tbl);
2693                 }
2694               else
2695                 {
2696                   c = SREF (newtext, pos_byte++);
2697                   if (buf_multibyte)
2698                     MAKE_CHAR_MULTIBYTE (c);
2699                 }
2700
2701               if (c == '&')
2702                 idx = sub;
2703               else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2704                 {
2705                   if (search_regs.start[c - '0'] >= 1)
2706                     idx = c - '0';
2707                 }
2708               else if (c == '\\')
2709                 add_len = 1, add_stuff = "\\";
2710               else
2711                 {
2712                   xfree (substed);
2713                   error ("Invalid use of `\\' in replacement text");
2714                 }
2715             }
2716           else
2717             {
2718               add_len = CHAR_STRING (c, str);
2719               add_stuff = str;
2720             }
2721
2722           /* If we want to copy part of a previous match,
2723              set up ADD_STUFF and ADD_LEN to point to it.  */
2724           if (idx >= 0)
2725             {
2726               EMACS_INT begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2727               add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2728               if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2729                 move_gap (search_regs.start[idx]);
2730               add_stuff = BYTE_POS_ADDR (begbyte);
2731             }
2732
2733           /* Now the stuff we want to add to SUBSTED
2734              is invariably ADD_LEN bytes starting at ADD_STUFF.  */
2735
2736           /* Make sure SUBSTED is big enough.  */
2737           if (substed_len + add_len >= substed_alloc_size)
2738             {
2739               substed_alloc_size = substed_len + add_len + 500;
2740               substed = (unsigned char *) xrealloc (substed,
2741                                                     substed_alloc_size + 1);
2742             }
2743
2744           /* Now add to the end of SUBSTED.  */
2745           if (add_stuff)
2746             {
2747               memcpy (substed + substed_len, add_stuff, add_len);
2748               substed_len += add_len;
2749             }
2750         }
2751
2752       if (really_changed)
2753         {
2754           if (buf_multibyte)
2755             {
2756               EMACS_INT nchars =
2757                 multibyte_chars_in_text (substed, substed_len);
2758
2759               newtext = make_multibyte_string (substed, nchars, substed_len);
2760             }
2761           else
2762             newtext = make_unibyte_string (substed, substed_len);
2763         }
2764       xfree (substed);
2765     }
2766
2767   /* Replace the old text with the new in the cleanest possible way.  */
2768   replace_range (search_regs.start[sub], search_regs.end[sub],
2769                  newtext, 1, 0, 1);
2770   newpoint = search_regs.start[sub] + SCHARS (newtext);
2771
2772   if (case_action == all_caps)
2773     Fupcase_region (make_number (search_regs.start[sub]),
2774                     make_number (newpoint));
2775   else if (case_action == cap_initial)
2776     Fupcase_initials_region (make_number (search_regs.start[sub]),
2777                              make_number (newpoint));
2778
2779   /* Adjust search data for this change.  */
2780   {
2781     EMACS_INT oldend = search_regs.end[sub];
2782     EMACS_INT oldstart = search_regs.start[sub];
2783     EMACS_INT change = newpoint - search_regs.end[sub];
2784     int i;
2785
2786     for (i = 0; i < search_regs.num_regs; i++)
2787       {
2788         if (search_regs.start[i] >= oldend)
2789           search_regs.start[i] += change;
2790         else if (search_regs.start[i] > oldstart)
2791           search_regs.start[i] = oldstart;
2792         if (search_regs.end[i] >= oldend)
2793           search_regs.end[i] += change;
2794         else if (search_regs.end[i] > oldstart)
2795           search_regs.end[i] = oldstart;
2796       }
2797   }
2798
2799   /* Put point back where it was in the text.  */
2800   if (opoint <= 0)
2801     TEMP_SET_PT (opoint + ZV);
2802   else
2803     TEMP_SET_PT (opoint);
2804
2805   /* Now move point "officially" to the start of the inserted replacement.  */
2806   move_if_not_intangible (newpoint);
2807
2808   return Qnil;
2809 }
2810 \f
2811 static Lisp_Object
2812 match_limit (Lisp_Object num, int beginningp)
2813 {
2814   register int n;
2815
2816   CHECK_NUMBER (num);
2817   n = XINT (num);
2818   if (n < 0)
2819     args_out_of_range (num, make_number (0));
2820   if (search_regs.num_regs <= 0)
2821     error ("No match data, because no search succeeded");
2822   if (n >= search_regs.num_regs
2823       || search_regs.start[n] < 0)
2824     return Qnil;
2825   return (make_number ((beginningp) ? search_regs.start[n]
2826                                     : search_regs.end[n]));
2827 }
2828
2829 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2830        doc: /* Return position of start of text matched by last search.
2831 SUBEXP, a number, specifies which parenthesized expression in the last
2832   regexp.
2833 Value is nil if SUBEXPth pair didn't match, or there were less than
2834   SUBEXP pairs.
2835 Zero means the entire text matched by the whole regexp or whole string.  */)
2836   (Lisp_Object subexp)
2837 {
2838   return match_limit (subexp, 1);
2839 }
2840
2841 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2842        doc: /* Return position of end of text matched by last search.
2843 SUBEXP, a number, specifies which parenthesized expression in the last
2844   regexp.
2845 Value is nil if SUBEXPth pair didn't match, or there were less than
2846   SUBEXP pairs.
2847 Zero means the entire text matched by the whole regexp or whole string.  */)
2848   (Lisp_Object subexp)
2849 {
2850   return match_limit (subexp, 0);
2851 }
2852
2853 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
2854        doc: /* Return a list containing all info on what the last search matched.
2855 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2856 All the elements are markers or nil (nil if the Nth pair didn't match)
2857 if the last match was on a buffer; integers or nil if a string was matched.
2858 Use `set-match-data' to reinstate the data in this list.
2859
2860 If INTEGERS (the optional first argument) is non-nil, always use
2861 integers \(rather than markers) to represent buffer positions.  In
2862 this case, and if the last match was in a buffer, the buffer will get
2863 stored as one additional element at the end of the list.
2864
2865 If REUSE is a list, reuse it as part of the value.  If REUSE is long
2866 enough to hold all the values, and if INTEGERS is non-nil, no consing
2867 is done.
2868
2869 If optional third arg RESEAT is non-nil, any previous markers on the
2870 REUSE list will be modified to point to nowhere.
2871
2872 Return value is undefined if the last search failed.  */)
2873   (Lisp_Object integers, Lisp_Object reuse, Lisp_Object reseat)
2874 {
2875   Lisp_Object tail, prev;
2876   Lisp_Object *data;
2877   int i, len;
2878
2879   if (!NILP (reseat))
2880     for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2881       if (MARKERP (XCAR (tail)))
2882         {
2883           unchain_marker (XMARKER (XCAR (tail)));
2884           XSETCAR (tail, Qnil);
2885         }
2886
2887   if (NILP (last_thing_searched))
2888     return Qnil;
2889
2890   prev = Qnil;
2891
2892   data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
2893                                  * sizeof (Lisp_Object));
2894
2895   len = 0;
2896   for (i = 0; i < search_regs.num_regs; i++)
2897     {
2898       int start = search_regs.start[i];
2899       if (start >= 0)
2900         {
2901           if (EQ (last_thing_searched, Qt)
2902               || ! NILP (integers))
2903             {
2904               XSETFASTINT (data[2 * i], start);
2905               XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2906             }
2907           else if (BUFFERP (last_thing_searched))
2908             {
2909               data[2 * i] = Fmake_marker ();
2910               Fset_marker (data[2 * i],
2911                            make_number (start),
2912                            last_thing_searched);
2913               data[2 * i + 1] = Fmake_marker ();
2914               Fset_marker (data[2 * i + 1],
2915                            make_number (search_regs.end[i]),
2916                            last_thing_searched);
2917             }
2918           else
2919             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
2920             abort ();
2921
2922           len = 2 * i + 2;
2923         }
2924       else
2925         data[2 * i] = data[2 * i + 1] = Qnil;
2926     }
2927
2928   if (BUFFERP (last_thing_searched) && !NILP (integers))
2929     {
2930       data[len] = last_thing_searched;
2931       len++;
2932     }
2933
2934   /* If REUSE is not usable, cons up the values and return them.  */
2935   if (! CONSP (reuse))
2936     return Flist (len, data);
2937
2938   /* If REUSE is a list, store as many value elements as will fit
2939      into the elements of REUSE.  */
2940   for (i = 0, tail = reuse; CONSP (tail);
2941        i++, tail = XCDR (tail))
2942     {
2943       if (i < len)
2944         XSETCAR (tail, data[i]);
2945       else
2946         XSETCAR (tail, Qnil);
2947       prev = tail;
2948     }
2949
2950   /* If we couldn't fit all value elements into REUSE,
2951      cons up the rest of them and add them to the end of REUSE.  */
2952   if (i < len)
2953     XSETCDR (prev, Flist (len - i, data + i));
2954
2955   return reuse;
2956 }
2957
2958 /* We used to have an internal use variant of `reseat' described as:
2959
2960       If RESEAT is `evaporate', put the markers back on the free list
2961       immediately.  No other references to the markers must exist in this
2962       case, so it is used only internally on the unwind stack and
2963       save-match-data from Lisp.
2964
2965    But it was ill-conceived: those supposedly-internal markers get exposed via
2966    the undo-list, so freeing them here is unsafe.  */
2967
2968 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
2969        doc: /* Set internal data on last search match from elements of LIST.
2970 LIST should have been created by calling `match-data' previously.
2971
2972 If optional arg RESEAT is non-nil, make markers on LIST point nowhere.  */)
2973   (register Lisp_Object list, Lisp_Object reseat)
2974 {
2975   register int i;
2976   register Lisp_Object marker;
2977
2978   if (running_asynch_code)
2979     save_search_regs ();
2980
2981   CHECK_LIST (list);
2982
2983   /* Unless we find a marker with a buffer or an explicit buffer
2984      in LIST, assume that this match data came from a string.  */
2985   last_thing_searched = Qt;
2986
2987   /* Allocate registers if they don't already exist.  */
2988   {
2989     int length = XFASTINT (Flength (list)) / 2;
2990
2991     if (length > search_regs.num_regs)
2992       {
2993         if (search_regs.num_regs == 0)
2994           {
2995             search_regs.start
2996               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2997             search_regs.end
2998               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2999           }
3000         else
3001           {
3002             search_regs.start
3003               = (regoff_t *) xrealloc (search_regs.start,
3004                                        length * sizeof (regoff_t));
3005             search_regs.end
3006               = (regoff_t *) xrealloc (search_regs.end,
3007                                        length * sizeof (regoff_t));
3008           }
3009
3010         for (i = search_regs.num_regs; i < length; i++)
3011           search_regs.start[i] = -1;
3012
3013         search_regs.num_regs = length;
3014       }
3015
3016     for (i = 0; CONSP (list); i++)
3017       {
3018         marker = XCAR (list);
3019         if (BUFFERP (marker))
3020           {
3021             last_thing_searched = marker;
3022             break;
3023           }
3024         if (i >= length)
3025           break;
3026         if (NILP (marker))
3027           {
3028             search_regs.start[i] = -1;
3029             list = XCDR (list);
3030           }
3031         else
3032           {
3033             EMACS_INT from;
3034             Lisp_Object m;
3035
3036             m = marker;
3037             if (MARKERP (marker))
3038               {
3039                 if (XMARKER (marker)->buffer == 0)
3040                   XSETFASTINT (marker, 0);
3041                 else
3042                   XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
3043               }
3044
3045             CHECK_NUMBER_COERCE_MARKER (marker);
3046             from = XINT (marker);
3047
3048             if (!NILP (reseat) && MARKERP (m))
3049               {
3050                 unchain_marker (XMARKER (m));
3051                 XSETCAR (list, Qnil);
3052               }
3053
3054             if ((list = XCDR (list), !CONSP (list)))
3055               break;
3056
3057             m = marker = XCAR (list);
3058
3059             if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
3060               XSETFASTINT (marker, 0);
3061
3062             CHECK_NUMBER_COERCE_MARKER (marker);
3063             search_regs.start[i] = from;
3064             search_regs.end[i] = XINT (marker);
3065
3066             if (!NILP (reseat) && MARKERP (m))
3067               {
3068                 unchain_marker (XMARKER (m));
3069                 XSETCAR (list, Qnil);
3070               }
3071           }
3072         list = XCDR (list);
3073       }
3074
3075     for (; i < search_regs.num_regs; i++)
3076       search_regs.start[i] = -1;
3077   }
3078
3079   return Qnil;
3080 }
3081
3082 /* If non-zero the match data have been saved in saved_search_regs
3083    during the execution of a sentinel or filter. */
3084 static int search_regs_saved;
3085 static struct re_registers saved_search_regs;
3086 static Lisp_Object saved_last_thing_searched;
3087
3088 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3089    if asynchronous code (filter or sentinel) is running. */
3090 static void
3091 save_search_regs (void)
3092 {
3093   if (!search_regs_saved)
3094     {
3095       saved_search_regs.num_regs = search_regs.num_regs;
3096       saved_search_regs.start = search_regs.start;
3097       saved_search_regs.end = search_regs.end;
3098       saved_last_thing_searched = last_thing_searched;
3099       last_thing_searched = Qnil;
3100       search_regs.num_regs = 0;
3101       search_regs.start = 0;
3102       search_regs.end = 0;
3103
3104       search_regs_saved = 1;
3105     }
3106 }
3107
3108 /* Called upon exit from filters and sentinels. */
3109 void
3110 restore_search_regs (void)
3111 {
3112   if (search_regs_saved)
3113     {
3114       if (search_regs.num_regs > 0)
3115         {
3116           xfree (search_regs.start);
3117           xfree (search_regs.end);
3118         }
3119       search_regs.num_regs = saved_search_regs.num_regs;
3120       search_regs.start = saved_search_regs.start;
3121       search_regs.end = saved_search_regs.end;
3122       last_thing_searched = saved_last_thing_searched;
3123       saved_last_thing_searched = Qnil;
3124       search_regs_saved = 0;
3125     }
3126 }
3127
3128 static Lisp_Object
3129 unwind_set_match_data (Lisp_Object list)
3130 {
3131   /* It is NOT ALWAYS safe to free (evaporate) the markers immediately.  */
3132   return Fset_match_data (list, Qt);
3133 }
3134
3135 /* Called to unwind protect the match data.  */
3136 void
3137 record_unwind_save_match_data (void)
3138 {
3139   record_unwind_protect (unwind_set_match_data,
3140                          Fmatch_data (Qnil, Qnil, Qnil));
3141 }
3142
3143 /* Quote a string to inactivate reg-expr chars */
3144
3145 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
3146        doc: /* Return a regexp string which matches exactly STRING and nothing else.  */)
3147   (Lisp_Object string)
3148 {
3149   register unsigned char *in, *out, *end;
3150   register unsigned char *temp;
3151   int backslashes_added = 0;
3152
3153   CHECK_STRING (string);
3154
3155   temp = (unsigned char *) alloca (SBYTES (string) * 2);
3156
3157   /* Now copy the data into the new string, inserting escapes. */
3158
3159   in = SDATA (string);
3160   end = in + SBYTES (string);
3161   out = temp;
3162
3163   for (; in != end; in++)
3164     {
3165       if (*in == '['
3166           || *in == '*' || *in == '.' || *in == '\\'
3167           || *in == '?' || *in == '+'
3168           || *in == '^' || *in == '$')
3169         *out++ = '\\', backslashes_added++;
3170       *out++ = *in;
3171     }
3172
3173   return make_specified_string (temp,
3174                                 SCHARS (string) + backslashes_added,
3175                                 out - temp,
3176                                 STRING_MULTIBYTE (string));
3177 }
3178 \f
3179 void
3180 syms_of_search (void)
3181 {
3182   register int i;
3183
3184   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3185     {
3186       searchbufs[i].buf.allocated = 100;
3187       searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
3188       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3189       searchbufs[i].regexp = Qnil;
3190       searchbufs[i].whitespace_regexp = Qnil;
3191       searchbufs[i].syntax_table = Qnil;
3192       staticpro (&searchbufs[i].regexp);
3193       staticpro (&searchbufs[i].whitespace_regexp);
3194       staticpro (&searchbufs[i].syntax_table);
3195       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3196     }
3197   searchbuf_head = &searchbufs[0];
3198
3199   Qsearch_failed = intern_c_string ("search-failed");
3200   staticpro (&Qsearch_failed);
3201   Qinvalid_regexp = intern_c_string ("invalid-regexp");
3202   staticpro (&Qinvalid_regexp);
3203
3204   Fput (Qsearch_failed, Qerror_conditions,
3205         pure_cons (Qsearch_failed, pure_cons (Qerror, Qnil)));
3206   Fput (Qsearch_failed, Qerror_message,
3207         make_pure_c_string ("Search failed"));
3208
3209   Fput (Qinvalid_regexp, Qerror_conditions,
3210         pure_cons (Qinvalid_regexp, pure_cons (Qerror, Qnil)));
3211   Fput (Qinvalid_regexp, Qerror_message,
3212         make_pure_c_string ("Invalid regexp"));
3213
3214   last_thing_searched = Qnil;
3215   staticpro (&last_thing_searched);
3216
3217   saved_last_thing_searched = Qnil;
3218   staticpro (&saved_last_thing_searched);
3219
3220   DEFVAR_LISP ("search-spaces-regexp", Vsearch_spaces_regexp,
3221       doc: /* Regexp to substitute for bunches of spaces in regexp search.
3222 Some commands use this for user-specified regexps.
3223 Spaces that occur inside character classes or repetition operators
3224 or other such regexp constructs are not replaced with this.
3225 A value of nil (which is the normal value) means treat spaces literally.  */);
3226   Vsearch_spaces_regexp = Qnil;
3227
3228   DEFVAR_LISP ("inhibit-changing-match-data", Vinhibit_changing_match_data,
3229       doc: /* Internal use only.
3230 If non-nil, the primitive searching and matching functions
3231 such as `looking-at', `string-match', `re-search-forward', etc.,
3232 do not set the match data.  The proper way to use this variable
3233 is to bind it with `let' around a small expression.  */);
3234   Vinhibit_changing_match_data = Qnil;
3235
3236   defsubr (&Slooking_at);
3237   defsubr (&Sposix_looking_at);
3238   defsubr (&Sstring_match);
3239   defsubr (&Sposix_string_match);
3240   defsubr (&Ssearch_forward);
3241   defsubr (&Ssearch_backward);
3242   defsubr (&Sword_search_forward);
3243   defsubr (&Sword_search_backward);
3244   defsubr (&Sword_search_forward_lax);
3245   defsubr (&Sword_search_backward_lax);
3246   defsubr (&Sre_search_forward);
3247   defsubr (&Sre_search_backward);
3248   defsubr (&Sposix_search_forward);
3249   defsubr (&Sposix_search_backward);
3250   defsubr (&Sreplace_match);
3251   defsubr (&Smatch_beginning);
3252   defsubr (&Smatch_end);
3253   defsubr (&Smatch_data);
3254   defsubr (&Sset_match_data);
3255   defsubr (&Sregexp_quote);
3256 }