code.delx.au - gnu-emacs/blob - src/search.c

   1 /* String search routines for GNU Emacs.
   2    Copyright (C) 1985, 1986, 1987, 1993, 1994, 1997, 1998, 1999, 2002, 2003,
   3                  2004, 2005, 2006 Free Software Foundation, Inc.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs; see the file COPYING.  If not, write to
  19 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  20 Boston, MA 02110-1301, USA.  */
  21
  22
  23 #include <config.h>
  24 #include "lisp.h"
  25 #include "syntax.h"
  26 #include "category.h"
  27 #include "buffer.h"
  28 #include "charset.h"
  29 #include "region-cache.h"
  30 #include "commands.h"
  31 #include "blockinput.h"
  32 #include "intervals.h"
  33
  34 #include <sys/types.h>
  35 #include "regex.h"
  36
  37 #define REGEXP_CACHE_SIZE 20
  38
  39 /* If the regexp is non-nil, then the buffer contains the compiled form
  40    of that regexp, suitable for searching.  */
  41 struct regexp_cache
  42 {
  43   struct regexp_cache *next;
  44   Lisp_Object regexp, whitespace_regexp;
  45   /* Syntax table for which the regexp applies.  We need this because
  46      of character classes.  If this is t, then the compiled pattern is valid
  47      for any syntax-table.  */
  48   Lisp_Object syntax_table;
  49   struct re_pattern_buffer buf;
  50   char fastmap[0400];
  51   /* Nonzero means regexp was compiled to do full POSIX backtracking.  */
  52   char posix;
  53 };
  54
  55 /* The instances of that struct.  */
  56 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
  57
  58 /* The head of the linked list; points to the most recently used buffer.  */
  59 struct regexp_cache *searchbuf_head;
  60
  61
  62 /* Every call to re_match, etc., must pass &search_regs as the regs
  63    argument unless you can show it is unnecessary (i.e., if re_match
  64    is certainly going to be called again before region-around-match
  65    can be called).
  66
  67    Since the registers are now dynamically allocated, we need to make
  68    sure not to refer to the Nth register before checking that it has
  69    been allocated by checking search_regs.num_regs.
  70
  71    The regex code keeps track of whether it has allocated the search
  72    buffer using bits in the re_pattern_buffer.  This means that whenever
  73    you compile a new pattern, it completely forgets whether it has
  74    allocated any registers, and will allocate new registers the next
  75    time you call a searching or matching function.  Therefore, we need
  76    to call re_set_registers after compiling a new pattern or after
  77    setting the match registers, so that the regex functions will be
  78    able to free or re-allocate it properly.  */
  79 static struct re_registers search_regs;
  80
  81 /* The buffer in which the last search was performed, or
  82    Qt if the last search was done in a string;
  83    Qnil if no searching has been done yet.  */
  84 static Lisp_Object last_thing_searched;
  85
  86 /* error condition signaled when regexp compile_pattern fails */
  87
  88 Lisp_Object Qinvalid_regexp;
  89
  90 /* Error condition used for failing searches */
  91 Lisp_Object Qsearch_failed;
  92
  93 Lisp_Object Vsearch_spaces_regexp;
  94
  95 static void set_search_regs ();
  96 static void save_search_regs ();
  97 static int simple_search ();
  98 static int boyer_moore ();
  99 static int search_buffer ();
 100 static void matcher_overflow () NO_RETURN;
 101
 102 static void
 103 matcher_overflow ()
 104 {
 105   error ("Stack overflow in regexp matcher");
 106 }
 107
 108 /* Compile a regexp and signal a Lisp error if anything goes wrong.
 109    PATTERN is the pattern to compile.
 110    CP is the place to put the result.
 111    TRANSLATE is a translation table for ignoring case, or nil for none.
 112    REGP is the structure that says where to store the "register"
 113    values that will result from matching this pattern.
 114    If it is 0, we should compile the pattern not to record any
 115    subexpression bounds.
 116    POSIX is nonzero if we want full backtracking (POSIX style)
 117    for this pattern.  0 means backtrack only enough to get a valid match.
 118    MULTIBYTE is nonzero if we want to handle multibyte characters in
 119    PATTERN.  0 means all multibyte characters are recognized just as
 120    sequences of binary data.
 121
 122    The behavior also depends on Vsearch_spaces_regexp.  */
 123
 124 static void
 125 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
 126      struct regexp_cache *cp;
 127      Lisp_Object pattern;
 128      Lisp_Object translate;
 129      struct re_registers *regp;
 130      int posix;
 131      int multibyte;
 132 {
 133   unsigned char *raw_pattern;
 134   int raw_pattern_size;
 135   char *val;
 136   reg_syntax_t old;
 137
 138   /* MULTIBYTE says whether the text to be searched is multibyte.
 139      We must convert PATTERN to match that, or we will not really
 140      find things right.  */
 141
 142   if (multibyte == STRING_MULTIBYTE (pattern))
 143     {
 144       raw_pattern = (unsigned char *) SDATA (pattern);
 145       raw_pattern_size = SBYTES (pattern);
 146     }
 147   else if (multibyte)
 148     {
 149       raw_pattern_size = count_size_as_multibyte (SDATA (pattern),
 150                                                   SCHARS (pattern));
 151       raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
 152       copy_text (SDATA (pattern), raw_pattern,
 153                  SCHARS (pattern), 0, 1);
 154     }
 155   else
 156     {
 157       /* Converting multibyte to single-byte.
 158
 159          ??? Perhaps this conversion should be done in a special way
 160          by subtracting nonascii-insert-offset from each non-ASCII char,
 161          so that only the multibyte chars which really correspond to
 162          the chosen single-byte character set can possibly match.  */
 163       raw_pattern_size = SCHARS (pattern);
 164       raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
 165       copy_text (SDATA (pattern), raw_pattern,
 166                  SBYTES (pattern), 1, 0);
 167     }
 168
 169   cp->regexp = Qnil;
 170   cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
 171   cp->posix = posix;
 172   cp->buf.multibyte = multibyte;
 173   cp->whitespace_regexp = Vsearch_spaces_regexp;
 174   /* rms: I think BLOCK_INPUT is not needed here any more,
 175      because regex.c defines malloc to call xmalloc.
 176      Using BLOCK_INPUT here means the debugger won't run if an error occurs.
 177      So let's turn it off.  */
 178   /*  BLOCK_INPUT;  */
 179   old = re_set_syntax (RE_SYNTAX_EMACS
 180                        | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
 181
 182   re_set_whitespace_regexp (NILP (Vsearch_spaces_regexp) ? NULL
 183                             : SDATA (Vsearch_spaces_regexp));
 184
 185   val = (char *) re_compile_pattern ((char *)raw_pattern,
 186                                      raw_pattern_size, &cp->buf);
 187
 188   /* If the compiled pattern hard codes some of the contents of the
 189      syntax-table, it can only be reused with *this* syntax table.  */
 190   cp->syntax_table = cp->buf.used_syntax ? current_buffer->syntax_table : Qt;
 191
 192   re_set_whitespace_regexp (NULL);
 193
 194   re_set_syntax (old);
 195   /* UNBLOCK_INPUT;  */
 196   if (val)
 197     xsignal1 (Qinvalid_regexp, build_string (val));
 198
 199   cp->regexp = Fcopy_sequence (pattern);
 200 }
 201
 202 /* Shrink each compiled regexp buffer in the cache
 203    to the size actually used right now.
 204    This is called from garbage collection.  */
 205
 206 void
 207 shrink_regexp_cache ()
 208 {
 209   struct regexp_cache *cp;
 210
 211   for (cp = searchbuf_head; cp != 0; cp = cp->next)
 212     {
 213       cp->buf.allocated = cp->buf.used;
 214       cp->buf.buffer
 215         = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
 216     }
 217 }
 218
 219 /* Clear the regexp cache w.r.t. a particular syntax table,
 220    because it was changed.
 221    There is no danger of memory leak here because re_compile_pattern
 222    automagically manages the memory in each re_pattern_buffer struct,
 223    based on its `allocated' and `buffer' values.  */
 224 void
 225 clear_regexp_cache ()
 226 {
 227   int i;
 228
 229   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
 230     /* It's tempting to compare with the syntax-table we've actually changd,
 231        but it's not sufficient because char-table inheritance mewans that
 232        modifying one syntax-table can change others at the same time.  */
 233     if (!EQ (searchbufs[i].syntax_table, Qt))
 234       searchbufs[i].regexp = Qnil;
 235 }
 236
 237 /* Compile a regexp if necessary, but first check to see if there's one in
 238    the cache.
 239    PATTERN is the pattern to compile.
 240    TRANSLATE is a translation table for ignoring case, or nil for none.
 241    REGP is the structure that says where to store the "register"
 242    values that will result from matching this pattern.
 243    If it is 0, we should compile the pattern not to record any
 244    subexpression bounds.
 245    POSIX is nonzero if we want full backtracking (POSIX style)
 246    for this pattern.  0 means backtrack only enough to get a valid match.  */
 247
 248 struct re_pattern_buffer *
 249 compile_pattern (pattern, regp, translate, posix, multibyte)
 250      Lisp_Object pattern;
 251      struct re_registers *regp;
 252      Lisp_Object translate;
 253      int posix, multibyte;
 254 {
 255   struct regexp_cache *cp, **cpp;
 256
 257   for (cpp = &searchbuf_head; ; cpp = &cp->next)
 258     {
 259       cp = *cpp;
 260       /* Entries are initialized to nil, and may be set to nil by
 261          compile_pattern_1 if the pattern isn't valid.  Don't apply
 262          string accessors in those cases.  However, compile_pattern_1
 263          is only applied to the cache entry we pick here to reuse.  So
 264          nil should never appear before a non-nil entry.  */
 265       if (NILP (cp->regexp))
 266         goto compile_it;
 267       if (SCHARS (cp->regexp) == SCHARS (pattern)
 268           && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
 269           && !NILP (Fstring_equal (cp->regexp, pattern))
 270           && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
 271           && cp->posix == posix
 272           && cp->buf.multibyte == multibyte
 273           && (EQ (cp->syntax_table, Qt)
 274               || EQ (cp->syntax_table, current_buffer->syntax_table))
 275           && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp)))
 276         break;
 277
 278       /* If we're at the end of the cache, compile into the nil cell
 279          we found, or the last (least recently used) cell with a
 280          string value.  */
 281       if (cp->next == 0)
 282         {
 283         compile_it:
 284           compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
 285           break;
 286         }
 287     }
 288
 289   /* When we get here, cp (aka *cpp) contains the compiled pattern,
 290      either because we found it in the cache or because we just compiled it.
 291      Move it to the front of the queue to mark it as most recently used.  */
 292   *cpp = cp->next;
 293   cp->next = searchbuf_head;
 294   searchbuf_head = cp;
 295
 296   /* Advise the searching functions about the space we have allocated
 297      for register data.  */
 298   if (regp)
 299     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
 300
 301   return &cp->buf;
 302 }
 303
 304 \f
 305 static Lisp_Object
 306 looking_at_1 (string, posix)
 307      Lisp_Object string;
 308      int posix;
 309 {
 310   Lisp_Object val;
 311   unsigned char *p1, *p2;
 312   int s1, s2;
 313   register int i;
 314   struct re_pattern_buffer *bufp;
 315
 316   if (running_asynch_code)
 317     save_search_regs ();
 318
 319   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 320   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
 321     = current_buffer->case_eqv_table;
 322
 323   CHECK_STRING (string);
 324   bufp = compile_pattern (string, &search_regs,
 325                           (!NILP (current_buffer->case_fold_search)
 326                            ? current_buffer->case_canon_table : Qnil),
 327                           posix,
 328                           !NILP (current_buffer->enable_multibyte_characters));
 329
 330   immediate_quit = 1;
 331   QUIT;                 /* Do a pending quit right away, to avoid paradoxical behavior */
 332
 333   /* Get pointers and sizes of the two strings
 334      that make up the visible portion of the buffer. */
 335
 336   p1 = BEGV_ADDR;
 337   s1 = GPT_BYTE - BEGV_BYTE;
 338   p2 = GAP_END_ADDR;
 339   s2 = ZV_BYTE - GPT_BYTE;
 340   if (s1 < 0)
 341     {
 342       p2 = p1;
 343       s2 = ZV_BYTE - BEGV_BYTE;
 344       s1 = 0;
 345     }
 346   if (s2 < 0)
 347     {
 348       s1 = ZV_BYTE - BEGV_BYTE;
 349       s2 = 0;
 350     }
 351
 352   re_match_object = Qnil;
 353
 354   i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
 355                   PT_BYTE - BEGV_BYTE, &search_regs,
 356                   ZV_BYTE - BEGV_BYTE);
 357   immediate_quit = 0;
 358
 359   if (i == -2)
 360     matcher_overflow ();
 361
 362   val = (0 <= i ? Qt : Qnil);
 363   if (i >= 0)
 364     for (i = 0; i < search_regs.num_regs; i++)
 365       if (search_regs.start[i] >= 0)
 366         {
 367           search_regs.start[i]
 368             = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
 369           search_regs.end[i]
 370             = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
 371         }
 372   XSETBUFFER (last_thing_searched, current_buffer);
 373   return val;
 374 }
 375
 376 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
 377        doc: /* Return t if text after point matches regular expression REGEXP.
 378 This function modifies the match data that `match-beginning',
 379 `match-end' and `match-data' access; save and restore the match
 380 data if you want to preserve them.  */)
 381      (regexp)
 382      Lisp_Object regexp;
 383 {
 384   return looking_at_1 (regexp, 0);
 385 }
 386
 387 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
 388        doc: /* Return t if text after point matches regular expression REGEXP.
 389 Find the longest match, in accord with Posix regular expression rules.
 390 This function modifies the match data that `match-beginning',
 391 `match-end' and `match-data' access; save and restore the match
 392 data if you want to preserve them.  */)
 393      (regexp)
 394      Lisp_Object regexp;
 395 {
 396   return looking_at_1 (regexp, 1);
 397 }
 398 \f
 399 static Lisp_Object
 400 string_match_1 (regexp, string, start, posix)
 401      Lisp_Object regexp, string, start;
 402      int posix;
 403 {
 404   int val;
 405   struct re_pattern_buffer *bufp;
 406   int pos, pos_byte;
 407   int i;
 408
 409   if (running_asynch_code)
 410     save_search_regs ();
 411
 412   CHECK_STRING (regexp);
 413   CHECK_STRING (string);
 414
 415   if (NILP (start))
 416     pos = 0, pos_byte = 0;
 417   else
 418     {
 419       int len = SCHARS (string);
 420
 421       CHECK_NUMBER (start);
 422       pos = XINT (start);
 423       if (pos < 0 && -pos <= len)
 424         pos = len + pos;
 425       else if (0 > pos || pos > len)
 426         args_out_of_range (string, start);
 427       pos_byte = string_char_to_byte (string, pos);
 428     }
 429
 430   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 431   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
 432     = current_buffer->case_eqv_table;
 433
 434   bufp = compile_pattern (regexp, &search_regs,
 435                           (!NILP (current_buffer->case_fold_search)
 436                            ? current_buffer->case_canon_table : Qnil),
 437                           posix,
 438                           STRING_MULTIBYTE (string));
 439   immediate_quit = 1;
 440   re_match_object = string;
 441
 442   val = re_search (bufp, (char *) SDATA (string),
 443                    SBYTES (string), pos_byte,
 444                    SBYTES (string) - pos_byte,
 445                    &search_regs);
 446   immediate_quit = 0;
 447   last_thing_searched = Qt;
 448   if (val == -2)
 449     matcher_overflow ();
 450   if (val < 0) return Qnil;
 451
 452   for (i = 0; i < search_regs.num_regs; i++)
 453     if (search_regs.start[i] >= 0)
 454       {
 455         search_regs.start[i]
 456           = string_byte_to_char (string, search_regs.start[i]);
 457         search_regs.end[i]
 458           = string_byte_to_char (string, search_regs.end[i]);
 459       }
 460
 461   return make_number (string_byte_to_char (string, val));
 462 }
 463
 464 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
 465        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 466 Matching ignores case if `case-fold-search' is non-nil.
 467 If third arg START is non-nil, start search at that index in STRING.
 468 For index of first char beyond the match, do (match-end 0).
 469 `match-end' and `match-beginning' also give indices of substrings
 470 matched by parenthesis constructs in the pattern.
 471
 472 You can use the function `match-string' to extract the substrings
 473 matched by the parenthesis constructions in REGEXP. */)
 474      (regexp, string, start)
 475      Lisp_Object regexp, string, start;
 476 {
 477   return string_match_1 (regexp, string, start, 0);
 478 }
 479
 480 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
 481        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 482 Find the longest match, in accord with Posix regular expression rules.
 483 Case is ignored if `case-fold-search' is non-nil in the current buffer.
 484 If third arg START is non-nil, start search at that index in STRING.
 485 For index of first char beyond the match, do (match-end 0).
 486 `match-end' and `match-beginning' also give indices of substrings
 487 matched by parenthesis constructs in the pattern.  */)
 488      (regexp, string, start)
 489      Lisp_Object regexp, string, start;
 490 {
 491   return string_match_1 (regexp, string, start, 1);
 492 }
 493
 494 /* Match REGEXP against STRING, searching all of STRING,
 495    and return the index of the match, or negative on failure.
 496    This does not clobber the match data.  */
 497
 498 int
 499 fast_string_match (regexp, string)
 500      Lisp_Object regexp, string;
 501 {
 502   int val;
 503   struct re_pattern_buffer *bufp;
 504
 505   bufp = compile_pattern (regexp, 0, Qnil,
 506                           0, STRING_MULTIBYTE (string));
 507   immediate_quit = 1;
 508   re_match_object = string;
 509
 510   val = re_search (bufp, (char *) SDATA (string),
 511                    SBYTES (string), 0,
 512                    SBYTES (string), 0);
 513   immediate_quit = 0;
 514   return val;
 515 }
 516
 517 /* Match REGEXP against STRING, searching all of STRING ignoring case,
 518    and return the index of the match, or negative on failure.
 519    This does not clobber the match data.
 520    We assume that STRING contains single-byte characters.  */
 521
 522 extern Lisp_Object Vascii_downcase_table;
 523
 524 int
 525 fast_c_string_match_ignore_case (regexp, string)
 526      Lisp_Object regexp;
 527      const char *string;
 528 {
 529   int val;
 530   struct re_pattern_buffer *bufp;
 531   int len = strlen (string);
 532
 533   regexp = string_make_unibyte (regexp);
 534   re_match_object = Qt;
 535   bufp = compile_pattern (regexp, 0,
 536                           Vascii_canon_table, 0,
 537                           0);
 538   immediate_quit = 1;
 539   val = re_search (bufp, string, len, 0, len, 0);
 540   immediate_quit = 0;
 541   return val;
 542 }
 543
 544 /* Like fast_string_match but ignore case.  */
 545
 546 int
 547 fast_string_match_ignore_case (regexp, string)
 548      Lisp_Object regexp, string;
 549 {
 550   int val;
 551   struct re_pattern_buffer *bufp;
 552
 553   bufp = compile_pattern (regexp, 0, Vascii_canon_table,
 554                           0, STRING_MULTIBYTE (string));
 555   immediate_quit = 1;
 556   re_match_object = string;
 557
 558   val = re_search (bufp, (char *) SDATA (string),
 559                    SBYTES (string), 0,
 560                    SBYTES (string), 0);
 561   immediate_quit = 0;
 562   return val;
 563 }
 564 \f
 565 /* The newline cache: remembering which sections of text have no newlines.  */
 566
 567 /* If the user has requested newline caching, make sure it's on.
 568    Otherwise, make sure it's off.
 569    This is our cheezy way of associating an action with the change of
 570    state of a buffer-local variable.  */
 571 static void
 572 newline_cache_on_off (buf)
 573      struct buffer *buf;
 574 {
 575   if (NILP (buf->cache_long_line_scans))
 576     {
 577       /* It should be off.  */
 578       if (buf->newline_cache)
 579         {
 580           free_region_cache (buf->newline_cache);
 581           buf->newline_cache = 0;
 582         }
 583     }
 584   else
 585     {
 586       /* It should be on.  */
 587       if (buf->newline_cache == 0)
 588         buf->newline_cache = new_region_cache ();
 589     }
 590 }
 591
 592 \f
 593 /* Search for COUNT instances of the character TARGET between START and END.
 594
 595    If COUNT is positive, search forwards; END must be >= START.
 596    If COUNT is negative, search backwards for the -COUNTth instance;
 597       END must be <= START.
 598    If COUNT is zero, do anything you please; run rogue, for all I care.
 599
 600    If END is zero, use BEGV or ZV instead, as appropriate for the
 601    direction indicated by COUNT.
 602
 603    If we find COUNT instances, set *SHORTAGE to zero, and return the
 604    position past the COUNTth match.  Note that for reverse motion
 605    this is not the same as the usual convention for Emacs motion commands.
 606
 607    If we don't find COUNT instances before reaching END, set *SHORTAGE
 608    to the number of TARGETs left unfound, and return END.
 609
 610    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 611    except when inside redisplay.  */
 612
 613 int
 614 scan_buffer (target, start, end, count, shortage, allow_quit)
 615      register int target;
 616      int start, end;
 617      int count;
 618      int *shortage;
 619      int allow_quit;
 620 {
 621   struct region_cache *newline_cache;
 622   int direction;
 623
 624   if (count > 0)
 625     {
 626       direction = 1;
 627       if (! end) end = ZV;
 628     }
 629   else
 630     {
 631       direction = -1;
 632       if (! end) end = BEGV;
 633     }
 634
 635   newline_cache_on_off (current_buffer);
 636   newline_cache = current_buffer->newline_cache;
 637
 638   if (shortage != 0)
 639     *shortage = 0;
 640
 641   immediate_quit = allow_quit;
 642
 643   if (count > 0)
 644     while (start != end)
 645       {
 646         /* Our innermost scanning loop is very simple; it doesn't know
 647            about gaps, buffer ends, or the newline cache.  ceiling is
 648            the position of the last character before the next such
 649            obstacle --- the last character the dumb search loop should
 650            examine.  */
 651         int ceiling_byte = CHAR_TO_BYTE (end) - 1;
 652         int start_byte = CHAR_TO_BYTE (start);
 653         int tem;
 654
 655         /* If we're looking for a newline, consult the newline cache
 656            to see where we can avoid some scanning.  */
 657         if (target == '\n' && newline_cache)
 658           {
 659             int next_change;
 660             immediate_quit = 0;
 661             while (region_cache_forward
 662                    (current_buffer, newline_cache, start_byte, &next_change))
 663               start_byte = next_change;
 664             immediate_quit = allow_quit;
 665
 666             /* START should never be after END.  */
 667             if (start_byte > ceiling_byte)
 668               start_byte = ceiling_byte;
 669
 670             /* Now the text after start is an unknown region, and
 671                next_change is the position of the next known region. */
 672             ceiling_byte = min (next_change - 1, ceiling_byte);
 673           }
 674
 675         /* The dumb loop can only scan text stored in contiguous
 676            bytes. BUFFER_CEILING_OF returns the last character
 677            position that is contiguous, so the ceiling is the
 678            position after that.  */
 679         tem = BUFFER_CEILING_OF (start_byte);
 680         ceiling_byte = min (tem, ceiling_byte);
 681
 682         {
 683           /* The termination address of the dumb loop.  */
 684           register unsigned char *ceiling_addr
 685             = BYTE_POS_ADDR (ceiling_byte) + 1;
 686           register unsigned char *cursor
 687             = BYTE_POS_ADDR (start_byte);
 688           unsigned char *base = cursor;
 689
 690           while (cursor < ceiling_addr)
 691             {
 692               unsigned char *scan_start = cursor;
 693
 694               /* The dumb loop.  */
 695               while (*cursor != target && ++cursor < ceiling_addr)
 696                 ;
 697
 698               /* If we're looking for newlines, cache the fact that
 699                  the region from start to cursor is free of them. */
 700               if (target == '\n' && newline_cache)
 701                 know_region_cache (current_buffer, newline_cache,
 702                                    start_byte + scan_start - base,
 703                                    start_byte + cursor - base);
 704
 705               /* Did we find the target character?  */
 706               if (cursor < ceiling_addr)
 707                 {
 708                   if (--count == 0)
 709                     {
 710                       immediate_quit = 0;
 711                       return BYTE_TO_CHAR (start_byte + cursor - base + 1);
 712                     }
 713                   cursor++;
 714                 }
 715             }
 716
 717           start = BYTE_TO_CHAR (start_byte + cursor - base);
 718         }
 719       }
 720   else
 721     while (start > end)
 722       {
 723         /* The last character to check before the next obstacle.  */
 724         int ceiling_byte = CHAR_TO_BYTE (end);
 725         int start_byte = CHAR_TO_BYTE (start);
 726         int tem;
 727
 728         /* Consult the newline cache, if appropriate.  */
 729         if (target == '\n' && newline_cache)
 730           {
 731             int next_change;
 732             immediate_quit = 0;
 733             while (region_cache_backward
 734                    (current_buffer, newline_cache, start_byte, &next_change))
 735               start_byte = next_change;
 736             immediate_quit = allow_quit;
 737
 738             /* Start should never be at or before end.  */
 739             if (start_byte <= ceiling_byte)
 740               start_byte = ceiling_byte + 1;
 741
 742             /* Now the text before start is an unknown region, and
 743                next_change is the position of the next known region. */
 744             ceiling_byte = max (next_change, ceiling_byte);
 745           }
 746
 747         /* Stop scanning before the gap.  */
 748         tem = BUFFER_FLOOR_OF (start_byte - 1);
 749         ceiling_byte = max (tem, ceiling_byte);
 750
 751         {
 752           /* The termination address of the dumb loop.  */
 753           register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
 754           register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
 755           unsigned char *base = cursor;
 756
 757           while (cursor >= ceiling_addr)
 758             {
 759               unsigned char *scan_start = cursor;
 760
 761               while (*cursor != target && --cursor >= ceiling_addr)
 762                 ;
 763
 764               /* If we're looking for newlines, cache the fact that
 765                  the region from after the cursor to start is free of them.  */
 766               if (target == '\n' && newline_cache)
 767                 know_region_cache (current_buffer, newline_cache,
 768                                    start_byte + cursor - base,
 769                                    start_byte + scan_start - base);
 770
 771               /* Did we find the target character?  */
 772               if (cursor >= ceiling_addr)
 773                 {
 774                   if (++count >= 0)
 775                     {
 776                       immediate_quit = 0;
 777                       return BYTE_TO_CHAR (start_byte + cursor - base);
 778                     }
 779                   cursor--;
 780                 }
 781             }
 782
 783           start = BYTE_TO_CHAR (start_byte + cursor - base);
 784         }
 785       }
 786
 787   immediate_quit = 0;
 788   if (shortage != 0)
 789     *shortage = count * direction;
 790   return start;
 791 }
 792 \f
 793 /* Search for COUNT instances of a line boundary, which means either a
 794    newline or (if selective display enabled) a carriage return.
 795    Start at START.  If COUNT is negative, search backwards.
 796
 797    We report the resulting position by calling TEMP_SET_PT_BOTH.
 798
 799    If we find COUNT instances. we position after (always after,
 800    even if scanning backwards) the COUNTth match, and return 0.
 801
 802    If we don't find COUNT instances before reaching the end of the
 803    buffer (or the beginning, if scanning backwards), we return
 804    the number of line boundaries left unfound, and position at
 805    the limit we bumped up against.
 806
 807    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 808    except in special cases.  */
 809
 810 int
 811 scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
 812      int start, start_byte;
 813      int limit, limit_byte;
 814      register int count;
 815      int allow_quit;
 816 {
 817   int direction = ((count > 0) ? 1 : -1);
 818
 819   register unsigned char *cursor;
 820   unsigned char *base;
 821
 822   register int ceiling;
 823   register unsigned char *ceiling_addr;
 824
 825   int old_immediate_quit = immediate_quit;
 826
 827   /* The code that follows is like scan_buffer
 828      but checks for either newline or carriage return.  */
 829
 830   if (allow_quit)
 831     immediate_quit++;
 832
 833   start_byte = CHAR_TO_BYTE (start);
 834
 835   if (count > 0)
 836     {
 837       while (start_byte < limit_byte)
 838         {
 839           ceiling =  BUFFER_CEILING_OF (start_byte);
 840           ceiling = min (limit_byte - 1, ceiling);
 841           ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
 842           base = (cursor = BYTE_POS_ADDR (start_byte));
 843           while (1)
 844             {
 845               while (*cursor != '\n' && ++cursor != ceiling_addr)
 846                 ;
 847
 848               if (cursor != ceiling_addr)
 849                 {
 850                   if (--count == 0)
 851                     {
 852                       immediate_quit = old_immediate_quit;
 853                       start_byte = start_byte + cursor - base + 1;
 854                       start = BYTE_TO_CHAR (start_byte);
 855                       TEMP_SET_PT_BOTH (start, start_byte);
 856                       return 0;
 857                     }
 858                   else
 859                     if (++cursor == ceiling_addr)
 860                       break;
 861                 }
 862               else
 863                 break;
 864             }
 865           start_byte += cursor - base;
 866         }
 867     }
 868   else
 869     {
 870       while (start_byte > limit_byte)
 871         {
 872           ceiling = BUFFER_FLOOR_OF (start_byte - 1);
 873           ceiling = max (limit_byte, ceiling);
 874           ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
 875           base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
 876           while (1)
 877             {
 878               while (--cursor != ceiling_addr && *cursor != '\n')
 879                 ;
 880
 881               if (cursor != ceiling_addr)
 882                 {
 883                   if (++count == 0)
 884                     {
 885                       immediate_quit = old_immediate_quit;
 886                       /* Return the position AFTER the match we found.  */
 887                       start_byte = start_byte + cursor - base + 1;
 888                       start = BYTE_TO_CHAR (start_byte);
 889                       TEMP_SET_PT_BOTH (start, start_byte);
 890                       return 0;
 891                     }
 892                 }
 893               else
 894                 break;
 895             }
 896           /* Here we add 1 to compensate for the last decrement
 897              of CURSOR, which took it past the valid range.  */
 898           start_byte += cursor - base + 1;
 899         }
 900     }
 901
 902   TEMP_SET_PT_BOTH (limit, limit_byte);
 903   immediate_quit = old_immediate_quit;
 904
 905   return count * direction;
 906 }
 907
 908 int
 909 find_next_newline_no_quit (from, cnt)
 910      register int from, cnt;
 911 {
 912   return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
 913 }
 914
 915 /* Like find_next_newline, but returns position before the newline,
 916    not after, and only search up to TO.  This isn't just
 917    find_next_newline (...)-1, because you might hit TO.  */
 918
 919 int
 920 find_before_next_newline (from, to, cnt)
 921      int from, to, cnt;
 922 {
 923   int shortage;
 924   int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
 925
 926   if (shortage == 0)
 927     pos--;
 928
 929   return pos;
 930 }
 931 \f
 932 /* Subroutines of Lisp buffer search functions. */
 933
 934 static Lisp_Object
 935 search_command (string, bound, noerror, count, direction, RE, posix)
 936      Lisp_Object string, bound, noerror, count;
 937      int direction;
 938      int RE;
 939      int posix;
 940 {
 941   register int np;
 942   int lim, lim_byte;
 943   int n = direction;
 944
 945   if (!NILP (count))
 946     {
 947       CHECK_NUMBER (count);
 948       n *= XINT (count);
 949     }
 950
 951   CHECK_STRING (string);
 952   if (NILP (bound))
 953     {
 954       if (n > 0)
 955         lim = ZV, lim_byte = ZV_BYTE;
 956       else
 957         lim = BEGV, lim_byte = BEGV_BYTE;
 958     }
 959   else
 960     {
 961       CHECK_NUMBER_COERCE_MARKER (bound);
 962       lim = XINT (bound);
 963       if (n > 0 ? lim < PT : lim > PT)
 964         error ("Invalid search bound (wrong side of point)");
 965       if (lim > ZV)
 966         lim = ZV, lim_byte = ZV_BYTE;
 967       else if (lim < BEGV)
 968         lim = BEGV, lim_byte = BEGV_BYTE;
 969       else
 970         lim_byte = CHAR_TO_BYTE (lim);
 971     }
 972
 973   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 974   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
 975     = current_buffer->case_eqv_table;
 976
 977   np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
 978                       (!NILP (current_buffer->case_fold_search)
 979                        ? current_buffer->case_canon_table
 980                        : Qnil),
 981                       (!NILP (current_buffer->case_fold_search)
 982                        ? current_buffer->case_eqv_table
 983                        : Qnil),
 984                       posix);
 985   if (np <= 0)
 986     {
 987       if (NILP (noerror))
 988         xsignal1 (Qsearch_failed, string);
 989
 990       if (!EQ (noerror, Qt))
 991         {
 992           if (lim < BEGV || lim > ZV)
 993             abort ();
 994           SET_PT_BOTH (lim, lim_byte);
 995           return Qnil;
 996 #if 0 /* This would be clean, but maybe programs depend on
 997          a value of nil here.  */
 998           np = lim;
 999 #endif
1000         }
1001       else
1002         return Qnil;
1003     }
1004
1005   if (np < BEGV || np > ZV)
1006     abort ();
1007
1008   SET_PT (np);
1009
1010   return make_number (np);
1011 }
1012 \f
1013 /* Return 1 if REGEXP it matches just one constant string.  */
1014
1015 static int
1016 trivial_regexp_p (regexp)
1017      Lisp_Object regexp;
1018 {
1019   int len = SBYTES (regexp);
1020   unsigned char *s = SDATA (regexp);
1021   while (--len >= 0)
1022     {
1023       switch (*s++)
1024         {
1025         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1026           return 0;
1027         case '\\':
1028           if (--len < 0)
1029             return 0;
1030           switch (*s++)
1031             {
1032             case '|': case '(': case ')': case '`': case '\'': case 'b':
1033             case 'B': case '<': case '>': case 'w': case 'W': case 's':
1034             case 'S': case '=': case '{': case '}': case '_':
1035             case 'c': case 'C': /* for categoryspec and notcategoryspec */
1036             case '1': case '2': case '3': case '4': case '5':
1037             case '6': case '7': case '8': case '9':
1038               return 0;
1039             }
1040         }
1041     }
1042   return 1;
1043 }
1044
1045 /* Search for the n'th occurrence of STRING in the current buffer,
1046    starting at position POS and stopping at position LIM,
1047    treating STRING as a literal string if RE is false or as
1048    a regular expression if RE is true.
1049
1050    If N is positive, searching is forward and LIM must be greater than POS.
1051    If N is negative, searching is backward and LIM must be less than POS.
1052
1053    Returns -x if x occurrences remain to be found (x > 0),
1054    or else the position at the beginning of the Nth occurrence
1055    (if searching backward) or the end (if searching forward).
1056
1057    POSIX is nonzero if we want full backtracking (POSIX style)
1058    for this pattern.  0 means backtrack only enough to get a valid match.  */
1059
1060 #define TRANSLATE(out, trt, d)                  \
1061 do                                              \
1062   {                                             \
1063     if (! NILP (trt))                           \
1064       {                                         \
1065         Lisp_Object temp;                       \
1066         temp = Faref (trt, make_number (d));    \
1067         if (INTEGERP (temp))                    \
1068           out = XINT (temp);                    \
1069         else                                    \
1070           out = d;                              \
1071       }                                         \
1072     else                                        \
1073       out = d;                                  \
1074   }                                             \
1075 while (0)
1076
1077 static int
1078 search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1079                RE, trt, inverse_trt, posix)
1080      Lisp_Object string;
1081      int pos;
1082      int pos_byte;
1083      int lim;
1084      int lim_byte;
1085      int n;
1086      int RE;
1087      Lisp_Object trt;
1088      Lisp_Object inverse_trt;
1089      int posix;
1090 {
1091   int len = SCHARS (string);
1092   int len_byte = SBYTES (string);
1093   register int i;
1094
1095   if (running_asynch_code)
1096     save_search_regs ();
1097
1098   /* Searching 0 times means don't move.  */
1099   /* Null string is found at starting position.  */
1100   if (len == 0 || n == 0)
1101     {
1102       set_search_regs (pos_byte, 0);
1103       return pos;
1104     }
1105
1106   if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
1107     {
1108       unsigned char *p1, *p2;
1109       int s1, s2;
1110       struct re_pattern_buffer *bufp;
1111
1112       bufp = compile_pattern (string, &search_regs, trt, posix,
1113                               !NILP (current_buffer->enable_multibyte_characters));
1114
1115       immediate_quit = 1;       /* Quit immediately if user types ^G,
1116                                    because letting this function finish
1117                                    can take too long. */
1118       QUIT;                     /* Do a pending quit right away,
1119                                    to avoid paradoxical behavior */
1120       /* Get pointers and sizes of the two strings
1121          that make up the visible portion of the buffer. */
1122
1123       p1 = BEGV_ADDR;
1124       s1 = GPT_BYTE - BEGV_BYTE;
1125       p2 = GAP_END_ADDR;
1126       s2 = ZV_BYTE - GPT_BYTE;
1127       if (s1 < 0)
1128         {
1129           p2 = p1;
1130           s2 = ZV_BYTE - BEGV_BYTE;
1131           s1 = 0;
1132         }
1133       if (s2 < 0)
1134         {
1135           s1 = ZV_BYTE - BEGV_BYTE;
1136           s2 = 0;
1137         }
1138       re_match_object = Qnil;
1139
1140       while (n < 0)
1141         {
1142           int val;
1143           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1144                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1145                              &search_regs,
1146                              /* Don't allow match past current point */
1147                              pos_byte - BEGV_BYTE);
1148           if (val == -2)
1149             {
1150               matcher_overflow ();
1151             }
1152           if (val >= 0)
1153             {
1154               pos_byte = search_regs.start[0] + BEGV_BYTE;
1155               for (i = 0; i < search_regs.num_regs; i++)
1156                 if (search_regs.start[i] >= 0)
1157                   {
1158                     search_regs.start[i]
1159                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1160                     search_regs.end[i]
1161                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1162                   }
1163               XSETBUFFER (last_thing_searched, current_buffer);
1164               /* Set pos to the new position. */
1165               pos = search_regs.start[0];
1166             }
1167           else
1168             {
1169               immediate_quit = 0;
1170               return (n);
1171             }
1172           n++;
1173         }
1174       while (n > 0)
1175         {
1176           int val;
1177           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1178                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1179                              &search_regs,
1180                              lim_byte - BEGV_BYTE);
1181           if (val == -2)
1182             {
1183               matcher_overflow ();
1184             }
1185           if (val >= 0)
1186             {
1187               pos_byte = search_regs.end[0] + BEGV_BYTE;
1188               for (i = 0; i < search_regs.num_regs; i++)
1189                 if (search_regs.start[i] >= 0)
1190                   {
1191                     search_regs.start[i]
1192                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1193                     search_regs.end[i]
1194                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1195                   }
1196               XSETBUFFER (last_thing_searched, current_buffer);
1197               pos = search_regs.end[0];
1198             }
1199           else
1200             {
1201               immediate_quit = 0;
1202               return (0 - n);
1203             }
1204           n--;
1205         }
1206       immediate_quit = 0;
1207       return (pos);
1208     }
1209   else                          /* non-RE case */
1210     {
1211       unsigned char *raw_pattern, *pat;
1212       int raw_pattern_size;
1213       int raw_pattern_size_byte;
1214       unsigned char *patbuf;
1215       int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1216       unsigned char *base_pat;
1217       /* Set to positive if we find a non-ASCII char that need
1218          translation.  Otherwise set to zero later.  */
1219       int charset_base = -1;
1220       int boyer_moore_ok = 1;
1221
1222       /* MULTIBYTE says whether the text to be searched is multibyte.
1223          We must convert PATTERN to match that, or we will not really
1224          find things right.  */
1225
1226       if (multibyte == STRING_MULTIBYTE (string))
1227         {
1228           raw_pattern = (unsigned char *) SDATA (string);
1229           raw_pattern_size = SCHARS (string);
1230           raw_pattern_size_byte = SBYTES (string);
1231         }
1232       else if (multibyte)
1233         {
1234           raw_pattern_size = SCHARS (string);
1235           raw_pattern_size_byte
1236             = count_size_as_multibyte (SDATA (string),
1237                                        raw_pattern_size);
1238           raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
1239           copy_text (SDATA (string), raw_pattern,
1240                      SCHARS (string), 0, 1);
1241         }
1242       else
1243         {
1244           /* Converting multibyte to single-byte.
1245
1246              ??? Perhaps this conversion should be done in a special way
1247              by subtracting nonascii-insert-offset from each non-ASCII char,
1248              so that only the multibyte chars which really correspond to
1249              the chosen single-byte character set can possibly match.  */
1250           raw_pattern_size = SCHARS (string);
1251           raw_pattern_size_byte = SCHARS (string);
1252           raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
1253           copy_text (SDATA (string), raw_pattern,
1254                      SBYTES (string), 1, 0);
1255         }
1256
1257       /* Copy and optionally translate the pattern.  */
1258       len = raw_pattern_size;
1259       len_byte = raw_pattern_size_byte;
1260       patbuf = (unsigned char *) alloca (len_byte);
1261       pat = patbuf;
1262       base_pat = raw_pattern;
1263       if (multibyte)
1264         {
1265           /* Fill patbuf by translated characters in STRING while
1266              checking if we can use boyer-moore search.  If TRT is
1267              non-nil, we can use boyer-moore search only if TRT can be
1268              represented by the byte array of 256 elements.  For that,
1269              all non-ASCII case-equivalents of all case-senstive
1270              characters in STRING must belong to the same charset and
1271              row.  */
1272
1273           while (--len >= 0)
1274             {
1275               unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
1276               int c, translated, inverse;
1277               int in_charlen, charlen;
1278
1279               /* If we got here and the RE flag is set, it's because we're
1280                  dealing with a regexp known to be trivial, so the backslash
1281                  just quotes the next character.  */
1282               if (RE && *base_pat == '\\')
1283                 {
1284                   len--;
1285                   raw_pattern_size--;
1286                   len_byte--;
1287                   base_pat++;
1288                 }
1289
1290               c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
1291
1292               if (NILP (trt))
1293                 {
1294                   str = base_pat;
1295                   charlen = in_charlen;
1296                 }
1297               else
1298                 {
1299                   /* Translate the character.  */
1300                   TRANSLATE (translated, trt, c);
1301                   charlen = CHAR_STRING (translated, str_base);
1302                   str = str_base;
1303
1304                   /* Check if C has any other case-equivalents.  */
1305                   TRANSLATE (inverse, inverse_trt, c);
1306                   /* If so, check if we can use boyer-moore.  */
1307                   if (c != inverse && boyer_moore_ok)
1308                     {
1309                       /* Check if all equivalents belong to the same
1310                          charset & row.  Note that the check of C
1311                          itself is done by the last iteration.  Note
1312                          also that we don't have to check ASCII
1313                          characters because boyer-moore search can
1314                          always handle their translation.  */
1315                       while (1)
1316                         {
1317                           if (ASCII_BYTE_P (inverse))
1318                             {
1319                               if (charset_base > 0)
1320                                 {
1321                                   boyer_moore_ok = 0;
1322                                   break;
1323                                 }
1324                               charset_base = 0;
1325                             }
1326                           else if (SINGLE_BYTE_CHAR_P (inverse))
1327                             {
1328                               /* Boyer-moore search can't handle a
1329                                  translation of an eight-bit
1330                                  character.  */
1331                               boyer_moore_ok = 0;
1332                               break;
1333                             }
1334                           else if (charset_base < 0)
1335                             charset_base = inverse & ~CHAR_FIELD3_MASK;
1336                           else if ((inverse & ~CHAR_FIELD3_MASK)
1337                                    != charset_base)
1338                             {
1339                               boyer_moore_ok = 0;
1340                               break;
1341                             }
1342                           if (c == inverse)
1343                             break;
1344                           TRANSLATE (inverse, inverse_trt, inverse);
1345                         }
1346                     }
1347                 }
1348               if (charset_base < 0)
1349                 charset_base = 0;
1350
1351               /* Store this character into the translated pattern.  */
1352               bcopy (str, pat, charlen);
1353               pat += charlen;
1354               base_pat += in_charlen;
1355               len_byte -= in_charlen;
1356             }
1357         }
1358       else
1359         {
1360           /* Unibyte buffer.  */
1361           charset_base = 0;
1362           while (--len >= 0)
1363             {
1364               int c, translated;
1365
1366               /* If we got here and the RE flag is set, it's because we're
1367                  dealing with a regexp known to be trivial, so the backslash
1368                  just quotes the next character.  */
1369               if (RE && *base_pat == '\\')
1370                 {
1371                   len--;
1372                   raw_pattern_size--;
1373                   base_pat++;
1374                 }
1375               c = *base_pat++;
1376               TRANSLATE (translated, trt, c);
1377               *pat++ = translated;
1378             }
1379         }
1380
1381       len_byte = pat - patbuf;
1382       len = raw_pattern_size;
1383       pat = base_pat = patbuf;
1384
1385       if (boyer_moore_ok)
1386         return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1387                             pos, pos_byte, lim, lim_byte,
1388                             charset_base);
1389       else
1390         return simple_search (n, pat, len, len_byte, trt,
1391                               pos, pos_byte, lim, lim_byte);
1392     }
1393 }
1394 \f
1395 /* Do a simple string search N times for the string PAT,
1396    whose length is LEN/LEN_BYTE,
1397    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1398    TRT is the translation table.
1399
1400    Return the character position where the match is found.
1401    Otherwise, if M matches remained to be found, return -M.
1402
1403    This kind of search works regardless of what is in PAT and
1404    regardless of what is in TRT.  It is used in cases where
1405    boyer_moore cannot work.  */
1406
1407 static int
1408 simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1409      int n;
1410      unsigned char *pat;
1411      int len, len_byte;
1412      Lisp_Object trt;
1413      int pos, pos_byte;
1414      int lim, lim_byte;
1415 {
1416   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1417   int forward = n > 0;
1418
1419   if (lim > pos && multibyte)
1420     while (n > 0)
1421       {
1422         while (1)
1423           {
1424             /* Try matching at position POS.  */
1425             int this_pos = pos;
1426             int this_pos_byte = pos_byte;
1427             int this_len = len;
1428             int this_len_byte = len_byte;
1429             unsigned char *p = pat;
1430             if (pos + len > lim)
1431               goto stop;
1432
1433             while (this_len > 0)
1434               {
1435                 int charlen, buf_charlen;
1436                 int pat_ch, buf_ch;
1437
1438                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1439                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1440                                                  ZV_BYTE - this_pos_byte,
1441                                                  buf_charlen);
1442                 TRANSLATE (buf_ch, trt, buf_ch);
1443
1444                 if (buf_ch != pat_ch)
1445                   break;
1446
1447                 this_len_byte -= charlen;
1448                 this_len--;
1449                 p += charlen;
1450
1451                 this_pos_byte += buf_charlen;
1452                 this_pos++;
1453               }
1454
1455             if (this_len == 0)
1456               {
1457                 pos += len;
1458                 pos_byte += len_byte;
1459                 break;
1460               }
1461
1462             INC_BOTH (pos, pos_byte);
1463           }
1464
1465         n--;
1466       }
1467   else if (lim > pos)
1468     while (n > 0)
1469       {
1470         while (1)
1471           {
1472             /* Try matching at position POS.  */
1473             int this_pos = pos;
1474             int this_len = len;
1475             unsigned char *p = pat;
1476
1477             if (pos + len > lim)
1478               goto stop;
1479
1480             while (this_len > 0)
1481               {
1482                 int pat_ch = *p++;
1483                 int buf_ch = FETCH_BYTE (this_pos);
1484                 TRANSLATE (buf_ch, trt, buf_ch);
1485
1486                 if (buf_ch != pat_ch)
1487                   break;
1488
1489                 this_len--;
1490                 this_pos++;
1491               }
1492
1493             if (this_len == 0)
1494               {
1495                 pos += len;
1496                 break;
1497               }
1498
1499             pos++;
1500           }
1501
1502         n--;
1503       }
1504   /* Backwards search.  */
1505   else if (lim < pos && multibyte)
1506     while (n < 0)
1507       {
1508         while (1)
1509           {
1510             /* Try matching at position POS.  */
1511             int this_pos = pos - len;
1512             int this_pos_byte = pos_byte - len_byte;
1513             int this_len = len;
1514             int this_len_byte = len_byte;
1515             unsigned char *p = pat;
1516
1517             if (pos - len < lim)
1518               goto stop;
1519
1520             while (this_len > 0)
1521               {
1522                 int charlen, buf_charlen;
1523                 int pat_ch, buf_ch;
1524
1525                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1526                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1527                                                  ZV_BYTE - this_pos_byte,
1528                                                  buf_charlen);
1529                 TRANSLATE (buf_ch, trt, buf_ch);
1530
1531                 if (buf_ch != pat_ch)
1532                   break;
1533
1534                 this_len_byte -= charlen;
1535                 this_len--;
1536                 p += charlen;
1537                 this_pos_byte += buf_charlen;
1538                 this_pos++;
1539               }
1540
1541             if (this_len == 0)
1542               {
1543                 pos -= len;
1544                 pos_byte -= len_byte;
1545                 break;
1546               }
1547
1548             DEC_BOTH (pos, pos_byte);
1549           }
1550
1551         n++;
1552       }
1553   else if (lim < pos)
1554     while (n < 0)
1555       {
1556         while (1)
1557           {
1558             /* Try matching at position POS.  */
1559             int this_pos = pos - len;
1560             int this_len = len;
1561             unsigned char *p = pat;
1562
1563             if (pos - len < lim)
1564               goto stop;
1565
1566             while (this_len > 0)
1567               {
1568                 int pat_ch = *p++;
1569                 int buf_ch = FETCH_BYTE (this_pos);
1570                 TRANSLATE (buf_ch, trt, buf_ch);
1571
1572                 if (buf_ch != pat_ch)
1573                   break;
1574                 this_len--;
1575                 this_pos++;
1576               }
1577
1578             if (this_len == 0)
1579               {
1580                 pos -= len;
1581                 break;
1582               }
1583
1584             pos--;
1585           }
1586
1587         n++;
1588       }
1589
1590  stop:
1591   if (n == 0)
1592     {
1593       if (forward)
1594         set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
1595       else
1596         set_search_regs (multibyte ? pos_byte : pos, len_byte);
1597
1598       return pos;
1599     }
1600   else if (n > 0)
1601     return -n;
1602   else
1603     return n;
1604 }
1605 \f
1606 /* Do Boyer-Moore search N times for the string BASE_PAT,
1607    whose length is LEN/LEN_BYTE,
1608    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1609    DIRECTION says which direction we search in.
1610    TRT and INVERSE_TRT are translation tables.
1611    Characters in PAT are already translated by TRT.
1612
1613    This kind of search works if all the characters in BASE_PAT that
1614    have nontrivial translation are the same aside from the last byte.
1615    This makes it possible to translate just the last byte of a
1616    character, and do so after just a simple test of the context.
1617    CHARSET_BASE is nonzero iff there is such a non-ASCII character.
1618
1619    If that criterion is not satisfied, do not call this function.  */
1620
1621 static int
1622 boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1623              pos, pos_byte, lim, lim_byte, charset_base)
1624      int n;
1625      unsigned char *base_pat;
1626      int len, len_byte;
1627      Lisp_Object trt;
1628      Lisp_Object inverse_trt;
1629      int pos, pos_byte;
1630      int lim, lim_byte;
1631      int charset_base;
1632 {
1633   int direction = ((n > 0) ? 1 : -1);
1634   register int dirlen;
1635   int infinity, limit, stride_for_teases = 0;
1636   register int *BM_tab;
1637   int *BM_tab_base;
1638   register unsigned char *cursor, *p_limit;
1639   register int i, j;
1640   unsigned char *pat, *pat_end;
1641   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1642
1643   unsigned char simple_translate[0400];
1644   /* These are set to the preceding bytes of a byte to be translated
1645      if charset_base is nonzero.  As the maximum byte length of a
1646      multibyte character is 4, we have to check at most three previous
1647      bytes.  */
1648   int translate_prev_byte1 = 0;
1649   int translate_prev_byte2 = 0;
1650   int translate_prev_byte3 = 0;
1651
1652 #ifdef C_ALLOCA
1653   int BM_tab_space[0400];
1654   BM_tab = &BM_tab_space[0];
1655 #else
1656   BM_tab = (int *) alloca (0400 * sizeof (int));
1657 #endif
1658   /* The general approach is that we are going to maintain that we know */
1659   /* the first (closest to the present position, in whatever direction */
1660   /* we're searching) character that could possibly be the last */
1661   /* (furthest from present position) character of a valid match.  We */
1662   /* advance the state of our knowledge by looking at that character */
1663   /* and seeing whether it indeed matches the last character of the */
1664   /* pattern.  If it does, we take a closer look.  If it does not, we */
1665   /* move our pointer (to putative last characters) as far as is */
1666   /* logically possible.  This amount of movement, which I call a */
1667   /* stride, will be the length of the pattern if the actual character */
1668   /* appears nowhere in the pattern, otherwise it will be the distance */
1669   /* from the last occurrence of that character to the end of the */
1670   /* pattern. */
1671   /* As a coding trick, an enormous stride is coded into the table for */
1672   /* characters that match the last character.  This allows use of only */
1673   /* a single test, a test for having gone past the end of the */
1674   /* permissible match region, to test for both possible matches (when */
1675   /* the stride goes past the end immediately) and failure to */
1676   /* match (where you get nudged past the end one stride at a time). */
1677
1678   /* Here we make a "mickey mouse" BM table.  The stride of the search */
1679   /* is determined only by the last character of the putative match. */
1680   /* If that character does not match, we will stride the proper */
1681   /* distance to propose a match that superimposes it on the last */
1682   /* instance of a character that matches it (per trt), or misses */
1683   /* it entirely if there is none. */
1684
1685   dirlen = len_byte * direction;
1686   infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
1687
1688   /* Record position after the end of the pattern.  */
1689   pat_end = base_pat + len_byte;
1690   /* BASE_PAT points to a character that we start scanning from.
1691      It is the first character in a forward search,
1692      the last character in a backward search.  */
1693   if (direction < 0)
1694     base_pat = pat_end - 1;
1695
1696   BM_tab_base = BM_tab;
1697   BM_tab += 0400;
1698   j = dirlen;           /* to get it in a register */
1699   /* A character that does not appear in the pattern induces a */
1700   /* stride equal to the pattern length. */
1701   while (BM_tab_base != BM_tab)
1702     {
1703       *--BM_tab = j;
1704       *--BM_tab = j;
1705       *--BM_tab = j;
1706       *--BM_tab = j;
1707     }
1708
1709   /* We use this for translation, instead of TRT itself.
1710      We fill this in to handle the characters that actually
1711      occur in the pattern.  Others don't matter anyway!  */
1712   bzero (simple_translate, sizeof simple_translate);
1713   for (i = 0; i < 0400; i++)
1714     simple_translate[i] = i;
1715
1716   if (charset_base)
1717     {
1718       /* Setup translate_prev_byte1/2/3 from CHARSET_BASE.  Only a
1719          byte following them are the target of translation.  */
1720       int sample_char = charset_base | 0x20;
1721       unsigned char str[MAX_MULTIBYTE_LENGTH];
1722       int len = CHAR_STRING (sample_char, str);
1723
1724       translate_prev_byte1 = str[len - 2];
1725       if (len > 2)
1726         {
1727           translate_prev_byte2 = str[len - 3];
1728           if (len > 3)
1729             translate_prev_byte3 = str[len - 4];
1730         }
1731     }
1732
1733   i = 0;
1734   while (i != infinity)
1735     {
1736       unsigned char *ptr = base_pat + i;
1737       i += direction;
1738       if (i == dirlen)
1739         i = infinity;
1740       if (! NILP (trt))
1741         {
1742           /* If the byte currently looking at is the last of a
1743              character to check case-equivalents, set CH to that
1744              character.  An ASCII character and a non-ASCII character
1745              matching with CHARSET_BASE are to be checked.  */
1746           int ch = -1;
1747
1748           if (ASCII_BYTE_P (*ptr) || ! multibyte)
1749             ch = *ptr;
1750           else if (charset_base
1751                    && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
1752             {
1753               unsigned char *charstart = ptr - 1;
1754
1755               while (! (CHAR_HEAD_P (*charstart)))
1756                 charstart--;
1757               ch = STRING_CHAR (charstart, ptr - charstart + 1);
1758               if (charset_base != (ch & ~CHAR_FIELD3_MASK))
1759                 ch = -1;
1760             }
1761
1762           if (ch >= 0400)
1763             j = ((unsigned char) ch) | 0200;
1764           else
1765             j = *ptr;
1766
1767           if (i == infinity)
1768             stride_for_teases = BM_tab[j];
1769
1770           BM_tab[j] = dirlen - i;
1771           /* A translation table is accompanied by its inverse -- see */
1772           /* comment following downcase_table for details */
1773           if (ch >= 0)
1774             {
1775               int starting_ch = ch;
1776               int starting_j = j;
1777
1778               while (1)
1779                 {
1780                   TRANSLATE (ch, inverse_trt, ch);
1781                   if (ch >= 0400)
1782                     j = ((unsigned char) ch) | 0200;
1783                   else
1784                     j = (unsigned char) ch;
1785
1786                   /* For all the characters that map into CH,
1787                      set up simple_translate to map the last byte
1788                      into STARTING_J.  */
1789                   simple_translate[j] = starting_j;
1790                   if (ch == starting_ch)
1791                     break;
1792                   BM_tab[j] = dirlen - i;
1793                 }
1794             }
1795         }
1796       else
1797         {
1798           j = *ptr;
1799
1800           if (i == infinity)
1801             stride_for_teases = BM_tab[j];
1802           BM_tab[j] = dirlen - i;
1803         }
1804       /* stride_for_teases tells how much to stride if we get a */
1805       /* match on the far character but are subsequently */
1806       /* disappointed, by recording what the stride would have been */
1807       /* for that character if the last character had been */
1808       /* different. */
1809     }
1810   infinity = dirlen - infinity;
1811   pos_byte += dirlen - ((direction > 0) ? direction : 0);
1812   /* loop invariant - POS_BYTE points at where last char (first
1813      char if reverse) of pattern would align in a possible match.  */
1814   while (n != 0)
1815     {
1816       int tail_end;
1817       unsigned char *tail_end_ptr;
1818
1819       /* It's been reported that some (broken) compiler thinks that
1820          Boolean expressions in an arithmetic context are unsigned.
1821          Using an explicit ?1:0 prevents this.  */
1822       if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1823           < 0)
1824         return (n * (0 - direction));
1825       /* First we do the part we can by pointers (maybe nothing) */
1826       QUIT;
1827       pat = base_pat;
1828       limit = pos_byte - dirlen + direction;
1829       if (direction > 0)
1830         {
1831           limit = BUFFER_CEILING_OF (limit);
1832           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1833              can take on without hitting edge of buffer or the gap.  */
1834           limit = min (limit, pos_byte + 20000);
1835           limit = min (limit, lim_byte - 1);
1836         }
1837       else
1838         {
1839           limit = BUFFER_FLOOR_OF (limit);
1840           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1841              can take on without hitting edge of buffer or the gap.  */
1842           limit = max (limit, pos_byte - 20000);
1843           limit = max (limit, lim_byte);
1844         }
1845       tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1846       tail_end_ptr = BYTE_POS_ADDR (tail_end);
1847
1848       if ((limit - pos_byte) * direction > 20)
1849         {
1850           unsigned char *p2;
1851
1852           p_limit = BYTE_POS_ADDR (limit);
1853           p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1854           /* In this loop, pos + cursor - p2 is the surrogate for pos */
1855           while (1)             /* use one cursor setting as long as i can */
1856             {
1857               if (direction > 0) /* worth duplicating */
1858                 {
1859                   /* Use signed comparison if appropriate
1860                      to make cursor+infinity sure to be > p_limit.
1861                      Assuming that the buffer lies in a range of addresses
1862                      that are all "positive" (as ints) or all "negative",
1863                      either kind of comparison will work as long
1864                      as we don't step by infinity.  So pick the kind
1865                      that works when we do step by infinity.  */
1866                   if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1867                     while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1868                       cursor += BM_tab[*cursor];
1869                   else
1870                     while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1871                       cursor += BM_tab[*cursor];
1872                 }
1873               else
1874                 {
1875                   if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1876                     while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1877                       cursor += BM_tab[*cursor];
1878                   else
1879                     while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1880                       cursor += BM_tab[*cursor];
1881                 }
1882 /* If you are here, cursor is beyond the end of the searched region. */
1883 /* This can happen if you match on the far character of the pattern, */
1884 /* because the "stride" of that character is infinity, a number able */
1885 /* to throw you well beyond the end of the search.  It can also */
1886 /* happen if you fail to match within the permitted region and would */
1887 /* otherwise try a character beyond that region */
1888               if ((cursor - p_limit) * direction <= len_byte)
1889                 break;  /* a small overrun is genuine */
1890               cursor -= infinity; /* large overrun = hit */
1891               i = dirlen - direction;
1892               if (! NILP (trt))
1893                 {
1894                   while ((i -= direction) + direction != 0)
1895                     {
1896                       int ch;
1897                       cursor -= direction;
1898                       /* Translate only the last byte of a character.  */
1899                       if (! multibyte
1900                           || ((cursor == tail_end_ptr
1901                                || CHAR_HEAD_P (cursor[1]))
1902                               && (CHAR_HEAD_P (cursor[0])
1903                                   /* Check if this is the last byte of
1904                                      a translable character.  */
1905                                   || (translate_prev_byte1 == cursor[-1]
1906                                       && (CHAR_HEAD_P (translate_prev_byte1)
1907                                           || (translate_prev_byte2 == cursor[-2]
1908                                               && (CHAR_HEAD_P (translate_prev_byte2)
1909                                                   || (translate_prev_byte3 == cursor[-3]))))))))
1910                         ch = simple_translate[*cursor];
1911                       else
1912                         ch = *cursor;
1913                       if (pat[i] != ch)
1914                         break;
1915                     }
1916                 }
1917               else
1918                 {
1919                   while ((i -= direction) + direction != 0)
1920                     {
1921                       cursor -= direction;
1922                       if (pat[i] != *cursor)
1923                         break;
1924                     }
1925                 }
1926               cursor += dirlen - i - direction; /* fix cursor */
1927               if (i + direction == 0)
1928                 {
1929                   int position;
1930
1931                   cursor -= direction;
1932
1933                   position = pos_byte + cursor - p2 + ((direction > 0)
1934                                                        ? 1 - len_byte : 0);
1935                   set_search_regs (position, len_byte);
1936
1937                   if ((n -= direction) != 0)
1938                     cursor += dirlen; /* to resume search */
1939                   else
1940                     return ((direction > 0)
1941                             ? search_regs.end[0] : search_regs.start[0]);
1942                 }
1943               else
1944                 cursor += stride_for_teases; /* <sigh> we lose -  */
1945             }
1946           pos_byte += cursor - p2;
1947         }
1948       else
1949         /* Now we'll pick up a clump that has to be done the hard */
1950         /* way because it covers a discontinuity */
1951         {
1952           limit = ((direction > 0)
1953                    ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1954                    : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1955           limit = ((direction > 0)
1956                    ? min (limit + len_byte, lim_byte - 1)
1957                    : max (limit - len_byte, lim_byte));
1958           /* LIMIT is now the last value POS_BYTE can have
1959              and still be valid for a possible match.  */
1960           while (1)
1961             {
1962               /* This loop can be coded for space rather than */
1963               /* speed because it will usually run only once. */
1964               /* (the reach is at most len + 21, and typically */
1965               /* does not exceed len) */
1966               while ((limit - pos_byte) * direction >= 0)
1967                 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
1968               /* now run the same tests to distinguish going off the */
1969               /* end, a match or a phony match. */
1970               if ((pos_byte - limit) * direction <= len_byte)
1971                 break;  /* ran off the end */
1972               /* Found what might be a match.
1973                  Set POS_BYTE back to last (first if reverse) pos.  */
1974               pos_byte -= infinity;
1975               i = dirlen - direction;
1976               while ((i -= direction) + direction != 0)
1977                 {
1978                   int ch;
1979                   unsigned char *ptr;
1980                   pos_byte -= direction;
1981                   ptr = BYTE_POS_ADDR (pos_byte);
1982                   /* Translate only the last byte of a character.  */
1983                   if (! multibyte
1984                       || ((ptr == tail_end_ptr
1985                            || CHAR_HEAD_P (ptr[1]))
1986                           && (CHAR_HEAD_P (ptr[0])
1987                               /* Check if this is the last byte of a
1988                                  translable character.  */
1989                               || (translate_prev_byte1 == ptr[-1]
1990                                   && (CHAR_HEAD_P (translate_prev_byte1)
1991                                       || (translate_prev_byte2 == ptr[-2]
1992                                           && (CHAR_HEAD_P (translate_prev_byte2)
1993                                               || translate_prev_byte3 == ptr[-3])))))))
1994                     ch = simple_translate[*ptr];
1995                   else
1996                     ch = *ptr;
1997                   if (pat[i] != ch)
1998                     break;
1999                 }
2000               /* Above loop has moved POS_BYTE part or all the way
2001                  back to the first pos (last pos if reverse).
2002                  Set it once again at the last (first if reverse) char.  */
2003               pos_byte += dirlen - i- direction;
2004               if (i + direction == 0)
2005                 {
2006                   int position;
2007                   pos_byte -= direction;
2008
2009                   position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
2010
2011                   set_search_regs (position, len_byte);
2012
2013                   if ((n -= direction) != 0)
2014                     pos_byte += dirlen; /* to resume search */
2015                   else
2016                     return ((direction > 0)
2017                             ? search_regs.end[0] : search_regs.start[0]);
2018                 }
2019               else
2020                 pos_byte += stride_for_teases;
2021             }
2022           }
2023       /* We have done one clump.  Can we continue? */
2024       if ((lim_byte - pos_byte) * direction < 0)
2025         return ((0 - n) * direction);
2026     }
2027   return BYTE_TO_CHAR (pos_byte);
2028 }
2029
2030 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
2031    for the overall match just found in the current buffer.
2032    Also clear out the match data for registers 1 and up.  */
2033
2034 static void
2035 set_search_regs (beg_byte, nbytes)
2036      int beg_byte, nbytes;
2037 {
2038   int i;
2039
2040   /* Make sure we have registers in which to store
2041      the match position.  */
2042   if (search_regs.num_regs == 0)
2043     {
2044       search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2045       search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2046       search_regs.num_regs = 2;
2047     }
2048
2049   /* Clear out the other registers.  */
2050   for (i = 1; i < search_regs.num_regs; i++)
2051     {
2052       search_regs.start[i] = -1;
2053       search_regs.end[i] = -1;
2054     }
2055
2056   search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2057   search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
2058   XSETBUFFER (last_thing_searched, current_buffer);
2059 }
2060 \f
2061 /* Given a string of words separated by word delimiters,
2062   compute a regexp that matches those exact words
2063   separated by arbitrary punctuation.  */
2064
2065 static Lisp_Object
2066 wordify (string)
2067      Lisp_Object string;
2068 {
2069   register unsigned char *p, *o;
2070   register int i, i_byte, len, punct_count = 0, word_count = 0;
2071   Lisp_Object val;
2072   int prev_c = 0;
2073   int adjust;
2074
2075   CHECK_STRING (string);
2076   p = SDATA (string);
2077   len = SCHARS (string);
2078
2079   for (i = 0, i_byte = 0; i < len; )
2080     {
2081       int c;
2082
2083       FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
2084
2085       if (SYNTAX (c) != Sword)
2086         {
2087           punct_count++;
2088           if (i > 0 && SYNTAX (prev_c) == Sword)
2089             word_count++;
2090         }
2091
2092       prev_c = c;
2093     }
2094
2095   if (SYNTAX (prev_c) == Sword)
2096     word_count++;
2097   if (!word_count)
2098     return empty_string;
2099
2100   adjust = - punct_count + 5 * (word_count - 1) + 4;
2101   if (STRING_MULTIBYTE (string))
2102     val = make_uninit_multibyte_string (len + adjust,
2103                                         SBYTES (string)
2104                                         + adjust);
2105   else
2106     val = make_uninit_string (len + adjust);
2107
2108   o = SDATA (val);
2109   *o++ = '\\';
2110   *o++ = 'b';
2111   prev_c = 0;
2112
2113   for (i = 0, i_byte = 0; i < len; )
2114     {
2115       int c;
2116       int i_byte_orig = i_byte;
2117
2118       FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
2119
2120       if (SYNTAX (c) == Sword)
2121         {
2122           bcopy (SDATA (string) + i_byte_orig, o,
2123                  i_byte - i_byte_orig);
2124           o += i_byte - i_byte_orig;
2125         }
2126       else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2127         {
2128           *o++ = '\\';
2129           *o++ = 'W';
2130           *o++ = '\\';
2131           *o++ = 'W';
2132           *o++ = '*';
2133         }
2134
2135       prev_c = c;
2136     }
2137
2138   *o++ = '\\';
2139   *o++ = 'b';
2140
2141   return val;
2142 }
2143 \f
2144 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2145        "MSearch backward: ",
2146        doc: /* Search backward from point for STRING.
2147 Set point to the beginning of the occurrence found, and return point.
2148 An optional second argument bounds the search; it is a buffer position.
2149 The match found must not extend before that position.
2150 Optional third argument, if t, means if fail just return nil (no error).
2151  If not nil and not t, position at limit of search and return nil.
2152 Optional fourth argument is repeat count--search for successive occurrences.
2153
2154 Search case-sensitivity is determined by the value of the variable
2155 `case-fold-search', which see.
2156
2157 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2158      (string, bound, noerror, count)
2159      Lisp_Object string, bound, noerror, count;
2160 {
2161   return search_command (string, bound, noerror, count, -1, 0, 0);
2162 }
2163
2164 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2165        doc: /* Search forward from point for STRING.
2166 Set point to the end of the occurrence found, and return point.
2167 An optional second argument bounds the search; it is a buffer position.
2168 The match found must not extend after that position.  nil is equivalent
2169   to (point-max).
2170 Optional third argument, if t, means if fail just return nil (no error).
2171   If not nil and not t, move to limit of search and return nil.
2172 Optional fourth argument is repeat count--search for successive occurrences.
2173
2174 Search case-sensitivity is determined by the value of the variable
2175 `case-fold-search', which see.
2176
2177 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2178      (string, bound, noerror, count)
2179      Lisp_Object string, bound, noerror, count;
2180 {
2181   return search_command (string, bound, noerror, count, 1, 0, 0);
2182 }
2183
2184 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2185        "sWord search backward: ",
2186        doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2187 Set point to the beginning of the occurrence found, and return point.
2188 An optional second argument bounds the search; it is a buffer position.
2189 The match found must not extend before that position.
2190 Optional third argument, if t, means if fail just return nil (no error).
2191   If not nil and not t, move to limit of search and return nil.
2192 Optional fourth argument is repeat count--search for successive occurrences.  */)
2193      (string, bound, noerror, count)
2194      Lisp_Object string, bound, noerror, count;
2195 {
2196   return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
2197 }
2198
2199 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2200        "sWord search: ",
2201        doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2202 Set point to the end of the occurrence found, and return point.
2203 An optional second argument bounds the search; it is a buffer position.
2204 The match found must not extend after that position.
2205 Optional third argument, if t, means if fail just return nil (no error).
2206   If not nil and not t, move to limit of search and return nil.
2207 Optional fourth argument is repeat count--search for successive occurrences.  */)
2208      (string, bound, noerror, count)
2209      Lisp_Object string, bound, noerror, count;
2210 {
2211   return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
2212 }
2213
2214 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2215        "sRE search backward: ",
2216        doc: /* Search backward from point for match for regular expression REGEXP.
2217 Set point to the beginning of the match, and return point.
2218 The match found is the one starting last in the buffer
2219 and yet ending before the origin of the search.
2220 An optional second argument bounds the search; it is a buffer position.
2221 The match found must start at or after that position.
2222 Optional third argument, if t, means if fail just return nil (no error).
2223   If not nil and not t, move to limit of search and return nil.
2224 Optional fourth argument is repeat count--search for successive occurrences.
2225 See also the functions `match-beginning', `match-end', `match-string',
2226 and `replace-match'.  */)
2227      (regexp, bound, noerror, count)
2228      Lisp_Object regexp, bound, noerror, count;
2229 {
2230   return search_command (regexp, bound, noerror, count, -1, 1, 0);
2231 }
2232
2233 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2234        "sRE search: ",
2235        doc: /* Search forward from point for regular expression REGEXP.
2236 Set point to the end of the occurrence found, and return point.
2237 An optional second argument bounds the search; it is a buffer position.
2238 The match found must not extend after that position.
2239 Optional third argument, if t, means if fail just return nil (no error).
2240   If not nil and not t, move to limit of search and return nil.
2241 Optional fourth argument is repeat count--search for successive occurrences.
2242 See also the functions `match-beginning', `match-end', `match-string',
2243 and `replace-match'.  */)
2244      (regexp, bound, noerror, count)
2245      Lisp_Object regexp, bound, noerror, count;
2246 {
2247   return search_command (regexp, bound, noerror, count, 1, 1, 0);
2248 }
2249
2250 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2251        "sPosix search backward: ",
2252        doc: /* Search backward from point for match for regular expression REGEXP.
2253 Find the longest match in accord with Posix regular expression rules.
2254 Set point to the beginning of the match, and return point.
2255 The match found is the one starting last in the buffer
2256 and yet ending before the origin of the search.
2257 An optional second argument bounds the search; it is a buffer position.
2258 The match found must start at or after that position.
2259 Optional third argument, if t, means if fail just return nil (no error).
2260   If not nil and not t, move to limit of search and return nil.
2261 Optional fourth argument is repeat count--search for successive occurrences.
2262 See also the functions `match-beginning', `match-end', `match-string',
2263 and `replace-match'.  */)
2264      (regexp, bound, noerror, count)
2265      Lisp_Object regexp, bound, noerror, count;
2266 {
2267   return search_command (regexp, bound, noerror, count, -1, 1, 1);
2268 }
2269
2270 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2271        "sPosix search: ",
2272        doc: /* Search forward from point for regular expression REGEXP.
2273 Find the longest match in accord with Posix regular expression rules.
2274 Set point to the end of the occurrence found, and return point.
2275 An optional second argument bounds the search; it is a buffer position.
2276 The match found must not extend after that position.
2277 Optional third argument, if t, means if fail just return nil (no error).
2278   If not nil and not t, move to limit of search and return nil.
2279 Optional fourth argument is repeat count--search for successive occurrences.
2280 See also the functions `match-beginning', `match-end', `match-string',
2281 and `replace-match'.  */)
2282      (regexp, bound, noerror, count)
2283      Lisp_Object regexp, bound, noerror, count;
2284 {
2285   return search_command (regexp, bound, noerror, count, 1, 1, 1);
2286 }
2287 \f
2288 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2289        doc: /* Replace text matched by last search with NEWTEXT.
2290 Leave point at the end of the replacement text.
2291
2292 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2293 Otherwise maybe capitalize the whole text, or maybe just word initials,
2294 based on the replaced text.
2295 If the replaced text has only capital letters
2296 and has at least one multiletter word, convert NEWTEXT to all caps.
2297 Otherwise if all words are capitalized in the replaced text,
2298 capitalize each word in NEWTEXT.
2299
2300 If third arg LITERAL is non-nil, insert NEWTEXT literally.
2301 Otherwise treat `\\' as special:
2302   `\\&' in NEWTEXT means substitute original matched text.
2303   `\\N' means substitute what matched the Nth `\\(...\\)'.
2304        If Nth parens didn't match, substitute nothing.
2305   `\\\\' means insert one `\\'.
2306 Case conversion does not apply to these substitutions.
2307
2308 FIXEDCASE and LITERAL are optional arguments.
2309
2310 The optional fourth argument STRING can be a string to modify.
2311 This is meaningful when the previous match was done against STRING,
2312 using `string-match'.  When used this way, `replace-match'
2313 creates and returns a new string made by copying STRING and replacing
2314 the part of STRING that was matched.
2315
2316 The optional fifth argument SUBEXP specifies a subexpression;
2317 it says to replace just that subexpression with NEWTEXT,
2318 rather than replacing the entire matched text.
2319 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2320 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2321 NEWTEXT in place of subexp N.
2322 This is useful only after a regular expression search or match,
2323 since only regular expressions have distinguished subexpressions.  */)
2324      (newtext, fixedcase, literal, string, subexp)
2325      Lisp_Object newtext, fixedcase, literal, string, subexp;
2326 {
2327   enum { nochange, all_caps, cap_initial } case_action;
2328   register int pos, pos_byte;
2329   int some_multiletter_word;
2330   int some_lowercase;
2331   int some_uppercase;
2332   int some_nonuppercase_initial;
2333   register int c, prevc;
2334   int sub;
2335   int opoint, newpoint;
2336
2337   CHECK_STRING (newtext);
2338
2339   if (! NILP (string))
2340     CHECK_STRING (string);
2341
2342   case_action = nochange;       /* We tried an initialization */
2343                                 /* but some C compilers blew it */
2344
2345   if (search_regs.num_regs <= 0)
2346     error ("`replace-match' called before any match found");
2347
2348   if (NILP (subexp))
2349     sub = 0;
2350   else
2351     {
2352       CHECK_NUMBER (subexp);
2353       sub = XINT (subexp);
2354       if (sub < 0 || sub >= search_regs.num_regs)
2355         args_out_of_range (subexp, make_number (search_regs.num_regs));
2356     }
2357
2358   if (NILP (string))
2359     {
2360       if (search_regs.start[sub] < BEGV
2361           || search_regs.start[sub] > search_regs.end[sub]
2362           || search_regs.end[sub] > ZV)
2363         args_out_of_range (make_number (search_regs.start[sub]),
2364                            make_number (search_regs.end[sub]));
2365     }
2366   else
2367     {
2368       if (search_regs.start[sub] < 0
2369           || search_regs.start[sub] > search_regs.end[sub]
2370           || search_regs.end[sub] > SCHARS (string))
2371         args_out_of_range (make_number (search_regs.start[sub]),
2372                            make_number (search_regs.end[sub]));
2373     }
2374
2375   if (NILP (fixedcase))
2376     {
2377       /* Decide how to casify by examining the matched text. */
2378       int last;
2379
2380       pos = search_regs.start[sub];
2381       last = search_regs.end[sub];
2382
2383       if (NILP (string))
2384         pos_byte = CHAR_TO_BYTE (pos);
2385       else
2386         pos_byte = string_char_to_byte (string, pos);
2387
2388       prevc = '\n';
2389       case_action = all_caps;
2390
2391       /* some_multiletter_word is set nonzero if any original word
2392          is more than one letter long. */
2393       some_multiletter_word = 0;
2394       some_lowercase = 0;
2395       some_nonuppercase_initial = 0;
2396       some_uppercase = 0;
2397
2398       while (pos < last)
2399         {
2400           if (NILP (string))
2401             {
2402               c = FETCH_CHAR (pos_byte);
2403               INC_BOTH (pos, pos_byte);
2404             }
2405           else
2406             FETCH_STRING_CHAR_ADVANCE (c, string, pos, pos_byte);
2407
2408           if (LOWERCASEP (c))
2409             {
2410               /* Cannot be all caps if any original char is lower case */
2411
2412               some_lowercase = 1;
2413               if (SYNTAX (prevc) != Sword)
2414                 some_nonuppercase_initial = 1;
2415               else
2416                 some_multiletter_word = 1;
2417             }
2418           else if (UPPERCASEP (c))
2419             {
2420               some_uppercase = 1;
2421               if (SYNTAX (prevc) != Sword)
2422                 ;
2423               else
2424                 some_multiletter_word = 1;
2425             }
2426           else
2427             {
2428               /* If the initial is a caseless word constituent,
2429                  treat that like a lowercase initial.  */
2430               if (SYNTAX (prevc) != Sword)
2431                 some_nonuppercase_initial = 1;
2432             }
2433
2434           prevc = c;
2435         }
2436
2437       /* Convert to all caps if the old text is all caps
2438          and has at least one multiletter word.  */
2439       if (! some_lowercase && some_multiletter_word)
2440         case_action = all_caps;
2441       /* Capitalize each word, if the old text has all capitalized words.  */
2442       else if (!some_nonuppercase_initial && some_multiletter_word)
2443         case_action = cap_initial;
2444       else if (!some_nonuppercase_initial && some_uppercase)
2445         /* Should x -> yz, operating on X, give Yz or YZ?
2446            We'll assume the latter.  */
2447         case_action = all_caps;
2448       else
2449         case_action = nochange;
2450     }
2451
2452   /* Do replacement in a string.  */
2453   if (!NILP (string))
2454     {
2455       Lisp_Object before, after;
2456
2457       before = Fsubstring (string, make_number (0),
2458                            make_number (search_regs.start[sub]));
2459       after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2460
2461       /* Substitute parts of the match into NEWTEXT
2462          if desired.  */
2463       if (NILP (literal))
2464         {
2465           int lastpos = 0;
2466           int lastpos_byte = 0;
2467           /* We build up the substituted string in ACCUM.  */
2468           Lisp_Object accum;
2469           Lisp_Object middle;
2470           int length = SBYTES (newtext);
2471
2472           accum = Qnil;
2473
2474           for (pos_byte = 0, pos = 0; pos_byte < length;)
2475             {
2476               int substart = -1;
2477               int subend = 0;
2478               int delbackslash = 0;
2479
2480               FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2481
2482               if (c == '\\')
2483                 {
2484                   FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2485
2486                   if (c == '&')
2487                     {
2488                       substart = search_regs.start[sub];
2489                       subend = search_regs.end[sub];
2490                     }
2491                   else if (c >= '1' && c <= '9')
2492                     {
2493                       if (search_regs.start[c - '0'] >= 0
2494                           && c <= search_regs.num_regs + '0')
2495                         {
2496                           substart = search_regs.start[c - '0'];
2497                           subend = search_regs.end[c - '0'];
2498                         }
2499                       else
2500                         {
2501                           /* If that subexp did not match,
2502                              replace \\N with nothing.  */
2503                           substart = 0;
2504                           subend = 0;
2505                         }
2506                     }
2507                   else if (c == '\\')
2508                     delbackslash = 1;
2509                   else
2510                     error ("Invalid use of `\\' in replacement text");
2511                 }
2512               if (substart >= 0)
2513                 {
2514                   if (pos - 2 != lastpos)
2515                     middle = substring_both (newtext, lastpos,
2516                                              lastpos_byte,
2517                                              pos - 2, pos_byte - 2);
2518                   else
2519                     middle = Qnil;
2520                   accum = concat3 (accum, middle,
2521                                    Fsubstring (string,
2522                                                make_number (substart),
2523                                                make_number (subend)));
2524                   lastpos = pos;
2525                   lastpos_byte = pos_byte;
2526                 }
2527               else if (delbackslash)
2528                 {
2529                   middle = substring_both (newtext, lastpos,
2530                                            lastpos_byte,
2531                                            pos - 1, pos_byte - 1);
2532
2533                   accum = concat2 (accum, middle);
2534                   lastpos = pos;
2535                   lastpos_byte = pos_byte;
2536                 }
2537             }
2538
2539           if (pos != lastpos)
2540             middle = substring_both (newtext, lastpos,
2541                                      lastpos_byte,
2542                                      pos, pos_byte);
2543           else
2544             middle = Qnil;
2545
2546           newtext = concat2 (accum, middle);
2547         }
2548
2549       /* Do case substitution in NEWTEXT if desired.  */
2550       if (case_action == all_caps)
2551         newtext = Fupcase (newtext);
2552       else if (case_action == cap_initial)
2553         newtext = Fupcase_initials (newtext);
2554
2555       return concat3 (before, newtext, after);
2556     }
2557
2558   /* Record point, then move (quietly) to the start of the match.  */
2559   if (PT >= search_regs.end[sub])
2560     opoint = PT - ZV;
2561   else if (PT > search_regs.start[sub])
2562     opoint = search_regs.end[sub] - ZV;
2563   else
2564     opoint = PT;
2565
2566   /* If we want non-literal replacement,
2567      perform substitution on the replacement string.  */
2568   if (NILP (literal))
2569     {
2570       int length = SBYTES (newtext);
2571       unsigned char *substed;
2572       int substed_alloc_size, substed_len;
2573       int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2574       int str_multibyte = STRING_MULTIBYTE (newtext);
2575       Lisp_Object rev_tbl;
2576       int really_changed = 0;
2577
2578       rev_tbl= (!buf_multibyte && CHAR_TABLE_P (Vnonascii_translation_table)
2579                 ? Fchar_table_extra_slot (Vnonascii_translation_table,
2580                                           make_number (0))
2581                 : Qnil);
2582
2583       substed_alloc_size = length * 2 + 100;
2584       substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2585       substed_len = 0;
2586
2587       /* Go thru NEWTEXT, producing the actual text to insert in
2588          SUBSTED while adjusting multibyteness to that of the current
2589          buffer.  */
2590
2591       for (pos_byte = 0, pos = 0; pos_byte < length;)
2592         {
2593           unsigned char str[MAX_MULTIBYTE_LENGTH];
2594           unsigned char *add_stuff = NULL;
2595           int add_len = 0;
2596           int idx = -1;
2597
2598           if (str_multibyte)
2599             {
2600               FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2601               if (!buf_multibyte)
2602                 c = multibyte_char_to_unibyte (c, rev_tbl);
2603             }
2604           else
2605             {
2606               /* Note that we don't have to increment POS.  */
2607               c = SREF (newtext, pos_byte++);
2608               if (buf_multibyte)
2609                 c = unibyte_char_to_multibyte (c);
2610             }
2611
2612           /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2613              or set IDX to a match index, which means put that part
2614              of the buffer text into SUBSTED.  */
2615
2616           if (c == '\\')
2617             {
2618               really_changed = 1;
2619
2620               if (str_multibyte)
2621                 {
2622                   FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2623                                                       pos, pos_byte);
2624                   if (!buf_multibyte && !SINGLE_BYTE_CHAR_P (c))
2625                     c = multibyte_char_to_unibyte (c, rev_tbl);
2626                 }
2627               else
2628                 {
2629                   c = SREF (newtext, pos_byte++);
2630                   if (buf_multibyte)
2631                     c = unibyte_char_to_multibyte (c);
2632                 }
2633
2634               if (c == '&')
2635                 idx = sub;
2636               else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2637                 {
2638                   if (search_regs.start[c - '0'] >= 1)
2639                     idx = c - '0';
2640                 }
2641               else if (c == '\\')
2642                 add_len = 1, add_stuff = "\\";
2643               else
2644                 {
2645                   xfree (substed);
2646                   error ("Invalid use of `\\' in replacement text");
2647                 }
2648             }
2649           else
2650             {
2651               add_len = CHAR_STRING (c, str);
2652               add_stuff = str;
2653             }
2654
2655           /* If we want to copy part of a previous match,
2656              set up ADD_STUFF and ADD_LEN to point to it.  */
2657           if (idx >= 0)
2658             {
2659               int begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2660               add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2661               if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2662                 move_gap (search_regs.start[idx]);
2663               add_stuff = BYTE_POS_ADDR (begbyte);
2664             }
2665
2666           /* Now the stuff we want to add to SUBSTED
2667              is invariably ADD_LEN bytes starting at ADD_STUFF.  */
2668
2669           /* Make sure SUBSTED is big enough.  */
2670           if (substed_len + add_len >= substed_alloc_size)
2671             {
2672               substed_alloc_size = substed_len + add_len + 500;
2673               substed = (unsigned char *) xrealloc (substed,
2674                                                     substed_alloc_size + 1);
2675             }
2676
2677           /* Now add to the end of SUBSTED.  */
2678           if (add_stuff)
2679             {
2680               bcopy (add_stuff, substed + substed_len, add_len);
2681               substed_len += add_len;
2682             }
2683         }
2684
2685       if (really_changed)
2686         {
2687           if (buf_multibyte)
2688             {
2689               int nchars = multibyte_chars_in_text (substed, substed_len);
2690
2691               newtext = make_multibyte_string (substed, nchars, substed_len);
2692             }
2693           else
2694             newtext = make_unibyte_string (substed, substed_len);
2695         }
2696       xfree (substed);
2697     }
2698
2699   /* Replace the old text with the new in the cleanest possible way.  */
2700   replace_range (search_regs.start[sub], search_regs.end[sub],
2701                  newtext, 1, 0, 1);
2702   newpoint = search_regs.start[sub] + SCHARS (newtext);
2703
2704   if (case_action == all_caps)
2705     Fupcase_region (make_number (search_regs.start[sub]),
2706                     make_number (newpoint));
2707   else if (case_action == cap_initial)
2708     Fupcase_initials_region (make_number (search_regs.start[sub]),
2709                              make_number (newpoint));
2710
2711   /* Adjust search data for this change.  */
2712   {
2713     int oldend = search_regs.end[sub];
2714     int oldstart = search_regs.start[sub];
2715     int change = newpoint - search_regs.end[sub];
2716     int i;
2717
2718     for (i = 0; i < search_regs.num_regs; i++)
2719       {
2720         if (search_regs.start[i] >= oldend)
2721           search_regs.start[i] += change;
2722         else if (search_regs.start[i] > oldstart)
2723           search_regs.start[i] = oldstart;
2724         if (search_regs.end[i] >= oldend)
2725           search_regs.end[i] += change;
2726         else if (search_regs.end[i] > oldstart)
2727           search_regs.end[i] = oldstart;
2728       }
2729   }
2730
2731   /* Put point back where it was in the text.  */
2732   if (opoint <= 0)
2733     TEMP_SET_PT (opoint + ZV);
2734   else
2735     TEMP_SET_PT (opoint);
2736
2737   /* Now move point "officially" to the start of the inserted replacement.  */
2738   move_if_not_intangible (newpoint);
2739
2740   return Qnil;
2741 }
2742 \f
2743 static Lisp_Object
2744 match_limit (num, beginningp)
2745      Lisp_Object num;
2746      int beginningp;
2747 {
2748   register int n;
2749
2750   CHECK_NUMBER (num);
2751   n = XINT (num);
2752   if (n < 0)
2753     args_out_of_range (num, make_number (0));
2754   if (search_regs.num_regs <= 0)
2755     error ("No match data, because no search succeeded");
2756   if (n >= search_regs.num_regs
2757       || search_regs.start[n] < 0)
2758     return Qnil;
2759   return (make_number ((beginningp) ? search_regs.start[n]
2760                                     : search_regs.end[n]));
2761 }
2762
2763 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2764        doc: /* Return position of start of text matched by last search.
2765 SUBEXP, a number, specifies which parenthesized expression in the last
2766   regexp.
2767 Value is nil if SUBEXPth pair didn't match, or there were less than
2768   SUBEXP pairs.
2769 Zero means the entire text matched by the whole regexp or whole string.  */)
2770      (subexp)
2771      Lisp_Object subexp;
2772 {
2773   return match_limit (subexp, 1);
2774 }
2775
2776 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2777        doc: /* Return position of end of text matched by last search.
2778 SUBEXP, a number, specifies which parenthesized expression in the last
2779   regexp.
2780 Value is nil if SUBEXPth pair didn't match, or there were less than
2781   SUBEXP pairs.
2782 Zero means the entire text matched by the whole regexp or whole string.  */)
2783      (subexp)
2784      Lisp_Object subexp;
2785 {
2786   return match_limit (subexp, 0);
2787 }
2788
2789 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
2790        doc: /* Return a list containing all info on what the last search matched.
2791 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2792 All the elements are markers or nil (nil if the Nth pair didn't match)
2793 if the last match was on a buffer; integers or nil if a string was matched.
2794 Use `store-match-data' to reinstate the data in this list.
2795
2796 If INTEGERS (the optional first argument) is non-nil, always use
2797 integers \(rather than markers) to represent buffer positions.  In
2798 this case, and if the last match was in a buffer, the buffer will get
2799 stored as one additional element at the end of the list.
2800
2801 If REUSE is a list, reuse it as part of the value.  If REUSE is long
2802 enough to hold all the values, and if INTEGERS is non-nil, no consing
2803 is done.
2804
2805 If optional third arg RESEAT is non-nil, any previous markers on the
2806 REUSE list will be modified to point to nowhere.
2807
2808 Return value is undefined if the last search failed.  */)
2809   (integers, reuse, reseat)
2810      Lisp_Object integers, reuse, reseat;
2811 {
2812   Lisp_Object tail, prev;
2813   Lisp_Object *data;
2814   int i, len;
2815
2816   if (!NILP (reseat))
2817     for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2818       if (MARKERP (XCAR (tail)))
2819         {
2820           unchain_marker (XMARKER (XCAR (tail)));
2821           XSETCAR (tail, Qnil);
2822         }
2823
2824   if (NILP (last_thing_searched))
2825     return Qnil;
2826
2827   prev = Qnil;
2828
2829   data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
2830                                  * sizeof (Lisp_Object));
2831
2832   len = 0;
2833   for (i = 0; i < search_regs.num_regs; i++)
2834     {
2835       int start = search_regs.start[i];
2836       if (start >= 0)
2837         {
2838           if (EQ (last_thing_searched, Qt)
2839               || ! NILP (integers))
2840             {
2841               XSETFASTINT (data[2 * i], start);
2842               XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2843             }
2844           else if (BUFFERP (last_thing_searched))
2845             {
2846               data[2 * i] = Fmake_marker ();
2847               Fset_marker (data[2 * i],
2848                            make_number (start),
2849                            last_thing_searched);
2850               data[2 * i + 1] = Fmake_marker ();
2851               Fset_marker (data[2 * i + 1],
2852                            make_number (search_regs.end[i]),
2853                            last_thing_searched);
2854             }
2855           else
2856             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
2857             abort ();
2858
2859           len = 2 * i + 2;
2860         }
2861       else
2862         data[2 * i] = data[2 * i + 1] = Qnil;
2863     }
2864
2865   if (BUFFERP (last_thing_searched) && !NILP (integers))
2866     {
2867       data[len] = last_thing_searched;
2868       len++;
2869     }
2870
2871   /* If REUSE is not usable, cons up the values and return them.  */
2872   if (! CONSP (reuse))
2873     return Flist (len, data);
2874
2875   /* If REUSE is a list, store as many value elements as will fit
2876      into the elements of REUSE.  */
2877   for (i = 0, tail = reuse; CONSP (tail);
2878        i++, tail = XCDR (tail))
2879     {
2880       if (i < len)
2881         XSETCAR (tail, data[i]);
2882       else
2883         XSETCAR (tail, Qnil);
2884       prev = tail;
2885     }
2886
2887   /* If we couldn't fit all value elements into REUSE,
2888      cons up the rest of them and add them to the end of REUSE.  */
2889   if (i < len)
2890     XSETCDR (prev, Flist (len - i, data + i));
2891
2892   return reuse;
2893 }
2894
2895 /* Internal usage only:
2896    If RESEAT is `evaporate', put the markers back on the free list
2897    immediately.  No other references to the markers must exist in this case,
2898    so it is used only internally on the unwind stack and save-match-data from
2899    Lisp.  */
2900
2901 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
2902        doc: /* Set internal data on last search match from elements of LIST.
2903 LIST should have been created by calling `match-data' previously.
2904
2905 If optional arg RESEAT is non-nil, make markers on LIST point nowhere.  */)
2906     (list, reseat)
2907      register Lisp_Object list, reseat;
2908 {
2909   register int i;
2910   register Lisp_Object marker;
2911
2912   if (running_asynch_code)
2913     save_search_regs ();
2914
2915   CHECK_LIST (list);
2916
2917   /* Unless we find a marker with a buffer or an explicit buffer
2918      in LIST, assume that this match data came from a string.  */
2919   last_thing_searched = Qt;
2920
2921   /* Allocate registers if they don't already exist.  */
2922   {
2923     int length = XFASTINT (Flength (list)) / 2;
2924
2925     if (length > search_regs.num_regs)
2926       {
2927         if (search_regs.num_regs == 0)
2928           {
2929             search_regs.start
2930               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2931             search_regs.end
2932               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2933           }
2934         else
2935           {
2936             search_regs.start
2937               = (regoff_t *) xrealloc (search_regs.start,
2938                                        length * sizeof (regoff_t));
2939             search_regs.end
2940               = (regoff_t *) xrealloc (search_regs.end,
2941                                        length * sizeof (regoff_t));
2942           }
2943
2944         for (i = search_regs.num_regs; i < length; i++)
2945           search_regs.start[i] = -1;
2946
2947         search_regs.num_regs = length;
2948       }
2949
2950     for (i = 0; CONSP (list); i++)
2951       {
2952         marker = XCAR (list);
2953         if (BUFFERP (marker))
2954           {
2955             last_thing_searched = marker;
2956             break;
2957           }
2958         if (i >= length)
2959           break;
2960         if (NILP (marker))
2961           {
2962             search_regs.start[i] = -1;
2963             list = XCDR (list);
2964           }
2965         else
2966           {
2967             int from;
2968             Lisp_Object m;
2969
2970             m = marker;
2971             if (MARKERP (marker))
2972               {
2973                 if (XMARKER (marker)->buffer == 0)
2974                   XSETFASTINT (marker, 0);
2975                 else
2976                   XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2977               }
2978
2979             CHECK_NUMBER_COERCE_MARKER (marker);
2980             from = XINT (marker);
2981
2982             if (!NILP (reseat) && MARKERP (m))
2983               {
2984                 if (EQ (reseat, Qevaporate))
2985                   free_marker (m);
2986                 else
2987                   unchain_marker (XMARKER (m));
2988                 XSETCAR (list, Qnil);
2989               }
2990
2991             if ((list = XCDR (list), !CONSP (list)))
2992               break;
2993
2994             m = marker = XCAR (list);
2995
2996             if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2997               XSETFASTINT (marker, 0);
2998
2999             CHECK_NUMBER_COERCE_MARKER (marker);
3000             search_regs.start[i] = from;
3001             search_regs.end[i] = XINT (marker);
3002
3003             if (!NILP (reseat) && MARKERP (m))
3004               {
3005                 if (EQ (reseat, Qevaporate))
3006                   free_marker (m);
3007                 else
3008                   unchain_marker (XMARKER (m));
3009                 XSETCAR (list, Qnil);
3010               }
3011           }
3012         list = XCDR (list);
3013       }
3014
3015     for (; i < search_regs.num_regs; i++)
3016       search_regs.start[i] = -1;
3017   }
3018
3019   return Qnil;
3020 }
3021
3022 /* If non-zero the match data have been saved in saved_search_regs
3023    during the execution of a sentinel or filter. */
3024 static int search_regs_saved;
3025 static struct re_registers saved_search_regs;
3026 static Lisp_Object saved_last_thing_searched;
3027
3028 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3029    if asynchronous code (filter or sentinel) is running. */
3030 static void
3031 save_search_regs ()
3032 {
3033   if (!search_regs_saved)
3034     {
3035       saved_search_regs.num_regs = search_regs.num_regs;
3036       saved_search_regs.start = search_regs.start;
3037       saved_search_regs.end = search_regs.end;
3038       saved_last_thing_searched = last_thing_searched;
3039       last_thing_searched = Qnil;
3040       search_regs.num_regs = 0;
3041       search_regs.start = 0;
3042       search_regs.end = 0;
3043
3044       search_regs_saved = 1;
3045     }
3046 }
3047
3048 /* Called upon exit from filters and sentinels. */
3049 void
3050 restore_search_regs ()
3051 {
3052   if (search_regs_saved)
3053     {
3054       if (search_regs.num_regs > 0)
3055         {
3056           xfree (search_regs.start);
3057           xfree (search_regs.end);
3058         }
3059       search_regs.num_regs = saved_search_regs.num_regs;
3060       search_regs.start = saved_search_regs.start;
3061       search_regs.end = saved_search_regs.end;
3062       last_thing_searched = saved_last_thing_searched;
3063       saved_last_thing_searched = Qnil;
3064       search_regs_saved = 0;
3065     }
3066 }
3067
3068 static Lisp_Object
3069 unwind_set_match_data (list)
3070      Lisp_Object list;
3071 {
3072   /* It is safe to free (evaporate) the markers immediately.  */
3073   return Fset_match_data (list, Qevaporate);
3074 }
3075
3076 /* Called to unwind protect the match data.  */
3077 void
3078 record_unwind_save_match_data ()
3079 {
3080   record_unwind_protect (unwind_set_match_data,
3081                          Fmatch_data (Qnil, Qnil, Qnil));
3082 }
3083
3084 /* Quote a string to inactivate reg-expr chars */
3085
3086 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
3087        doc: /* Return a regexp string which matches exactly STRING and nothing else.  */)
3088      (string)
3089      Lisp_Object string;
3090 {
3091   register unsigned char *in, *out, *end;
3092   register unsigned char *temp;
3093   int backslashes_added = 0;
3094
3095   CHECK_STRING (string);
3096
3097   temp = (unsigned char *) alloca (SBYTES (string) * 2);
3098
3099   /* Now copy the data into the new string, inserting escapes. */
3100
3101   in = SDATA (string);
3102   end = in + SBYTES (string);
3103   out = temp;
3104
3105   for (; in != end; in++)
3106     {
3107       if (*in == '['
3108           || *in == '*' || *in == '.' || *in == '\\'
3109           || *in == '?' || *in == '+'
3110           || *in == '^' || *in == '$')
3111         *out++ = '\\', backslashes_added++;
3112       *out++ = *in;
3113     }
3114
3115   return make_specified_string (temp,
3116                                 SCHARS (string) + backslashes_added,
3117                                 out - temp,
3118                                 STRING_MULTIBYTE (string));
3119 }
3120 \f
3121 void
3122 syms_of_search ()
3123 {
3124   register int i;
3125
3126   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3127     {
3128       searchbufs[i].buf.allocated = 100;
3129       searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
3130       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3131       searchbufs[i].regexp = Qnil;
3132       searchbufs[i].whitespace_regexp = Qnil;
3133       searchbufs[i].syntax_table = Qnil;
3134       staticpro (&searchbufs[i].regexp);
3135       staticpro (&searchbufs[i].whitespace_regexp);
3136       staticpro (&searchbufs[i].syntax_table);
3137       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3138     }
3139   searchbuf_head = &searchbufs[0];
3140
3141   Qsearch_failed = intern ("search-failed");
3142   staticpro (&Qsearch_failed);
3143   Qinvalid_regexp = intern ("invalid-regexp");
3144   staticpro (&Qinvalid_regexp);
3145
3146   Fput (Qsearch_failed, Qerror_conditions,
3147         Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
3148   Fput (Qsearch_failed, Qerror_message,
3149         build_string ("Search failed"));
3150
3151   Fput (Qinvalid_regexp, Qerror_conditions,
3152         Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
3153   Fput (Qinvalid_regexp, Qerror_message,
3154         build_string ("Invalid regexp"));
3155
3156   last_thing_searched = Qnil;
3157   staticpro (&last_thing_searched);
3158
3159   saved_last_thing_searched = Qnil;
3160   staticpro (&saved_last_thing_searched);
3161
3162   DEFVAR_LISP ("search-spaces-regexp", &Vsearch_spaces_regexp,
3163       doc: /* Regexp to substitute for bunches of spaces in regexp search.
3164 Some commands use this for user-specified regexps.
3165 Spaces that occur inside character classes or repetition operators
3166 or other such regexp constructs are not replaced with this.
3167 A value of nil (which is the normal value) means treat spaces literally.  */);
3168   Vsearch_spaces_regexp = Qnil;
3169
3170   defsubr (&Slooking_at);
3171   defsubr (&Sposix_looking_at);
3172   defsubr (&Sstring_match);
3173   defsubr (&Sposix_string_match);
3174   defsubr (&Ssearch_forward);
3175   defsubr (&Ssearch_backward);
3176   defsubr (&Sword_search_forward);
3177   defsubr (&Sword_search_backward);
3178   defsubr (&Sre_search_forward);
3179   defsubr (&Sre_search_backward);
3180   defsubr (&Sposix_search_forward);
3181   defsubr (&Sposix_search_backward);
3182   defsubr (&Sreplace_match);
3183   defsubr (&Smatch_beginning);
3184   defsubr (&Smatch_end);
3185   defsubr (&Smatch_data);
3186   defsubr (&Sset_match_data);
3187   defsubr (&Sregexp_quote);
3188 }
3189
3190 /* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3191    (do not change this comment) */