]> code.delx.au - gnu-emacs/blob - src/search.c
(Version, mh-version): Update for release 8.0.
[gnu-emacs] / src / search.c
1 /* String search routines for GNU Emacs.
2 Copyright (C) 1985, 1986, 1987, 1993, 1994, 1997, 1998, 1999, 2002, 2003,
3 2004, 2005, 2006 Free Software Foundation, Inc.
4
5 This file is part of GNU Emacs.
6
7 GNU Emacs is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
21
22
23 #include <config.h>
24 #include "lisp.h"
25 #include "syntax.h"
26 #include "category.h"
27 #include "buffer.h"
28 #include "charset.h"
29 #include "region-cache.h"
30 #include "commands.h"
31 #include "blockinput.h"
32 #include "intervals.h"
33
34 #include <sys/types.h>
35 #include "regex.h"
36
37 #define REGEXP_CACHE_SIZE 20
38
39 /* If the regexp is non-nil, then the buffer contains the compiled form
40 of that regexp, suitable for searching. */
41 struct regexp_cache
42 {
43 struct regexp_cache *next;
44 Lisp_Object regexp, whitespace_regexp;
45 struct re_pattern_buffer buf;
46 char fastmap[0400];
47 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
48 char posix;
49 };
50
51 /* The instances of that struct. */
52 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
53
54 /* The head of the linked list; points to the most recently used buffer. */
55 struct regexp_cache *searchbuf_head;
56
57
58 /* Every call to re_match, etc., must pass &search_regs as the regs
59 argument unless you can show it is unnecessary (i.e., if re_match
60 is certainly going to be called again before region-around-match
61 can be called).
62
63 Since the registers are now dynamically allocated, we need to make
64 sure not to refer to the Nth register before checking that it has
65 been allocated by checking search_regs.num_regs.
66
67 The regex code keeps track of whether it has allocated the search
68 buffer using bits in the re_pattern_buffer. This means that whenever
69 you compile a new pattern, it completely forgets whether it has
70 allocated any registers, and will allocate new registers the next
71 time you call a searching or matching function. Therefore, we need
72 to call re_set_registers after compiling a new pattern or after
73 setting the match registers, so that the regex functions will be
74 able to free or re-allocate it properly. */
75 static struct re_registers search_regs;
76
77 /* The buffer in which the last search was performed, or
78 Qt if the last search was done in a string;
79 Qnil if no searching has been done yet. */
80 static Lisp_Object last_thing_searched;
81
82 /* error condition signaled when regexp compile_pattern fails */
83
84 Lisp_Object Qinvalid_regexp;
85
86 Lisp_Object Vsearch_spaces_regexp;
87
88 static void set_search_regs ();
89 static void save_search_regs ();
90 static int simple_search ();
91 static int boyer_moore ();
92 static int search_buffer ();
93
94 static void
95 matcher_overflow ()
96 {
97 error ("Stack overflow in regexp matcher");
98 }
99
100 /* Compile a regexp and signal a Lisp error if anything goes wrong.
101 PATTERN is the pattern to compile.
102 CP is the place to put the result.
103 TRANSLATE is a translation table for ignoring case, or nil for none.
104 REGP is the structure that says where to store the "register"
105 values that will result from matching this pattern.
106 If it is 0, we should compile the pattern not to record any
107 subexpression bounds.
108 POSIX is nonzero if we want full backtracking (POSIX style)
109 for this pattern. 0 means backtrack only enough to get a valid match.
110 MULTIBYTE is nonzero if we want to handle multibyte characters in
111 PATTERN. 0 means all multibyte characters are recognized just as
112 sequences of binary data.
113
114 The behavior also depends on Vsearch_spaces_regexp. */
115
116 static void
117 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
118 struct regexp_cache *cp;
119 Lisp_Object pattern;
120 Lisp_Object translate;
121 struct re_registers *regp;
122 int posix;
123 int multibyte;
124 {
125 unsigned char *raw_pattern;
126 int raw_pattern_size;
127 char *val;
128 reg_syntax_t old;
129
130 /* MULTIBYTE says whether the text to be searched is multibyte.
131 We must convert PATTERN to match that, or we will not really
132 find things right. */
133
134 if (multibyte == STRING_MULTIBYTE (pattern))
135 {
136 raw_pattern = (unsigned char *) SDATA (pattern);
137 raw_pattern_size = SBYTES (pattern);
138 }
139 else if (multibyte)
140 {
141 raw_pattern_size = count_size_as_multibyte (SDATA (pattern),
142 SCHARS (pattern));
143 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
144 copy_text (SDATA (pattern), raw_pattern,
145 SCHARS (pattern), 0, 1);
146 }
147 else
148 {
149 /* Converting multibyte to single-byte.
150
151 ??? Perhaps this conversion should be done in a special way
152 by subtracting nonascii-insert-offset from each non-ASCII char,
153 so that only the multibyte chars which really correspond to
154 the chosen single-byte character set can possibly match. */
155 raw_pattern_size = SCHARS (pattern);
156 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
157 copy_text (SDATA (pattern), raw_pattern,
158 SBYTES (pattern), 1, 0);
159 }
160
161 cp->regexp = Qnil;
162 cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
163 cp->posix = posix;
164 cp->buf.multibyte = multibyte;
165 cp->whitespace_regexp = Vsearch_spaces_regexp;
166 BLOCK_INPUT;
167 old = re_set_syntax (RE_SYNTAX_EMACS
168 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
169
170 re_set_whitespace_regexp (NILP (Vsearch_spaces_regexp) ? NULL
171 : SDATA (Vsearch_spaces_regexp));
172
173 val = (char *) re_compile_pattern ((char *)raw_pattern,
174 raw_pattern_size, &cp->buf);
175
176 re_set_whitespace_regexp (NULL);
177
178 re_set_syntax (old);
179 UNBLOCK_INPUT;
180 if (val)
181 Fsignal (Qinvalid_regexp, Fcons (build_string (val), Qnil));
182
183 cp->regexp = Fcopy_sequence (pattern);
184 }
185
186 /* Shrink each compiled regexp buffer in the cache
187 to the size actually used right now.
188 This is called from garbage collection. */
189
190 void
191 shrink_regexp_cache ()
192 {
193 struct regexp_cache *cp;
194
195 for (cp = searchbuf_head; cp != 0; cp = cp->next)
196 {
197 cp->buf.allocated = cp->buf.used;
198 cp->buf.buffer
199 = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
200 }
201 }
202
203 /* Compile a regexp if necessary, but first check to see if there's one in
204 the cache.
205 PATTERN is the pattern to compile.
206 TRANSLATE is a translation table for ignoring case, or nil for none.
207 REGP is the structure that says where to store the "register"
208 values that will result from matching this pattern.
209 If it is 0, we should compile the pattern not to record any
210 subexpression bounds.
211 POSIX is nonzero if we want full backtracking (POSIX style)
212 for this pattern. 0 means backtrack only enough to get a valid match. */
213
214 struct re_pattern_buffer *
215 compile_pattern (pattern, regp, translate, posix, multibyte)
216 Lisp_Object pattern;
217 struct re_registers *regp;
218 Lisp_Object translate;
219 int posix, multibyte;
220 {
221 struct regexp_cache *cp, **cpp;
222
223 for (cpp = &searchbuf_head; ; cpp = &cp->next)
224 {
225 cp = *cpp;
226 /* Entries are initialized to nil, and may be set to nil by
227 compile_pattern_1 if the pattern isn't valid. Don't apply
228 string accessors in those cases. However, compile_pattern_1
229 is only applied to the cache entry we pick here to reuse. So
230 nil should never appear before a non-nil entry. */
231 if (NILP (cp->regexp))
232 goto compile_it;
233 if (SCHARS (cp->regexp) == SCHARS (pattern)
234 && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
235 && !NILP (Fstring_equal (cp->regexp, pattern))
236 && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
237 && cp->posix == posix
238 && cp->buf.multibyte == multibyte
239 && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp)))
240 break;
241
242 /* If we're at the end of the cache, compile into the nil cell
243 we found, or the last (least recently used) cell with a
244 string value. */
245 if (cp->next == 0)
246 {
247 compile_it:
248 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
249 break;
250 }
251 }
252
253 /* When we get here, cp (aka *cpp) contains the compiled pattern,
254 either because we found it in the cache or because we just compiled it.
255 Move it to the front of the queue to mark it as most recently used. */
256 *cpp = cp->next;
257 cp->next = searchbuf_head;
258 searchbuf_head = cp;
259
260 /* Advise the searching functions about the space we have allocated
261 for register data. */
262 if (regp)
263 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
264
265 return &cp->buf;
266 }
267
268 /* Error condition used for failing searches */
269 Lisp_Object Qsearch_failed;
270
271 Lisp_Object
272 signal_failure (arg)
273 Lisp_Object arg;
274 {
275 Fsignal (Qsearch_failed, Fcons (arg, Qnil));
276 return Qnil;
277 }
278 \f
279 static Lisp_Object
280 looking_at_1 (string, posix)
281 Lisp_Object string;
282 int posix;
283 {
284 Lisp_Object val;
285 unsigned char *p1, *p2;
286 int s1, s2;
287 register int i;
288 struct re_pattern_buffer *bufp;
289
290 if (running_asynch_code)
291 save_search_regs ();
292
293 CHECK_STRING (string);
294 bufp = compile_pattern (string, &search_regs,
295 (!NILP (current_buffer->case_fold_search)
296 ? current_buffer->case_canon_table : Qnil),
297 posix,
298 !NILP (current_buffer->enable_multibyte_characters));
299
300 immediate_quit = 1;
301 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
302
303 /* Get pointers and sizes of the two strings
304 that make up the visible portion of the buffer. */
305
306 p1 = BEGV_ADDR;
307 s1 = GPT_BYTE - BEGV_BYTE;
308 p2 = GAP_END_ADDR;
309 s2 = ZV_BYTE - GPT_BYTE;
310 if (s1 < 0)
311 {
312 p2 = p1;
313 s2 = ZV_BYTE - BEGV_BYTE;
314 s1 = 0;
315 }
316 if (s2 < 0)
317 {
318 s1 = ZV_BYTE - BEGV_BYTE;
319 s2 = 0;
320 }
321
322 re_match_object = Qnil;
323
324 i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
325 PT_BYTE - BEGV_BYTE, &search_regs,
326 ZV_BYTE - BEGV_BYTE);
327 immediate_quit = 0;
328
329 if (i == -2)
330 matcher_overflow ();
331
332 val = (0 <= i ? Qt : Qnil);
333 if (i >= 0)
334 for (i = 0; i < search_regs.num_regs; i++)
335 if (search_regs.start[i] >= 0)
336 {
337 search_regs.start[i]
338 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
339 search_regs.end[i]
340 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
341 }
342 XSETBUFFER (last_thing_searched, current_buffer);
343 return val;
344 }
345
346 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
347 doc: /* Return t if text after point matches regular expression REGEXP.
348 This function modifies the match data that `match-beginning',
349 `match-end' and `match-data' access; save and restore the match
350 data if you want to preserve them. */)
351 (regexp)
352 Lisp_Object regexp;
353 {
354 return looking_at_1 (regexp, 0);
355 }
356
357 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
358 doc: /* Return t if text after point matches regular expression REGEXP.
359 Find the longest match, in accord with Posix regular expression rules.
360 This function modifies the match data that `match-beginning',
361 `match-end' and `match-data' access; save and restore the match
362 data if you want to preserve them. */)
363 (regexp)
364 Lisp_Object regexp;
365 {
366 return looking_at_1 (regexp, 1);
367 }
368 \f
369 static Lisp_Object
370 string_match_1 (regexp, string, start, posix)
371 Lisp_Object regexp, string, start;
372 int posix;
373 {
374 int val;
375 struct re_pattern_buffer *bufp;
376 int pos, pos_byte;
377 int i;
378
379 if (running_asynch_code)
380 save_search_regs ();
381
382 CHECK_STRING (regexp);
383 CHECK_STRING (string);
384
385 if (NILP (start))
386 pos = 0, pos_byte = 0;
387 else
388 {
389 int len = SCHARS (string);
390
391 CHECK_NUMBER (start);
392 pos = XINT (start);
393 if (pos < 0 && -pos <= len)
394 pos = len + pos;
395 else if (0 > pos || pos > len)
396 args_out_of_range (string, start);
397 pos_byte = string_char_to_byte (string, pos);
398 }
399
400 bufp = compile_pattern (regexp, &search_regs,
401 (!NILP (current_buffer->case_fold_search)
402 ? current_buffer->case_canon_table : Qnil),
403 posix,
404 STRING_MULTIBYTE (string));
405 immediate_quit = 1;
406 re_match_object = string;
407
408 val = re_search (bufp, (char *) SDATA (string),
409 SBYTES (string), pos_byte,
410 SBYTES (string) - pos_byte,
411 &search_regs);
412 immediate_quit = 0;
413 last_thing_searched = Qt;
414 if (val == -2)
415 matcher_overflow ();
416 if (val < 0) return Qnil;
417
418 for (i = 0; i < search_regs.num_regs; i++)
419 if (search_regs.start[i] >= 0)
420 {
421 search_regs.start[i]
422 = string_byte_to_char (string, search_regs.start[i]);
423 search_regs.end[i]
424 = string_byte_to_char (string, search_regs.end[i]);
425 }
426
427 return make_number (string_byte_to_char (string, val));
428 }
429
430 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
431 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
432 Matching ignores case if `case-fold-search' is non-nil.
433 If third arg START is non-nil, start search at that index in STRING.
434 For index of first char beyond the match, do (match-end 0).
435 `match-end' and `match-beginning' also give indices of substrings
436 matched by parenthesis constructs in the pattern.
437
438 You can use the function `match-string' to extract the substrings
439 matched by the parenthesis constructions in REGEXP. */)
440 (regexp, string, start)
441 Lisp_Object regexp, string, start;
442 {
443 return string_match_1 (regexp, string, start, 0);
444 }
445
446 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
447 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
448 Find the longest match, in accord with Posix regular expression rules.
449 Case is ignored if `case-fold-search' is non-nil in the current buffer.
450 If third arg START is non-nil, start search at that index in STRING.
451 For index of first char beyond the match, do (match-end 0).
452 `match-end' and `match-beginning' also give indices of substrings
453 matched by parenthesis constructs in the pattern. */)
454 (regexp, string, start)
455 Lisp_Object regexp, string, start;
456 {
457 return string_match_1 (regexp, string, start, 1);
458 }
459
460 /* Match REGEXP against STRING, searching all of STRING,
461 and return the index of the match, or negative on failure.
462 This does not clobber the match data. */
463
464 int
465 fast_string_match (regexp, string)
466 Lisp_Object regexp, string;
467 {
468 int val;
469 struct re_pattern_buffer *bufp;
470
471 bufp = compile_pattern (regexp, 0, Qnil,
472 0, STRING_MULTIBYTE (string));
473 immediate_quit = 1;
474 re_match_object = string;
475
476 val = re_search (bufp, (char *) SDATA (string),
477 SBYTES (string), 0,
478 SBYTES (string), 0);
479 immediate_quit = 0;
480 return val;
481 }
482
483 /* Match REGEXP against STRING, searching all of STRING ignoring case,
484 and return the index of the match, or negative on failure.
485 This does not clobber the match data.
486 We assume that STRING contains single-byte characters. */
487
488 extern Lisp_Object Vascii_downcase_table;
489
490 int
491 fast_c_string_match_ignore_case (regexp, string)
492 Lisp_Object regexp;
493 const char *string;
494 {
495 int val;
496 struct re_pattern_buffer *bufp;
497 int len = strlen (string);
498
499 regexp = string_make_unibyte (regexp);
500 re_match_object = Qt;
501 bufp = compile_pattern (regexp, 0,
502 Vascii_canon_table, 0,
503 0);
504 immediate_quit = 1;
505 val = re_search (bufp, string, len, 0, len, 0);
506 immediate_quit = 0;
507 return val;
508 }
509
510 /* Like fast_string_match but ignore case. */
511
512 int
513 fast_string_match_ignore_case (regexp, string)
514 Lisp_Object regexp, string;
515 {
516 int val;
517 struct re_pattern_buffer *bufp;
518
519 bufp = compile_pattern (regexp, 0, Vascii_canon_table,
520 0, STRING_MULTIBYTE (string));
521 immediate_quit = 1;
522 re_match_object = string;
523
524 val = re_search (bufp, (char *) SDATA (string),
525 SBYTES (string), 0,
526 SBYTES (string), 0);
527 immediate_quit = 0;
528 return val;
529 }
530 \f
531 /* The newline cache: remembering which sections of text have no newlines. */
532
533 /* If the user has requested newline caching, make sure it's on.
534 Otherwise, make sure it's off.
535 This is our cheezy way of associating an action with the change of
536 state of a buffer-local variable. */
537 static void
538 newline_cache_on_off (buf)
539 struct buffer *buf;
540 {
541 if (NILP (buf->cache_long_line_scans))
542 {
543 /* It should be off. */
544 if (buf->newline_cache)
545 {
546 free_region_cache (buf->newline_cache);
547 buf->newline_cache = 0;
548 }
549 }
550 else
551 {
552 /* It should be on. */
553 if (buf->newline_cache == 0)
554 buf->newline_cache = new_region_cache ();
555 }
556 }
557
558 \f
559 /* Search for COUNT instances of the character TARGET between START and END.
560
561 If COUNT is positive, search forwards; END must be >= START.
562 If COUNT is negative, search backwards for the -COUNTth instance;
563 END must be <= START.
564 If COUNT is zero, do anything you please; run rogue, for all I care.
565
566 If END is zero, use BEGV or ZV instead, as appropriate for the
567 direction indicated by COUNT.
568
569 If we find COUNT instances, set *SHORTAGE to zero, and return the
570 position past the COUNTth match. Note that for reverse motion
571 this is not the same as the usual convention for Emacs motion commands.
572
573 If we don't find COUNT instances before reaching END, set *SHORTAGE
574 to the number of TARGETs left unfound, and return END.
575
576 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
577 except when inside redisplay. */
578
579 int
580 scan_buffer (target, start, end, count, shortage, allow_quit)
581 register int target;
582 int start, end;
583 int count;
584 int *shortage;
585 int allow_quit;
586 {
587 struct region_cache *newline_cache;
588 int direction;
589
590 if (count > 0)
591 {
592 direction = 1;
593 if (! end) end = ZV;
594 }
595 else
596 {
597 direction = -1;
598 if (! end) end = BEGV;
599 }
600
601 newline_cache_on_off (current_buffer);
602 newline_cache = current_buffer->newline_cache;
603
604 if (shortage != 0)
605 *shortage = 0;
606
607 immediate_quit = allow_quit;
608
609 if (count > 0)
610 while (start != end)
611 {
612 /* Our innermost scanning loop is very simple; it doesn't know
613 about gaps, buffer ends, or the newline cache. ceiling is
614 the position of the last character before the next such
615 obstacle --- the last character the dumb search loop should
616 examine. */
617 int ceiling_byte = CHAR_TO_BYTE (end) - 1;
618 int start_byte = CHAR_TO_BYTE (start);
619 int tem;
620
621 /* If we're looking for a newline, consult the newline cache
622 to see where we can avoid some scanning. */
623 if (target == '\n' && newline_cache)
624 {
625 int next_change;
626 immediate_quit = 0;
627 while (region_cache_forward
628 (current_buffer, newline_cache, start_byte, &next_change))
629 start_byte = next_change;
630 immediate_quit = allow_quit;
631
632 /* START should never be after END. */
633 if (start_byte > ceiling_byte)
634 start_byte = ceiling_byte;
635
636 /* Now the text after start is an unknown region, and
637 next_change is the position of the next known region. */
638 ceiling_byte = min (next_change - 1, ceiling_byte);
639 }
640
641 /* The dumb loop can only scan text stored in contiguous
642 bytes. BUFFER_CEILING_OF returns the last character
643 position that is contiguous, so the ceiling is the
644 position after that. */
645 tem = BUFFER_CEILING_OF (start_byte);
646 ceiling_byte = min (tem, ceiling_byte);
647
648 {
649 /* The termination address of the dumb loop. */
650 register unsigned char *ceiling_addr
651 = BYTE_POS_ADDR (ceiling_byte) + 1;
652 register unsigned char *cursor
653 = BYTE_POS_ADDR (start_byte);
654 unsigned char *base = cursor;
655
656 while (cursor < ceiling_addr)
657 {
658 unsigned char *scan_start = cursor;
659
660 /* The dumb loop. */
661 while (*cursor != target && ++cursor < ceiling_addr)
662 ;
663
664 /* If we're looking for newlines, cache the fact that
665 the region from start to cursor is free of them. */
666 if (target == '\n' && newline_cache)
667 know_region_cache (current_buffer, newline_cache,
668 start_byte + scan_start - base,
669 start_byte + cursor - base);
670
671 /* Did we find the target character? */
672 if (cursor < ceiling_addr)
673 {
674 if (--count == 0)
675 {
676 immediate_quit = 0;
677 return BYTE_TO_CHAR (start_byte + cursor - base + 1);
678 }
679 cursor++;
680 }
681 }
682
683 start = BYTE_TO_CHAR (start_byte + cursor - base);
684 }
685 }
686 else
687 while (start > end)
688 {
689 /* The last character to check before the next obstacle. */
690 int ceiling_byte = CHAR_TO_BYTE (end);
691 int start_byte = CHAR_TO_BYTE (start);
692 int tem;
693
694 /* Consult the newline cache, if appropriate. */
695 if (target == '\n' && newline_cache)
696 {
697 int next_change;
698 immediate_quit = 0;
699 while (region_cache_backward
700 (current_buffer, newline_cache, start_byte, &next_change))
701 start_byte = next_change;
702 immediate_quit = allow_quit;
703
704 /* Start should never be at or before end. */
705 if (start_byte <= ceiling_byte)
706 start_byte = ceiling_byte + 1;
707
708 /* Now the text before start is an unknown region, and
709 next_change is the position of the next known region. */
710 ceiling_byte = max (next_change, ceiling_byte);
711 }
712
713 /* Stop scanning before the gap. */
714 tem = BUFFER_FLOOR_OF (start_byte - 1);
715 ceiling_byte = max (tem, ceiling_byte);
716
717 {
718 /* The termination address of the dumb loop. */
719 register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
720 register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
721 unsigned char *base = cursor;
722
723 while (cursor >= ceiling_addr)
724 {
725 unsigned char *scan_start = cursor;
726
727 while (*cursor != target && --cursor >= ceiling_addr)
728 ;
729
730 /* If we're looking for newlines, cache the fact that
731 the region from after the cursor to start is free of them. */
732 if (target == '\n' && newline_cache)
733 know_region_cache (current_buffer, newline_cache,
734 start_byte + cursor - base,
735 start_byte + scan_start - base);
736
737 /* Did we find the target character? */
738 if (cursor >= ceiling_addr)
739 {
740 if (++count >= 0)
741 {
742 immediate_quit = 0;
743 return BYTE_TO_CHAR (start_byte + cursor - base);
744 }
745 cursor--;
746 }
747 }
748
749 start = BYTE_TO_CHAR (start_byte + cursor - base);
750 }
751 }
752
753 immediate_quit = 0;
754 if (shortage != 0)
755 *shortage = count * direction;
756 return start;
757 }
758 \f
759 /* Search for COUNT instances of a line boundary, which means either a
760 newline or (if selective display enabled) a carriage return.
761 Start at START. If COUNT is negative, search backwards.
762
763 We report the resulting position by calling TEMP_SET_PT_BOTH.
764
765 If we find COUNT instances. we position after (always after,
766 even if scanning backwards) the COUNTth match, and return 0.
767
768 If we don't find COUNT instances before reaching the end of the
769 buffer (or the beginning, if scanning backwards), we return
770 the number of line boundaries left unfound, and position at
771 the limit we bumped up against.
772
773 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
774 except in special cases. */
775
776 int
777 scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
778 int start, start_byte;
779 int limit, limit_byte;
780 register int count;
781 int allow_quit;
782 {
783 int direction = ((count > 0) ? 1 : -1);
784
785 register unsigned char *cursor;
786 unsigned char *base;
787
788 register int ceiling;
789 register unsigned char *ceiling_addr;
790
791 int old_immediate_quit = immediate_quit;
792
793 /* The code that follows is like scan_buffer
794 but checks for either newline or carriage return. */
795
796 if (allow_quit)
797 immediate_quit++;
798
799 start_byte = CHAR_TO_BYTE (start);
800
801 if (count > 0)
802 {
803 while (start_byte < limit_byte)
804 {
805 ceiling = BUFFER_CEILING_OF (start_byte);
806 ceiling = min (limit_byte - 1, ceiling);
807 ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
808 base = (cursor = BYTE_POS_ADDR (start_byte));
809 while (1)
810 {
811 while (*cursor != '\n' && ++cursor != ceiling_addr)
812 ;
813
814 if (cursor != ceiling_addr)
815 {
816 if (--count == 0)
817 {
818 immediate_quit = old_immediate_quit;
819 start_byte = start_byte + cursor - base + 1;
820 start = BYTE_TO_CHAR (start_byte);
821 TEMP_SET_PT_BOTH (start, start_byte);
822 return 0;
823 }
824 else
825 if (++cursor == ceiling_addr)
826 break;
827 }
828 else
829 break;
830 }
831 start_byte += cursor - base;
832 }
833 }
834 else
835 {
836 while (start_byte > limit_byte)
837 {
838 ceiling = BUFFER_FLOOR_OF (start_byte - 1);
839 ceiling = max (limit_byte, ceiling);
840 ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
841 base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
842 while (1)
843 {
844 while (--cursor != ceiling_addr && *cursor != '\n')
845 ;
846
847 if (cursor != ceiling_addr)
848 {
849 if (++count == 0)
850 {
851 immediate_quit = old_immediate_quit;
852 /* Return the position AFTER the match we found. */
853 start_byte = start_byte + cursor - base + 1;
854 start = BYTE_TO_CHAR (start_byte);
855 TEMP_SET_PT_BOTH (start, start_byte);
856 return 0;
857 }
858 }
859 else
860 break;
861 }
862 /* Here we add 1 to compensate for the last decrement
863 of CURSOR, which took it past the valid range. */
864 start_byte += cursor - base + 1;
865 }
866 }
867
868 TEMP_SET_PT_BOTH (limit, limit_byte);
869 immediate_quit = old_immediate_quit;
870
871 return count * direction;
872 }
873
874 int
875 find_next_newline_no_quit (from, cnt)
876 register int from, cnt;
877 {
878 return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
879 }
880
881 /* Like find_next_newline, but returns position before the newline,
882 not after, and only search up to TO. This isn't just
883 find_next_newline (...)-1, because you might hit TO. */
884
885 int
886 find_before_next_newline (from, to, cnt)
887 int from, to, cnt;
888 {
889 int shortage;
890 int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
891
892 if (shortage == 0)
893 pos--;
894
895 return pos;
896 }
897 \f
898 /* Subroutines of Lisp buffer search functions. */
899
900 static Lisp_Object
901 search_command (string, bound, noerror, count, direction, RE, posix)
902 Lisp_Object string, bound, noerror, count;
903 int direction;
904 int RE;
905 int posix;
906 {
907 register int np;
908 int lim, lim_byte;
909 int n = direction;
910
911 if (!NILP (count))
912 {
913 CHECK_NUMBER (count);
914 n *= XINT (count);
915 }
916
917 CHECK_STRING (string);
918 if (NILP (bound))
919 {
920 if (n > 0)
921 lim = ZV, lim_byte = ZV_BYTE;
922 else
923 lim = BEGV, lim_byte = BEGV_BYTE;
924 }
925 else
926 {
927 CHECK_NUMBER_COERCE_MARKER (bound);
928 lim = XINT (bound);
929 if (n > 0 ? lim < PT : lim > PT)
930 error ("Invalid search bound (wrong side of point)");
931 if (lim > ZV)
932 lim = ZV, lim_byte = ZV_BYTE;
933 else if (lim < BEGV)
934 lim = BEGV, lim_byte = BEGV_BYTE;
935 else
936 lim_byte = CHAR_TO_BYTE (lim);
937 }
938
939 np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
940 (!NILP (current_buffer->case_fold_search)
941 ? current_buffer->case_canon_table
942 : Qnil),
943 (!NILP (current_buffer->case_fold_search)
944 ? current_buffer->case_eqv_table
945 : Qnil),
946 posix);
947 if (np <= 0)
948 {
949 if (NILP (noerror))
950 return signal_failure (string);
951 if (!EQ (noerror, Qt))
952 {
953 if (lim < BEGV || lim > ZV)
954 abort ();
955 SET_PT_BOTH (lim, lim_byte);
956 return Qnil;
957 #if 0 /* This would be clean, but maybe programs depend on
958 a value of nil here. */
959 np = lim;
960 #endif
961 }
962 else
963 return Qnil;
964 }
965
966 if (np < BEGV || np > ZV)
967 abort ();
968
969 SET_PT (np);
970
971 return make_number (np);
972 }
973 \f
974 /* Return 1 if REGEXP it matches just one constant string. */
975
976 static int
977 trivial_regexp_p (regexp)
978 Lisp_Object regexp;
979 {
980 int len = SBYTES (regexp);
981 unsigned char *s = SDATA (regexp);
982 while (--len >= 0)
983 {
984 switch (*s++)
985 {
986 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
987 return 0;
988 case '\\':
989 if (--len < 0)
990 return 0;
991 switch (*s++)
992 {
993 case '|': case '(': case ')': case '`': case '\'': case 'b':
994 case 'B': case '<': case '>': case 'w': case 'W': case 's':
995 case 'S': case '=': case '{': case '}': case '_':
996 case 'c': case 'C': /* for categoryspec and notcategoryspec */
997 case '1': case '2': case '3': case '4': case '5':
998 case '6': case '7': case '8': case '9':
999 return 0;
1000 }
1001 }
1002 }
1003 return 1;
1004 }
1005
1006 /* Search for the n'th occurrence of STRING in the current buffer,
1007 starting at position POS and stopping at position LIM,
1008 treating STRING as a literal string if RE is false or as
1009 a regular expression if RE is true.
1010
1011 If N is positive, searching is forward and LIM must be greater than POS.
1012 If N is negative, searching is backward and LIM must be less than POS.
1013
1014 Returns -x if x occurrences remain to be found (x > 0),
1015 or else the position at the beginning of the Nth occurrence
1016 (if searching backward) or the end (if searching forward).
1017
1018 POSIX is nonzero if we want full backtracking (POSIX style)
1019 for this pattern. 0 means backtrack only enough to get a valid match. */
1020
1021 #define TRANSLATE(out, trt, d) \
1022 do \
1023 { \
1024 if (! NILP (trt)) \
1025 { \
1026 Lisp_Object temp; \
1027 temp = Faref (trt, make_number (d)); \
1028 if (INTEGERP (temp)) \
1029 out = XINT (temp); \
1030 else \
1031 out = d; \
1032 } \
1033 else \
1034 out = d; \
1035 } \
1036 while (0)
1037
1038 static int
1039 search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1040 RE, trt, inverse_trt, posix)
1041 Lisp_Object string;
1042 int pos;
1043 int pos_byte;
1044 int lim;
1045 int lim_byte;
1046 int n;
1047 int RE;
1048 Lisp_Object trt;
1049 Lisp_Object inverse_trt;
1050 int posix;
1051 {
1052 int len = SCHARS (string);
1053 int len_byte = SBYTES (string);
1054 register int i;
1055
1056 if (running_asynch_code)
1057 save_search_regs ();
1058
1059 /* Searching 0 times means don't move. */
1060 /* Null string is found at starting position. */
1061 if (len == 0 || n == 0)
1062 {
1063 set_search_regs (pos_byte, 0);
1064 return pos;
1065 }
1066
1067 if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
1068 {
1069 unsigned char *p1, *p2;
1070 int s1, s2;
1071 struct re_pattern_buffer *bufp;
1072
1073 bufp = compile_pattern (string, &search_regs, trt, posix,
1074 !NILP (current_buffer->enable_multibyte_characters));
1075
1076 immediate_quit = 1; /* Quit immediately if user types ^G,
1077 because letting this function finish
1078 can take too long. */
1079 QUIT; /* Do a pending quit right away,
1080 to avoid paradoxical behavior */
1081 /* Get pointers and sizes of the two strings
1082 that make up the visible portion of the buffer. */
1083
1084 p1 = BEGV_ADDR;
1085 s1 = GPT_BYTE - BEGV_BYTE;
1086 p2 = GAP_END_ADDR;
1087 s2 = ZV_BYTE - GPT_BYTE;
1088 if (s1 < 0)
1089 {
1090 p2 = p1;
1091 s2 = ZV_BYTE - BEGV_BYTE;
1092 s1 = 0;
1093 }
1094 if (s2 < 0)
1095 {
1096 s1 = ZV_BYTE - BEGV_BYTE;
1097 s2 = 0;
1098 }
1099 re_match_object = Qnil;
1100
1101 while (n < 0)
1102 {
1103 int val;
1104 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1105 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1106 &search_regs,
1107 /* Don't allow match past current point */
1108 pos_byte - BEGV_BYTE);
1109 if (val == -2)
1110 {
1111 matcher_overflow ();
1112 }
1113 if (val >= 0)
1114 {
1115 pos_byte = search_regs.start[0] + BEGV_BYTE;
1116 for (i = 0; i < search_regs.num_regs; i++)
1117 if (search_regs.start[i] >= 0)
1118 {
1119 search_regs.start[i]
1120 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1121 search_regs.end[i]
1122 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1123 }
1124 XSETBUFFER (last_thing_searched, current_buffer);
1125 /* Set pos to the new position. */
1126 pos = search_regs.start[0];
1127 }
1128 else
1129 {
1130 immediate_quit = 0;
1131 return (n);
1132 }
1133 n++;
1134 }
1135 while (n > 0)
1136 {
1137 int val;
1138 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1139 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1140 &search_regs,
1141 lim_byte - BEGV_BYTE);
1142 if (val == -2)
1143 {
1144 matcher_overflow ();
1145 }
1146 if (val >= 0)
1147 {
1148 pos_byte = search_regs.end[0] + BEGV_BYTE;
1149 for (i = 0; i < search_regs.num_regs; i++)
1150 if (search_regs.start[i] >= 0)
1151 {
1152 search_regs.start[i]
1153 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1154 search_regs.end[i]
1155 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1156 }
1157 XSETBUFFER (last_thing_searched, current_buffer);
1158 pos = search_regs.end[0];
1159 }
1160 else
1161 {
1162 immediate_quit = 0;
1163 return (0 - n);
1164 }
1165 n--;
1166 }
1167 immediate_quit = 0;
1168 return (pos);
1169 }
1170 else /* non-RE case */
1171 {
1172 unsigned char *raw_pattern, *pat;
1173 int raw_pattern_size;
1174 int raw_pattern_size_byte;
1175 unsigned char *patbuf;
1176 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1177 unsigned char *base_pat;
1178 /* Set to positive if we find a non-ASCII char that need
1179 translation. Otherwise set to zero later. */
1180 int charset_base = -1;
1181 int boyer_moore_ok = 1;
1182
1183 /* MULTIBYTE says whether the text to be searched is multibyte.
1184 We must convert PATTERN to match that, or we will not really
1185 find things right. */
1186
1187 if (multibyte == STRING_MULTIBYTE (string))
1188 {
1189 raw_pattern = (unsigned char *) SDATA (string);
1190 raw_pattern_size = SCHARS (string);
1191 raw_pattern_size_byte = SBYTES (string);
1192 }
1193 else if (multibyte)
1194 {
1195 raw_pattern_size = SCHARS (string);
1196 raw_pattern_size_byte
1197 = count_size_as_multibyte (SDATA (string),
1198 raw_pattern_size);
1199 raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
1200 copy_text (SDATA (string), raw_pattern,
1201 SCHARS (string), 0, 1);
1202 }
1203 else
1204 {
1205 /* Converting multibyte to single-byte.
1206
1207 ??? Perhaps this conversion should be done in a special way
1208 by subtracting nonascii-insert-offset from each non-ASCII char,
1209 so that only the multibyte chars which really correspond to
1210 the chosen single-byte character set can possibly match. */
1211 raw_pattern_size = SCHARS (string);
1212 raw_pattern_size_byte = SCHARS (string);
1213 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
1214 copy_text (SDATA (string), raw_pattern,
1215 SBYTES (string), 1, 0);
1216 }
1217
1218 /* Copy and optionally translate the pattern. */
1219 len = raw_pattern_size;
1220 len_byte = raw_pattern_size_byte;
1221 patbuf = (unsigned char *) alloca (len_byte);
1222 pat = patbuf;
1223 base_pat = raw_pattern;
1224 if (multibyte)
1225 {
1226 /* Fill patbuf by translated characters in STRING while
1227 checking if we can use boyer-moore search. If TRT is
1228 non-nil, we can use boyer-moore search only if TRT can be
1229 represented by the byte array of 256 elements. For that,
1230 all non-ASCII case-equivalents of all case-senstive
1231 characters in STRING must belong to the same charset and
1232 row. */
1233
1234 while (--len >= 0)
1235 {
1236 unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
1237 int c, translated, inverse;
1238 int in_charlen, charlen;
1239
1240 /* If we got here and the RE flag is set, it's because we're
1241 dealing with a regexp known to be trivial, so the backslash
1242 just quotes the next character. */
1243 if (RE && *base_pat == '\\')
1244 {
1245 len--;
1246 raw_pattern_size--;
1247 len_byte--;
1248 base_pat++;
1249 }
1250
1251 c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
1252
1253 if (NILP (trt))
1254 {
1255 str = base_pat;
1256 charlen = in_charlen;
1257 }
1258 else
1259 {
1260 /* Translate the character. */
1261 TRANSLATE (translated, trt, c);
1262 charlen = CHAR_STRING (translated, str_base);
1263 str = str_base;
1264
1265 /* Check if C has any other case-equivalents. */
1266 TRANSLATE (inverse, inverse_trt, c);
1267 /* If so, check if we can use boyer-moore. */
1268 if (c != inverse && boyer_moore_ok)
1269 {
1270 /* Check if all equivalents belong to the same
1271 charset & row. Note that the check of C
1272 itself is done by the last iteration. Note
1273 also that we don't have to check ASCII
1274 characters because boyer-moore search can
1275 always handle their translation. */
1276 while (1)
1277 {
1278 if (ASCII_BYTE_P (inverse))
1279 {
1280 if (charset_base > 0)
1281 {
1282 boyer_moore_ok = 0;
1283 break;
1284 }
1285 charset_base = 0;
1286 }
1287 else if (SINGLE_BYTE_CHAR_P (inverse))
1288 {
1289 /* Boyer-moore search can't handle a
1290 translation of an eight-bit
1291 character. */
1292 boyer_moore_ok = 0;
1293 break;
1294 }
1295 else if (charset_base < 0)
1296 charset_base = inverse & ~CHAR_FIELD3_MASK;
1297 else if ((inverse & ~CHAR_FIELD3_MASK)
1298 != charset_base)
1299 {
1300 boyer_moore_ok = 0;
1301 break;
1302 }
1303 if (c == inverse)
1304 break;
1305 TRANSLATE (inverse, inverse_trt, inverse);
1306 }
1307 }
1308 }
1309 if (charset_base < 0)
1310 charset_base = 0;
1311
1312 /* Store this character into the translated pattern. */
1313 bcopy (str, pat, charlen);
1314 pat += charlen;
1315 base_pat += in_charlen;
1316 len_byte -= in_charlen;
1317 }
1318 }
1319 else
1320 {
1321 /* Unibyte buffer. */
1322 charset_base = 0;
1323 while (--len >= 0)
1324 {
1325 int c, translated;
1326
1327 /* If we got here and the RE flag is set, it's because we're
1328 dealing with a regexp known to be trivial, so the backslash
1329 just quotes the next character. */
1330 if (RE && *base_pat == '\\')
1331 {
1332 len--;
1333 raw_pattern_size--;
1334 base_pat++;
1335 }
1336 c = *base_pat++;
1337 TRANSLATE (translated, trt, c);
1338 *pat++ = translated;
1339 }
1340 }
1341
1342 len_byte = pat - patbuf;
1343 len = raw_pattern_size;
1344 pat = base_pat = patbuf;
1345
1346 if (boyer_moore_ok)
1347 return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1348 pos, pos_byte, lim, lim_byte,
1349 charset_base);
1350 else
1351 return simple_search (n, pat, len, len_byte, trt,
1352 pos, pos_byte, lim, lim_byte);
1353 }
1354 }
1355 \f
1356 /* Do a simple string search N times for the string PAT,
1357 whose length is LEN/LEN_BYTE,
1358 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1359 TRT is the translation table.
1360
1361 Return the character position where the match is found.
1362 Otherwise, if M matches remained to be found, return -M.
1363
1364 This kind of search works regardless of what is in PAT and
1365 regardless of what is in TRT. It is used in cases where
1366 boyer_moore cannot work. */
1367
1368 static int
1369 simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1370 int n;
1371 unsigned char *pat;
1372 int len, len_byte;
1373 Lisp_Object trt;
1374 int pos, pos_byte;
1375 int lim, lim_byte;
1376 {
1377 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1378 int forward = n > 0;
1379
1380 if (lim > pos && multibyte)
1381 while (n > 0)
1382 {
1383 while (1)
1384 {
1385 /* Try matching at position POS. */
1386 int this_pos = pos;
1387 int this_pos_byte = pos_byte;
1388 int this_len = len;
1389 int this_len_byte = len_byte;
1390 unsigned char *p = pat;
1391 if (pos + len > lim)
1392 goto stop;
1393
1394 while (this_len > 0)
1395 {
1396 int charlen, buf_charlen;
1397 int pat_ch, buf_ch;
1398
1399 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1400 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1401 ZV_BYTE - this_pos_byte,
1402 buf_charlen);
1403 TRANSLATE (buf_ch, trt, buf_ch);
1404
1405 if (buf_ch != pat_ch)
1406 break;
1407
1408 this_len_byte -= charlen;
1409 this_len--;
1410 p += charlen;
1411
1412 this_pos_byte += buf_charlen;
1413 this_pos++;
1414 }
1415
1416 if (this_len == 0)
1417 {
1418 pos += len;
1419 pos_byte += len_byte;
1420 break;
1421 }
1422
1423 INC_BOTH (pos, pos_byte);
1424 }
1425
1426 n--;
1427 }
1428 else if (lim > pos)
1429 while (n > 0)
1430 {
1431 while (1)
1432 {
1433 /* Try matching at position POS. */
1434 int this_pos = pos;
1435 int this_len = len;
1436 unsigned char *p = pat;
1437
1438 if (pos + len > lim)
1439 goto stop;
1440
1441 while (this_len > 0)
1442 {
1443 int pat_ch = *p++;
1444 int buf_ch = FETCH_BYTE (this_pos);
1445 TRANSLATE (buf_ch, trt, buf_ch);
1446
1447 if (buf_ch != pat_ch)
1448 break;
1449
1450 this_len--;
1451 this_pos++;
1452 }
1453
1454 if (this_len == 0)
1455 {
1456 pos += len;
1457 break;
1458 }
1459
1460 pos++;
1461 }
1462
1463 n--;
1464 }
1465 /* Backwards search. */
1466 else if (lim < pos && multibyte)
1467 while (n < 0)
1468 {
1469 while (1)
1470 {
1471 /* Try matching at position POS. */
1472 int this_pos = pos - len;
1473 int this_pos_byte = pos_byte - len_byte;
1474 int this_len = len;
1475 int this_len_byte = len_byte;
1476 unsigned char *p = pat;
1477
1478 if (pos - len < lim)
1479 goto stop;
1480
1481 while (this_len > 0)
1482 {
1483 int charlen, buf_charlen;
1484 int pat_ch, buf_ch;
1485
1486 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1487 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1488 ZV_BYTE - this_pos_byte,
1489 buf_charlen);
1490 TRANSLATE (buf_ch, trt, buf_ch);
1491
1492 if (buf_ch != pat_ch)
1493 break;
1494
1495 this_len_byte -= charlen;
1496 this_len--;
1497 p += charlen;
1498 this_pos_byte += buf_charlen;
1499 this_pos++;
1500 }
1501
1502 if (this_len == 0)
1503 {
1504 pos -= len;
1505 pos_byte -= len_byte;
1506 break;
1507 }
1508
1509 DEC_BOTH (pos, pos_byte);
1510 }
1511
1512 n++;
1513 }
1514 else if (lim < pos)
1515 while (n < 0)
1516 {
1517 while (1)
1518 {
1519 /* Try matching at position POS. */
1520 int this_pos = pos - len;
1521 int this_len = len;
1522 unsigned char *p = pat;
1523
1524 if (pos - len < lim)
1525 goto stop;
1526
1527 while (this_len > 0)
1528 {
1529 int pat_ch = *p++;
1530 int buf_ch = FETCH_BYTE (this_pos);
1531 TRANSLATE (buf_ch, trt, buf_ch);
1532
1533 if (buf_ch != pat_ch)
1534 break;
1535 this_len--;
1536 this_pos++;
1537 }
1538
1539 if (this_len == 0)
1540 {
1541 pos -= len;
1542 break;
1543 }
1544
1545 pos--;
1546 }
1547
1548 n++;
1549 }
1550
1551 stop:
1552 if (n == 0)
1553 {
1554 if (forward)
1555 set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
1556 else
1557 set_search_regs (multibyte ? pos_byte : pos, len_byte);
1558
1559 return pos;
1560 }
1561 else if (n > 0)
1562 return -n;
1563 else
1564 return n;
1565 }
1566 \f
1567 /* Do Boyer-Moore search N times for the string BASE_PAT,
1568 whose length is LEN/LEN_BYTE,
1569 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1570 DIRECTION says which direction we search in.
1571 TRT and INVERSE_TRT are translation tables.
1572 Characters in PAT are already translated by TRT.
1573
1574 This kind of search works if all the characters in BASE_PAT that
1575 have nontrivial translation are the same aside from the last byte.
1576 This makes it possible to translate just the last byte of a
1577 character, and do so after just a simple test of the context.
1578 CHARSET_BASE is nonzero iff there is such a non-ASCII character.
1579
1580 If that criterion is not satisfied, do not call this function. */
1581
1582 static int
1583 boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1584 pos, pos_byte, lim, lim_byte, charset_base)
1585 int n;
1586 unsigned char *base_pat;
1587 int len, len_byte;
1588 Lisp_Object trt;
1589 Lisp_Object inverse_trt;
1590 int pos, pos_byte;
1591 int lim, lim_byte;
1592 int charset_base;
1593 {
1594 int direction = ((n > 0) ? 1 : -1);
1595 register int dirlen;
1596 int infinity, limit, stride_for_teases = 0;
1597 register int *BM_tab;
1598 int *BM_tab_base;
1599 register unsigned char *cursor, *p_limit;
1600 register int i, j;
1601 unsigned char *pat, *pat_end;
1602 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1603
1604 unsigned char simple_translate[0400];
1605 /* These are set to the preceding bytes of a byte to be translated
1606 if charset_base is nonzero. As the maximum byte length of a
1607 multibyte character is 4, we have to check at most three previous
1608 bytes. */
1609 int translate_prev_byte1 = 0;
1610 int translate_prev_byte2 = 0;
1611 int translate_prev_byte3 = 0;
1612
1613 #ifdef C_ALLOCA
1614 int BM_tab_space[0400];
1615 BM_tab = &BM_tab_space[0];
1616 #else
1617 BM_tab = (int *) alloca (0400 * sizeof (int));
1618 #endif
1619 /* The general approach is that we are going to maintain that we know */
1620 /* the first (closest to the present position, in whatever direction */
1621 /* we're searching) character that could possibly be the last */
1622 /* (furthest from present position) character of a valid match. We */
1623 /* advance the state of our knowledge by looking at that character */
1624 /* and seeing whether it indeed matches the last character of the */
1625 /* pattern. If it does, we take a closer look. If it does not, we */
1626 /* move our pointer (to putative last characters) as far as is */
1627 /* logically possible. This amount of movement, which I call a */
1628 /* stride, will be the length of the pattern if the actual character */
1629 /* appears nowhere in the pattern, otherwise it will be the distance */
1630 /* from the last occurrence of that character to the end of the */
1631 /* pattern. */
1632 /* As a coding trick, an enormous stride is coded into the table for */
1633 /* characters that match the last character. This allows use of only */
1634 /* a single test, a test for having gone past the end of the */
1635 /* permissible match region, to test for both possible matches (when */
1636 /* the stride goes past the end immediately) and failure to */
1637 /* match (where you get nudged past the end one stride at a time). */
1638
1639 /* Here we make a "mickey mouse" BM table. The stride of the search */
1640 /* is determined only by the last character of the putative match. */
1641 /* If that character does not match, we will stride the proper */
1642 /* distance to propose a match that superimposes it on the last */
1643 /* instance of a character that matches it (per trt), or misses */
1644 /* it entirely if there is none. */
1645
1646 dirlen = len_byte * direction;
1647 infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
1648
1649 /* Record position after the end of the pattern. */
1650 pat_end = base_pat + len_byte;
1651 /* BASE_PAT points to a character that we start scanning from.
1652 It is the first character in a forward search,
1653 the last character in a backward search. */
1654 if (direction < 0)
1655 base_pat = pat_end - 1;
1656
1657 BM_tab_base = BM_tab;
1658 BM_tab += 0400;
1659 j = dirlen; /* to get it in a register */
1660 /* A character that does not appear in the pattern induces a */
1661 /* stride equal to the pattern length. */
1662 while (BM_tab_base != BM_tab)
1663 {
1664 *--BM_tab = j;
1665 *--BM_tab = j;
1666 *--BM_tab = j;
1667 *--BM_tab = j;
1668 }
1669
1670 /* We use this for translation, instead of TRT itself.
1671 We fill this in to handle the characters that actually
1672 occur in the pattern. Others don't matter anyway! */
1673 bzero (simple_translate, sizeof simple_translate);
1674 for (i = 0; i < 0400; i++)
1675 simple_translate[i] = i;
1676
1677 if (charset_base)
1678 {
1679 /* Setup translate_prev_byte1/2/3 from CHARSET_BASE. Only a
1680 byte following them are the target of translation. */
1681 int sample_char = charset_base | 0x20;
1682 unsigned char str[MAX_MULTIBYTE_LENGTH];
1683 int len = CHAR_STRING (sample_char, str);
1684
1685 translate_prev_byte1 = str[len - 2];
1686 if (len > 2)
1687 {
1688 translate_prev_byte2 = str[len - 3];
1689 if (len > 3)
1690 translate_prev_byte3 = str[len - 4];
1691 }
1692 }
1693
1694 i = 0;
1695 while (i != infinity)
1696 {
1697 unsigned char *ptr = base_pat + i;
1698 i += direction;
1699 if (i == dirlen)
1700 i = infinity;
1701 if (! NILP (trt))
1702 {
1703 /* If the byte currently looking at is the last of a
1704 character to check case-equivalents, set CH to that
1705 character. An ASCII character and a non-ASCII character
1706 matching with CHARSET_BASE are to be checked. */
1707 int ch = -1;
1708
1709 if (ASCII_BYTE_P (*ptr) || ! multibyte)
1710 ch = *ptr;
1711 else if (charset_base
1712 && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
1713 {
1714 unsigned char *charstart = ptr - 1;
1715
1716 while (! (CHAR_HEAD_P (*charstart)))
1717 charstart--;
1718 ch = STRING_CHAR (charstart, ptr - charstart + 1);
1719 if (charset_base != (ch & ~CHAR_FIELD3_MASK))
1720 ch = -1;
1721 }
1722
1723 if (ch >= 0400)
1724 j = ((unsigned char) ch) | 0200;
1725 else
1726 j = *ptr;
1727
1728 if (i == infinity)
1729 stride_for_teases = BM_tab[j];
1730
1731 BM_tab[j] = dirlen - i;
1732 /* A translation table is accompanied by its inverse -- see */
1733 /* comment following downcase_table for details */
1734 if (ch >= 0)
1735 {
1736 int starting_ch = ch;
1737 int starting_j = j;
1738
1739 while (1)
1740 {
1741 TRANSLATE (ch, inverse_trt, ch);
1742 if (ch >= 0400)
1743 j = ((unsigned char) ch) | 0200;
1744 else
1745 j = (unsigned char) ch;
1746
1747 /* For all the characters that map into CH,
1748 set up simple_translate to map the last byte
1749 into STARTING_J. */
1750 simple_translate[j] = starting_j;
1751 if (ch == starting_ch)
1752 break;
1753 BM_tab[j] = dirlen - i;
1754 }
1755 }
1756 }
1757 else
1758 {
1759 j = *ptr;
1760
1761 if (i == infinity)
1762 stride_for_teases = BM_tab[j];
1763 BM_tab[j] = dirlen - i;
1764 }
1765 /* stride_for_teases tells how much to stride if we get a */
1766 /* match on the far character but are subsequently */
1767 /* disappointed, by recording what the stride would have been */
1768 /* for that character if the last character had been */
1769 /* different. */
1770 }
1771 infinity = dirlen - infinity;
1772 pos_byte += dirlen - ((direction > 0) ? direction : 0);
1773 /* loop invariant - POS_BYTE points at where last char (first
1774 char if reverse) of pattern would align in a possible match. */
1775 while (n != 0)
1776 {
1777 int tail_end;
1778 unsigned char *tail_end_ptr;
1779
1780 /* It's been reported that some (broken) compiler thinks that
1781 Boolean expressions in an arithmetic context are unsigned.
1782 Using an explicit ?1:0 prevents this. */
1783 if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1784 < 0)
1785 return (n * (0 - direction));
1786 /* First we do the part we can by pointers (maybe nothing) */
1787 QUIT;
1788 pat = base_pat;
1789 limit = pos_byte - dirlen + direction;
1790 if (direction > 0)
1791 {
1792 limit = BUFFER_CEILING_OF (limit);
1793 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1794 can take on without hitting edge of buffer or the gap. */
1795 limit = min (limit, pos_byte + 20000);
1796 limit = min (limit, lim_byte - 1);
1797 }
1798 else
1799 {
1800 limit = BUFFER_FLOOR_OF (limit);
1801 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1802 can take on without hitting edge of buffer or the gap. */
1803 limit = max (limit, pos_byte - 20000);
1804 limit = max (limit, lim_byte);
1805 }
1806 tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1807 tail_end_ptr = BYTE_POS_ADDR (tail_end);
1808
1809 if ((limit - pos_byte) * direction > 20)
1810 {
1811 unsigned char *p2;
1812
1813 p_limit = BYTE_POS_ADDR (limit);
1814 p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1815 /* In this loop, pos + cursor - p2 is the surrogate for pos */
1816 while (1) /* use one cursor setting as long as i can */
1817 {
1818 if (direction > 0) /* worth duplicating */
1819 {
1820 /* Use signed comparison if appropriate
1821 to make cursor+infinity sure to be > p_limit.
1822 Assuming that the buffer lies in a range of addresses
1823 that are all "positive" (as ints) or all "negative",
1824 either kind of comparison will work as long
1825 as we don't step by infinity. So pick the kind
1826 that works when we do step by infinity. */
1827 if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1828 while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1829 cursor += BM_tab[*cursor];
1830 else
1831 while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1832 cursor += BM_tab[*cursor];
1833 }
1834 else
1835 {
1836 if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1837 while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1838 cursor += BM_tab[*cursor];
1839 else
1840 while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1841 cursor += BM_tab[*cursor];
1842 }
1843 /* If you are here, cursor is beyond the end of the searched region. */
1844 /* This can happen if you match on the far character of the pattern, */
1845 /* because the "stride" of that character is infinity, a number able */
1846 /* to throw you well beyond the end of the search. It can also */
1847 /* happen if you fail to match within the permitted region and would */
1848 /* otherwise try a character beyond that region */
1849 if ((cursor - p_limit) * direction <= len_byte)
1850 break; /* a small overrun is genuine */
1851 cursor -= infinity; /* large overrun = hit */
1852 i = dirlen - direction;
1853 if (! NILP (trt))
1854 {
1855 while ((i -= direction) + direction != 0)
1856 {
1857 int ch;
1858 cursor -= direction;
1859 /* Translate only the last byte of a character. */
1860 if (! multibyte
1861 || ((cursor == tail_end_ptr
1862 || CHAR_HEAD_P (cursor[1]))
1863 && (CHAR_HEAD_P (cursor[0])
1864 /* Check if this is the last byte of
1865 a translable character. */
1866 || (translate_prev_byte1 == cursor[-1]
1867 && (CHAR_HEAD_P (translate_prev_byte1)
1868 || (translate_prev_byte2 == cursor[-2]
1869 && (CHAR_HEAD_P (translate_prev_byte2)
1870 || (translate_prev_byte3 == cursor[-3]))))))))
1871 ch = simple_translate[*cursor];
1872 else
1873 ch = *cursor;
1874 if (pat[i] != ch)
1875 break;
1876 }
1877 }
1878 else
1879 {
1880 while ((i -= direction) + direction != 0)
1881 {
1882 cursor -= direction;
1883 if (pat[i] != *cursor)
1884 break;
1885 }
1886 }
1887 cursor += dirlen - i - direction; /* fix cursor */
1888 if (i + direction == 0)
1889 {
1890 int position;
1891
1892 cursor -= direction;
1893
1894 position = pos_byte + cursor - p2 + ((direction > 0)
1895 ? 1 - len_byte : 0);
1896 set_search_regs (position, len_byte);
1897
1898 if ((n -= direction) != 0)
1899 cursor += dirlen; /* to resume search */
1900 else
1901 return ((direction > 0)
1902 ? search_regs.end[0] : search_regs.start[0]);
1903 }
1904 else
1905 cursor += stride_for_teases; /* <sigh> we lose - */
1906 }
1907 pos_byte += cursor - p2;
1908 }
1909 else
1910 /* Now we'll pick up a clump that has to be done the hard */
1911 /* way because it covers a discontinuity */
1912 {
1913 limit = ((direction > 0)
1914 ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1915 : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1916 limit = ((direction > 0)
1917 ? min (limit + len_byte, lim_byte - 1)
1918 : max (limit - len_byte, lim_byte));
1919 /* LIMIT is now the last value POS_BYTE can have
1920 and still be valid for a possible match. */
1921 while (1)
1922 {
1923 /* This loop can be coded for space rather than */
1924 /* speed because it will usually run only once. */
1925 /* (the reach is at most len + 21, and typically */
1926 /* does not exceed len) */
1927 while ((limit - pos_byte) * direction >= 0)
1928 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
1929 /* now run the same tests to distinguish going off the */
1930 /* end, a match or a phony match. */
1931 if ((pos_byte - limit) * direction <= len_byte)
1932 break; /* ran off the end */
1933 /* Found what might be a match.
1934 Set POS_BYTE back to last (first if reverse) pos. */
1935 pos_byte -= infinity;
1936 i = dirlen - direction;
1937 while ((i -= direction) + direction != 0)
1938 {
1939 int ch;
1940 unsigned char *ptr;
1941 pos_byte -= direction;
1942 ptr = BYTE_POS_ADDR (pos_byte);
1943 /* Translate only the last byte of a character. */
1944 if (! multibyte
1945 || ((ptr == tail_end_ptr
1946 || CHAR_HEAD_P (ptr[1]))
1947 && (CHAR_HEAD_P (ptr[0])
1948 /* Check if this is the last byte of a
1949 translable character. */
1950 || (translate_prev_byte1 == ptr[-1]
1951 && (CHAR_HEAD_P (translate_prev_byte1)
1952 || (translate_prev_byte2 == ptr[-2]
1953 && (CHAR_HEAD_P (translate_prev_byte2)
1954 || translate_prev_byte3 == ptr[-3])))))))
1955 ch = simple_translate[*ptr];
1956 else
1957 ch = *ptr;
1958 if (pat[i] != ch)
1959 break;
1960 }
1961 /* Above loop has moved POS_BYTE part or all the way
1962 back to the first pos (last pos if reverse).
1963 Set it once again at the last (first if reverse) char. */
1964 pos_byte += dirlen - i- direction;
1965 if (i + direction == 0)
1966 {
1967 int position;
1968 pos_byte -= direction;
1969
1970 position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
1971
1972 set_search_regs (position, len_byte);
1973
1974 if ((n -= direction) != 0)
1975 pos_byte += dirlen; /* to resume search */
1976 else
1977 return ((direction > 0)
1978 ? search_regs.end[0] : search_regs.start[0]);
1979 }
1980 else
1981 pos_byte += stride_for_teases;
1982 }
1983 }
1984 /* We have done one clump. Can we continue? */
1985 if ((lim_byte - pos_byte) * direction < 0)
1986 return ((0 - n) * direction);
1987 }
1988 return BYTE_TO_CHAR (pos_byte);
1989 }
1990
1991 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
1992 for the overall match just found in the current buffer.
1993 Also clear out the match data for registers 1 and up. */
1994
1995 static void
1996 set_search_regs (beg_byte, nbytes)
1997 int beg_byte, nbytes;
1998 {
1999 int i;
2000
2001 /* Make sure we have registers in which to store
2002 the match position. */
2003 if (search_regs.num_regs == 0)
2004 {
2005 search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2006 search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2007 search_regs.num_regs = 2;
2008 }
2009
2010 /* Clear out the other registers. */
2011 for (i = 1; i < search_regs.num_regs; i++)
2012 {
2013 search_regs.start[i] = -1;
2014 search_regs.end[i] = -1;
2015 }
2016
2017 search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2018 search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
2019 XSETBUFFER (last_thing_searched, current_buffer);
2020 }
2021 \f
2022 /* Given a string of words separated by word delimiters,
2023 compute a regexp that matches those exact words
2024 separated by arbitrary punctuation. */
2025
2026 static Lisp_Object
2027 wordify (string)
2028 Lisp_Object string;
2029 {
2030 register unsigned char *p, *o;
2031 register int i, i_byte, len, punct_count = 0, word_count = 0;
2032 Lisp_Object val;
2033 int prev_c = 0;
2034 int adjust;
2035
2036 CHECK_STRING (string);
2037 p = SDATA (string);
2038 len = SCHARS (string);
2039
2040 for (i = 0, i_byte = 0; i < len; )
2041 {
2042 int c;
2043
2044 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
2045
2046 if (SYNTAX (c) != Sword)
2047 {
2048 punct_count++;
2049 if (i > 0 && SYNTAX (prev_c) == Sword)
2050 word_count++;
2051 }
2052
2053 prev_c = c;
2054 }
2055
2056 if (SYNTAX (prev_c) == Sword)
2057 word_count++;
2058 if (!word_count)
2059 return empty_string;
2060
2061 adjust = - punct_count + 5 * (word_count - 1) + 4;
2062 if (STRING_MULTIBYTE (string))
2063 val = make_uninit_multibyte_string (len + adjust,
2064 SBYTES (string)
2065 + adjust);
2066 else
2067 val = make_uninit_string (len + adjust);
2068
2069 o = SDATA (val);
2070 *o++ = '\\';
2071 *o++ = 'b';
2072 prev_c = 0;
2073
2074 for (i = 0, i_byte = 0; i < len; )
2075 {
2076 int c;
2077 int i_byte_orig = i_byte;
2078
2079 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
2080
2081 if (SYNTAX (c) == Sword)
2082 {
2083 bcopy (SDATA (string) + i_byte_orig, o,
2084 i_byte - i_byte_orig);
2085 o += i_byte - i_byte_orig;
2086 }
2087 else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2088 {
2089 *o++ = '\\';
2090 *o++ = 'W';
2091 *o++ = '\\';
2092 *o++ = 'W';
2093 *o++ = '*';
2094 }
2095
2096 prev_c = c;
2097 }
2098
2099 *o++ = '\\';
2100 *o++ = 'b';
2101
2102 return val;
2103 }
2104 \f
2105 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2106 "MSearch backward: ",
2107 doc: /* Search backward from point for STRING.
2108 Set point to the beginning of the occurrence found, and return point.
2109 An optional second argument bounds the search; it is a buffer position.
2110 The match found must not extend before that position.
2111 Optional third argument, if t, means if fail just return nil (no error).
2112 If not nil and not t, position at limit of search and return nil.
2113 Optional fourth argument is repeat count--search for successive occurrences.
2114
2115 Search case-sensitivity is determined by the value of the variable
2116 `case-fold-search', which see.
2117
2118 See also the functions `match-beginning', `match-end' and `replace-match'. */)
2119 (string, bound, noerror, count)
2120 Lisp_Object string, bound, noerror, count;
2121 {
2122 return search_command (string, bound, noerror, count, -1, 0, 0);
2123 }
2124
2125 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2126 doc: /* Search forward from point for STRING.
2127 Set point to the end of the occurrence found, and return point.
2128 An optional second argument bounds the search; it is a buffer position.
2129 The match found must not extend after that position. nil is equivalent
2130 to (point-max).
2131 Optional third argument, if t, means if fail just return nil (no error).
2132 If not nil and not t, move to limit of search and return nil.
2133 Optional fourth argument is repeat count--search for successive occurrences.
2134
2135 Search case-sensitivity is determined by the value of the variable
2136 `case-fold-search', which see.
2137
2138 See also the functions `match-beginning', `match-end' and `replace-match'. */)
2139 (string, bound, noerror, count)
2140 Lisp_Object string, bound, noerror, count;
2141 {
2142 return search_command (string, bound, noerror, count, 1, 0, 0);
2143 }
2144
2145 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2146 "sWord search backward: ",
2147 doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2148 Set point to the beginning of the occurrence found, and return point.
2149 An optional second argument bounds the search; it is a buffer position.
2150 The match found must not extend before that position.
2151 Optional third argument, if t, means if fail just return nil (no error).
2152 If not nil and not t, move to limit of search and return nil.
2153 Optional fourth argument is repeat count--search for successive occurrences. */)
2154 (string, bound, noerror, count)
2155 Lisp_Object string, bound, noerror, count;
2156 {
2157 return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
2158 }
2159
2160 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2161 "sWord search: ",
2162 doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2163 Set point to the end of the occurrence found, and return point.
2164 An optional second argument bounds the search; it is a buffer position.
2165 The match found must not extend after that position.
2166 Optional third argument, if t, means if fail just return nil (no error).
2167 If not nil and not t, move to limit of search and return nil.
2168 Optional fourth argument is repeat count--search for successive occurrences. */)
2169 (string, bound, noerror, count)
2170 Lisp_Object string, bound, noerror, count;
2171 {
2172 return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
2173 }
2174
2175 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2176 "sRE search backward: ",
2177 doc: /* Search backward from point for match for regular expression REGEXP.
2178 Set point to the beginning of the match, and return point.
2179 The match found is the one starting last in the buffer
2180 and yet ending before the origin of the search.
2181 An optional second argument bounds the search; it is a buffer position.
2182 The match found must start at or after that position.
2183 Optional third argument, if t, means if fail just return nil (no error).
2184 If not nil and not t, move to limit of search and return nil.
2185 Optional fourth argument is repeat count--search for successive occurrences.
2186 See also the functions `match-beginning', `match-end', `match-string',
2187 and `replace-match'. */)
2188 (regexp, bound, noerror, count)
2189 Lisp_Object regexp, bound, noerror, count;
2190 {
2191 return search_command (regexp, bound, noerror, count, -1, 1, 0);
2192 }
2193
2194 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2195 "sRE search: ",
2196 doc: /* Search forward from point for regular expression REGEXP.
2197 Set point to the end of the occurrence found, and return point.
2198 An optional second argument bounds the search; it is a buffer position.
2199 The match found must not extend after that position.
2200 Optional third argument, if t, means if fail just return nil (no error).
2201 If not nil and not t, move to limit of search and return nil.
2202 Optional fourth argument is repeat count--search for successive occurrences.
2203 See also the functions `match-beginning', `match-end', `match-string',
2204 and `replace-match'. */)
2205 (regexp, bound, noerror, count)
2206 Lisp_Object regexp, bound, noerror, count;
2207 {
2208 return search_command (regexp, bound, noerror, count, 1, 1, 0);
2209 }
2210
2211 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2212 "sPosix search backward: ",
2213 doc: /* Search backward from point for match for regular expression REGEXP.
2214 Find the longest match in accord with Posix regular expression rules.
2215 Set point to the beginning of the match, and return point.
2216 The match found is the one starting last in the buffer
2217 and yet ending before the origin of the search.
2218 An optional second argument bounds the search; it is a buffer position.
2219 The match found must start at or after that position.
2220 Optional third argument, if t, means if fail just return nil (no error).
2221 If not nil and not t, move to limit of search and return nil.
2222 Optional fourth argument is repeat count--search for successive occurrences.
2223 See also the functions `match-beginning', `match-end', `match-string',
2224 and `replace-match'. */)
2225 (regexp, bound, noerror, count)
2226 Lisp_Object regexp, bound, noerror, count;
2227 {
2228 return search_command (regexp, bound, noerror, count, -1, 1, 1);
2229 }
2230
2231 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2232 "sPosix search: ",
2233 doc: /* Search forward from point for regular expression REGEXP.
2234 Find the longest match in accord with Posix regular expression rules.
2235 Set point to the end of the occurrence found, and return point.
2236 An optional second argument bounds the search; it is a buffer position.
2237 The match found must not extend after that position.
2238 Optional third argument, if t, means if fail just return nil (no error).
2239 If not nil and not t, move to limit of search and return nil.
2240 Optional fourth argument is repeat count--search for successive occurrences.
2241 See also the functions `match-beginning', `match-end', `match-string',
2242 and `replace-match'. */)
2243 (regexp, bound, noerror, count)
2244 Lisp_Object regexp, bound, noerror, count;
2245 {
2246 return search_command (regexp, bound, noerror, count, 1, 1, 1);
2247 }
2248 \f
2249 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2250 doc: /* Replace text matched by last search with NEWTEXT.
2251 Leave point at the end of the replacement text.
2252
2253 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2254 Otherwise maybe capitalize the whole text, or maybe just word initials,
2255 based on the replaced text.
2256 If the replaced text has only capital letters
2257 and has at least one multiletter word, convert NEWTEXT to all caps.
2258 Otherwise if all words are capitalized in the replaced text,
2259 capitalize each word in NEWTEXT.
2260
2261 If third arg LITERAL is non-nil, insert NEWTEXT literally.
2262 Otherwise treat `\\' as special:
2263 `\\&' in NEWTEXT means substitute original matched text.
2264 `\\N' means substitute what matched the Nth `\\(...\\)'.
2265 If Nth parens didn't match, substitute nothing.
2266 `\\\\' means insert one `\\'.
2267 Case conversion does not apply to these substitutions.
2268
2269 FIXEDCASE and LITERAL are optional arguments.
2270
2271 The optional fourth argument STRING can be a string to modify.
2272 This is meaningful when the previous match was done against STRING,
2273 using `string-match'. When used this way, `replace-match'
2274 creates and returns a new string made by copying STRING and replacing
2275 the part of STRING that was matched.
2276
2277 The optional fifth argument SUBEXP specifies a subexpression;
2278 it says to replace just that subexpression with NEWTEXT,
2279 rather than replacing the entire matched text.
2280 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2281 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2282 NEWTEXT in place of subexp N.
2283 This is useful only after a regular expression search or match,
2284 since only regular expressions have distinguished subexpressions. */)
2285 (newtext, fixedcase, literal, string, subexp)
2286 Lisp_Object newtext, fixedcase, literal, string, subexp;
2287 {
2288 enum { nochange, all_caps, cap_initial } case_action;
2289 register int pos, pos_byte;
2290 int some_multiletter_word;
2291 int some_lowercase;
2292 int some_uppercase;
2293 int some_nonuppercase_initial;
2294 register int c, prevc;
2295 int sub;
2296 int opoint, newpoint;
2297
2298 CHECK_STRING (newtext);
2299
2300 if (! NILP (string))
2301 CHECK_STRING (string);
2302
2303 case_action = nochange; /* We tried an initialization */
2304 /* but some C compilers blew it */
2305
2306 if (search_regs.num_regs <= 0)
2307 error ("`replace-match' called before any match found");
2308
2309 if (NILP (subexp))
2310 sub = 0;
2311 else
2312 {
2313 CHECK_NUMBER (subexp);
2314 sub = XINT (subexp);
2315 if (sub < 0 || sub >= search_regs.num_regs)
2316 args_out_of_range (subexp, make_number (search_regs.num_regs));
2317 }
2318
2319 if (NILP (string))
2320 {
2321 if (search_regs.start[sub] < BEGV
2322 || search_regs.start[sub] > search_regs.end[sub]
2323 || search_regs.end[sub] > ZV)
2324 args_out_of_range (make_number (search_regs.start[sub]),
2325 make_number (search_regs.end[sub]));
2326 }
2327 else
2328 {
2329 if (search_regs.start[sub] < 0
2330 || search_regs.start[sub] > search_regs.end[sub]
2331 || search_regs.end[sub] > SCHARS (string))
2332 args_out_of_range (make_number (search_regs.start[sub]),
2333 make_number (search_regs.end[sub]));
2334 }
2335
2336 if (NILP (fixedcase))
2337 {
2338 /* Decide how to casify by examining the matched text. */
2339 int last;
2340
2341 pos = search_regs.start[sub];
2342 last = search_regs.end[sub];
2343
2344 if (NILP (string))
2345 pos_byte = CHAR_TO_BYTE (pos);
2346 else
2347 pos_byte = string_char_to_byte (string, pos);
2348
2349 prevc = '\n';
2350 case_action = all_caps;
2351
2352 /* some_multiletter_word is set nonzero if any original word
2353 is more than one letter long. */
2354 some_multiletter_word = 0;
2355 some_lowercase = 0;
2356 some_nonuppercase_initial = 0;
2357 some_uppercase = 0;
2358
2359 while (pos < last)
2360 {
2361 if (NILP (string))
2362 {
2363 c = FETCH_CHAR (pos_byte);
2364 INC_BOTH (pos, pos_byte);
2365 }
2366 else
2367 FETCH_STRING_CHAR_ADVANCE (c, string, pos, pos_byte);
2368
2369 if (LOWERCASEP (c))
2370 {
2371 /* Cannot be all caps if any original char is lower case */
2372
2373 some_lowercase = 1;
2374 if (SYNTAX (prevc) != Sword)
2375 some_nonuppercase_initial = 1;
2376 else
2377 some_multiletter_word = 1;
2378 }
2379 else if (UPPERCASEP (c))
2380 {
2381 some_uppercase = 1;
2382 if (SYNTAX (prevc) != Sword)
2383 ;
2384 else
2385 some_multiletter_word = 1;
2386 }
2387 else
2388 {
2389 /* If the initial is a caseless word constituent,
2390 treat that like a lowercase initial. */
2391 if (SYNTAX (prevc) != Sword)
2392 some_nonuppercase_initial = 1;
2393 }
2394
2395 prevc = c;
2396 }
2397
2398 /* Convert to all caps if the old text is all caps
2399 and has at least one multiletter word. */
2400 if (! some_lowercase && some_multiletter_word)
2401 case_action = all_caps;
2402 /* Capitalize each word, if the old text has all capitalized words. */
2403 else if (!some_nonuppercase_initial && some_multiletter_word)
2404 case_action = cap_initial;
2405 else if (!some_nonuppercase_initial && some_uppercase)
2406 /* Should x -> yz, operating on X, give Yz or YZ?
2407 We'll assume the latter. */
2408 case_action = all_caps;
2409 else
2410 case_action = nochange;
2411 }
2412
2413 /* Do replacement in a string. */
2414 if (!NILP (string))
2415 {
2416 Lisp_Object before, after;
2417
2418 before = Fsubstring (string, make_number (0),
2419 make_number (search_regs.start[sub]));
2420 after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2421
2422 /* Substitute parts of the match into NEWTEXT
2423 if desired. */
2424 if (NILP (literal))
2425 {
2426 int lastpos = 0;
2427 int lastpos_byte = 0;
2428 /* We build up the substituted string in ACCUM. */
2429 Lisp_Object accum;
2430 Lisp_Object middle;
2431 int length = SBYTES (newtext);
2432
2433 accum = Qnil;
2434
2435 for (pos_byte = 0, pos = 0; pos_byte < length;)
2436 {
2437 int substart = -1;
2438 int subend = 0;
2439 int delbackslash = 0;
2440
2441 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2442
2443 if (c == '\\')
2444 {
2445 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2446
2447 if (c == '&')
2448 {
2449 substart = search_regs.start[sub];
2450 subend = search_regs.end[sub];
2451 }
2452 else if (c >= '1' && c <= '9')
2453 {
2454 if (search_regs.start[c - '0'] >= 0
2455 && c <= search_regs.num_regs + '0')
2456 {
2457 substart = search_regs.start[c - '0'];
2458 subend = search_regs.end[c - '0'];
2459 }
2460 else
2461 {
2462 /* If that subexp did not match,
2463 replace \\N with nothing. */
2464 substart = 0;
2465 subend = 0;
2466 }
2467 }
2468 else if (c == '\\')
2469 delbackslash = 1;
2470 else
2471 error ("Invalid use of `\\' in replacement text");
2472 }
2473 if (substart >= 0)
2474 {
2475 if (pos - 2 != lastpos)
2476 middle = substring_both (newtext, lastpos,
2477 lastpos_byte,
2478 pos - 2, pos_byte - 2);
2479 else
2480 middle = Qnil;
2481 accum = concat3 (accum, middle,
2482 Fsubstring (string,
2483 make_number (substart),
2484 make_number (subend)));
2485 lastpos = pos;
2486 lastpos_byte = pos_byte;
2487 }
2488 else if (delbackslash)
2489 {
2490 middle = substring_both (newtext, lastpos,
2491 lastpos_byte,
2492 pos - 1, pos_byte - 1);
2493
2494 accum = concat2 (accum, middle);
2495 lastpos = pos;
2496 lastpos_byte = pos_byte;
2497 }
2498 }
2499
2500 if (pos != lastpos)
2501 middle = substring_both (newtext, lastpos,
2502 lastpos_byte,
2503 pos, pos_byte);
2504 else
2505 middle = Qnil;
2506
2507 newtext = concat2 (accum, middle);
2508 }
2509
2510 /* Do case substitution in NEWTEXT if desired. */
2511 if (case_action == all_caps)
2512 newtext = Fupcase (newtext);
2513 else if (case_action == cap_initial)
2514 newtext = Fupcase_initials (newtext);
2515
2516 return concat3 (before, newtext, after);
2517 }
2518
2519 /* Record point, then move (quietly) to the start of the match. */
2520 if (PT >= search_regs.end[sub])
2521 opoint = PT - ZV;
2522 else if (PT > search_regs.start[sub])
2523 opoint = search_regs.end[sub] - ZV;
2524 else
2525 opoint = PT;
2526
2527 /* If we want non-literal replacement,
2528 perform substitution on the replacement string. */
2529 if (NILP (literal))
2530 {
2531 int length = SBYTES (newtext);
2532 unsigned char *substed;
2533 int substed_alloc_size, substed_len;
2534 int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2535 int str_multibyte = STRING_MULTIBYTE (newtext);
2536 Lisp_Object rev_tbl;
2537 int really_changed = 0;
2538
2539 rev_tbl= (!buf_multibyte && CHAR_TABLE_P (Vnonascii_translation_table)
2540 ? Fchar_table_extra_slot (Vnonascii_translation_table,
2541 make_number (0))
2542 : Qnil);
2543
2544 substed_alloc_size = length * 2 + 100;
2545 substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2546 substed_len = 0;
2547
2548 /* Go thru NEWTEXT, producing the actual text to insert in
2549 SUBSTED while adjusting multibyteness to that of the current
2550 buffer. */
2551
2552 for (pos_byte = 0, pos = 0; pos_byte < length;)
2553 {
2554 unsigned char str[MAX_MULTIBYTE_LENGTH];
2555 unsigned char *add_stuff = NULL;
2556 int add_len = 0;
2557 int idx = -1;
2558
2559 if (str_multibyte)
2560 {
2561 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2562 if (!buf_multibyte)
2563 c = multibyte_char_to_unibyte (c, rev_tbl);
2564 }
2565 else
2566 {
2567 /* Note that we don't have to increment POS. */
2568 c = SREF (newtext, pos_byte++);
2569 if (buf_multibyte)
2570 c = unibyte_char_to_multibyte (c);
2571 }
2572
2573 /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2574 or set IDX to a match index, which means put that part
2575 of the buffer text into SUBSTED. */
2576
2577 if (c == '\\')
2578 {
2579 really_changed = 1;
2580
2581 if (str_multibyte)
2582 {
2583 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2584 pos, pos_byte);
2585 if (!buf_multibyte && !SINGLE_BYTE_CHAR_P (c))
2586 c = multibyte_char_to_unibyte (c, rev_tbl);
2587 }
2588 else
2589 {
2590 c = SREF (newtext, pos_byte++);
2591 if (buf_multibyte)
2592 c = unibyte_char_to_multibyte (c);
2593 }
2594
2595 if (c == '&')
2596 idx = sub;
2597 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2598 {
2599 if (search_regs.start[c - '0'] >= 1)
2600 idx = c - '0';
2601 }
2602 else if (c == '\\')
2603 add_len = 1, add_stuff = "\\";
2604 else
2605 {
2606 xfree (substed);
2607 error ("Invalid use of `\\' in replacement text");
2608 }
2609 }
2610 else
2611 {
2612 add_len = CHAR_STRING (c, str);
2613 add_stuff = str;
2614 }
2615
2616 /* If we want to copy part of a previous match,
2617 set up ADD_STUFF and ADD_LEN to point to it. */
2618 if (idx >= 0)
2619 {
2620 int begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2621 add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2622 if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2623 move_gap (search_regs.start[idx]);
2624 add_stuff = BYTE_POS_ADDR (begbyte);
2625 }
2626
2627 /* Now the stuff we want to add to SUBSTED
2628 is invariably ADD_LEN bytes starting at ADD_STUFF. */
2629
2630 /* Make sure SUBSTED is big enough. */
2631 if (substed_len + add_len >= substed_alloc_size)
2632 {
2633 substed_alloc_size = substed_len + add_len + 500;
2634 substed = (unsigned char *) xrealloc (substed,
2635 substed_alloc_size + 1);
2636 }
2637
2638 /* Now add to the end of SUBSTED. */
2639 if (add_stuff)
2640 {
2641 bcopy (add_stuff, substed + substed_len, add_len);
2642 substed_len += add_len;
2643 }
2644 }
2645
2646 if (really_changed)
2647 {
2648 if (buf_multibyte)
2649 {
2650 int nchars = multibyte_chars_in_text (substed, substed_len);
2651
2652 newtext = make_multibyte_string (substed, nchars, substed_len);
2653 }
2654 else
2655 newtext = make_unibyte_string (substed, substed_len);
2656 }
2657 xfree (substed);
2658 }
2659
2660 /* Replace the old text with the new in the cleanest possible way. */
2661 replace_range (search_regs.start[sub], search_regs.end[sub],
2662 newtext, 1, 0, 1);
2663 newpoint = search_regs.start[sub] + SCHARS (newtext);
2664
2665 if (case_action == all_caps)
2666 Fupcase_region (make_number (search_regs.start[sub]),
2667 make_number (newpoint));
2668 else if (case_action == cap_initial)
2669 Fupcase_initials_region (make_number (search_regs.start[sub]),
2670 make_number (newpoint));
2671
2672 /* Adjust search data for this change. */
2673 {
2674 int oldend = search_regs.end[sub];
2675 int oldstart = search_regs.start[sub];
2676 int change = newpoint - search_regs.end[sub];
2677 int i;
2678
2679 for (i = 0; i < search_regs.num_regs; i++)
2680 {
2681 if (search_regs.start[i] >= oldend)
2682 search_regs.start[i] += change;
2683 else if (search_regs.start[i] > oldstart)
2684 search_regs.start[i] = oldstart;
2685 if (search_regs.end[i] >= oldend)
2686 search_regs.end[i] += change;
2687 else if (search_regs.end[i] > oldstart)
2688 search_regs.end[i] = oldstart;
2689 }
2690 }
2691
2692 /* Put point back where it was in the text. */
2693 if (opoint <= 0)
2694 TEMP_SET_PT (opoint + ZV);
2695 else
2696 TEMP_SET_PT (opoint);
2697
2698 /* Now move point "officially" to the start of the inserted replacement. */
2699 move_if_not_intangible (newpoint);
2700
2701 return Qnil;
2702 }
2703 \f
2704 static Lisp_Object
2705 match_limit (num, beginningp)
2706 Lisp_Object num;
2707 int beginningp;
2708 {
2709 register int n;
2710
2711 CHECK_NUMBER (num);
2712 n = XINT (num);
2713 if (n < 0)
2714 args_out_of_range (num, make_number (0));
2715 if (search_regs.num_regs <= 0)
2716 error ("No match data, because no search succeeded");
2717 if (n >= search_regs.num_regs
2718 || search_regs.start[n] < 0)
2719 return Qnil;
2720 return (make_number ((beginningp) ? search_regs.start[n]
2721 : search_regs.end[n]));
2722 }
2723
2724 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2725 doc: /* Return position of start of text matched by last search.
2726 SUBEXP, a number, specifies which parenthesized expression in the last
2727 regexp.
2728 Value is nil if SUBEXPth pair didn't match, or there were less than
2729 SUBEXP pairs.
2730 Zero means the entire text matched by the whole regexp or whole string. */)
2731 (subexp)
2732 Lisp_Object subexp;
2733 {
2734 return match_limit (subexp, 1);
2735 }
2736
2737 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2738 doc: /* Return position of end of text matched by last search.
2739 SUBEXP, a number, specifies which parenthesized expression in the last
2740 regexp.
2741 Value is nil if SUBEXPth pair didn't match, or there were less than
2742 SUBEXP pairs.
2743 Zero means the entire text matched by the whole regexp or whole string. */)
2744 (subexp)
2745 Lisp_Object subexp;
2746 {
2747 return match_limit (subexp, 0);
2748 }
2749
2750 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
2751 doc: /* Return a list containing all info on what the last search matched.
2752 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2753 All the elements are markers or nil (nil if the Nth pair didn't match)
2754 if the last match was on a buffer; integers or nil if a string was matched.
2755 Use `store-match-data' to reinstate the data in this list.
2756
2757 If INTEGERS (the optional first argument) is non-nil, always use
2758 integers \(rather than markers) to represent buffer positions. In
2759 this case, and if the last match was in a buffer, the buffer will get
2760 stored as one additional element at the end of the list.
2761
2762 If REUSE is a list, reuse it as part of the value. If REUSE is long
2763 enough to hold all the values, and if INTEGERS is non-nil, no consing
2764 is done.
2765
2766 If optional third arg RESEAT is non-nil, any previous markers on the
2767 REUSE list will be modified to point to nowhere.
2768
2769 Return value is undefined if the last search failed. */)
2770 (integers, reuse, reseat)
2771 Lisp_Object integers, reuse, reseat;
2772 {
2773 Lisp_Object tail, prev;
2774 Lisp_Object *data;
2775 int i, len;
2776
2777 if (!NILP (reseat))
2778 for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2779 if (MARKERP (XCAR (tail)))
2780 {
2781 unchain_marker (XMARKER (XCAR (tail)));
2782 XSETCAR (tail, Qnil);
2783 }
2784
2785 if (NILP (last_thing_searched))
2786 return Qnil;
2787
2788 prev = Qnil;
2789
2790 data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
2791 * sizeof (Lisp_Object));
2792
2793 len = 0;
2794 for (i = 0; i < search_regs.num_regs; i++)
2795 {
2796 int start = search_regs.start[i];
2797 if (start >= 0)
2798 {
2799 if (EQ (last_thing_searched, Qt)
2800 || ! NILP (integers))
2801 {
2802 XSETFASTINT (data[2 * i], start);
2803 XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2804 }
2805 else if (BUFFERP (last_thing_searched))
2806 {
2807 data[2 * i] = Fmake_marker ();
2808 Fset_marker (data[2 * i],
2809 make_number (start),
2810 last_thing_searched);
2811 data[2 * i + 1] = Fmake_marker ();
2812 Fset_marker (data[2 * i + 1],
2813 make_number (search_regs.end[i]),
2814 last_thing_searched);
2815 }
2816 else
2817 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2818 abort ();
2819
2820 len = 2 * i + 2;
2821 }
2822 else
2823 data[2 * i] = data[2 * i + 1] = Qnil;
2824 }
2825
2826 if (BUFFERP (last_thing_searched) && !NILP (integers))
2827 {
2828 data[len] = last_thing_searched;
2829 len++;
2830 }
2831
2832 /* If REUSE is not usable, cons up the values and return them. */
2833 if (! CONSP (reuse))
2834 return Flist (len, data);
2835
2836 /* If REUSE is a list, store as many value elements as will fit
2837 into the elements of REUSE. */
2838 for (i = 0, tail = reuse; CONSP (tail);
2839 i++, tail = XCDR (tail))
2840 {
2841 if (i < len)
2842 XSETCAR (tail, data[i]);
2843 else
2844 XSETCAR (tail, Qnil);
2845 prev = tail;
2846 }
2847
2848 /* If we couldn't fit all value elements into REUSE,
2849 cons up the rest of them and add them to the end of REUSE. */
2850 if (i < len)
2851 XSETCDR (prev, Flist (len - i, data + i));
2852
2853 return reuse;
2854 }
2855
2856 /* Internal usage only:
2857 If RESEAT is `evaporate', put the markers back on the free list
2858 immediately. No other references to the markers must exist in this case,
2859 so it is used only internally on the unwind stack and save-match-data from
2860 Lisp. */
2861
2862 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
2863 doc: /* Set internal data on last search match from elements of LIST.
2864 LIST should have been created by calling `match-data' previously.
2865
2866 If optional arg RESEAT is non-nil, make markers on LIST point nowhere. */)
2867 (list, reseat)
2868 register Lisp_Object list, reseat;
2869 {
2870 register int i;
2871 register Lisp_Object marker;
2872
2873 if (running_asynch_code)
2874 save_search_regs ();
2875
2876 if (!CONSP (list) && !NILP (list))
2877 list = wrong_type_argument (Qconsp, list);
2878
2879 /* Unless we find a marker with a buffer or an explicit buffer
2880 in LIST, assume that this match data came from a string. */
2881 last_thing_searched = Qt;
2882
2883 /* Allocate registers if they don't already exist. */
2884 {
2885 int length = XFASTINT (Flength (list)) / 2;
2886
2887 if (length > search_regs.num_regs)
2888 {
2889 if (search_regs.num_regs == 0)
2890 {
2891 search_regs.start
2892 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2893 search_regs.end
2894 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2895 }
2896 else
2897 {
2898 search_regs.start
2899 = (regoff_t *) xrealloc (search_regs.start,
2900 length * sizeof (regoff_t));
2901 search_regs.end
2902 = (regoff_t *) xrealloc (search_regs.end,
2903 length * sizeof (regoff_t));
2904 }
2905
2906 for (i = search_regs.num_regs; i < length; i++)
2907 search_regs.start[i] = -1;
2908
2909 search_regs.num_regs = length;
2910 }
2911
2912 for (i = 0; CONSP (list); i++)
2913 {
2914 marker = XCAR (list);
2915 if (BUFFERP (marker))
2916 {
2917 last_thing_searched = marker;
2918 break;
2919 }
2920 if (i >= length)
2921 break;
2922 if (NILP (marker))
2923 {
2924 search_regs.start[i] = -1;
2925 list = XCDR (list);
2926 }
2927 else
2928 {
2929 int from;
2930 Lisp_Object m;
2931
2932 m = marker;
2933 if (MARKERP (marker))
2934 {
2935 if (XMARKER (marker)->buffer == 0)
2936 XSETFASTINT (marker, 0);
2937 else
2938 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2939 }
2940
2941 CHECK_NUMBER_COERCE_MARKER (marker);
2942 from = XINT (marker);
2943
2944 if (!NILP (reseat) && MARKERP (m))
2945 {
2946 if (EQ (reseat, Qevaporate))
2947 free_marker (m);
2948 else
2949 unchain_marker (XMARKER (m));
2950 XSETCAR (list, Qnil);
2951 }
2952
2953 if ((list = XCDR (list), !CONSP (list)))
2954 break;
2955
2956 m = marker = XCAR (list);
2957
2958 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2959 XSETFASTINT (marker, 0);
2960
2961 CHECK_NUMBER_COERCE_MARKER (marker);
2962 search_regs.start[i] = from;
2963 search_regs.end[i] = XINT (marker);
2964
2965 if (!NILP (reseat) && MARKERP (m))
2966 {
2967 if (EQ (reseat, Qevaporate))
2968 free_marker (m);
2969 else
2970 unchain_marker (XMARKER (m));
2971 XSETCAR (list, Qnil);
2972 }
2973 }
2974 list = XCDR (list);
2975 }
2976
2977 for (; i < search_regs.num_regs; i++)
2978 search_regs.start[i] = -1;
2979 }
2980
2981 return Qnil;
2982 }
2983
2984 /* If non-zero the match data have been saved in saved_search_regs
2985 during the execution of a sentinel or filter. */
2986 static int search_regs_saved;
2987 static struct re_registers saved_search_regs;
2988 static Lisp_Object saved_last_thing_searched;
2989
2990 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2991 if asynchronous code (filter or sentinel) is running. */
2992 static void
2993 save_search_regs ()
2994 {
2995 if (!search_regs_saved)
2996 {
2997 saved_search_regs.num_regs = search_regs.num_regs;
2998 saved_search_regs.start = search_regs.start;
2999 saved_search_regs.end = search_regs.end;
3000 saved_last_thing_searched = last_thing_searched;
3001 last_thing_searched = Qnil;
3002 search_regs.num_regs = 0;
3003 search_regs.start = 0;
3004 search_regs.end = 0;
3005
3006 search_regs_saved = 1;
3007 }
3008 }
3009
3010 /* Called upon exit from filters and sentinels. */
3011 void
3012 restore_search_regs ()
3013 {
3014 if (search_regs_saved)
3015 {
3016 if (search_regs.num_regs > 0)
3017 {
3018 xfree (search_regs.start);
3019 xfree (search_regs.end);
3020 }
3021 search_regs.num_regs = saved_search_regs.num_regs;
3022 search_regs.start = saved_search_regs.start;
3023 search_regs.end = saved_search_regs.end;
3024 last_thing_searched = saved_last_thing_searched;
3025 saved_last_thing_searched = Qnil;
3026 search_regs_saved = 0;
3027 }
3028 }
3029
3030 static Lisp_Object
3031 unwind_set_match_data (list)
3032 Lisp_Object list;
3033 {
3034 /* It is safe to free (evaporate) the markers immediately. */
3035 return Fset_match_data (list, Qevaporate);
3036 }
3037
3038 /* Called to unwind protect the match data. */
3039 void
3040 record_unwind_save_match_data ()
3041 {
3042 record_unwind_protect (unwind_set_match_data,
3043 Fmatch_data (Qnil, Qnil, Qnil));
3044 }
3045
3046 /* Quote a string to inactivate reg-expr chars */
3047
3048 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
3049 doc: /* Return a regexp string which matches exactly STRING and nothing else. */)
3050 (string)
3051 Lisp_Object string;
3052 {
3053 register unsigned char *in, *out, *end;
3054 register unsigned char *temp;
3055 int backslashes_added = 0;
3056
3057 CHECK_STRING (string);
3058
3059 temp = (unsigned char *) alloca (SBYTES (string) * 2);
3060
3061 /* Now copy the data into the new string, inserting escapes. */
3062
3063 in = SDATA (string);
3064 end = in + SBYTES (string);
3065 out = temp;
3066
3067 for (; in != end; in++)
3068 {
3069 if (*in == '['
3070 || *in == '*' || *in == '.' || *in == '\\'
3071 || *in == '?' || *in == '+'
3072 || *in == '^' || *in == '$')
3073 *out++ = '\\', backslashes_added++;
3074 *out++ = *in;
3075 }
3076
3077 return make_specified_string (temp,
3078 SCHARS (string) + backslashes_added,
3079 out - temp,
3080 STRING_MULTIBYTE (string));
3081 }
3082 \f
3083 void
3084 syms_of_search ()
3085 {
3086 register int i;
3087
3088 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3089 {
3090 searchbufs[i].buf.allocated = 100;
3091 searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
3092 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3093 searchbufs[i].regexp = Qnil;
3094 searchbufs[i].whitespace_regexp = Qnil;
3095 staticpro (&searchbufs[i].regexp);
3096 staticpro (&searchbufs[i].whitespace_regexp);
3097 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3098 }
3099 searchbuf_head = &searchbufs[0];
3100
3101 Qsearch_failed = intern ("search-failed");
3102 staticpro (&Qsearch_failed);
3103 Qinvalid_regexp = intern ("invalid-regexp");
3104 staticpro (&Qinvalid_regexp);
3105
3106 Fput (Qsearch_failed, Qerror_conditions,
3107 Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
3108 Fput (Qsearch_failed, Qerror_message,
3109 build_string ("Search failed"));
3110
3111 Fput (Qinvalid_regexp, Qerror_conditions,
3112 Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
3113 Fput (Qinvalid_regexp, Qerror_message,
3114 build_string ("Invalid regexp"));
3115
3116 last_thing_searched = Qnil;
3117 staticpro (&last_thing_searched);
3118
3119 saved_last_thing_searched = Qnil;
3120 staticpro (&saved_last_thing_searched);
3121
3122 DEFVAR_LISP ("search-spaces-regexp", &Vsearch_spaces_regexp,
3123 doc: /* Regexp to substitute for bunches of spaces in regexp search.
3124 Some commands use this for user-specified regexps.
3125 Spaces that occur inside character classes or repetition operators
3126 or other such regexp constructs are not replaced with this.
3127 A value of nil (which is the normal value) means treat spaces literally. */);
3128 Vsearch_spaces_regexp = Qnil;
3129
3130 defsubr (&Slooking_at);
3131 defsubr (&Sposix_looking_at);
3132 defsubr (&Sstring_match);
3133 defsubr (&Sposix_string_match);
3134 defsubr (&Ssearch_forward);
3135 defsubr (&Ssearch_backward);
3136 defsubr (&Sword_search_forward);
3137 defsubr (&Sword_search_backward);
3138 defsubr (&Sre_search_forward);
3139 defsubr (&Sre_search_backward);
3140 defsubr (&Sposix_search_forward);
3141 defsubr (&Sposix_search_backward);
3142 defsubr (&Sreplace_match);
3143 defsubr (&Smatch_beginning);
3144 defsubr (&Smatch_end);
3145 defsubr (&Smatch_data);
3146 defsubr (&Sset_match_data);
3147 defsubr (&Sregexp_quote);
3148 }
3149
3150 /* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3151 (do not change this comment) */