]> code.delx.au - gnu-emacs/blob - src/search.c
Fix incorrect usage of @key in the User Manual (Bug#20135)
[gnu-emacs] / src / search.c
1 /* String search routines for GNU Emacs.
2
3 Copyright (C) 1985-1987, 1993-1994, 1997-1999, 2001-2015 Free Software
4 Foundation, Inc.
5
6 This file is part of GNU Emacs.
7
8 GNU Emacs is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
12
13 GNU Emacs is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
20
21
22 #include <config.h>
23
24 #include "lisp.h"
25 #include "category.h"
26 #include "character.h"
27 #include "buffer.h"
28 #include "syntax.h"
29 #include "charset.h"
30 #include "region-cache.h"
31 #include "commands.h"
32 #include "blockinput.h"
33 #include "intervals.h"
34
35 #include <sys/types.h>
36 #include "regex.h"
37
38 #define REGEXP_CACHE_SIZE 20
39
40 /* If the regexp is non-nil, then the buffer contains the compiled form
41 of that regexp, suitable for searching. */
42 struct regexp_cache
43 {
44 struct regexp_cache *next;
45 Lisp_Object regexp, whitespace_regexp;
46 /* Syntax table for which the regexp applies. We need this because
47 of character classes. If this is t, then the compiled pattern is valid
48 for any syntax-table. */
49 Lisp_Object syntax_table;
50 struct re_pattern_buffer buf;
51 char fastmap[0400];
52 /* True means regexp was compiled to do full POSIX backtracking. */
53 bool posix;
54 };
55
56 /* The instances of that struct. */
57 static struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
58
59 /* The head of the linked list; points to the most recently used buffer. */
60 static struct regexp_cache *searchbuf_head;
61
62
63 /* Every call to re_match, etc., must pass &search_regs as the regs
64 argument unless you can show it is unnecessary (i.e., if re_match
65 is certainly going to be called again before region-around-match
66 can be called).
67
68 Since the registers are now dynamically allocated, we need to make
69 sure not to refer to the Nth register before checking that it has
70 been allocated by checking search_regs.num_regs.
71
72 The regex code keeps track of whether it has allocated the search
73 buffer using bits in the re_pattern_buffer. This means that whenever
74 you compile a new pattern, it completely forgets whether it has
75 allocated any registers, and will allocate new registers the next
76 time you call a searching or matching function. Therefore, we need
77 to call re_set_registers after compiling a new pattern or after
78 setting the match registers, so that the regex functions will be
79 able to free or re-allocate it properly. */
80 static struct re_registers search_regs;
81
82 /* The buffer in which the last search was performed, or
83 Qt if the last search was done in a string;
84 Qnil if no searching has been done yet. */
85 static Lisp_Object last_thing_searched;
86
87 /* Error condition signaled when regexp compile_pattern fails. */
88 static Lisp_Object Qinvalid_regexp;
89
90 /* Error condition used for failing searches. */
91 static Lisp_Object Qsearch_failed;
92
93 static void set_search_regs (ptrdiff_t, ptrdiff_t);
94 static void save_search_regs (void);
95 static EMACS_INT simple_search (EMACS_INT, unsigned char *, ptrdiff_t,
96 ptrdiff_t, Lisp_Object, ptrdiff_t, ptrdiff_t,
97 ptrdiff_t, ptrdiff_t);
98 static EMACS_INT boyer_moore (EMACS_INT, unsigned char *, ptrdiff_t,
99 Lisp_Object, Lisp_Object, ptrdiff_t,
100 ptrdiff_t, int);
101 static EMACS_INT search_buffer (Lisp_Object, ptrdiff_t, ptrdiff_t,
102 ptrdiff_t, ptrdiff_t, EMACS_INT, int,
103 Lisp_Object, Lisp_Object, bool);
104
105 static _Noreturn void
106 matcher_overflow (void)
107 {
108 error ("Stack overflow in regexp matcher");
109 }
110
111 /* Compile a regexp and signal a Lisp error if anything goes wrong.
112 PATTERN is the pattern to compile.
113 CP is the place to put the result.
114 TRANSLATE is a translation table for ignoring case, or nil for none.
115 POSIX is true if we want full backtracking (POSIX style) for this pattern.
116 False means backtrack only enough to get a valid match.
117
118 The behavior also depends on Vsearch_spaces_regexp. */
119
120 static void
121 compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern,
122 Lisp_Object translate, bool posix)
123 {
124 char *val;
125 reg_syntax_t old;
126
127 cp->regexp = Qnil;
128 cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
129 cp->posix = posix;
130 cp->buf.multibyte = STRING_MULTIBYTE (pattern);
131 cp->buf.charset_unibyte = charset_unibyte;
132 if (STRINGP (Vsearch_spaces_regexp))
133 cp->whitespace_regexp = Vsearch_spaces_regexp;
134 else
135 cp->whitespace_regexp = Qnil;
136
137 /* rms: I think BLOCK_INPUT is not needed here any more,
138 because regex.c defines malloc to call xmalloc.
139 Using BLOCK_INPUT here means the debugger won't run if an error occurs.
140 So let's turn it off. */
141 /* BLOCK_INPUT; */
142 old = re_set_syntax (RE_SYNTAX_EMACS
143 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
144
145 if (STRINGP (Vsearch_spaces_regexp))
146 re_set_whitespace_regexp (SSDATA (Vsearch_spaces_regexp));
147 else
148 re_set_whitespace_regexp (NULL);
149
150 val = (char *) re_compile_pattern (SSDATA (pattern),
151 SBYTES (pattern), &cp->buf);
152
153 /* If the compiled pattern hard codes some of the contents of the
154 syntax-table, it can only be reused with *this* syntax table. */
155 cp->syntax_table = cp->buf.used_syntax ? BVAR (current_buffer, syntax_table) : Qt;
156
157 re_set_whitespace_regexp (NULL);
158
159 re_set_syntax (old);
160 /* unblock_input (); */
161 if (val)
162 xsignal1 (Qinvalid_regexp, build_string (val));
163
164 cp->regexp = Fcopy_sequence (pattern);
165 }
166
167 /* Shrink each compiled regexp buffer in the cache
168 to the size actually used right now.
169 This is called from garbage collection. */
170
171 void
172 shrink_regexp_cache (void)
173 {
174 struct regexp_cache *cp;
175
176 for (cp = searchbuf_head; cp != 0; cp = cp->next)
177 {
178 cp->buf.allocated = cp->buf.used;
179 cp->buf.buffer = xrealloc (cp->buf.buffer, cp->buf.used);
180 }
181 }
182
183 /* Clear the regexp cache w.r.t. a particular syntax table,
184 because it was changed.
185 There is no danger of memory leak here because re_compile_pattern
186 automagically manages the memory in each re_pattern_buffer struct,
187 based on its `allocated' and `buffer' values. */
188 void
189 clear_regexp_cache (void)
190 {
191 int i;
192
193 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
194 /* It's tempting to compare with the syntax-table we've actually changed,
195 but it's not sufficient because char-table inheritance means that
196 modifying one syntax-table can change others at the same time. */
197 if (!EQ (searchbufs[i].syntax_table, Qt))
198 searchbufs[i].regexp = Qnil;
199 }
200
201 /* Compile a regexp if necessary, but first check to see if there's one in
202 the cache.
203 PATTERN is the pattern to compile.
204 TRANSLATE is a translation table for ignoring case, or nil for none.
205 REGP is the structure that says where to store the "register"
206 values that will result from matching this pattern.
207 If it is 0, we should compile the pattern not to record any
208 subexpression bounds.
209 POSIX is true if we want full backtracking (POSIX style) for this pattern.
210 False means backtrack only enough to get a valid match. */
211
212 struct re_pattern_buffer *
213 compile_pattern (Lisp_Object pattern, struct re_registers *regp,
214 Lisp_Object translate, bool posix, bool multibyte)
215 {
216 struct regexp_cache *cp, **cpp;
217
218 for (cpp = &searchbuf_head; ; cpp = &cp->next)
219 {
220 cp = *cpp;
221 /* Entries are initialized to nil, and may be set to nil by
222 compile_pattern_1 if the pattern isn't valid. Don't apply
223 string accessors in those cases. However, compile_pattern_1
224 is only applied to the cache entry we pick here to reuse. So
225 nil should never appear before a non-nil entry. */
226 if (NILP (cp->regexp))
227 goto compile_it;
228 if (SCHARS (cp->regexp) == SCHARS (pattern)
229 && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
230 && !NILP (Fstring_equal (cp->regexp, pattern))
231 && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
232 && cp->posix == posix
233 && (EQ (cp->syntax_table, Qt)
234 || EQ (cp->syntax_table, BVAR (current_buffer, syntax_table)))
235 && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp))
236 && cp->buf.charset_unibyte == charset_unibyte)
237 break;
238
239 /* If we're at the end of the cache, compile into the nil cell
240 we found, or the last (least recently used) cell with a
241 string value. */
242 if (cp->next == 0)
243 {
244 compile_it:
245 compile_pattern_1 (cp, pattern, translate, posix);
246 break;
247 }
248 }
249
250 /* When we get here, cp (aka *cpp) contains the compiled pattern,
251 either because we found it in the cache or because we just compiled it.
252 Move it to the front of the queue to mark it as most recently used. */
253 *cpp = cp->next;
254 cp->next = searchbuf_head;
255 searchbuf_head = cp;
256
257 /* Advise the searching functions about the space we have allocated
258 for register data. */
259 if (regp)
260 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
261
262 /* The compiled pattern can be used both for multibyte and unibyte
263 target. But, we have to tell which the pattern is used for. */
264 cp->buf.target_multibyte = multibyte;
265
266 return &cp->buf;
267 }
268
269 \f
270 static Lisp_Object
271 looking_at_1 (Lisp_Object string, bool posix)
272 {
273 Lisp_Object val;
274 unsigned char *p1, *p2;
275 ptrdiff_t s1, s2;
276 register ptrdiff_t i;
277 struct re_pattern_buffer *bufp;
278
279 if (running_asynch_code)
280 save_search_regs ();
281
282 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
283 set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
284 BVAR (current_buffer, case_eqv_table));
285
286 CHECK_STRING (string);
287 bufp = compile_pattern (string,
288 (NILP (Vinhibit_changing_match_data)
289 ? &search_regs : NULL),
290 (!NILP (BVAR (current_buffer, case_fold_search))
291 ? BVAR (current_buffer, case_canon_table) : Qnil),
292 posix,
293 !NILP (BVAR (current_buffer, enable_multibyte_characters)));
294
295 immediate_quit = 1;
296 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
297
298 /* Get pointers and sizes of the two strings
299 that make up the visible portion of the buffer. */
300
301 p1 = BEGV_ADDR;
302 s1 = GPT_BYTE - BEGV_BYTE;
303 p2 = GAP_END_ADDR;
304 s2 = ZV_BYTE - GPT_BYTE;
305 if (s1 < 0)
306 {
307 p2 = p1;
308 s2 = ZV_BYTE - BEGV_BYTE;
309 s1 = 0;
310 }
311 if (s2 < 0)
312 {
313 s1 = ZV_BYTE - BEGV_BYTE;
314 s2 = 0;
315 }
316
317 re_match_object = Qnil;
318
319 i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
320 PT_BYTE - BEGV_BYTE,
321 (NILP (Vinhibit_changing_match_data)
322 ? &search_regs : NULL),
323 ZV_BYTE - BEGV_BYTE);
324 immediate_quit = 0;
325
326 if (i == -2)
327 matcher_overflow ();
328
329 val = (i >= 0 ? Qt : Qnil);
330 if (NILP (Vinhibit_changing_match_data) && i >= 0)
331 {
332 for (i = 0; i < search_regs.num_regs; i++)
333 if (search_regs.start[i] >= 0)
334 {
335 search_regs.start[i]
336 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
337 search_regs.end[i]
338 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
339 }
340 /* Set last_thing_searched only when match data is changed. */
341 XSETBUFFER (last_thing_searched, current_buffer);
342 }
343
344 return val;
345 }
346
347 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
348 doc: /* Return t if text after point matches regular expression REGEXP.
349 This function modifies the match data that `match-beginning',
350 `match-end' and `match-data' access; save and restore the match
351 data if you want to preserve them. */)
352 (Lisp_Object regexp)
353 {
354 return looking_at_1 (regexp, 0);
355 }
356
357 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
358 doc: /* Return t if text after point matches regular expression REGEXP.
359 Find the longest match, in accord with Posix regular expression rules.
360 This function modifies the match data that `match-beginning',
361 `match-end' and `match-data' access; save and restore the match
362 data if you want to preserve them. */)
363 (Lisp_Object regexp)
364 {
365 return looking_at_1 (regexp, 1);
366 }
367 \f
368 static Lisp_Object
369 string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start,
370 bool posix)
371 {
372 ptrdiff_t val;
373 struct re_pattern_buffer *bufp;
374 EMACS_INT pos;
375 ptrdiff_t pos_byte, i;
376
377 if (running_asynch_code)
378 save_search_regs ();
379
380 CHECK_STRING (regexp);
381 CHECK_STRING (string);
382
383 if (NILP (start))
384 pos = 0, pos_byte = 0;
385 else
386 {
387 ptrdiff_t len = SCHARS (string);
388
389 CHECK_NUMBER (start);
390 pos = XINT (start);
391 if (pos < 0 && -pos <= len)
392 pos = len + pos;
393 else if (0 > pos || pos > len)
394 args_out_of_range (string, start);
395 pos_byte = string_char_to_byte (string, pos);
396 }
397
398 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
399 set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
400 BVAR (current_buffer, case_eqv_table));
401
402 bufp = compile_pattern (regexp,
403 (NILP (Vinhibit_changing_match_data)
404 ? &search_regs : NULL),
405 (!NILP (BVAR (current_buffer, case_fold_search))
406 ? BVAR (current_buffer, case_canon_table) : Qnil),
407 posix,
408 STRING_MULTIBYTE (string));
409 immediate_quit = 1;
410 re_match_object = string;
411
412 val = re_search (bufp, SSDATA (string),
413 SBYTES (string), pos_byte,
414 SBYTES (string) - pos_byte,
415 (NILP (Vinhibit_changing_match_data)
416 ? &search_regs : NULL));
417 immediate_quit = 0;
418
419 /* Set last_thing_searched only when match data is changed. */
420 if (NILP (Vinhibit_changing_match_data))
421 last_thing_searched = Qt;
422
423 if (val == -2)
424 matcher_overflow ();
425 if (val < 0) return Qnil;
426
427 if (NILP (Vinhibit_changing_match_data))
428 for (i = 0; i < search_regs.num_regs; i++)
429 if (search_regs.start[i] >= 0)
430 {
431 search_regs.start[i]
432 = string_byte_to_char (string, search_regs.start[i]);
433 search_regs.end[i]
434 = string_byte_to_char (string, search_regs.end[i]);
435 }
436
437 return make_number (string_byte_to_char (string, val));
438 }
439
440 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
441 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
442 Matching ignores case if `case-fold-search' is non-nil.
443 If third arg START is non-nil, start search at that index in STRING.
444 For index of first char beyond the match, do (match-end 0).
445 `match-end' and `match-beginning' also give indices of substrings
446 matched by parenthesis constructs in the pattern.
447
448 You can use the function `match-string' to extract the substrings
449 matched by the parenthesis constructions in REGEXP. */)
450 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start)
451 {
452 return string_match_1 (regexp, string, start, 0);
453 }
454
455 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
456 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
457 Find the longest match, in accord with Posix regular expression rules.
458 Case is ignored if `case-fold-search' is non-nil in the current buffer.
459 If third arg START is non-nil, start search at that index in STRING.
460 For index of first char beyond the match, do (match-end 0).
461 `match-end' and `match-beginning' also give indices of substrings
462 matched by parenthesis constructs in the pattern. */)
463 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start)
464 {
465 return string_match_1 (regexp, string, start, 1);
466 }
467
468 /* Match REGEXP against STRING, searching all of STRING,
469 and return the index of the match, or negative on failure.
470 This does not clobber the match data. */
471
472 ptrdiff_t
473 fast_string_match (Lisp_Object regexp, Lisp_Object string)
474 {
475 ptrdiff_t val;
476 struct re_pattern_buffer *bufp;
477
478 bufp = compile_pattern (regexp, 0, Qnil,
479 0, STRING_MULTIBYTE (string));
480 immediate_quit = 1;
481 re_match_object = string;
482
483 val = re_search (bufp, SSDATA (string),
484 SBYTES (string), 0,
485 SBYTES (string), 0);
486 immediate_quit = 0;
487 return val;
488 }
489
490 /* Match REGEXP against STRING, searching all of STRING ignoring case,
491 and return the index of the match, or negative on failure.
492 This does not clobber the match data.
493 We assume that STRING contains single-byte characters. */
494
495 ptrdiff_t
496 fast_c_string_match_ignore_case (Lisp_Object regexp,
497 const char *string, ptrdiff_t len)
498 {
499 ptrdiff_t val;
500 struct re_pattern_buffer *bufp;
501
502 regexp = string_make_unibyte (regexp);
503 re_match_object = Qt;
504 bufp = compile_pattern (regexp, 0,
505 Vascii_canon_table, 0,
506 0);
507 immediate_quit = 1;
508 val = re_search (bufp, string, len, 0, len, 0);
509 immediate_quit = 0;
510 return val;
511 }
512
513 /* Like fast_string_match but ignore case. */
514
515 ptrdiff_t
516 fast_string_match_ignore_case (Lisp_Object regexp, Lisp_Object string)
517 {
518 ptrdiff_t val;
519 struct re_pattern_buffer *bufp;
520
521 bufp = compile_pattern (regexp, 0, Vascii_canon_table,
522 0, STRING_MULTIBYTE (string));
523 immediate_quit = 1;
524 re_match_object = string;
525
526 val = re_search (bufp, SSDATA (string),
527 SBYTES (string), 0,
528 SBYTES (string), 0);
529 immediate_quit = 0;
530 return val;
531 }
532 \f
533 /* Match REGEXP against the characters after POS to LIMIT, and return
534 the number of matched characters. If STRING is non-nil, match
535 against the characters in it. In that case, POS and LIMIT are
536 indices into the string. This function doesn't modify the match
537 data. */
538
539 ptrdiff_t
540 fast_looking_at (Lisp_Object regexp, ptrdiff_t pos, ptrdiff_t pos_byte,
541 ptrdiff_t limit, ptrdiff_t limit_byte, Lisp_Object string)
542 {
543 bool multibyte;
544 struct re_pattern_buffer *buf;
545 unsigned char *p1, *p2;
546 ptrdiff_t s1, s2;
547 ptrdiff_t len;
548
549 if (STRINGP (string))
550 {
551 if (pos_byte < 0)
552 pos_byte = string_char_to_byte (string, pos);
553 if (limit_byte < 0)
554 limit_byte = string_char_to_byte (string, limit);
555 p1 = NULL;
556 s1 = 0;
557 p2 = SDATA (string);
558 s2 = SBYTES (string);
559 re_match_object = string;
560 multibyte = STRING_MULTIBYTE (string);
561 }
562 else
563 {
564 if (pos_byte < 0)
565 pos_byte = CHAR_TO_BYTE (pos);
566 if (limit_byte < 0)
567 limit_byte = CHAR_TO_BYTE (limit);
568 pos_byte -= BEGV_BYTE;
569 limit_byte -= BEGV_BYTE;
570 p1 = BEGV_ADDR;
571 s1 = GPT_BYTE - BEGV_BYTE;
572 p2 = GAP_END_ADDR;
573 s2 = ZV_BYTE - GPT_BYTE;
574 if (s1 < 0)
575 {
576 p2 = p1;
577 s2 = ZV_BYTE - BEGV_BYTE;
578 s1 = 0;
579 }
580 if (s2 < 0)
581 {
582 s1 = ZV_BYTE - BEGV_BYTE;
583 s2 = 0;
584 }
585 re_match_object = Qnil;
586 multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
587 }
588
589 buf = compile_pattern (regexp, 0, Qnil, 0, multibyte);
590 immediate_quit = 1;
591 len = re_match_2 (buf, (char *) p1, s1, (char *) p2, s2,
592 pos_byte, NULL, limit_byte);
593 immediate_quit = 0;
594
595 return len;
596 }
597
598 \f
599 /* The newline cache: remembering which sections of text have no newlines. */
600
601 /* If the user has requested the long scans caching, make sure it's on.
602 Otherwise, make sure it's off.
603 This is our cheezy way of associating an action with the change of
604 state of a buffer-local variable. */
605 static struct region_cache *
606 newline_cache_on_off (struct buffer *buf)
607 {
608 struct buffer *base_buf = buf;
609 bool indirect_p = false;
610
611 if (buf->base_buffer)
612 {
613 base_buf = buf->base_buffer;
614 indirect_p = true;
615 }
616
617 /* Don't turn on or off the cache in the base buffer, if the value
618 of cache-long-scans of the base buffer is inconsistent with that.
619 This is because doing so will just make the cache pure overhead,
620 since if we turn it on via indirect buffer, it will be
621 immediately turned off by its base buffer. */
622 if (NILP (BVAR (buf, cache_long_scans)))
623 {
624 if (!indirect_p
625 || NILP (BVAR (base_buf, cache_long_scans)))
626 {
627 /* It should be off. */
628 if (base_buf->newline_cache)
629 {
630 free_region_cache (base_buf->newline_cache);
631 base_buf->newline_cache = 0;
632 }
633 }
634 return NULL;
635 }
636 else
637 {
638 if (!indirect_p
639 || !NILP (BVAR (base_buf, cache_long_scans)))
640 {
641 /* It should be on. */
642 if (base_buf->newline_cache == 0)
643 base_buf->newline_cache = new_region_cache ();
644 }
645 return base_buf->newline_cache;
646 }
647 }
648
649 \f
650 /* Search for COUNT newlines between START/START_BYTE and END/END_BYTE.
651
652 If COUNT is positive, search forwards; END must be >= START.
653 If COUNT is negative, search backwards for the -COUNTth instance;
654 END must be <= START.
655 If COUNT is zero, do anything you please; run rogue, for all I care.
656
657 If END is zero, use BEGV or ZV instead, as appropriate for the
658 direction indicated by COUNT.
659
660 If we find COUNT instances, set *SHORTAGE to zero, and return the
661 position past the COUNTth match. Note that for reverse motion
662 this is not the same as the usual convention for Emacs motion commands.
663
664 If we don't find COUNT instances before reaching END, set *SHORTAGE
665 to the number of newlines left unfound, and return END.
666
667 If BYTEPOS is not NULL, set *BYTEPOS to the byte position corresponding
668 to the returned character position.
669
670 If ALLOW_QUIT, set immediate_quit. That's good to do
671 except when inside redisplay. */
672
673 ptrdiff_t
674 find_newline (ptrdiff_t start, ptrdiff_t start_byte, ptrdiff_t end,
675 ptrdiff_t end_byte, ptrdiff_t count, ptrdiff_t *shortage,
676 ptrdiff_t *bytepos, bool allow_quit)
677 {
678 struct region_cache *newline_cache;
679 int direction;
680 struct buffer *cache_buffer;
681
682 if (count > 0)
683 {
684 direction = 1;
685 if (!end)
686 end = ZV, end_byte = ZV_BYTE;
687 }
688 else
689 {
690 direction = -1;
691 if (!end)
692 end = BEGV, end_byte = BEGV_BYTE;
693 }
694 if (end_byte == -1)
695 end_byte = CHAR_TO_BYTE (end);
696
697 newline_cache = newline_cache_on_off (current_buffer);
698 if (current_buffer->base_buffer)
699 cache_buffer = current_buffer->base_buffer;
700 else
701 cache_buffer = current_buffer;
702
703 if (shortage != 0)
704 *shortage = 0;
705
706 immediate_quit = allow_quit;
707
708 if (count > 0)
709 while (start != end)
710 {
711 /* Our innermost scanning loop is very simple; it doesn't know
712 about gaps, buffer ends, or the newline cache. ceiling is
713 the position of the last character before the next such
714 obstacle --- the last character the dumb search loop should
715 examine. */
716 ptrdiff_t tem, ceiling_byte = end_byte - 1;
717
718 /* If we're using the newline cache, consult it to see whether
719 we can avoid some scanning. */
720 if (newline_cache)
721 {
722 ptrdiff_t next_change;
723 int result = 1;
724
725 immediate_quit = 0;
726 while (start < end && result)
727 {
728 ptrdiff_t lim1;
729
730 result = region_cache_forward (cache_buffer, newline_cache,
731 start, &next_change);
732 if (result)
733 {
734 /* When the cache revalidation is deferred,
735 next-change might point beyond ZV, which will
736 cause assertion violation in CHAR_TO_BYTE below.
737 Limit next_change to ZV to avoid that. */
738 if (next_change > ZV)
739 next_change = ZV;
740 start = next_change;
741 lim1 = next_change = end;
742 }
743 else
744 lim1 = min (next_change, end);
745
746 /* The cache returned zero for this region; see if
747 this is because the region is known and includes
748 only newlines. While at that, count any newlines
749 we bump into, and exit if we found enough off them. */
750 start_byte = CHAR_TO_BYTE (start);
751 while (start < lim1
752 && FETCH_BYTE (start_byte) == '\n')
753 {
754 start_byte++;
755 start++;
756 if (--count == 0)
757 {
758 if (bytepos)
759 *bytepos = start_byte;
760 return start;
761 }
762 }
763 /* If we found a non-newline character before hitting
764 position where the cache will again return non-zero
765 (i.e. no newlines beyond that position), it means
766 this region is not yet known to the cache, and we
767 must resort to the "dumb loop" method. */
768 if (start < next_change && !result)
769 break;
770 result = 1;
771 }
772 if (start >= end)
773 {
774 start = end;
775 start_byte = end_byte;
776 break;
777 }
778 immediate_quit = allow_quit;
779
780 /* START should never be after END. */
781 if (start_byte > ceiling_byte)
782 start_byte = ceiling_byte;
783
784 /* Now the text after start is an unknown region, and
785 next_change is the position of the next known region. */
786 ceiling_byte = min (CHAR_TO_BYTE (next_change) - 1, ceiling_byte);
787 }
788 else if (start_byte == -1)
789 start_byte = CHAR_TO_BYTE (start);
790
791 /* The dumb loop can only scan text stored in contiguous
792 bytes. BUFFER_CEILING_OF returns the last character
793 position that is contiguous, so the ceiling is the
794 position after that. */
795 tem = BUFFER_CEILING_OF (start_byte);
796 ceiling_byte = min (tem, ceiling_byte);
797
798 {
799 /* The termination address of the dumb loop. */
800 unsigned char *lim_addr = BYTE_POS_ADDR (ceiling_byte) + 1;
801 ptrdiff_t lim_byte = ceiling_byte + 1;
802
803 /* Nonpositive offsets (relative to LIM_ADDR and LIM_BYTE)
804 of the base, the cursor, and the next line. */
805 ptrdiff_t base = start_byte - lim_byte;
806 ptrdiff_t cursor, next;
807
808 for (cursor = base; cursor < 0; cursor = next)
809 {
810 /* The dumb loop. */
811 unsigned char *nl = memchr (lim_addr + cursor, '\n', - cursor);
812 next = nl ? nl - lim_addr : 0;
813
814 /* If we're using the newline cache, cache the fact that
815 the region we just traversed is free of newlines. */
816 if (newline_cache && cursor != next)
817 {
818 know_region_cache (cache_buffer, newline_cache,
819 BYTE_TO_CHAR (lim_byte + cursor),
820 BYTE_TO_CHAR (lim_byte + next));
821 /* know_region_cache can relocate buffer text. */
822 lim_addr = BYTE_POS_ADDR (ceiling_byte) + 1;
823 }
824
825 if (! nl)
826 break;
827 next++;
828
829 if (--count == 0)
830 {
831 immediate_quit = 0;
832 if (bytepos)
833 *bytepos = lim_byte + next;
834 return BYTE_TO_CHAR (lim_byte + next);
835 }
836 }
837
838 start_byte = lim_byte;
839 start = BYTE_TO_CHAR (start_byte);
840 }
841 }
842 else
843 while (start > end)
844 {
845 /* The last character to check before the next obstacle. */
846 ptrdiff_t tem, ceiling_byte = end_byte;
847
848 /* Consult the newline cache, if appropriate. */
849 if (newline_cache)
850 {
851 ptrdiff_t next_change;
852 int result = 1;
853
854 immediate_quit = 0;
855 while (start > end && result)
856 {
857 ptrdiff_t lim1;
858
859 result = region_cache_backward (cache_buffer, newline_cache,
860 start, &next_change);
861 if (result)
862 {
863 start = next_change;
864 lim1 = next_change = end;
865 }
866 else
867 lim1 = max (next_change, end);
868 start_byte = CHAR_TO_BYTE (start);
869 while (start > lim1
870 && FETCH_BYTE (start_byte - 1) == '\n')
871 {
872 if (++count == 0)
873 {
874 if (bytepos)
875 *bytepos = start_byte;
876 return start;
877 }
878 start_byte--;
879 start--;
880 }
881 if (start > next_change && !result)
882 break;
883 result = 1;
884 }
885 if (start <= end)
886 {
887 start = end;
888 start_byte = end_byte;
889 break;
890 }
891 immediate_quit = allow_quit;
892
893 /* Start should never be at or before end. */
894 if (start_byte <= ceiling_byte)
895 start_byte = ceiling_byte + 1;
896
897 /* Now the text before start is an unknown region, and
898 next_change is the position of the next known region. */
899 ceiling_byte = max (CHAR_TO_BYTE (next_change), ceiling_byte);
900 }
901 else if (start_byte == -1)
902 start_byte = CHAR_TO_BYTE (start);
903
904 /* Stop scanning before the gap. */
905 tem = BUFFER_FLOOR_OF (start_byte - 1);
906 ceiling_byte = max (tem, ceiling_byte);
907
908 {
909 /* The termination address of the dumb loop. */
910 unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
911
912 /* Offsets (relative to CEILING_ADDR and CEILING_BYTE) of
913 the base, the cursor, and the previous line. These
914 offsets are at least -1. */
915 ptrdiff_t base = start_byte - ceiling_byte;
916 ptrdiff_t cursor, prev;
917
918 for (cursor = base; 0 < cursor; cursor = prev)
919 {
920 unsigned char *nl = memrchr (ceiling_addr, '\n', cursor);
921 prev = nl ? nl - ceiling_addr : -1;
922
923 /* If we're looking for newlines, cache the fact that
924 this line's region is free of them. */
925 if (newline_cache && cursor != prev + 1)
926 {
927 know_region_cache (cache_buffer, newline_cache,
928 BYTE_TO_CHAR (ceiling_byte + prev + 1),
929 BYTE_TO_CHAR (ceiling_byte + cursor));
930 /* know_region_cache can relocate buffer text. */
931 ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
932 }
933
934 if (! nl)
935 break;
936
937 if (++count >= 0)
938 {
939 immediate_quit = 0;
940 if (bytepos)
941 *bytepos = ceiling_byte + prev + 1;
942 return BYTE_TO_CHAR (ceiling_byte + prev + 1);
943 }
944 }
945
946 start_byte = ceiling_byte;
947 start = BYTE_TO_CHAR (start_byte);
948 }
949 }
950
951 immediate_quit = 0;
952 if (shortage)
953 *shortage = count * direction;
954 if (bytepos)
955 {
956 *bytepos = start_byte == -1 ? CHAR_TO_BYTE (start) : start_byte;
957 eassert (*bytepos == CHAR_TO_BYTE (start));
958 }
959 return start;
960 }
961 \f
962 /* Search for COUNT instances of a line boundary.
963 Start at START. If COUNT is negative, search backwards.
964
965 We report the resulting position by calling TEMP_SET_PT_BOTH.
966
967 If we find COUNT instances. we position after (always after,
968 even if scanning backwards) the COUNTth match, and return 0.
969
970 If we don't find COUNT instances before reaching the end of the
971 buffer (or the beginning, if scanning backwards), we return
972 the number of line boundaries left unfound, and position at
973 the limit we bumped up against.
974
975 If ALLOW_QUIT, set immediate_quit. That's good to do
976 except in special cases. */
977
978 ptrdiff_t
979 scan_newline (ptrdiff_t start, ptrdiff_t start_byte,
980 ptrdiff_t limit, ptrdiff_t limit_byte,
981 ptrdiff_t count, bool allow_quit)
982 {
983 ptrdiff_t charpos, bytepos, shortage;
984
985 charpos = find_newline (start, start_byte, limit, limit_byte,
986 count, &shortage, &bytepos, allow_quit);
987 if (shortage)
988 TEMP_SET_PT_BOTH (limit, limit_byte);
989 else
990 TEMP_SET_PT_BOTH (charpos, bytepos);
991 return shortage;
992 }
993
994 /* Like find_newline, but doesn't allow QUITting and doesn't return
995 SHORTAGE. */
996 ptrdiff_t
997 find_newline_no_quit (ptrdiff_t from, ptrdiff_t frombyte,
998 ptrdiff_t cnt, ptrdiff_t *bytepos)
999 {
1000 return find_newline (from, frombyte, 0, -1, cnt, NULL, bytepos, 0);
1001 }
1002
1003 /* Like find_newline, but returns position before the newline, not
1004 after, and only search up to TO.
1005 This isn't just find_newline_no_quit (...)-1, because you might hit TO. */
1006
1007 ptrdiff_t
1008 find_before_next_newline (ptrdiff_t from, ptrdiff_t to,
1009 ptrdiff_t cnt, ptrdiff_t *bytepos)
1010 {
1011 ptrdiff_t shortage;
1012 ptrdiff_t pos = find_newline (from, -1, to, -1, cnt, &shortage, bytepos, 1);
1013
1014 if (shortage == 0)
1015 {
1016 if (bytepos)
1017 DEC_BOTH (pos, *bytepos);
1018 else
1019 pos--;
1020 }
1021 return pos;
1022 }
1023 \f
1024 /* Subroutines of Lisp buffer search functions. */
1025
1026 static Lisp_Object
1027 search_command (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror,
1028 Lisp_Object count, int direction, int RE, bool posix)
1029 {
1030 EMACS_INT np;
1031 EMACS_INT lim;
1032 ptrdiff_t lim_byte;
1033 EMACS_INT n = direction;
1034
1035 if (!NILP (count))
1036 {
1037 CHECK_NUMBER (count);
1038 n *= XINT (count);
1039 }
1040
1041 CHECK_STRING (string);
1042 if (NILP (bound))
1043 {
1044 if (n > 0)
1045 lim = ZV, lim_byte = ZV_BYTE;
1046 else
1047 lim = BEGV, lim_byte = BEGV_BYTE;
1048 }
1049 else
1050 {
1051 CHECK_NUMBER_COERCE_MARKER (bound);
1052 lim = XINT (bound);
1053 if (n > 0 ? lim < PT : lim > PT)
1054 error ("Invalid search bound (wrong side of point)");
1055 if (lim > ZV)
1056 lim = ZV, lim_byte = ZV_BYTE;
1057 else if (lim < BEGV)
1058 lim = BEGV, lim_byte = BEGV_BYTE;
1059 else
1060 lim_byte = CHAR_TO_BYTE (lim);
1061 }
1062
1063 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
1064 set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
1065 BVAR (current_buffer, case_eqv_table));
1066
1067 np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
1068 (!NILP (BVAR (current_buffer, case_fold_search))
1069 ? BVAR (current_buffer, case_canon_table)
1070 : Qnil),
1071 (!NILP (BVAR (current_buffer, case_fold_search))
1072 ? BVAR (current_buffer, case_eqv_table)
1073 : Qnil),
1074 posix);
1075 if (np <= 0)
1076 {
1077 if (NILP (noerror))
1078 xsignal1 (Qsearch_failed, string);
1079
1080 if (!EQ (noerror, Qt))
1081 {
1082 eassert (BEGV <= lim && lim <= ZV);
1083 SET_PT_BOTH (lim, lim_byte);
1084 return Qnil;
1085 #if 0 /* This would be clean, but maybe programs depend on
1086 a value of nil here. */
1087 np = lim;
1088 #endif
1089 }
1090 else
1091 return Qnil;
1092 }
1093
1094 eassert (BEGV <= np && np <= ZV);
1095 SET_PT (np);
1096
1097 return make_number (np);
1098 }
1099 \f
1100 /* Return true if REGEXP it matches just one constant string. */
1101
1102 static bool
1103 trivial_regexp_p (Lisp_Object regexp)
1104 {
1105 ptrdiff_t len = SBYTES (regexp);
1106 unsigned char *s = SDATA (regexp);
1107 while (--len >= 0)
1108 {
1109 switch (*s++)
1110 {
1111 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1112 return 0;
1113 case '\\':
1114 if (--len < 0)
1115 return 0;
1116 switch (*s++)
1117 {
1118 case '|': case '(': case ')': case '`': case '\'': case 'b':
1119 case 'B': case '<': case '>': case 'w': case 'W': case 's':
1120 case 'S': case '=': case '{': case '}': case '_':
1121 case 'c': case 'C': /* for categoryspec and notcategoryspec */
1122 case '1': case '2': case '3': case '4': case '5':
1123 case '6': case '7': case '8': case '9':
1124 return 0;
1125 }
1126 }
1127 }
1128 return 1;
1129 }
1130
1131 /* Search for the n'th occurrence of STRING in the current buffer,
1132 starting at position POS and stopping at position LIM,
1133 treating STRING as a literal string if RE is false or as
1134 a regular expression if RE is true.
1135
1136 If N is positive, searching is forward and LIM must be greater than POS.
1137 If N is negative, searching is backward and LIM must be less than POS.
1138
1139 Returns -x if x occurrences remain to be found (x > 0),
1140 or else the position at the beginning of the Nth occurrence
1141 (if searching backward) or the end (if searching forward).
1142
1143 POSIX is nonzero if we want full backtracking (POSIX style)
1144 for this pattern. 0 means backtrack only enough to get a valid match. */
1145
1146 #define TRANSLATE(out, trt, d) \
1147 do \
1148 { \
1149 if (! NILP (trt)) \
1150 { \
1151 Lisp_Object temp; \
1152 temp = Faref (trt, make_number (d)); \
1153 if (INTEGERP (temp)) \
1154 out = XINT (temp); \
1155 else \
1156 out = d; \
1157 } \
1158 else \
1159 out = d; \
1160 } \
1161 while (0)
1162
1163 /* Only used in search_buffer, to record the end position of the match
1164 when searching regexps and SEARCH_REGS should not be changed
1165 (i.e. Vinhibit_changing_match_data is non-nil). */
1166 static struct re_registers search_regs_1;
1167
1168 static EMACS_INT
1169 search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
1170 ptrdiff_t lim, ptrdiff_t lim_byte, EMACS_INT n,
1171 int RE, Lisp_Object trt, Lisp_Object inverse_trt, bool posix)
1172 {
1173 ptrdiff_t len = SCHARS (string);
1174 ptrdiff_t len_byte = SBYTES (string);
1175 register ptrdiff_t i;
1176
1177 if (running_asynch_code)
1178 save_search_regs ();
1179
1180 /* Searching 0 times means don't move. */
1181 /* Null string is found at starting position. */
1182 if (len == 0 || n == 0)
1183 {
1184 set_search_regs (pos_byte, 0);
1185 return pos;
1186 }
1187
1188 if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
1189 {
1190 unsigned char *p1, *p2;
1191 ptrdiff_t s1, s2;
1192 struct re_pattern_buffer *bufp;
1193
1194 bufp = compile_pattern (string,
1195 (NILP (Vinhibit_changing_match_data)
1196 ? &search_regs : &search_regs_1),
1197 trt, posix,
1198 !NILP (BVAR (current_buffer, enable_multibyte_characters)));
1199
1200 immediate_quit = 1; /* Quit immediately if user types ^G,
1201 because letting this function finish
1202 can take too long. */
1203 QUIT; /* Do a pending quit right away,
1204 to avoid paradoxical behavior */
1205 /* Get pointers and sizes of the two strings
1206 that make up the visible portion of the buffer. */
1207
1208 p1 = BEGV_ADDR;
1209 s1 = GPT_BYTE - BEGV_BYTE;
1210 p2 = GAP_END_ADDR;
1211 s2 = ZV_BYTE - GPT_BYTE;
1212 if (s1 < 0)
1213 {
1214 p2 = p1;
1215 s2 = ZV_BYTE - BEGV_BYTE;
1216 s1 = 0;
1217 }
1218 if (s2 < 0)
1219 {
1220 s1 = ZV_BYTE - BEGV_BYTE;
1221 s2 = 0;
1222 }
1223 re_match_object = Qnil;
1224
1225 while (n < 0)
1226 {
1227 ptrdiff_t val;
1228
1229 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1230 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1231 (NILP (Vinhibit_changing_match_data)
1232 ? &search_regs : &search_regs_1),
1233 /* Don't allow match past current point */
1234 pos_byte - BEGV_BYTE);
1235 if (val == -2)
1236 {
1237 matcher_overflow ();
1238 }
1239 if (val >= 0)
1240 {
1241 if (NILP (Vinhibit_changing_match_data))
1242 {
1243 pos_byte = search_regs.start[0] + BEGV_BYTE;
1244 for (i = 0; i < search_regs.num_regs; i++)
1245 if (search_regs.start[i] >= 0)
1246 {
1247 search_regs.start[i]
1248 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1249 search_regs.end[i]
1250 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1251 }
1252 XSETBUFFER (last_thing_searched, current_buffer);
1253 /* Set pos to the new position. */
1254 pos = search_regs.start[0];
1255 }
1256 else
1257 {
1258 pos_byte = search_regs_1.start[0] + BEGV_BYTE;
1259 /* Set pos to the new position. */
1260 pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
1261 }
1262 }
1263 else
1264 {
1265 immediate_quit = 0;
1266 return (n);
1267 }
1268 n++;
1269 }
1270 while (n > 0)
1271 {
1272 ptrdiff_t val;
1273
1274 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1275 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1276 (NILP (Vinhibit_changing_match_data)
1277 ? &search_regs : &search_regs_1),
1278 lim_byte - BEGV_BYTE);
1279 if (val == -2)
1280 {
1281 matcher_overflow ();
1282 }
1283 if (val >= 0)
1284 {
1285 if (NILP (Vinhibit_changing_match_data))
1286 {
1287 pos_byte = search_regs.end[0] + BEGV_BYTE;
1288 for (i = 0; i < search_regs.num_regs; i++)
1289 if (search_regs.start[i] >= 0)
1290 {
1291 search_regs.start[i]
1292 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1293 search_regs.end[i]
1294 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1295 }
1296 XSETBUFFER (last_thing_searched, current_buffer);
1297 pos = search_regs.end[0];
1298 }
1299 else
1300 {
1301 pos_byte = search_regs_1.end[0] + BEGV_BYTE;
1302 pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
1303 }
1304 }
1305 else
1306 {
1307 immediate_quit = 0;
1308 return (0 - n);
1309 }
1310 n--;
1311 }
1312 immediate_quit = 0;
1313 return (pos);
1314 }
1315 else /* non-RE case */
1316 {
1317 unsigned char *raw_pattern, *pat;
1318 ptrdiff_t raw_pattern_size;
1319 ptrdiff_t raw_pattern_size_byte;
1320 unsigned char *patbuf;
1321 bool multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
1322 unsigned char *base_pat;
1323 /* Set to positive if we find a non-ASCII char that need
1324 translation. Otherwise set to zero later. */
1325 int char_base = -1;
1326 bool boyer_moore_ok = 1;
1327
1328 /* MULTIBYTE says whether the text to be searched is multibyte.
1329 We must convert PATTERN to match that, or we will not really
1330 find things right. */
1331
1332 if (multibyte == STRING_MULTIBYTE (string))
1333 {
1334 raw_pattern = SDATA (string);
1335 raw_pattern_size = SCHARS (string);
1336 raw_pattern_size_byte = SBYTES (string);
1337 }
1338 else if (multibyte)
1339 {
1340 raw_pattern_size = SCHARS (string);
1341 raw_pattern_size_byte
1342 = count_size_as_multibyte (SDATA (string),
1343 raw_pattern_size);
1344 raw_pattern = alloca (raw_pattern_size_byte + 1);
1345 copy_text (SDATA (string), raw_pattern,
1346 SCHARS (string), 0, 1);
1347 }
1348 else
1349 {
1350 /* Converting multibyte to single-byte.
1351
1352 ??? Perhaps this conversion should be done in a special way
1353 by subtracting nonascii-insert-offset from each non-ASCII char,
1354 so that only the multibyte chars which really correspond to
1355 the chosen single-byte character set can possibly match. */
1356 raw_pattern_size = SCHARS (string);
1357 raw_pattern_size_byte = SCHARS (string);
1358 raw_pattern = alloca (raw_pattern_size + 1);
1359 copy_text (SDATA (string), raw_pattern,
1360 SBYTES (string), 1, 0);
1361 }
1362
1363 /* Copy and optionally translate the pattern. */
1364 len = raw_pattern_size;
1365 len_byte = raw_pattern_size_byte;
1366 patbuf = alloca (len * MAX_MULTIBYTE_LENGTH);
1367 pat = patbuf;
1368 base_pat = raw_pattern;
1369 if (multibyte)
1370 {
1371 /* Fill patbuf by translated characters in STRING while
1372 checking if we can use boyer-moore search. If TRT is
1373 non-nil, we can use boyer-moore search only if TRT can be
1374 represented by the byte array of 256 elements. For that,
1375 all non-ASCII case-equivalents of all case-sensitive
1376 characters in STRING must belong to the same character
1377 group (two characters belong to the same group iff their
1378 multibyte forms are the same except for the last byte;
1379 i.e. every 64 characters form a group; U+0000..U+003F,
1380 U+0040..U+007F, U+0080..U+00BF, ...). */
1381
1382 while (--len >= 0)
1383 {
1384 unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
1385 int c, translated, inverse;
1386 int in_charlen, charlen;
1387
1388 /* If we got here and the RE flag is set, it's because we're
1389 dealing with a regexp known to be trivial, so the backslash
1390 just quotes the next character. */
1391 if (RE && *base_pat == '\\')
1392 {
1393 len--;
1394 raw_pattern_size--;
1395 len_byte--;
1396 base_pat++;
1397 }
1398
1399 c = STRING_CHAR_AND_LENGTH (base_pat, in_charlen);
1400
1401 if (NILP (trt))
1402 {
1403 str = base_pat;
1404 charlen = in_charlen;
1405 }
1406 else
1407 {
1408 /* Translate the character. */
1409 TRANSLATE (translated, trt, c);
1410 charlen = CHAR_STRING (translated, str_base);
1411 str = str_base;
1412
1413 /* Check if C has any other case-equivalents. */
1414 TRANSLATE (inverse, inverse_trt, c);
1415 /* If so, check if we can use boyer-moore. */
1416 if (c != inverse && boyer_moore_ok)
1417 {
1418 /* Check if all equivalents belong to the same
1419 group of characters. Note that the check of C
1420 itself is done by the last iteration. */
1421 int this_char_base = -1;
1422
1423 while (boyer_moore_ok)
1424 {
1425 if (ASCII_BYTE_P (inverse))
1426 {
1427 if (this_char_base > 0)
1428 boyer_moore_ok = 0;
1429 else
1430 this_char_base = 0;
1431 }
1432 else if (CHAR_BYTE8_P (inverse))
1433 /* Boyer-moore search can't handle a
1434 translation of an eight-bit
1435 character. */
1436 boyer_moore_ok = 0;
1437 else if (this_char_base < 0)
1438 {
1439 this_char_base = inverse & ~0x3F;
1440 if (char_base < 0)
1441 char_base = this_char_base;
1442 else if (this_char_base != char_base)
1443 boyer_moore_ok = 0;
1444 }
1445 else if ((inverse & ~0x3F) != this_char_base)
1446 boyer_moore_ok = 0;
1447 if (c == inverse)
1448 break;
1449 TRANSLATE (inverse, inverse_trt, inverse);
1450 }
1451 }
1452 }
1453
1454 /* Store this character into the translated pattern. */
1455 memcpy (pat, str, charlen);
1456 pat += charlen;
1457 base_pat += in_charlen;
1458 len_byte -= in_charlen;
1459 }
1460
1461 /* If char_base is still negative we didn't find any translated
1462 non-ASCII characters. */
1463 if (char_base < 0)
1464 char_base = 0;
1465 }
1466 else
1467 {
1468 /* Unibyte buffer. */
1469 char_base = 0;
1470 while (--len >= 0)
1471 {
1472 int c, translated, inverse;
1473
1474 /* If we got here and the RE flag is set, it's because we're
1475 dealing with a regexp known to be trivial, so the backslash
1476 just quotes the next character. */
1477 if (RE && *base_pat == '\\')
1478 {
1479 len--;
1480 raw_pattern_size--;
1481 base_pat++;
1482 }
1483 c = *base_pat++;
1484 TRANSLATE (translated, trt, c);
1485 *pat++ = translated;
1486 /* Check that none of C's equivalents violates the
1487 assumptions of boyer_moore. */
1488 TRANSLATE (inverse, inverse_trt, c);
1489 while (1)
1490 {
1491 if (inverse >= 0200)
1492 {
1493 boyer_moore_ok = 0;
1494 break;
1495 }
1496 if (c == inverse)
1497 break;
1498 TRANSLATE (inverse, inverse_trt, inverse);
1499 }
1500 }
1501 }
1502
1503 len_byte = pat - patbuf;
1504 pat = base_pat = patbuf;
1505
1506 if (boyer_moore_ok)
1507 return boyer_moore (n, pat, len_byte, trt, inverse_trt,
1508 pos_byte, lim_byte,
1509 char_base);
1510 else
1511 return simple_search (n, pat, raw_pattern_size, len_byte, trt,
1512 pos, pos_byte, lim, lim_byte);
1513 }
1514 }
1515 \f
1516 /* Do a simple string search N times for the string PAT,
1517 whose length is LEN/LEN_BYTE,
1518 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1519 TRT is the translation table.
1520
1521 Return the character position where the match is found.
1522 Otherwise, if M matches remained to be found, return -M.
1523
1524 This kind of search works regardless of what is in PAT and
1525 regardless of what is in TRT. It is used in cases where
1526 boyer_moore cannot work. */
1527
1528 static EMACS_INT
1529 simple_search (EMACS_INT n, unsigned char *pat,
1530 ptrdiff_t len, ptrdiff_t len_byte, Lisp_Object trt,
1531 ptrdiff_t pos, ptrdiff_t pos_byte,
1532 ptrdiff_t lim, ptrdiff_t lim_byte)
1533 {
1534 bool multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
1535 bool forward = n > 0;
1536 /* Number of buffer bytes matched. Note that this may be different
1537 from len_byte in a multibyte buffer. */
1538 ptrdiff_t match_byte = PTRDIFF_MIN;
1539
1540 if (lim > pos && multibyte)
1541 while (n > 0)
1542 {
1543 while (1)
1544 {
1545 /* Try matching at position POS. */
1546 ptrdiff_t this_pos = pos;
1547 ptrdiff_t this_pos_byte = pos_byte;
1548 ptrdiff_t this_len = len;
1549 unsigned char *p = pat;
1550 if (pos + len > lim || pos_byte + len_byte > lim_byte)
1551 goto stop;
1552
1553 while (this_len > 0)
1554 {
1555 int charlen, buf_charlen;
1556 int pat_ch, buf_ch;
1557
1558 pat_ch = STRING_CHAR_AND_LENGTH (p, charlen);
1559 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1560 buf_charlen);
1561 TRANSLATE (buf_ch, trt, buf_ch);
1562
1563 if (buf_ch != pat_ch)
1564 break;
1565
1566 this_len--;
1567 p += charlen;
1568
1569 this_pos_byte += buf_charlen;
1570 this_pos++;
1571 }
1572
1573 if (this_len == 0)
1574 {
1575 match_byte = this_pos_byte - pos_byte;
1576 pos += len;
1577 pos_byte += match_byte;
1578 break;
1579 }
1580
1581 INC_BOTH (pos, pos_byte);
1582 }
1583
1584 n--;
1585 }
1586 else if (lim > pos)
1587 while (n > 0)
1588 {
1589 while (1)
1590 {
1591 /* Try matching at position POS. */
1592 ptrdiff_t this_pos = pos;
1593 ptrdiff_t this_len = len;
1594 unsigned char *p = pat;
1595
1596 if (pos + len > lim)
1597 goto stop;
1598
1599 while (this_len > 0)
1600 {
1601 int pat_ch = *p++;
1602 int buf_ch = FETCH_BYTE (this_pos);
1603 TRANSLATE (buf_ch, trt, buf_ch);
1604
1605 if (buf_ch != pat_ch)
1606 break;
1607
1608 this_len--;
1609 this_pos++;
1610 }
1611
1612 if (this_len == 0)
1613 {
1614 match_byte = len;
1615 pos += len;
1616 break;
1617 }
1618
1619 pos++;
1620 }
1621
1622 n--;
1623 }
1624 /* Backwards search. */
1625 else if (lim < pos && multibyte)
1626 while (n < 0)
1627 {
1628 while (1)
1629 {
1630 /* Try matching at position POS. */
1631 ptrdiff_t this_pos = pos;
1632 ptrdiff_t this_pos_byte = pos_byte;
1633 ptrdiff_t this_len = len;
1634 const unsigned char *p = pat + len_byte;
1635
1636 if (this_pos - len < lim || (pos_byte - len_byte) < lim_byte)
1637 goto stop;
1638
1639 while (this_len > 0)
1640 {
1641 int pat_ch, buf_ch;
1642
1643 DEC_BOTH (this_pos, this_pos_byte);
1644 PREV_CHAR_BOUNDARY (p, pat);
1645 pat_ch = STRING_CHAR (p);
1646 buf_ch = STRING_CHAR (BYTE_POS_ADDR (this_pos_byte));
1647 TRANSLATE (buf_ch, trt, buf_ch);
1648
1649 if (buf_ch != pat_ch)
1650 break;
1651
1652 this_len--;
1653 }
1654
1655 if (this_len == 0)
1656 {
1657 match_byte = pos_byte - this_pos_byte;
1658 pos = this_pos;
1659 pos_byte = this_pos_byte;
1660 break;
1661 }
1662
1663 DEC_BOTH (pos, pos_byte);
1664 }
1665
1666 n++;
1667 }
1668 else if (lim < pos)
1669 while (n < 0)
1670 {
1671 while (1)
1672 {
1673 /* Try matching at position POS. */
1674 ptrdiff_t this_pos = pos - len;
1675 ptrdiff_t this_len = len;
1676 unsigned char *p = pat;
1677
1678 if (this_pos < lim)
1679 goto stop;
1680
1681 while (this_len > 0)
1682 {
1683 int pat_ch = *p++;
1684 int buf_ch = FETCH_BYTE (this_pos);
1685 TRANSLATE (buf_ch, trt, buf_ch);
1686
1687 if (buf_ch != pat_ch)
1688 break;
1689 this_len--;
1690 this_pos++;
1691 }
1692
1693 if (this_len == 0)
1694 {
1695 match_byte = len;
1696 pos -= len;
1697 break;
1698 }
1699
1700 pos--;
1701 }
1702
1703 n++;
1704 }
1705
1706 stop:
1707 if (n == 0)
1708 {
1709 eassert (match_byte != PTRDIFF_MIN);
1710 if (forward)
1711 set_search_regs ((multibyte ? pos_byte : pos) - match_byte, match_byte);
1712 else
1713 set_search_regs (multibyte ? pos_byte : pos, match_byte);
1714
1715 return pos;
1716 }
1717 else if (n > 0)
1718 return -n;
1719 else
1720 return n;
1721 }
1722 \f
1723 /* Do Boyer-Moore search N times for the string BASE_PAT,
1724 whose length is LEN_BYTE,
1725 from buffer position POS_BYTE until LIM_BYTE.
1726 DIRECTION says which direction we search in.
1727 TRT and INVERSE_TRT are translation tables.
1728 Characters in PAT are already translated by TRT.
1729
1730 This kind of search works if all the characters in BASE_PAT that
1731 have nontrivial translation are the same aside from the last byte.
1732 This makes it possible to translate just the last byte of a
1733 character, and do so after just a simple test of the context.
1734 CHAR_BASE is nonzero if there is such a non-ASCII character.
1735
1736 If that criterion is not satisfied, do not call this function. */
1737
1738 static EMACS_INT
1739 boyer_moore (EMACS_INT n, unsigned char *base_pat,
1740 ptrdiff_t len_byte,
1741 Lisp_Object trt, Lisp_Object inverse_trt,
1742 ptrdiff_t pos_byte, ptrdiff_t lim_byte,
1743 int char_base)
1744 {
1745 int direction = ((n > 0) ? 1 : -1);
1746 register ptrdiff_t dirlen;
1747 ptrdiff_t limit;
1748 int stride_for_teases = 0;
1749 int BM_tab[0400];
1750 register unsigned char *cursor, *p_limit;
1751 register ptrdiff_t i;
1752 register int j;
1753 unsigned char *pat, *pat_end;
1754 bool multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
1755
1756 unsigned char simple_translate[0400];
1757 /* These are set to the preceding bytes of a byte to be translated
1758 if char_base is nonzero. As the maximum byte length of a
1759 multibyte character is 5, we have to check at most four previous
1760 bytes. */
1761 int translate_prev_byte1 = 0;
1762 int translate_prev_byte2 = 0;
1763 int translate_prev_byte3 = 0;
1764
1765 /* The general approach is that we are going to maintain that we know
1766 the first (closest to the present position, in whatever direction
1767 we're searching) character that could possibly be the last
1768 (furthest from present position) character of a valid match. We
1769 advance the state of our knowledge by looking at that character
1770 and seeing whether it indeed matches the last character of the
1771 pattern. If it does, we take a closer look. If it does not, we
1772 move our pointer (to putative last characters) as far as is
1773 logically possible. This amount of movement, which I call a
1774 stride, will be the length of the pattern if the actual character
1775 appears nowhere in the pattern, otherwise it will be the distance
1776 from the last occurrence of that character to the end of the
1777 pattern. If the amount is zero we have a possible match. */
1778
1779 /* Here we make a "mickey mouse" BM table. The stride of the search
1780 is determined only by the last character of the putative match.
1781 If that character does not match, we will stride the proper
1782 distance to propose a match that superimposes it on the last
1783 instance of a character that matches it (per trt), or misses
1784 it entirely if there is none. */
1785
1786 dirlen = len_byte * direction;
1787
1788 /* Record position after the end of the pattern. */
1789 pat_end = base_pat + len_byte;
1790 /* BASE_PAT points to a character that we start scanning from.
1791 It is the first character in a forward search,
1792 the last character in a backward search. */
1793 if (direction < 0)
1794 base_pat = pat_end - 1;
1795
1796 /* A character that does not appear in the pattern induces a
1797 stride equal to the pattern length. */
1798 for (i = 0; i < 0400; i++)
1799 BM_tab[i] = dirlen;
1800
1801 /* We use this for translation, instead of TRT itself.
1802 We fill this in to handle the characters that actually
1803 occur in the pattern. Others don't matter anyway! */
1804 for (i = 0; i < 0400; i++)
1805 simple_translate[i] = i;
1806
1807 if (char_base)
1808 {
1809 /* Setup translate_prev_byte1/2/3/4 from CHAR_BASE. Only a
1810 byte following them are the target of translation. */
1811 unsigned char str[MAX_MULTIBYTE_LENGTH];
1812 int cblen = CHAR_STRING (char_base, str);
1813
1814 translate_prev_byte1 = str[cblen - 2];
1815 if (cblen > 2)
1816 {
1817 translate_prev_byte2 = str[cblen - 3];
1818 if (cblen > 3)
1819 translate_prev_byte3 = str[cblen - 4];
1820 }
1821 }
1822
1823 i = 0;
1824 while (i != dirlen)
1825 {
1826 unsigned char *ptr = base_pat + i;
1827 i += direction;
1828 if (! NILP (trt))
1829 {
1830 /* If the byte currently looking at is the last of a
1831 character to check case-equivalents, set CH to that
1832 character. An ASCII character and a non-ASCII character
1833 matching with CHAR_BASE are to be checked. */
1834 int ch = -1;
1835
1836 if (ASCII_BYTE_P (*ptr) || ! multibyte)
1837 ch = *ptr;
1838 else if (char_base
1839 && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
1840 {
1841 unsigned char *charstart = ptr - 1;
1842
1843 while (! (CHAR_HEAD_P (*charstart)))
1844 charstart--;
1845 ch = STRING_CHAR (charstart);
1846 if (char_base != (ch & ~0x3F))
1847 ch = -1;
1848 }
1849
1850 if (ch >= 0200 && multibyte)
1851 j = (ch & 0x3F) | 0200;
1852 else
1853 j = *ptr;
1854
1855 if (i == dirlen)
1856 stride_for_teases = BM_tab[j];
1857
1858 BM_tab[j] = dirlen - i;
1859 /* A translation table is accompanied by its inverse -- see
1860 comment following downcase_table for details. */
1861 if (ch >= 0)
1862 {
1863 int starting_ch = ch;
1864 int starting_j = j;
1865
1866 while (1)
1867 {
1868 TRANSLATE (ch, inverse_trt, ch);
1869 if (ch >= 0200 && multibyte)
1870 j = (ch & 0x3F) | 0200;
1871 else
1872 j = ch;
1873
1874 /* For all the characters that map into CH,
1875 set up simple_translate to map the last byte
1876 into STARTING_J. */
1877 simple_translate[j] = starting_j;
1878 if (ch == starting_ch)
1879 break;
1880 BM_tab[j] = dirlen - i;
1881 }
1882 }
1883 }
1884 else
1885 {
1886 j = *ptr;
1887
1888 if (i == dirlen)
1889 stride_for_teases = BM_tab[j];
1890 BM_tab[j] = dirlen - i;
1891 }
1892 /* stride_for_teases tells how much to stride if we get a
1893 match on the far character but are subsequently
1894 disappointed, by recording what the stride would have been
1895 for that character if the last character had been
1896 different. */
1897 }
1898 pos_byte += dirlen - ((direction > 0) ? direction : 0);
1899 /* loop invariant - POS_BYTE points at where last char (first
1900 char if reverse) of pattern would align in a possible match. */
1901 while (n != 0)
1902 {
1903 ptrdiff_t tail_end;
1904 unsigned char *tail_end_ptr;
1905
1906 /* It's been reported that some (broken) compiler thinks that
1907 Boolean expressions in an arithmetic context are unsigned.
1908 Using an explicit ?1:0 prevents this. */
1909 if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1910 < 0)
1911 return (n * (0 - direction));
1912 /* First we do the part we can by pointers (maybe nothing) */
1913 QUIT;
1914 pat = base_pat;
1915 limit = pos_byte - dirlen + direction;
1916 if (direction > 0)
1917 {
1918 limit = BUFFER_CEILING_OF (limit);
1919 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1920 can take on without hitting edge of buffer or the gap. */
1921 limit = min (limit, pos_byte + 20000);
1922 limit = min (limit, lim_byte - 1);
1923 }
1924 else
1925 {
1926 limit = BUFFER_FLOOR_OF (limit);
1927 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1928 can take on without hitting edge of buffer or the gap. */
1929 limit = max (limit, pos_byte - 20000);
1930 limit = max (limit, lim_byte);
1931 }
1932 tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1933 tail_end_ptr = BYTE_POS_ADDR (tail_end);
1934
1935 if ((limit - pos_byte) * direction > 20)
1936 {
1937 unsigned char *p2;
1938
1939 p_limit = BYTE_POS_ADDR (limit);
1940 p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1941 /* In this loop, pos + cursor - p2 is the surrogate for pos. */
1942 while (1) /* use one cursor setting as long as i can */
1943 {
1944 if (direction > 0) /* worth duplicating */
1945 {
1946 while (cursor <= p_limit)
1947 {
1948 if (BM_tab[*cursor] == 0)
1949 goto hit;
1950 cursor += BM_tab[*cursor];
1951 }
1952 }
1953 else
1954 {
1955 while (cursor >= p_limit)
1956 {
1957 if (BM_tab[*cursor] == 0)
1958 goto hit;
1959 cursor += BM_tab[*cursor];
1960 }
1961 }
1962 /* If you are here, cursor is beyond the end of the
1963 searched region. You fail to match within the
1964 permitted region and would otherwise try a character
1965 beyond that region. */
1966 break;
1967
1968 hit:
1969 i = dirlen - direction;
1970 if (! NILP (trt))
1971 {
1972 while ((i -= direction) + direction != 0)
1973 {
1974 int ch;
1975 cursor -= direction;
1976 /* Translate only the last byte of a character. */
1977 if (! multibyte
1978 || ((cursor == tail_end_ptr
1979 || CHAR_HEAD_P (cursor[1]))
1980 && (CHAR_HEAD_P (cursor[0])
1981 /* Check if this is the last byte of
1982 a translatable character. */
1983 || (translate_prev_byte1 == cursor[-1]
1984 && (CHAR_HEAD_P (translate_prev_byte1)
1985 || (translate_prev_byte2 == cursor[-2]
1986 && (CHAR_HEAD_P (translate_prev_byte2)
1987 || (translate_prev_byte3 == cursor[-3]))))))))
1988 ch = simple_translate[*cursor];
1989 else
1990 ch = *cursor;
1991 if (pat[i] != ch)
1992 break;
1993 }
1994 }
1995 else
1996 {
1997 while ((i -= direction) + direction != 0)
1998 {
1999 cursor -= direction;
2000 if (pat[i] != *cursor)
2001 break;
2002 }
2003 }
2004 cursor += dirlen - i - direction; /* fix cursor */
2005 if (i + direction == 0)
2006 {
2007 ptrdiff_t position, start, end;
2008
2009 cursor -= direction;
2010
2011 position = pos_byte + cursor - p2 + ((direction > 0)
2012 ? 1 - len_byte : 0);
2013 set_search_regs (position, len_byte);
2014
2015 if (NILP (Vinhibit_changing_match_data))
2016 {
2017 start = search_regs.start[0];
2018 end = search_regs.end[0];
2019 }
2020 else
2021 /* If Vinhibit_changing_match_data is non-nil,
2022 search_regs will not be changed. So let's
2023 compute start and end here. */
2024 {
2025 start = BYTE_TO_CHAR (position);
2026 end = BYTE_TO_CHAR (position + len_byte);
2027 }
2028
2029 if ((n -= direction) != 0)
2030 cursor += dirlen; /* to resume search */
2031 else
2032 return direction > 0 ? end : start;
2033 }
2034 else
2035 cursor += stride_for_teases; /* <sigh> we lose - */
2036 }
2037 pos_byte += cursor - p2;
2038 }
2039 else
2040 /* Now we'll pick up a clump that has to be done the hard
2041 way because it covers a discontinuity. */
2042 {
2043 limit = ((direction > 0)
2044 ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
2045 : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
2046 limit = ((direction > 0)
2047 ? min (limit + len_byte, lim_byte - 1)
2048 : max (limit - len_byte, lim_byte));
2049 /* LIMIT is now the last value POS_BYTE can have
2050 and still be valid for a possible match. */
2051 while (1)
2052 {
2053 /* This loop can be coded for space rather than
2054 speed because it will usually run only once.
2055 (the reach is at most len + 21, and typically
2056 does not exceed len). */
2057 while ((limit - pos_byte) * direction >= 0)
2058 {
2059 int ch = FETCH_BYTE (pos_byte);
2060 if (BM_tab[ch] == 0)
2061 goto hit2;
2062 pos_byte += BM_tab[ch];
2063 }
2064 break; /* ran off the end */
2065
2066 hit2:
2067 /* Found what might be a match. */
2068 i = dirlen - direction;
2069 while ((i -= direction) + direction != 0)
2070 {
2071 int ch;
2072 unsigned char *ptr;
2073 pos_byte -= direction;
2074 ptr = BYTE_POS_ADDR (pos_byte);
2075 /* Translate only the last byte of a character. */
2076 if (! multibyte
2077 || ((ptr == tail_end_ptr
2078 || CHAR_HEAD_P (ptr[1]))
2079 && (CHAR_HEAD_P (ptr[0])
2080 /* Check if this is the last byte of a
2081 translatable character. */
2082 || (translate_prev_byte1 == ptr[-1]
2083 && (CHAR_HEAD_P (translate_prev_byte1)
2084 || (translate_prev_byte2 == ptr[-2]
2085 && (CHAR_HEAD_P (translate_prev_byte2)
2086 || translate_prev_byte3 == ptr[-3])))))))
2087 ch = simple_translate[*ptr];
2088 else
2089 ch = *ptr;
2090 if (pat[i] != ch)
2091 break;
2092 }
2093 /* Above loop has moved POS_BYTE part or all the way
2094 back to the first pos (last pos if reverse).
2095 Set it once again at the last (first if reverse) char. */
2096 pos_byte += dirlen - i - direction;
2097 if (i + direction == 0)
2098 {
2099 ptrdiff_t position, start, end;
2100 pos_byte -= direction;
2101
2102 position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
2103 set_search_regs (position, len_byte);
2104
2105 if (NILP (Vinhibit_changing_match_data))
2106 {
2107 start = search_regs.start[0];
2108 end = search_regs.end[0];
2109 }
2110 else
2111 /* If Vinhibit_changing_match_data is non-nil,
2112 search_regs will not be changed. So let's
2113 compute start and end here. */
2114 {
2115 start = BYTE_TO_CHAR (position);
2116 end = BYTE_TO_CHAR (position + len_byte);
2117 }
2118
2119 if ((n -= direction) != 0)
2120 pos_byte += dirlen; /* to resume search */
2121 else
2122 return direction > 0 ? end : start;
2123 }
2124 else
2125 pos_byte += stride_for_teases;
2126 }
2127 }
2128 /* We have done one clump. Can we continue? */
2129 if ((lim_byte - pos_byte) * direction < 0)
2130 return ((0 - n) * direction);
2131 }
2132 return BYTE_TO_CHAR (pos_byte);
2133 }
2134
2135 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
2136 for the overall match just found in the current buffer.
2137 Also clear out the match data for registers 1 and up. */
2138
2139 static void
2140 set_search_regs (ptrdiff_t beg_byte, ptrdiff_t nbytes)
2141 {
2142 ptrdiff_t i;
2143
2144 if (!NILP (Vinhibit_changing_match_data))
2145 return;
2146
2147 /* Make sure we have registers in which to store
2148 the match position. */
2149 if (search_regs.num_regs == 0)
2150 {
2151 search_regs.start = xmalloc (2 * sizeof (regoff_t));
2152 search_regs.end = xmalloc (2 * sizeof (regoff_t));
2153 search_regs.num_regs = 2;
2154 }
2155
2156 /* Clear out the other registers. */
2157 for (i = 1; i < search_regs.num_regs; i++)
2158 {
2159 search_regs.start[i] = -1;
2160 search_regs.end[i] = -1;
2161 }
2162
2163 search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2164 search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
2165 XSETBUFFER (last_thing_searched, current_buffer);
2166 }
2167 \f
2168 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2169 "MSearch backward: ",
2170 doc: /* Search backward from point for STRING.
2171 Set point to the beginning of the occurrence found, and return point.
2172 An optional second argument bounds the search; it is a buffer position.
2173 The match found must not extend before that position.
2174 Optional third argument, if t, means if fail just return nil (no error).
2175 If not nil and not t, position at limit of search and return nil.
2176 Optional fourth argument COUNT, if non-nil, means to search for COUNT
2177 successive occurrences. If COUNT is negative, search forward,
2178 instead of backward, for -COUNT occurrences.
2179
2180 Search case-sensitivity is determined by the value of the variable
2181 `case-fold-search', which see.
2182
2183 See also the functions `match-beginning', `match-end' and `replace-match'. */)
2184 (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2185 {
2186 return search_command (string, bound, noerror, count, -1, 0, 0);
2187 }
2188
2189 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2190 doc: /* Search forward from point for STRING.
2191 Set point to the end of the occurrence found, and return point.
2192 An optional second argument bounds the search; it is a buffer position.
2193 The match found must not extend after that position. A value of nil is
2194 equivalent to (point-max).
2195 Optional third argument, if t, means if fail just return nil (no error).
2196 If not nil and not t, move to limit of search and return nil.
2197 Optional fourth argument COUNT, if non-nil, means to search for COUNT
2198 successive occurrences. If COUNT is negative, search backward,
2199 instead of forward, for -COUNT occurrences.
2200
2201 Search case-sensitivity is determined by the value of the variable
2202 `case-fold-search', which see.
2203
2204 See also the functions `match-beginning', `match-end' and `replace-match'. */)
2205 (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2206 {
2207 return search_command (string, bound, noerror, count, 1, 0, 0);
2208 }
2209
2210 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2211 "sRE search backward: ",
2212 doc: /* Search backward from point for match for regular expression REGEXP.
2213 Set point to the beginning of the match, and return point.
2214 The match found is the one starting last in the buffer
2215 and yet ending before the origin of the search.
2216 An optional second argument bounds the search; it is a buffer position.
2217 The match found must start at or after that position.
2218 Optional third argument, if t, means if fail just return nil (no error).
2219 If not nil and not t, move to limit of search and return nil.
2220 Optional fourth argument is repeat count--search for successive occurrences.
2221
2222 Search case-sensitivity is determined by the value of the variable
2223 `case-fold-search', which see.
2224
2225 See also the functions `match-beginning', `match-end', `match-string',
2226 and `replace-match'. */)
2227 (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2228 {
2229 return search_command (regexp, bound, noerror, count, -1, 1, 0);
2230 }
2231
2232 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2233 "sRE search: ",
2234 doc: /* Search forward from point for regular expression REGEXP.
2235 Set point to the end of the occurrence found, and return point.
2236 An optional second argument bounds the search; it is a buffer position.
2237 The match found must not extend after that position.
2238 Optional third argument, if t, means if fail just return nil (no error).
2239 If not nil and not t, move to limit of search and return nil.
2240 Optional fourth argument is repeat count--search for successive occurrences.
2241
2242 Search case-sensitivity is determined by the value of the variable
2243 `case-fold-search', which see.
2244
2245 See also the functions `match-beginning', `match-end', `match-string',
2246 and `replace-match'. */)
2247 (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2248 {
2249 return search_command (regexp, bound, noerror, count, 1, 1, 0);
2250 }
2251
2252 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2253 "sPosix search backward: ",
2254 doc: /* Search backward from point for match for regular expression REGEXP.
2255 Find the longest match in accord with Posix regular expression rules.
2256 Set point to the beginning of the match, and return point.
2257 The match found is the one starting last in the buffer
2258 and yet ending before the origin of the search.
2259 An optional second argument bounds the search; it is a buffer position.
2260 The match found must start at or after that position.
2261 Optional third argument, if t, means if fail just return nil (no error).
2262 If not nil and not t, move to limit of search and return nil.
2263 Optional fourth argument is repeat count--search for successive occurrences.
2264
2265 Search case-sensitivity is determined by the value of the variable
2266 `case-fold-search', which see.
2267
2268 See also the functions `match-beginning', `match-end', `match-string',
2269 and `replace-match'. */)
2270 (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2271 {
2272 return search_command (regexp, bound, noerror, count, -1, 1, 1);
2273 }
2274
2275 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2276 "sPosix search: ",
2277 doc: /* Search forward from point for regular expression REGEXP.
2278 Find the longest match in accord with Posix regular expression rules.
2279 Set point to the end of the occurrence found, and return point.
2280 An optional second argument bounds the search; it is a buffer position.
2281 The match found must not extend after that position.
2282 Optional third argument, if t, means if fail just return nil (no error).
2283 If not nil and not t, move to limit of search and return nil.
2284 Optional fourth argument is repeat count--search for successive occurrences.
2285
2286 Search case-sensitivity is determined by the value of the variable
2287 `case-fold-search', which see.
2288
2289 See also the functions `match-beginning', `match-end', `match-string',
2290 and `replace-match'. */)
2291 (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2292 {
2293 return search_command (regexp, bound, noerror, count, 1, 1, 1);
2294 }
2295 \f
2296 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2297 doc: /* Replace text matched by last search with NEWTEXT.
2298 Leave point at the end of the replacement text.
2299
2300 If optional second arg FIXEDCASE is non-nil, do not alter the case of
2301 the replacement text. Otherwise, maybe capitalize the whole text, or
2302 maybe just word initials, based on the replaced text. If the replaced
2303 text has only capital letters and has at least one multiletter word,
2304 convert NEWTEXT to all caps. Otherwise if all words are capitalized
2305 in the replaced text, capitalize each word in NEWTEXT.
2306
2307 If optional third arg LITERAL is non-nil, insert NEWTEXT literally.
2308 Otherwise treat `\\' as special:
2309 `\\&' in NEWTEXT means substitute original matched text.
2310 `\\N' means substitute what matched the Nth `\\(...\\)'.
2311 If Nth parens didn't match, substitute nothing.
2312 `\\\\' means insert one `\\'.
2313 `\\?' is treated literally
2314 (for compatibility with `query-replace-regexp').
2315 Any other character following `\\' signals an error.
2316 Case conversion does not apply to these substitutions.
2317
2318 If optional fourth argument STRING is non-nil, it should be a string
2319 to act on; this should be the string on which the previous match was
2320 done via `string-match'. In this case, `replace-match' creates and
2321 returns a new string, made by copying STRING and replacing the part of
2322 STRING that was matched (the original STRING itself is not altered).
2323
2324 The optional fifth argument SUBEXP specifies a subexpression;
2325 it says to replace just that subexpression with NEWTEXT,
2326 rather than replacing the entire matched text.
2327 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2328 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2329 NEWTEXT in place of subexp N.
2330 This is useful only after a regular expression search or match,
2331 since only regular expressions have distinguished subexpressions. */)
2332 (Lisp_Object newtext, Lisp_Object fixedcase, Lisp_Object literal, Lisp_Object string, Lisp_Object subexp)
2333 {
2334 enum { nochange, all_caps, cap_initial } case_action;
2335 ptrdiff_t pos, pos_byte;
2336 bool some_multiletter_word;
2337 bool some_lowercase;
2338 bool some_uppercase;
2339 bool some_nonuppercase_initial;
2340 int c, prevc;
2341 ptrdiff_t sub;
2342 ptrdiff_t opoint, newpoint;
2343
2344 CHECK_STRING (newtext);
2345
2346 if (! NILP (string))
2347 CHECK_STRING (string);
2348
2349 case_action = nochange; /* We tried an initialization */
2350 /* but some C compilers blew it */
2351
2352 if (search_regs.num_regs <= 0)
2353 error ("`replace-match' called before any match found");
2354
2355 if (NILP (subexp))
2356 sub = 0;
2357 else
2358 {
2359 CHECK_NUMBER (subexp);
2360 if (! (0 <= XINT (subexp) && XINT (subexp) < search_regs.num_regs))
2361 args_out_of_range (subexp, make_number (search_regs.num_regs));
2362 sub = XINT (subexp);
2363 }
2364
2365 if (NILP (string))
2366 {
2367 if (search_regs.start[sub] < BEGV
2368 || search_regs.start[sub] > search_regs.end[sub]
2369 || search_regs.end[sub] > ZV)
2370 args_out_of_range (make_number (search_regs.start[sub]),
2371 make_number (search_regs.end[sub]));
2372 }
2373 else
2374 {
2375 if (search_regs.start[sub] < 0
2376 || search_regs.start[sub] > search_regs.end[sub]
2377 || search_regs.end[sub] > SCHARS (string))
2378 args_out_of_range (make_number (search_regs.start[sub]),
2379 make_number (search_regs.end[sub]));
2380 }
2381
2382 if (NILP (fixedcase))
2383 {
2384 /* Decide how to casify by examining the matched text. */
2385 ptrdiff_t last;
2386
2387 pos = search_regs.start[sub];
2388 last = search_regs.end[sub];
2389
2390 if (NILP (string))
2391 pos_byte = CHAR_TO_BYTE (pos);
2392 else
2393 pos_byte = string_char_to_byte (string, pos);
2394
2395 prevc = '\n';
2396 case_action = all_caps;
2397
2398 /* some_multiletter_word is set nonzero if any original word
2399 is more than one letter long. */
2400 some_multiletter_word = 0;
2401 some_lowercase = 0;
2402 some_nonuppercase_initial = 0;
2403 some_uppercase = 0;
2404
2405 while (pos < last)
2406 {
2407 if (NILP (string))
2408 {
2409 c = FETCH_CHAR_AS_MULTIBYTE (pos_byte);
2410 INC_BOTH (pos, pos_byte);
2411 }
2412 else
2413 FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, pos, pos_byte);
2414
2415 if (lowercasep (c))
2416 {
2417 /* Cannot be all caps if any original char is lower case */
2418
2419 some_lowercase = 1;
2420 if (SYNTAX (prevc) != Sword)
2421 some_nonuppercase_initial = 1;
2422 else
2423 some_multiletter_word = 1;
2424 }
2425 else if (uppercasep (c))
2426 {
2427 some_uppercase = 1;
2428 if (SYNTAX (prevc) != Sword)
2429 ;
2430 else
2431 some_multiletter_word = 1;
2432 }
2433 else
2434 {
2435 /* If the initial is a caseless word constituent,
2436 treat that like a lowercase initial. */
2437 if (SYNTAX (prevc) != Sword)
2438 some_nonuppercase_initial = 1;
2439 }
2440
2441 prevc = c;
2442 }
2443
2444 /* Convert to all caps if the old text is all caps
2445 and has at least one multiletter word. */
2446 if (! some_lowercase && some_multiletter_word)
2447 case_action = all_caps;
2448 /* Capitalize each word, if the old text has all capitalized words. */
2449 else if (!some_nonuppercase_initial && some_multiletter_word)
2450 case_action = cap_initial;
2451 else if (!some_nonuppercase_initial && some_uppercase)
2452 /* Should x -> yz, operating on X, give Yz or YZ?
2453 We'll assume the latter. */
2454 case_action = all_caps;
2455 else
2456 case_action = nochange;
2457 }
2458
2459 /* Do replacement in a string. */
2460 if (!NILP (string))
2461 {
2462 Lisp_Object before, after;
2463
2464 before = Fsubstring (string, make_number (0),
2465 make_number (search_regs.start[sub]));
2466 after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2467
2468 /* Substitute parts of the match into NEWTEXT
2469 if desired. */
2470 if (NILP (literal))
2471 {
2472 ptrdiff_t lastpos = 0;
2473 ptrdiff_t lastpos_byte = 0;
2474 /* We build up the substituted string in ACCUM. */
2475 Lisp_Object accum;
2476 Lisp_Object middle;
2477 ptrdiff_t length = SBYTES (newtext);
2478
2479 accum = Qnil;
2480
2481 for (pos_byte = 0, pos = 0; pos_byte < length;)
2482 {
2483 ptrdiff_t substart = -1;
2484 ptrdiff_t subend = 0;
2485 bool delbackslash = 0;
2486
2487 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2488
2489 if (c == '\\')
2490 {
2491 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2492
2493 if (c == '&')
2494 {
2495 substart = search_regs.start[sub];
2496 subend = search_regs.end[sub];
2497 }
2498 else if (c >= '1' && c <= '9')
2499 {
2500 if (c - '0' < search_regs.num_regs
2501 && search_regs.start[c - '0'] >= 0)
2502 {
2503 substart = search_regs.start[c - '0'];
2504 subend = search_regs.end[c - '0'];
2505 }
2506 else
2507 {
2508 /* If that subexp did not match,
2509 replace \\N with nothing. */
2510 substart = 0;
2511 subend = 0;
2512 }
2513 }
2514 else if (c == '\\')
2515 delbackslash = 1;
2516 else if (c != '?')
2517 error ("Invalid use of `\\' in replacement text");
2518 }
2519 if (substart >= 0)
2520 {
2521 if (pos - 2 != lastpos)
2522 middle = substring_both (newtext, lastpos,
2523 lastpos_byte,
2524 pos - 2, pos_byte - 2);
2525 else
2526 middle = Qnil;
2527 accum = concat3 (accum, middle,
2528 Fsubstring (string,
2529 make_number (substart),
2530 make_number (subend)));
2531 lastpos = pos;
2532 lastpos_byte = pos_byte;
2533 }
2534 else if (delbackslash)
2535 {
2536 middle = substring_both (newtext, lastpos,
2537 lastpos_byte,
2538 pos - 1, pos_byte - 1);
2539
2540 accum = concat2 (accum, middle);
2541 lastpos = pos;
2542 lastpos_byte = pos_byte;
2543 }
2544 }
2545
2546 if (pos != lastpos)
2547 middle = substring_both (newtext, lastpos,
2548 lastpos_byte,
2549 pos, pos_byte);
2550 else
2551 middle = Qnil;
2552
2553 newtext = concat2 (accum, middle);
2554 }
2555
2556 /* Do case substitution in NEWTEXT if desired. */
2557 if (case_action == all_caps)
2558 newtext = Fupcase (newtext);
2559 else if (case_action == cap_initial)
2560 newtext = Fupcase_initials (newtext);
2561
2562 return concat3 (before, newtext, after);
2563 }
2564
2565 /* Record point, then move (quietly) to the start of the match. */
2566 if (PT >= search_regs.end[sub])
2567 opoint = PT - ZV;
2568 else if (PT > search_regs.start[sub])
2569 opoint = search_regs.end[sub] - ZV;
2570 else
2571 opoint = PT;
2572
2573 /* If we want non-literal replacement,
2574 perform substitution on the replacement string. */
2575 if (NILP (literal))
2576 {
2577 ptrdiff_t length = SBYTES (newtext);
2578 unsigned char *substed;
2579 ptrdiff_t substed_alloc_size, substed_len;
2580 bool buf_multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
2581 bool str_multibyte = STRING_MULTIBYTE (newtext);
2582 bool really_changed = 0;
2583
2584 substed_alloc_size = (length <= (STRING_BYTES_BOUND - 100) / 2
2585 ? length * 2 + 100
2586 : STRING_BYTES_BOUND);
2587 substed = xmalloc (substed_alloc_size);
2588 substed_len = 0;
2589
2590 /* Go thru NEWTEXT, producing the actual text to insert in
2591 SUBSTED while adjusting multibyteness to that of the current
2592 buffer. */
2593
2594 for (pos_byte = 0, pos = 0; pos_byte < length;)
2595 {
2596 unsigned char str[MAX_MULTIBYTE_LENGTH];
2597 const unsigned char *add_stuff = NULL;
2598 ptrdiff_t add_len = 0;
2599 ptrdiff_t idx = -1;
2600
2601 if (str_multibyte)
2602 {
2603 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2604 if (!buf_multibyte)
2605 c = multibyte_char_to_unibyte (c);
2606 }
2607 else
2608 {
2609 /* Note that we don't have to increment POS. */
2610 c = SREF (newtext, pos_byte++);
2611 if (buf_multibyte)
2612 MAKE_CHAR_MULTIBYTE (c);
2613 }
2614
2615 /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2616 or set IDX to a match index, which means put that part
2617 of the buffer text into SUBSTED. */
2618
2619 if (c == '\\')
2620 {
2621 really_changed = 1;
2622
2623 if (str_multibyte)
2624 {
2625 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2626 pos, pos_byte);
2627 if (!buf_multibyte && !ASCII_CHAR_P (c))
2628 c = multibyte_char_to_unibyte (c);
2629 }
2630 else
2631 {
2632 c = SREF (newtext, pos_byte++);
2633 if (buf_multibyte)
2634 MAKE_CHAR_MULTIBYTE (c);
2635 }
2636
2637 if (c == '&')
2638 idx = sub;
2639 else if (c >= '1' && c <= '9' && c - '0' < search_regs.num_regs)
2640 {
2641 if (search_regs.start[c - '0'] >= 1)
2642 idx = c - '0';
2643 }
2644 else if (c == '\\')
2645 add_len = 1, add_stuff = (unsigned char *) "\\";
2646 else
2647 {
2648 xfree (substed);
2649 error ("Invalid use of `\\' in replacement text");
2650 }
2651 }
2652 else
2653 {
2654 add_len = CHAR_STRING (c, str);
2655 add_stuff = str;
2656 }
2657
2658 /* If we want to copy part of a previous match,
2659 set up ADD_STUFF and ADD_LEN to point to it. */
2660 if (idx >= 0)
2661 {
2662 ptrdiff_t begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2663 add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2664 if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2665 move_gap_both (search_regs.start[idx], begbyte);
2666 add_stuff = BYTE_POS_ADDR (begbyte);
2667 }
2668
2669 /* Now the stuff we want to add to SUBSTED
2670 is invariably ADD_LEN bytes starting at ADD_STUFF. */
2671
2672 /* Make sure SUBSTED is big enough. */
2673 if (substed_alloc_size - substed_len < add_len)
2674 substed =
2675 xpalloc (substed, &substed_alloc_size,
2676 add_len - (substed_alloc_size - substed_len),
2677 STRING_BYTES_BOUND, 1);
2678
2679 /* Now add to the end of SUBSTED. */
2680 if (add_stuff)
2681 {
2682 memcpy (substed + substed_len, add_stuff, add_len);
2683 substed_len += add_len;
2684 }
2685 }
2686
2687 if (really_changed)
2688 {
2689 if (buf_multibyte)
2690 {
2691 ptrdiff_t nchars =
2692 multibyte_chars_in_text (substed, substed_len);
2693
2694 newtext = make_multibyte_string ((char *) substed, nchars,
2695 substed_len);
2696 }
2697 else
2698 newtext = make_unibyte_string ((char *) substed, substed_len);
2699 }
2700 xfree (substed);
2701 }
2702
2703 /* Replace the old text with the new in the cleanest possible way. */
2704 replace_range (search_regs.start[sub], search_regs.end[sub],
2705 newtext, 1, 0, 1);
2706 newpoint = search_regs.start[sub] + SCHARS (newtext);
2707
2708 if (case_action == all_caps)
2709 Fupcase_region (make_number (search_regs.start[sub]),
2710 make_number (newpoint));
2711 else if (case_action == cap_initial)
2712 Fupcase_initials_region (make_number (search_regs.start[sub]),
2713 make_number (newpoint));
2714
2715 /* Adjust search data for this change. */
2716 {
2717 ptrdiff_t oldend = search_regs.end[sub];
2718 ptrdiff_t oldstart = search_regs.start[sub];
2719 ptrdiff_t change = newpoint - search_regs.end[sub];
2720 ptrdiff_t i;
2721
2722 for (i = 0; i < search_regs.num_regs; i++)
2723 {
2724 if (search_regs.start[i] >= oldend)
2725 search_regs.start[i] += change;
2726 else if (search_regs.start[i] > oldstart)
2727 search_regs.start[i] = oldstart;
2728 if (search_regs.end[i] >= oldend)
2729 search_regs.end[i] += change;
2730 else if (search_regs.end[i] > oldstart)
2731 search_regs.end[i] = oldstart;
2732 }
2733 }
2734
2735 /* Put point back where it was in the text. */
2736 if (opoint <= 0)
2737 TEMP_SET_PT (opoint + ZV);
2738 else
2739 TEMP_SET_PT (opoint);
2740
2741 /* Now move point "officially" to the start of the inserted replacement. */
2742 move_if_not_intangible (newpoint);
2743
2744 return Qnil;
2745 }
2746 \f
2747 static Lisp_Object
2748 match_limit (Lisp_Object num, bool beginningp)
2749 {
2750 EMACS_INT n;
2751
2752 CHECK_NUMBER (num);
2753 n = XINT (num);
2754 if (n < 0)
2755 args_out_of_range (num, make_number (0));
2756 if (search_regs.num_regs <= 0)
2757 error ("No match data, because no search succeeded");
2758 if (n >= search_regs.num_regs
2759 || search_regs.start[n] < 0)
2760 return Qnil;
2761 return (make_number ((beginningp) ? search_regs.start[n]
2762 : search_regs.end[n]));
2763 }
2764
2765 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2766 doc: /* Return position of start of text matched by last search.
2767 SUBEXP, a number, specifies which parenthesized expression in the last
2768 regexp.
2769 Value is nil if SUBEXPth pair didn't match, or there were less than
2770 SUBEXP pairs.
2771 Zero means the entire text matched by the whole regexp or whole string. */)
2772 (Lisp_Object subexp)
2773 {
2774 return match_limit (subexp, 1);
2775 }
2776
2777 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2778 doc: /* Return position of end of text matched by last search.
2779 SUBEXP, a number, specifies which parenthesized expression in the last
2780 regexp.
2781 Value is nil if SUBEXPth pair didn't match, or there were less than
2782 SUBEXP pairs.
2783 Zero means the entire text matched by the whole regexp or whole string. */)
2784 (Lisp_Object subexp)
2785 {
2786 return match_limit (subexp, 0);
2787 }
2788
2789 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
2790 doc: /* Return a list containing all info on what the last search matched.
2791 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2792 All the elements are markers or nil (nil if the Nth pair didn't match)
2793 if the last match was on a buffer; integers or nil if a string was matched.
2794 Use `set-match-data' to reinstate the data in this list.
2795
2796 If INTEGERS (the optional first argument) is non-nil, always use
2797 integers \(rather than markers) to represent buffer positions. In
2798 this case, and if the last match was in a buffer, the buffer will get
2799 stored as one additional element at the end of the list.
2800
2801 If REUSE is a list, reuse it as part of the value. If REUSE is long
2802 enough to hold all the values, and if INTEGERS is non-nil, no consing
2803 is done.
2804
2805 If optional third arg RESEAT is non-nil, any previous markers on the
2806 REUSE list will be modified to point to nowhere.
2807
2808 Return value is undefined if the last search failed. */)
2809 (Lisp_Object integers, Lisp_Object reuse, Lisp_Object reseat)
2810 {
2811 Lisp_Object tail, prev;
2812 Lisp_Object *data;
2813 ptrdiff_t i, len;
2814
2815 if (!NILP (reseat))
2816 for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2817 if (MARKERP (XCAR (tail)))
2818 {
2819 unchain_marker (XMARKER (XCAR (tail)));
2820 XSETCAR (tail, Qnil);
2821 }
2822
2823 if (NILP (last_thing_searched))
2824 return Qnil;
2825
2826 prev = Qnil;
2827
2828 data = alloca ((2 * search_regs.num_regs + 1) * sizeof *data);
2829
2830 len = 0;
2831 for (i = 0; i < search_regs.num_regs; i++)
2832 {
2833 ptrdiff_t start = search_regs.start[i];
2834 if (start >= 0)
2835 {
2836 if (EQ (last_thing_searched, Qt)
2837 || ! NILP (integers))
2838 {
2839 XSETFASTINT (data[2 * i], start);
2840 XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2841 }
2842 else if (BUFFERP (last_thing_searched))
2843 {
2844 data[2 * i] = Fmake_marker ();
2845 Fset_marker (data[2 * i],
2846 make_number (start),
2847 last_thing_searched);
2848 data[2 * i + 1] = Fmake_marker ();
2849 Fset_marker (data[2 * i + 1],
2850 make_number (search_regs.end[i]),
2851 last_thing_searched);
2852 }
2853 else
2854 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2855 emacs_abort ();
2856
2857 len = 2 * i + 2;
2858 }
2859 else
2860 data[2 * i] = data[2 * i + 1] = Qnil;
2861 }
2862
2863 if (BUFFERP (last_thing_searched) && !NILP (integers))
2864 {
2865 data[len] = last_thing_searched;
2866 len++;
2867 }
2868
2869 /* If REUSE is not usable, cons up the values and return them. */
2870 if (! CONSP (reuse))
2871 return Flist (len, data);
2872
2873 /* If REUSE is a list, store as many value elements as will fit
2874 into the elements of REUSE. */
2875 for (i = 0, tail = reuse; CONSP (tail);
2876 i++, tail = XCDR (tail))
2877 {
2878 if (i < len)
2879 XSETCAR (tail, data[i]);
2880 else
2881 XSETCAR (tail, Qnil);
2882 prev = tail;
2883 }
2884
2885 /* If we couldn't fit all value elements into REUSE,
2886 cons up the rest of them and add them to the end of REUSE. */
2887 if (i < len)
2888 XSETCDR (prev, Flist (len - i, data + i));
2889
2890 return reuse;
2891 }
2892
2893 /* We used to have an internal use variant of `reseat' described as:
2894
2895 If RESEAT is `evaporate', put the markers back on the free list
2896 immediately. No other references to the markers must exist in this
2897 case, so it is used only internally on the unwind stack and
2898 save-match-data from Lisp.
2899
2900 But it was ill-conceived: those supposedly-internal markers get exposed via
2901 the undo-list, so freeing them here is unsafe. */
2902
2903 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
2904 doc: /* Set internal data on last search match from elements of LIST.
2905 LIST should have been created by calling `match-data' previously.
2906
2907 If optional arg RESEAT is non-nil, make markers on LIST point nowhere. */)
2908 (register Lisp_Object list, Lisp_Object reseat)
2909 {
2910 ptrdiff_t i;
2911 register Lisp_Object marker;
2912
2913 if (running_asynch_code)
2914 save_search_regs ();
2915
2916 CHECK_LIST (list);
2917
2918 /* Unless we find a marker with a buffer or an explicit buffer
2919 in LIST, assume that this match data came from a string. */
2920 last_thing_searched = Qt;
2921
2922 /* Allocate registers if they don't already exist. */
2923 {
2924 EMACS_INT length = XFASTINT (Flength (list)) / 2;
2925
2926 if (length > search_regs.num_regs)
2927 {
2928 ptrdiff_t num_regs = search_regs.num_regs;
2929 if (PTRDIFF_MAX < length)
2930 memory_full (SIZE_MAX);
2931 search_regs.start =
2932 xpalloc (search_regs.start, &num_regs, length - num_regs,
2933 min (PTRDIFF_MAX, UINT_MAX), sizeof (regoff_t));
2934 search_regs.end =
2935 xrealloc (search_regs.end, num_regs * sizeof (regoff_t));
2936
2937 for (i = search_regs.num_regs; i < num_regs; i++)
2938 search_regs.start[i] = -1;
2939
2940 search_regs.num_regs = num_regs;
2941 }
2942
2943 for (i = 0; CONSP (list); i++)
2944 {
2945 marker = XCAR (list);
2946 if (BUFFERP (marker))
2947 {
2948 last_thing_searched = marker;
2949 break;
2950 }
2951 if (i >= length)
2952 break;
2953 if (NILP (marker))
2954 {
2955 search_regs.start[i] = -1;
2956 list = XCDR (list);
2957 }
2958 else
2959 {
2960 Lisp_Object from;
2961 Lisp_Object m;
2962
2963 m = marker;
2964 if (MARKERP (marker))
2965 {
2966 if (XMARKER (marker)->buffer == 0)
2967 XSETFASTINT (marker, 0);
2968 else
2969 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2970 }
2971
2972 CHECK_NUMBER_COERCE_MARKER (marker);
2973 from = marker;
2974
2975 if (!NILP (reseat) && MARKERP (m))
2976 {
2977 unchain_marker (XMARKER (m));
2978 XSETCAR (list, Qnil);
2979 }
2980
2981 if ((list = XCDR (list), !CONSP (list)))
2982 break;
2983
2984 m = marker = XCAR (list);
2985
2986 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2987 XSETFASTINT (marker, 0);
2988
2989 CHECK_NUMBER_COERCE_MARKER (marker);
2990 if ((XINT (from) < 0
2991 ? TYPE_MINIMUM (regoff_t) <= XINT (from)
2992 : XINT (from) <= TYPE_MAXIMUM (regoff_t))
2993 && (XINT (marker) < 0
2994 ? TYPE_MINIMUM (regoff_t) <= XINT (marker)
2995 : XINT (marker) <= TYPE_MAXIMUM (regoff_t)))
2996 {
2997 search_regs.start[i] = XINT (from);
2998 search_regs.end[i] = XINT (marker);
2999 }
3000 else
3001 {
3002 search_regs.start[i] = -1;
3003 }
3004
3005 if (!NILP (reseat) && MARKERP (m))
3006 {
3007 unchain_marker (XMARKER (m));
3008 XSETCAR (list, Qnil);
3009 }
3010 }
3011 list = XCDR (list);
3012 }
3013
3014 for (; i < search_regs.num_regs; i++)
3015 search_regs.start[i] = -1;
3016 }
3017
3018 return Qnil;
3019 }
3020
3021 /* If true the match data have been saved in saved_search_regs
3022 during the execution of a sentinel or filter. */
3023 static bool search_regs_saved;
3024 static struct re_registers saved_search_regs;
3025 static Lisp_Object saved_last_thing_searched;
3026
3027 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3028 if asynchronous code (filter or sentinel) is running. */
3029 static void
3030 save_search_regs (void)
3031 {
3032 if (!search_regs_saved)
3033 {
3034 saved_search_regs.num_regs = search_regs.num_regs;
3035 saved_search_regs.start = search_regs.start;
3036 saved_search_regs.end = search_regs.end;
3037 saved_last_thing_searched = last_thing_searched;
3038 last_thing_searched = Qnil;
3039 search_regs.num_regs = 0;
3040 search_regs.start = 0;
3041 search_regs.end = 0;
3042
3043 search_regs_saved = 1;
3044 }
3045 }
3046
3047 /* Called upon exit from filters and sentinels. */
3048 void
3049 restore_search_regs (void)
3050 {
3051 if (search_regs_saved)
3052 {
3053 if (search_regs.num_regs > 0)
3054 {
3055 xfree (search_regs.start);
3056 xfree (search_regs.end);
3057 }
3058 search_regs.num_regs = saved_search_regs.num_regs;
3059 search_regs.start = saved_search_regs.start;
3060 search_regs.end = saved_search_regs.end;
3061 last_thing_searched = saved_last_thing_searched;
3062 saved_last_thing_searched = Qnil;
3063 search_regs_saved = 0;
3064 }
3065 }
3066
3067 static void
3068 unwind_set_match_data (Lisp_Object list)
3069 {
3070 /* It is NOT ALWAYS safe to free (evaporate) the markers immediately. */
3071 Fset_match_data (list, Qt);
3072 }
3073
3074 /* Called to unwind protect the match data. */
3075 void
3076 record_unwind_save_match_data (void)
3077 {
3078 record_unwind_protect (unwind_set_match_data,
3079 Fmatch_data (Qnil, Qnil, Qnil));
3080 }
3081
3082 /* Quote a string to deactivate reg-expr chars */
3083
3084 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
3085 doc: /* Return a regexp string which matches exactly STRING and nothing else. */)
3086 (Lisp_Object string)
3087 {
3088 char *in, *out, *end;
3089 char *temp;
3090 ptrdiff_t backslashes_added = 0;
3091
3092 CHECK_STRING (string);
3093
3094 temp = alloca (SBYTES (string) * 2);
3095
3096 /* Now copy the data into the new string, inserting escapes. */
3097
3098 in = SSDATA (string);
3099 end = in + SBYTES (string);
3100 out = temp;
3101
3102 for (; in != end; in++)
3103 {
3104 if (*in == '['
3105 || *in == '*' || *in == '.' || *in == '\\'
3106 || *in == '?' || *in == '+'
3107 || *in == '^' || *in == '$')
3108 *out++ = '\\', backslashes_added++;
3109 *out++ = *in;
3110 }
3111
3112 return make_specified_string (temp,
3113 SCHARS (string) + backslashes_added,
3114 out - temp,
3115 STRING_MULTIBYTE (string));
3116 }
3117
3118 /* Like find_newline, but doesn't use the cache, and only searches forward. */
3119 static ptrdiff_t
3120 find_newline1 (ptrdiff_t start, ptrdiff_t start_byte, ptrdiff_t end,
3121 ptrdiff_t end_byte, ptrdiff_t count, ptrdiff_t *shortage,
3122 ptrdiff_t *bytepos, bool allow_quit)
3123 {
3124 if (count > 0)
3125 {
3126 if (!end)
3127 end = ZV, end_byte = ZV_BYTE;
3128 }
3129 else
3130 {
3131 if (!end)
3132 end = BEGV, end_byte = BEGV_BYTE;
3133 }
3134 if (end_byte == -1)
3135 end_byte = CHAR_TO_BYTE (end);
3136
3137 if (shortage != 0)
3138 *shortage = 0;
3139
3140 immediate_quit = allow_quit;
3141
3142 if (count > 0)
3143 while (start != end)
3144 {
3145 /* Our innermost scanning loop is very simple; it doesn't know
3146 about gaps, buffer ends, or the newline cache. ceiling is
3147 the position of the last character before the next such
3148 obstacle --- the last character the dumb search loop should
3149 examine. */
3150 ptrdiff_t tem, ceiling_byte = end_byte - 1;
3151
3152 if (start_byte == -1)
3153 start_byte = CHAR_TO_BYTE (start);
3154
3155 /* The dumb loop can only scan text stored in contiguous
3156 bytes. BUFFER_CEILING_OF returns the last character
3157 position that is contiguous, so the ceiling is the
3158 position after that. */
3159 tem = BUFFER_CEILING_OF (start_byte);
3160 ceiling_byte = min (tem, ceiling_byte);
3161
3162 {
3163 /* The termination address of the dumb loop. */
3164 unsigned char *lim_addr = BYTE_POS_ADDR (ceiling_byte) + 1;
3165 ptrdiff_t lim_byte = ceiling_byte + 1;
3166
3167 /* Nonpositive offsets (relative to LIM_ADDR and LIM_BYTE)
3168 of the base, the cursor, and the next line. */
3169 ptrdiff_t base = start_byte - lim_byte;
3170 ptrdiff_t cursor, next;
3171
3172 for (cursor = base; cursor < 0; cursor = next)
3173 {
3174 /* The dumb loop. */
3175 unsigned char *nl = memchr (lim_addr + cursor, '\n', - cursor);
3176 next = nl ? nl - lim_addr : 0;
3177
3178 if (! nl)
3179 break;
3180 next++;
3181
3182 if (--count == 0)
3183 {
3184 immediate_quit = 0;
3185 if (bytepos)
3186 *bytepos = lim_byte + next;
3187 return BYTE_TO_CHAR (lim_byte + next);
3188 }
3189 }
3190
3191 start_byte = lim_byte;
3192 start = BYTE_TO_CHAR (start_byte);
3193 }
3194 }
3195
3196 immediate_quit = 0;
3197 if (shortage)
3198 *shortage = count;
3199 if (bytepos)
3200 {
3201 *bytepos = start_byte == -1 ? CHAR_TO_BYTE (start) : start_byte;
3202 eassert (*bytepos == CHAR_TO_BYTE (start));
3203 }
3204 return start;
3205 }
3206
3207 DEFUN ("newline-cache-check", Fnewline_cache_check, Snewline_cache_check,
3208 0, 1, 0,
3209 doc: /* Check the newline cache of BUFFER against buffer contents.
3210
3211 BUFFER defaults to the current buffer.
3212
3213 Value is an array of 2 sub-arrays of buffer positions for newlines,
3214 the first based on the cache, the second based on actually scanning
3215 the buffer. If the buffer doesn't have a cache, the value is nil. */)
3216 (Lisp_Object buffer)
3217 {
3218 struct buffer *buf, *old = NULL;
3219 ptrdiff_t shortage, nl_count_cache, nl_count_buf;
3220 Lisp_Object cache_newlines, buf_newlines, val;
3221 ptrdiff_t from, found, i;
3222
3223 if (NILP (buffer))
3224 buf = current_buffer;
3225 else
3226 {
3227 CHECK_BUFFER (buffer);
3228 buf = XBUFFER (buffer);
3229 old = current_buffer;
3230 }
3231 if (buf->base_buffer)
3232 buf = buf->base_buffer;
3233
3234 /* If the buffer doesn't have a newline cache, return nil. */
3235 if (NILP (BVAR (buf, cache_long_scans))
3236 || buf->newline_cache == NULL)
3237 return Qnil;
3238
3239 /* find_newline can only work on the current buffer. */
3240 if (old != NULL)
3241 set_buffer_internal_1 (buf);
3242
3243 /* How many newlines are there according to the cache? */
3244 find_newline (BEGV, BEGV_BYTE, ZV, ZV_BYTE,
3245 TYPE_MAXIMUM (ptrdiff_t), &shortage, NULL, true);
3246 nl_count_cache = TYPE_MAXIMUM (ptrdiff_t) - shortage;
3247
3248 /* Create vector and populate it. */
3249 cache_newlines = make_uninit_vector (nl_count_cache);
3250
3251 if (nl_count_cache)
3252 {
3253 for (from = BEGV, found = from, i = 0; from < ZV; from = found, i++)
3254 {
3255 ptrdiff_t from_byte = CHAR_TO_BYTE (from);
3256
3257 found = find_newline (from, from_byte, 0, -1, 1, &shortage,
3258 NULL, true);
3259 if (shortage != 0 || i >= nl_count_cache)
3260 break;
3261 ASET (cache_newlines, i, make_number (found - 1));
3262 }
3263 /* Fill the rest of slots with an invalid position. */
3264 for ( ; i < nl_count_cache; i++)
3265 ASET (cache_newlines, i, make_number (-1));
3266 }
3267
3268 /* Now do the same, but without using the cache. */
3269 find_newline1 (BEGV, BEGV_BYTE, ZV, ZV_BYTE,
3270 TYPE_MAXIMUM (ptrdiff_t), &shortage, NULL, true);
3271 nl_count_buf = TYPE_MAXIMUM (ptrdiff_t) - shortage;
3272 buf_newlines = make_uninit_vector (nl_count_buf);
3273 if (nl_count_buf)
3274 {
3275 for (from = BEGV, found = from, i = 0; from < ZV; from = found, i++)
3276 {
3277 ptrdiff_t from_byte = CHAR_TO_BYTE (from);
3278
3279 found = find_newline1 (from, from_byte, 0, -1, 1, &shortage,
3280 NULL, true);
3281 if (shortage != 0 || i >= nl_count_buf)
3282 break;
3283 ASET (buf_newlines, i, make_number (found - 1));
3284 }
3285 for ( ; i < nl_count_buf; i++)
3286 ASET (buf_newlines, i, make_number (-1));
3287 }
3288
3289 /* Construct the value and return it. */
3290 val = make_uninit_vector (2);
3291 ASET (val, 0, cache_newlines);
3292 ASET (val, 1, buf_newlines);
3293
3294 if (old != NULL)
3295 set_buffer_internal_1 (old);
3296 return val;
3297 }
3298 \f
3299 void
3300 syms_of_search (void)
3301 {
3302 register int i;
3303
3304 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3305 {
3306 searchbufs[i].buf.allocated = 100;
3307 searchbufs[i].buf.buffer = xmalloc (100);
3308 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3309 searchbufs[i].regexp = Qnil;
3310 searchbufs[i].whitespace_regexp = Qnil;
3311 searchbufs[i].syntax_table = Qnil;
3312 staticpro (&searchbufs[i].regexp);
3313 staticpro (&searchbufs[i].whitespace_regexp);
3314 staticpro (&searchbufs[i].syntax_table);
3315 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3316 }
3317 searchbuf_head = &searchbufs[0];
3318
3319 DEFSYM (Qsearch_failed, "search-failed");
3320 DEFSYM (Qinvalid_regexp, "invalid-regexp");
3321
3322 Fput (Qsearch_failed, Qerror_conditions,
3323 listn (CONSTYPE_PURE, 2, Qsearch_failed, Qerror));
3324 Fput (Qsearch_failed, Qerror_message,
3325 build_pure_c_string ("Search failed"));
3326
3327 Fput (Qinvalid_regexp, Qerror_conditions,
3328 listn (CONSTYPE_PURE, 2, Qinvalid_regexp, Qerror));
3329 Fput (Qinvalid_regexp, Qerror_message,
3330 build_pure_c_string ("Invalid regexp"));
3331
3332 last_thing_searched = Qnil;
3333 staticpro (&last_thing_searched);
3334
3335 saved_last_thing_searched = Qnil;
3336 staticpro (&saved_last_thing_searched);
3337
3338 DEFVAR_LISP ("search-spaces-regexp", Vsearch_spaces_regexp,
3339 doc: /* Regexp to substitute for bunches of spaces in regexp search.
3340 Some commands use this for user-specified regexps.
3341 Spaces that occur inside character classes or repetition operators
3342 or other such regexp constructs are not replaced with this.
3343 A value of nil (which is the normal value) means treat spaces literally. */);
3344 Vsearch_spaces_regexp = Qnil;
3345
3346 DEFVAR_LISP ("inhibit-changing-match-data", Vinhibit_changing_match_data,
3347 doc: /* Internal use only.
3348 If non-nil, the primitive searching and matching functions
3349 such as `looking-at', `string-match', `re-search-forward', etc.,
3350 do not set the match data. The proper way to use this variable
3351 is to bind it with `let' around a small expression. */);
3352 Vinhibit_changing_match_data = Qnil;
3353
3354 defsubr (&Slooking_at);
3355 defsubr (&Sposix_looking_at);
3356 defsubr (&Sstring_match);
3357 defsubr (&Sposix_string_match);
3358 defsubr (&Ssearch_forward);
3359 defsubr (&Ssearch_backward);
3360 defsubr (&Sre_search_forward);
3361 defsubr (&Sre_search_backward);
3362 defsubr (&Sposix_search_forward);
3363 defsubr (&Sposix_search_backward);
3364 defsubr (&Sreplace_match);
3365 defsubr (&Smatch_beginning);
3366 defsubr (&Smatch_end);
3367 defsubr (&Smatch_data);
3368 defsubr (&Sset_match_data);
3369 defsubr (&Sregexp_quote);
3370 defsubr (&Snewline_cache_check);
3371 }