code.delx.au - gnu-emacs/blob - src/syntax.c

   1 /* GNU Emacs routines to deal with syntax tables; also word and list parsing.
   2    Copyright (C) 1985, 1987, 1993-1995, 1997-1999, 2001-2016 Free
   3    Software Foundation, Inc.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software: you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation, either version 3 of the License, or (at
  10 your option) any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  19
  20
  21 #include <config.h>
  22
  23 #include <sys/types.h>
  24
  25 #include "lisp.h"
  26 #include "character.h"
  27 #include "buffer.h"
  28 #include "regex.h"
  29 #include "syntax.h"
  30 #include "intervals.h"
  31 #include "category.h"
  32
  33 /* Make syntax table lookup grant data in gl_state.  */
  34 #define SYNTAX(c) syntax_property (c, 1)
  35 #define SYNTAX_ENTRY(c) syntax_property_entry (c, 1)
  36 #define SYNTAX_WITH_FLAGS(c) syntax_property_with_flags (c, 1)
  37
  38 /* Eight single-bit flags have the following meanings:
  39   1. This character is the first of a two-character comment-start sequence.
  40   2. This character is the second of a two-character comment-start sequence.
  41   3. This character is the first of a two-character comment-end sequence.
  42   4. This character is the second of a two-character comment-end sequence.
  43   5. This character is a prefix, for backward-prefix-chars.
  44   6. The char is part of a delimiter for comments of style "b".
  45   7. This character is part of a nestable comment sequence.
  46   8. The char is part of a delimiter for comments of style "c".
  47   Note that any two-character sequence whose first character has flag 1
  48   and whose second character has flag 2 will be interpreted as a comment start.
  49
  50   Bits 6 and 8 discriminate among different comment styles.
  51   Languages such as C++ allow two orthogonal syntax start/end pairs
  52   and bit 6 determines whether a comment-end or Scommentend
  53   ends style a or b.  Comment markers can start style a, b, c, or bc.
  54   Style a is always the default.
  55   For 2-char comment markers, the style b flag is looked up only on the second
  56   char of the comment marker and on the first char of the comment ender.
  57   For style c (like the nested flag), the flag can be placed on any of
  58   the chars.  */
  59
  60 /* These functions extract specific flags from an integer
  61    that holds the syntax code and the flags.  */
  62
  63 static bool
  64 SYNTAX_FLAGS_COMSTART_FIRST (int flags)
  65 {
  66   return (flags >> 16) & 1;
  67 }
  68 static bool
  69 SYNTAX_FLAGS_COMSTART_SECOND (int flags)
  70 {
  71   return (flags >> 17) & 1;
  72 }
  73 static bool
  74 SYNTAX_FLAGS_COMEND_FIRST (int flags)
  75 {
  76   return (flags >> 18) & 1;
  77 }
  78 static bool
  79 SYNTAX_FLAGS_COMEND_SECOND (int flags)
  80 {
  81   return (flags >> 19) & 1;
  82 }
  83 static bool
  84 SYNTAX_FLAGS_COMSTARTEND_FIRST (int flags)
  85 {
  86   return (flags & 0x50000) != 0;
  87 }
  88 static bool
  89 SYNTAX_FLAGS_PREFIX (int flags)
  90 {
  91   return (flags >> 20) & 1;
  92 }
  93 static bool
  94 SYNTAX_FLAGS_COMMENT_STYLEB (int flags)
  95 {
  96   return (flags >> 21) & 1;
  97 }
  98 static bool
  99 SYNTAX_FLAGS_COMMENT_STYLEC (int flags)
 100 {
 101   return (flags >> 23) & 1;
 102 }
 103 static int
 104 SYNTAX_FLAGS_COMMENT_STYLEC2 (int flags)
 105 {
 106   return (flags >> 22) & 2; /* SYNTAX_FLAGS_COMMENT_STYLEC (flags) * 2 */
 107 }
 108 static bool
 109 SYNTAX_FLAGS_COMMENT_NESTED (int flags)
 110 {
 111   return (flags >> 22) & 1;
 112 }
 113
 114 /* FLAGS should be the flags of the main char of the comment marker, e.g.
 115    the second for comstart and the first for comend.  */
 116 static int
 117 SYNTAX_FLAGS_COMMENT_STYLE (int flags, int other_flags)
 118 {
 119   return (SYNTAX_FLAGS_COMMENT_STYLEB (flags)
 120           | SYNTAX_FLAGS_COMMENT_STYLEC2 (flags)
 121           | SYNTAX_FLAGS_COMMENT_STYLEC2 (other_flags));
 122 }
 123
 124 /* Extract a particular flag for a given character.  */
 125
 126 static bool
 127 SYNTAX_COMEND_FIRST (int c)
 128 {
 129   return SYNTAX_FLAGS_COMEND_FIRST (SYNTAX_WITH_FLAGS (c));
 130 }
 131
 132 /* We use these constants in place for comment-style and
 133    string-ender-char to distinguish comments/strings started by
 134    comment_fence and string_fence codes.  */
 135
 136 enum
 137   {
 138     ST_COMMENT_STYLE = 256 + 1,
 139     ST_STRING_STYLE = 256 + 2
 140   };
 141
 142 /* This is the internal form of the parse state used in parse-partial-sexp.  */
 143
 144 struct lisp_parse_state
 145   {
 146     EMACS_INT depth;    /* Depth at end of parsing.  */
 147     int instring;  /* -1 if not within string, else desired terminator.  */
 148     EMACS_INT incomment; /* -1 if in unnestable comment else comment nesting */
 149     int comstyle;  /* comment style a=0, or b=1, or ST_COMMENT_STYLE.  */
 150     bool quoted;   /* True if just after an escape char at end of parsing.  */
 151     EMACS_INT mindepth; /* Minimum depth seen while scanning.  */
 152     /* Char number of most recent start-of-expression at current level */
 153     ptrdiff_t thislevelstart;
 154     /* Char number of start of containing expression */
 155     ptrdiff_t prevlevelstart;
 156     ptrdiff_t location;      /* Char number at which parsing stopped.  */
 157     ptrdiff_t location_byte; /* Corresponding byte position.  */
 158     ptrdiff_t comstr_start;  /* Position of last comment/string starter.  */
 159     Lisp_Object levelstarts; /* Char numbers of starts-of-expression
 160                                 of levels (starting from outermost).  */
 161     int prev_syntax; /* Syntax of previous position scanned, when
 162                         that position (potentially) holds the first char
 163                         of a 2-char construct, i.e. comment delimiter
 164                         or Sescape, etc.  Smax otherwise. */
 165   };
 166 \f
 167 /* These variables are a cache for finding the start of a defun.
 168    find_start_pos is the place for which the defun start was found.
 169    find_start_value is the defun start position found for it.
 170    find_start_value_byte is the corresponding byte position.
 171    find_start_buffer is the buffer it was found in.
 172    find_start_begv is the BEGV value when it was found.
 173    find_start_modiff is the value of MODIFF when it was found.  */
 174
 175 static ptrdiff_t find_start_pos;
 176 static ptrdiff_t find_start_value;
 177 static ptrdiff_t find_start_value_byte;
 178 static struct buffer *find_start_buffer;
 179 static ptrdiff_t find_start_begv;
 180 static EMACS_INT find_start_modiff;
 181
 182
 183 static Lisp_Object skip_chars (bool, Lisp_Object, Lisp_Object, bool);
 184 static Lisp_Object skip_syntaxes (bool, Lisp_Object, Lisp_Object);
 185 static Lisp_Object scan_lists (EMACS_INT, EMACS_INT, EMACS_INT, bool);
 186 static void scan_sexps_forward (struct lisp_parse_state *,
 187                                 ptrdiff_t, ptrdiff_t, ptrdiff_t, EMACS_INT,
 188                                 bool, int);
 189 static void internalize_parse_state (Lisp_Object, struct lisp_parse_state *);
 190 static bool in_classes (int, Lisp_Object);
 191 static void parse_sexp_propertize (ptrdiff_t charpos);
 192
 193 /* This setter is used only in this file, so it can be private.  */
 194 static void
 195 bset_syntax_table (struct buffer *b, Lisp_Object val)
 196 {
 197   b->syntax_table_ = val;
 198 }
 199 \f
 200 /* Whether the syntax of the character C has the prefix flag set.  */
 201 bool
 202 syntax_prefix_flag_p (int c)
 203 {
 204   return SYNTAX_FLAGS_PREFIX (SYNTAX_WITH_FLAGS (c));
 205 }
 206
 207 struct gl_state_s gl_state;             /* Global state of syntax parser.  */
 208
 209 enum { INTERVALS_AT_ONCE = 10 };        /* 1 + max-number of intervals
 210                                            to scan to property-change.  */
 211
 212 /* Set the syntax entry VAL for char C in table TABLE.  */
 213
 214 static void
 215 SET_RAW_SYNTAX_ENTRY (Lisp_Object table, int c, Lisp_Object val)
 216 {
 217   CHAR_TABLE_SET (table, c, val);
 218 }
 219
 220 /* Set the syntax entry VAL for char-range RANGE in table TABLE.
 221    RANGE is a cons (FROM . TO) specifying the range of characters.  */
 222
 223 static void
 224 SET_RAW_SYNTAX_ENTRY_RANGE (Lisp_Object table, Lisp_Object range,
 225                             Lisp_Object val)
 226 {
 227   Fset_char_table_range (table, range, val);
 228 }
 229
 230 /* Extract the information from the entry for character C
 231    in the current syntax table.  */
 232
 233 static Lisp_Object
 234 SYNTAX_MATCH (int c)
 235 {
 236   Lisp_Object ent = SYNTAX_ENTRY (c);
 237   return CONSP (ent) ? XCDR (ent) : Qnil;
 238 }
 239
 240 /* This should be called with FROM at the start of forward
 241    search, or after the last position of the backward search.  It
 242    makes sure that the first char is picked up with correct table, so
 243    one does not need to call UPDATE_SYNTAX_TABLE immediately after the
 244    call.
 245    Sign of COUNT gives the direction of the search.
 246  */
 247
 248 static void
 249 SETUP_SYNTAX_TABLE (ptrdiff_t from, ptrdiff_t count)
 250 {
 251   SETUP_BUFFER_SYNTAX_TABLE ();
 252   gl_state.b_property = BEGV;
 253   gl_state.e_property = ZV + 1;
 254   gl_state.object = Qnil;
 255   gl_state.offset = 0;
 256   if (parse_sexp_lookup_properties)
 257     {
 258       if (count > 0)
 259         update_syntax_table_forward (from, true, Qnil);
 260       else if (from > BEGV)
 261         {
 262           update_syntax_table (from - 1, count, true, Qnil);
 263           parse_sexp_propertize (from - 1);
 264         }
 265     }
 266 }
 267
 268 /* Same as above, but in OBJECT.  If OBJECT is nil, use current buffer.
 269    If it is t (which is only used in fast_c_string_match_ignore_case),
 270    ignore properties altogether.
 271
 272    This is meant for regex.c to use.  For buffers, regex.c passes arguments
 273    to the UPDATE_SYNTAX_TABLE functions which are relative to BEGV.
 274    So if it is a buffer, we set the offset field to BEGV.  */
 275
 276 void
 277 SETUP_SYNTAX_TABLE_FOR_OBJECT (Lisp_Object object,
 278                                ptrdiff_t from, ptrdiff_t count)
 279 {
 280   SETUP_BUFFER_SYNTAX_TABLE ();
 281   gl_state.object = object;
 282   if (BUFFERP (gl_state.object))
 283     {
 284       struct buffer *buf = XBUFFER (gl_state.object);
 285       gl_state.b_property = 1;
 286       gl_state.e_property = BUF_ZV (buf) - BUF_BEGV (buf) + 1;
 287       gl_state.offset = BUF_BEGV (buf) - 1;
 288     }
 289   else if (NILP (gl_state.object))
 290     {
 291       gl_state.b_property = 1;
 292       gl_state.e_property = ZV - BEGV + 1;
 293       gl_state.offset = BEGV - 1;
 294     }
 295   else if (EQ (gl_state.object, Qt))
 296     {
 297       gl_state.b_property = 0;
 298       gl_state.e_property = PTRDIFF_MAX;
 299       gl_state.offset = 0;
 300     }
 301   else
 302     {
 303       gl_state.b_property = 0;
 304       gl_state.e_property = 1 + SCHARS (gl_state.object);
 305       gl_state.offset = 0;
 306     }
 307   if (parse_sexp_lookup_properties)
 308     update_syntax_table (from + gl_state.offset - (count <= 0),
 309                          count, 1, gl_state.object);
 310 }
 311
 312 /* Update gl_state to an appropriate interval which contains CHARPOS.  The
 313    sign of COUNT give the relative position of CHARPOS wrt the previously
 314    valid interval.  If INIT, only [be]_property fields of gl_state are
 315    valid at start, the rest is filled basing on OBJECT.
 316
 317    `gl_state.*_i' are the intervals, and CHARPOS is further in the search
 318    direction than the intervals - or in an interval.  We update the
 319    current syntax-table basing on the property of this interval, and
 320    update the interval to start further than CHARPOS - or be
 321    NULL.  We also update lim_property to be the next value of
 322    charpos to call this subroutine again - or be before/after the
 323    start/end of OBJECT.  */
 324
 325 void
 326 update_syntax_table (ptrdiff_t charpos, EMACS_INT count, bool init,
 327                      Lisp_Object object)
 328 {
 329   Lisp_Object tmp_table;
 330   int cnt = 0;
 331   bool invalidate = true;
 332   INTERVAL i;
 333
 334   if (init)
 335     {
 336       gl_state.old_prop = Qnil;
 337       gl_state.start = gl_state.b_property;
 338       gl_state.stop = gl_state.e_property;
 339       i = interval_of (charpos, object);
 340       gl_state.backward_i = gl_state.forward_i = i;
 341       invalidate = false;
 342       if (!i)
 343         return;
 344       /* interval_of updates only ->position of the return value, so
 345          update the parents manually to speed up update_interval.  */
 346       while (!NULL_PARENT (i))
 347         {
 348           if (AM_RIGHT_CHILD (i))
 349             INTERVAL_PARENT (i)->position = i->position
 350               - LEFT_TOTAL_LENGTH (i) + TOTAL_LENGTH (i) /* right end */
 351               - TOTAL_LENGTH (INTERVAL_PARENT (i))
 352               + LEFT_TOTAL_LENGTH (INTERVAL_PARENT (i));
 353           else
 354             INTERVAL_PARENT (i)->position = i->position - LEFT_TOTAL_LENGTH (i)
 355               + TOTAL_LENGTH (i);
 356           i = INTERVAL_PARENT (i);
 357         }
 358       i = gl_state.forward_i;
 359       gl_state.b_property = i->position - gl_state.offset;
 360       gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
 361       goto update;
 362     }
 363   i = count > 0 ? gl_state.forward_i : gl_state.backward_i;
 364
 365   /* We are guaranteed to be called with CHARPOS either in i,
 366      or further off.  */
 367   if (!i)
 368     error ("Error in syntax_table logic for to-the-end intervals");
 369   else if (charpos < i->position)               /* Move left.  */
 370     {
 371       if (count > 0)
 372         error ("Error in syntax_table logic for intervals <-");
 373       /* Update the interval.  */
 374       i = update_interval (i, charpos);
 375       if (INTERVAL_LAST_POS (i) != gl_state.b_property)
 376         {
 377           invalidate = false;
 378           gl_state.forward_i = i;
 379           gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
 380         }
 381     }
 382   else if (charpos >= INTERVAL_LAST_POS (i)) /* Move right.  */
 383     {
 384       if (count < 0)
 385         error ("Error in syntax_table logic for intervals ->");
 386       /* Update the interval.  */
 387       i = update_interval (i, charpos);
 388       if (i->position != gl_state.e_property)
 389         {
 390           invalidate = false;
 391           gl_state.backward_i = i;
 392           gl_state.b_property = i->position - gl_state.offset;
 393         }
 394     }
 395
 396   update:
 397   tmp_table = textget (i->plist, Qsyntax_table);
 398
 399   if (invalidate)
 400     invalidate = !EQ (tmp_table, gl_state.old_prop); /* Need to invalidate? */
 401
 402   if (invalidate)               /* Did not get to adjacent interval.  */
 403     {                           /* with the same table => */
 404                                 /* invalidate the old range.  */
 405       if (count > 0)
 406         {
 407           gl_state.backward_i = i;
 408           gl_state.b_property = i->position - gl_state.offset;
 409         }
 410       else
 411         {
 412           gl_state.forward_i = i;
 413           gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
 414         }
 415     }
 416
 417   if (!EQ (tmp_table, gl_state.old_prop))
 418     {
 419       gl_state.current_syntax_table = tmp_table;
 420       gl_state.old_prop = tmp_table;
 421       if (EQ (Fsyntax_table_p (tmp_table), Qt))
 422         {
 423           gl_state.use_global = 0;
 424         }
 425       else if (CONSP (tmp_table))
 426         {
 427           gl_state.use_global = 1;
 428           gl_state.global_code = tmp_table;
 429         }
 430       else
 431         {
 432           gl_state.use_global = 0;
 433           gl_state.current_syntax_table = BVAR (current_buffer, syntax_table);
 434         }
 435     }
 436
 437   while (i)
 438     {
 439       if (cnt && !EQ (tmp_table, textget (i->plist, Qsyntax_table)))
 440         {
 441           if (count > 0)
 442             {
 443               gl_state.e_property = i->position - gl_state.offset;
 444               gl_state.forward_i = i;
 445             }
 446           else
 447             {
 448               gl_state.b_property
 449                 = i->position + LENGTH (i) - gl_state.offset;
 450               gl_state.backward_i = i;
 451             }
 452           return;
 453         }
 454       else if (cnt == INTERVALS_AT_ONCE)
 455         {
 456           if (count > 0)
 457             {
 458               gl_state.e_property
 459                 = i->position + LENGTH (i) - gl_state.offset
 460                 /* e_property at EOB is not set to ZV but to ZV+1, so that
 461                    we can do INC(from);UPDATE_SYNTAX_TABLE_FORWARD without
 462                    having to check eob between the two.  */
 463                 + (next_interval (i) ? 0 : 1);
 464               gl_state.forward_i = i;
 465             }
 466           else
 467             {
 468               gl_state.b_property = i->position - gl_state.offset;
 469               gl_state.backward_i = i;
 470             }
 471           return;
 472         }
 473       cnt++;
 474       i = count > 0 ? next_interval (i) : previous_interval (i);
 475     }
 476   eassert (i == NULL); /* This property goes to the end.  */
 477   if (count > 0)
 478     {
 479       gl_state.e_property = gl_state.stop;
 480       gl_state.forward_i = i;
 481     }
 482   else
 483     gl_state.b_property = gl_state.start;
 484 }
 485
 486 static void
 487 parse_sexp_propertize (ptrdiff_t charpos)
 488 {
 489   EMACS_INT zv = ZV;
 490   if (syntax_propertize__done <= charpos
 491       && syntax_propertize__done < zv)
 492     {
 493       EMACS_INT modiffs = CHARS_MODIFF;
 494       safe_call1 (Qinternal__syntax_propertize,
 495                   make_number (min (zv, 1 + charpos)));
 496       if (modiffs != CHARS_MODIFF)
 497         error ("parse-sexp-propertize-function modified the buffer!");
 498       if (syntax_propertize__done <= charpos
 499           && syntax_propertize__done < zv)
 500         error ("parse-sexp-propertize-function did not move"
 501                " syntax-propertize--done");
 502       SETUP_SYNTAX_TABLE (charpos, 1);
 503     }
 504   else if (gl_state.e_property > syntax_propertize__done)
 505     {
 506       gl_state.e_property = syntax_propertize__done;
 507       gl_state.e_property_truncated = true;
 508     }
 509   else if (gl_state.e_property_truncated
 510            && gl_state.e_property < syntax_propertize__done)
 511     { /* When moving backward, e_property might be set without resetting
 512          e_property_truncated, so the e_property_truncated flag may
 513          occasionally be left raised spuriously.  This should be rare.  */
 514       gl_state.e_property_truncated = false;
 515       update_syntax_table_forward (charpos, false, Qnil);
 516     }
 517 }
 518
 519 void
 520 update_syntax_table_forward (ptrdiff_t charpos, bool init,
 521                              Lisp_Object object)
 522 {
 523   if (gl_state.e_property_truncated)
 524     {
 525       eassert (NILP (object));
 526       eassert (charpos >= gl_state.e_property);
 527       parse_sexp_propertize (charpos);
 528     }
 529   else
 530     {
 531       update_syntax_table (charpos, 1, init, object);
 532       if (NILP (object) && gl_state.e_property > syntax_propertize__done)
 533         parse_sexp_propertize (charpos);
 534     }
 535 }
 536 \f
 537 /* Returns true if char at CHARPOS is quoted.
 538    Global syntax-table data should be set up already to be good at CHARPOS
 539    or after.  On return global syntax data is good for lookup at CHARPOS.  */
 540
 541 static bool
 542 char_quoted (ptrdiff_t charpos, ptrdiff_t bytepos)
 543 {
 544   enum syntaxcode code;
 545   ptrdiff_t beg = BEGV;
 546   bool quoted = 0;
 547   ptrdiff_t orig = charpos;
 548
 549   while (charpos > beg)
 550     {
 551       int c;
 552       DEC_BOTH (charpos, bytepos);
 553
 554       UPDATE_SYNTAX_TABLE_BACKWARD (charpos);
 555       c = FETCH_CHAR_AS_MULTIBYTE (bytepos);
 556       code = SYNTAX (c);
 557       if (! (code == Scharquote || code == Sescape))
 558         break;
 559
 560       quoted = !quoted;
 561     }
 562
 563   UPDATE_SYNTAX_TABLE (orig);
 564   return quoted;
 565 }
 566
 567 /* Return the bytepos one character before BYTEPOS.
 568    We assume that BYTEPOS is not at the start of the buffer.  */
 569
 570 static ptrdiff_t
 571 dec_bytepos (ptrdiff_t bytepos)
 572 {
 573   if (NILP (BVAR (current_buffer, enable_multibyte_characters)))
 574     return bytepos - 1;
 575
 576   DEC_POS (bytepos);
 577   return bytepos;
 578 }
 579 \f
 580 /* Return a defun-start position before POS and not too far before.
 581    It should be the last one before POS, or nearly the last.
 582
 583    When open_paren_in_column_0_is_defun_start is nonzero,
 584    only the beginning of the buffer is treated as a defun-start.
 585
 586    We record the information about where the scan started
 587    and what its result was, so that another call in the same area
 588    can return the same value very quickly.
 589
 590    There is no promise at which position the global syntax data is
 591    valid on return from the subroutine, so the caller should explicitly
 592    update the global data.  */
 593
 594 static ptrdiff_t
 595 find_defun_start (ptrdiff_t pos, ptrdiff_t pos_byte)
 596 {
 597   ptrdiff_t opoint = PT, opoint_byte = PT_BYTE;
 598
 599   /* Use previous finding, if it's valid and applies to this inquiry.  */
 600   if (current_buffer == find_start_buffer
 601       /* Reuse the defun-start even if POS is a little farther on.
 602          POS might be in the next defun, but that's ok.
 603          Our value may not be the best possible, but will still be usable.  */
 604       && pos <= find_start_pos + 1000
 605       && pos >= find_start_value
 606       && BEGV == find_start_begv
 607       && MODIFF == find_start_modiff)
 608     return find_start_value;
 609
 610   if (!open_paren_in_column_0_is_defun_start)
 611     {
 612       find_start_value = BEGV;
 613       find_start_value_byte = BEGV_BYTE;
 614       goto found;
 615     }
 616
 617   /* Back up to start of line.  */
 618   scan_newline (pos, pos_byte, BEGV, BEGV_BYTE, -1, 1);
 619
 620   /* We optimize syntax-table lookup for rare updates.  Thus we accept
 621      only those `^\s(' which are good in global _and_ text-property
 622      syntax-tables.  */
 623   SETUP_BUFFER_SYNTAX_TABLE ();
 624   while (PT > BEGV)
 625     {
 626       int c;
 627
 628       /* Open-paren at start of line means we may have found our
 629          defun-start.  */
 630       c = FETCH_CHAR_AS_MULTIBYTE (PT_BYTE);
 631       if (SYNTAX (c) == Sopen)
 632         {
 633           SETUP_SYNTAX_TABLE (PT + 1, -1);      /* Try again... */
 634           c = FETCH_CHAR_AS_MULTIBYTE (PT_BYTE);
 635           if (SYNTAX (c) == Sopen)
 636             break;
 637           /* Now fallback to the default value.  */
 638           SETUP_BUFFER_SYNTAX_TABLE ();
 639         }
 640       /* Move to beg of previous line.  */
 641       scan_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, -2, 1);
 642     }
 643
 644   /* Record what we found, for the next try.  */
 645   find_start_value = PT;
 646   find_start_value_byte = PT_BYTE;
 647   TEMP_SET_PT_BOTH (opoint, opoint_byte);
 648
 649  found:
 650   find_start_buffer = current_buffer;
 651   find_start_modiff = MODIFF;
 652   find_start_begv = BEGV;
 653   find_start_pos = pos;
 654
 655   return find_start_value;
 656 }
 657 \f
 658 /* Return the SYNTAX_COMEND_FIRST of the character before POS, POS_BYTE.  */
 659
 660 static bool
 661 prev_char_comend_first (ptrdiff_t pos, ptrdiff_t pos_byte)
 662 {
 663   int c;
 664   bool val;
 665
 666   DEC_BOTH (pos, pos_byte);
 667   UPDATE_SYNTAX_TABLE_BACKWARD (pos);
 668   c = FETCH_CHAR (pos_byte);
 669   val = SYNTAX_COMEND_FIRST (c);
 670   UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
 671   return val;
 672 }
 673
 674 /* Check whether charpos FROM is at the end of a comment.
 675    FROM_BYTE is the bytepos corresponding to FROM.
 676    Do not move back before STOP.
 677
 678    Return true if we find a comment ending at FROM/FROM_BYTE.
 679
 680    If successful, store the charpos of the comment's beginning
 681    into *CHARPOS_PTR, and the bytepos into *BYTEPOS_PTR.
 682
 683    Global syntax data remains valid for backward search starting at
 684    the returned value (or at FROM, if the search was not successful).  */
 685
 686 static bool
 687 back_comment (ptrdiff_t from, ptrdiff_t from_byte, ptrdiff_t stop,
 688               bool comnested, int comstyle, ptrdiff_t *charpos_ptr,
 689               ptrdiff_t *bytepos_ptr)
 690 {
 691   /* Look back, counting the parity of string-quotes,
 692      and recording the comment-starters seen.
 693      When we reach a safe place, assume that's not in a string;
 694      then step the main scan to the earliest comment-starter seen
 695      an even number of string quotes away from the safe place.
 696
 697      OFROM[I] is position of the earliest comment-starter seen
 698      which is I+2X quotes from the comment-end.
 699      PARITY is current parity of quotes from the comment end.  */
 700   int string_style = -1;        /* Presumed outside of any string.  */
 701   bool string_lossage = 0;
 702   /* Not a real lossage: indicates that we have passed a matching comment
 703      starter plus a non-matching comment-ender, meaning that any matching
 704      comment-starter we might see later could be a false positive (hidden
 705      inside another comment).
 706      Test case:  { a (* b } c (* d *) */
 707   bool comment_lossage = 0;
 708   ptrdiff_t comment_end = from;
 709   ptrdiff_t comment_end_byte = from_byte;
 710   ptrdiff_t comstart_pos = 0;
 711   ptrdiff_t comstart_byte;
 712   /* Place where the containing defun starts,
 713      or 0 if we didn't come across it yet.  */
 714   ptrdiff_t defun_start = 0;
 715   ptrdiff_t defun_start_byte = 0;
 716   enum syntaxcode code;
 717   ptrdiff_t nesting = 1;                /* Current comment nesting.  */
 718   int c;
 719   int syntax = 0;
 720
 721   /* FIXME: A }} comment-ender style leads to incorrect behavior
 722      in the case of {{ c }}} because we ignore the last two chars which are
 723      assumed to be comment-enders although they aren't.  */
 724
 725   /* At beginning of range to scan, we're outside of strings;
 726      that determines quote parity to the comment-end.  */
 727   while (from != stop)
 728     {
 729       ptrdiff_t temp_byte;
 730       int prev_syntax;
 731       bool com2start, com2end, comstart;
 732
 733       /* Move back and examine a character.  */
 734       DEC_BOTH (from, from_byte);
 735       UPDATE_SYNTAX_TABLE_BACKWARD (from);
 736
 737       prev_syntax = syntax;
 738       c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
 739       syntax = SYNTAX_WITH_FLAGS (c);
 740       code = SYNTAX (c);
 741
 742       /* Check for 2-char comment markers.  */
 743       com2start = (SYNTAX_FLAGS_COMSTART_FIRST (syntax)
 744                    && SYNTAX_FLAGS_COMSTART_SECOND (prev_syntax)
 745                    && (comstyle
 746                        == SYNTAX_FLAGS_COMMENT_STYLE (prev_syntax, syntax))
 747                    && (SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax)
 748                        || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested);
 749       com2end = (SYNTAX_FLAGS_COMEND_FIRST (syntax)
 750                  && SYNTAX_FLAGS_COMEND_SECOND (prev_syntax));
 751       comstart = (com2start || code == Scomment);
 752
 753       /* Nasty cases with overlapping 2-char comment markers:
 754          - snmp-mode: -- c -- foo -- c --
 755                       --- c --
 756                       ------ c --
 757          - c-mode:    *||*
 758                       |* *|* *|
 759                       |*| |* |*|
 760                       ///   */
 761
 762       /* If a 2-char comment sequence partly overlaps with another,
 763          we don't try to be clever.  E.g. |*| in C, or }% in modes that
 764          have %..\n and %{..}%.  */
 765       if (from > stop && (com2end || comstart))
 766         {
 767           ptrdiff_t next = from, next_byte = from_byte;
 768           int next_c, next_syntax;
 769           DEC_BOTH (next, next_byte);
 770           UPDATE_SYNTAX_TABLE_BACKWARD (next);
 771           next_c = FETCH_CHAR_AS_MULTIBYTE (next_byte);
 772           next_syntax = SYNTAX_WITH_FLAGS (next_c);
 773           if (((comstart || comnested)
 774                && SYNTAX_FLAGS_COMEND_SECOND (syntax)
 775                && SYNTAX_FLAGS_COMEND_FIRST (next_syntax))
 776               || ((com2end || comnested)
 777                   && SYNTAX_FLAGS_COMSTART_SECOND (syntax)
 778                   && (comstyle
 779                       == SYNTAX_FLAGS_COMMENT_STYLE (syntax, prev_syntax))
 780                   && SYNTAX_FLAGS_COMSTART_FIRST (next_syntax)))
 781             goto lossage;
 782           /* UPDATE_SYNTAX_TABLE_FORWARD (next + 1); */
 783         }
 784
 785       if (com2start && comstart_pos == 0)
 786         /* We're looking at a comment starter.  But it might be a comment
 787            ender as well (see snmp-mode).  The first time we see one, we
 788            need to consider it as a comment starter,
 789            and the subsequent times as a comment ender.  */
 790         com2end = 0;
 791
 792       /* Turn a 2-char comment sequences into the appropriate syntax.  */
 793       if (com2end)
 794         code = Sendcomment;
 795       else if (com2start)
 796         code = Scomment;
 797       /* Ignore comment starters of a different style.  */
 798       else if (code == Scomment
 799                && (comstyle != SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0)
 800                    || SYNTAX_FLAGS_COMMENT_NESTED (syntax) != comnested))
 801         continue;
 802
 803       /* Ignore escaped characters, except comment-enders which cannot
 804          be escaped.  */
 805       if ((Vcomment_end_can_be_escaped || code != Sendcomment)
 806           && char_quoted (from, from_byte))
 807         continue;
 808
 809       switch (code)
 810         {
 811         case Sstring_fence:
 812         case Scomment_fence:
 813           c = (code == Sstring_fence ? ST_STRING_STYLE : ST_COMMENT_STYLE);
 814         case Sstring:
 815           /* Track parity of quotes.  */
 816           if (string_style == -1)
 817             /* Entering a string.  */
 818             string_style = c;
 819           else if (string_style == c)
 820             /* Leaving the string.  */
 821             string_style = -1;
 822           else
 823             /* If we have two kinds of string delimiters.
 824                There's no way to grok this scanning backwards.  */
 825             string_lossage = 1;
 826           break;
 827
 828         case Scomment:
 829           /* We've already checked that it is the relevant comstyle.  */
 830           if (string_style != -1 || comment_lossage || string_lossage)
 831             /* There are odd string quotes involved, so let's be careful.
 832                Test case in Pascal: " { " a { " } */
 833             goto lossage;
 834
 835           if (!comnested)
 836             {
 837               /* Record best comment-starter so far.  */
 838               comstart_pos = from;
 839               comstart_byte = from_byte;
 840             }
 841           else if (--nesting <= 0)
 842             /* nested comments have to be balanced, so we don't need to
 843                keep looking for earlier ones.  We use here the same (slightly
 844                incorrect) reasoning as below:  since it is followed by uniform
 845                paired string quotes, this comment-start has to be outside of
 846                strings, else the comment-end itself would be inside a string. */
 847             goto done;
 848           break;
 849
 850         case Sendcomment:
 851           if (SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == comstyle
 852               && ((com2end && SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax))
 853                   || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested)
 854             /* This is the same style of comment ender as ours. */
 855             {
 856               if (comnested)
 857                 nesting++;
 858               else
 859                 /* Anything before that can't count because it would match
 860                    this comment-ender rather than ours.  */
 861                 from = stop;    /* Break out of the loop.  */
 862             }
 863           else if (comstart_pos != 0 || c != '\n')
 864             /* We're mixing comment styles here, so we'd better be careful.
 865                The (comstart_pos != 0 || c != '\n') check is not quite correct
 866                (we should just always set comment_lossage), but removing it
 867                would imply that any multiline comment in C would go through
 868                lossage, which seems overkill.
 869                The failure should only happen in the rare cases such as
 870                  { (* } *)   */
 871             comment_lossage = 1;
 872           break;
 873
 874         case Sopen:
 875           /* Assume a defun-start point is outside of strings.  */
 876           if (open_paren_in_column_0_is_defun_start
 877               && (from == stop
 878                   || (temp_byte = dec_bytepos (from_byte),
 879                       FETCH_CHAR (temp_byte) == '\n')))
 880             {
 881               defun_start = from;
 882               defun_start_byte = from_byte;
 883               from = stop;      /* Break out of the loop.  */
 884             }
 885           break;
 886
 887         default:
 888           break;
 889         }
 890     }
 891
 892   if (comstart_pos == 0)
 893     {
 894       from = comment_end;
 895       from_byte = comment_end_byte;
 896       UPDATE_SYNTAX_TABLE_FORWARD (comment_end);
 897     }
 898   /* If comstart_pos is set and we get here (ie. didn't jump to `lossage'
 899      or `done'), then we've found the beginning of the non-nested comment.  */
 900   else if (1)   /* !comnested */
 901     {
 902       from = comstart_pos;
 903       from_byte = comstart_byte;
 904       UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
 905     }
 906   else lossage:
 907     {
 908       struct lisp_parse_state state;
 909       bool adjusted = true;
 910       /* We had two kinds of string delimiters mixed up
 911          together.  Decode this going forwards.
 912          Scan fwd from a known safe place (beginning-of-defun)
 913          to the one in question; this records where we
 914          last passed a comment starter.  */
 915       /* If we did not already find the defun start, find it now.  */
 916       if (defun_start == 0)
 917         {
 918           defun_start = find_defun_start (comment_end, comment_end_byte);
 919           defun_start_byte = find_start_value_byte;
 920           adjusted = (defun_start > BEGV);
 921         }
 922       do
 923         {
 924           internalize_parse_state (Qnil, &state);
 925           scan_sexps_forward (&state,
 926                               defun_start, defun_start_byte,
 927                               comment_end, TYPE_MINIMUM (EMACS_INT),
 928                               0, 0);
 929           defun_start = comment_end;
 930           if (!adjusted)
 931             {
 932               adjusted = true;
 933               find_start_value
 934                 = CONSP (state.levelstarts) ? XINT (XCAR (state.levelstarts))
 935                 : state.thislevelstart >= 0 ? state.thislevelstart
 936                 : find_start_value;
 937               find_start_value_byte = CHAR_TO_BYTE (find_start_value);
 938             }
 939
 940           if (state.incomment == (comnested ? 1 : -1)
 941               && state.comstyle == comstyle)
 942             from = state.comstr_start;
 943           else
 944             {
 945               from = comment_end;
 946               if (state.incomment)
 947                 /* If comment_end is inside some other comment, maybe ours
 948                    is nested, so we need to try again from within the
 949                    surrounding comment.  Example: { a (* " *)  */
 950                 {
 951                   /* FIXME: We should advance by one or two chars.  */
 952                   defun_start = state.comstr_start + 2;
 953                   defun_start_byte = CHAR_TO_BYTE (defun_start);
 954                 }
 955             }
 956         } while (defun_start < comment_end);
 957
 958       from_byte = CHAR_TO_BYTE (from);
 959       UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
 960     }
 961
 962  done:
 963   *charpos_ptr = from;
 964   *bytepos_ptr = from_byte;
 965
 966   return from != comment_end;
 967 }
 968 \f
 969 DEFUN ("syntax-table-p", Fsyntax_table_p, Ssyntax_table_p, 1, 1, 0,
 970        doc: /* Return t if OBJECT is a syntax table.
 971 Currently, any char-table counts as a syntax table.  */)
 972   (Lisp_Object object)
 973 {
 974   if (CHAR_TABLE_P (object)
 975       && EQ (XCHAR_TABLE (object)->purpose, Qsyntax_table))
 976     return Qt;
 977   return Qnil;
 978 }
 979
 980 static void
 981 check_syntax_table (Lisp_Object obj)
 982 {
 983   CHECK_TYPE (CHAR_TABLE_P (obj) && EQ (XCHAR_TABLE (obj)->purpose, Qsyntax_table),
 984               Qsyntax_table_p, obj);
 985 }
 986
 987 DEFUN ("syntax-table", Fsyntax_table, Ssyntax_table, 0, 0, 0,
 988        doc: /* Return the current syntax table.
 989 This is the one specified by the current buffer.  */)
 990   (void)
 991 {
 992   return BVAR (current_buffer, syntax_table);
 993 }
 994
 995 DEFUN ("standard-syntax-table", Fstandard_syntax_table,
 996    Sstandard_syntax_table, 0, 0, 0,
 997        doc: /* Return the standard syntax table.
 998 This is the one used for new buffers.  */)
 999   (void)
1000 {
1001   return Vstandard_syntax_table;
1002 }
1003
1004 DEFUN ("copy-syntax-table", Fcopy_syntax_table, Scopy_syntax_table, 0, 1, 0,
1005        doc: /* Construct a new syntax table and return it.
1006 It is a copy of the TABLE, which defaults to the standard syntax table.  */)
1007   (Lisp_Object table)
1008 {
1009   Lisp_Object copy;
1010
1011   if (!NILP (table))
1012     check_syntax_table (table);
1013   else
1014     table = Vstandard_syntax_table;
1015
1016   copy = Fcopy_sequence (table);
1017
1018   /* Only the standard syntax table should have a default element.
1019      Other syntax tables should inherit from parents instead.  */
1020   set_char_table_defalt (copy, Qnil);
1021
1022   /* Copied syntax tables should all have parents.
1023      If we copied one with no parent, such as the standard syntax table,
1024      use the standard syntax table as the copy's parent.  */
1025   if (NILP (XCHAR_TABLE (copy)->parent))
1026     Fset_char_table_parent (copy, Vstandard_syntax_table);
1027   return copy;
1028 }
1029
1030 DEFUN ("set-syntax-table", Fset_syntax_table, Sset_syntax_table, 1, 1, 0,
1031        doc: /* Select a new syntax table for the current buffer.
1032 One argument, a syntax table.  */)
1033   (Lisp_Object table)
1034 {
1035   int idx;
1036   check_syntax_table (table);
1037   bset_syntax_table (current_buffer, table);
1038   /* Indicate that this buffer now has a specified syntax table.  */
1039   idx = PER_BUFFER_VAR_IDX (syntax_table);
1040   SET_PER_BUFFER_VALUE_P (current_buffer, idx, 1);
1041   return table;
1042 }
1043 \f
1044 /* Convert a letter which signifies a syntax code
1045  into the code it signifies.
1046  This is used by modify-syntax-entry, and other things.  */
1047
1048 unsigned char const syntax_spec_code[0400] =
1049   { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1050     0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1051     0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1052     0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1053     Swhitespace, Scomment_fence, Sstring, 0377, Smath, 0377, 0377, Squote,
1054     Sopen, Sclose, 0377, 0377, 0377, Swhitespace, Spunct, Scharquote,
1055     0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1056     0377, 0377, 0377, 0377, Scomment, 0377, Sendcomment, 0377,
1057     Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377,   /* @, A ... */
1058     0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1059     0377, 0377, 0377, 0377, 0377, 0377, 0377, Sword,
1060     0377, 0377, 0377, 0377, Sescape, 0377, 0377, Ssymbol,
1061     0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,   /* `, a, ... */
1062     0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1063     0377, 0377, 0377, 0377, 0377, 0377, 0377, Sword,
1064     0377, 0377, 0377, 0377, Sstring_fence, 0377, 0377, 0377
1065   };
1066
1067 /* Indexed by syntax code, give the letter that describes it.  */
1068
1069 char const syntax_code_spec[16] =
1070   {
1071     ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@',
1072     '!', '|'
1073   };
1074
1075 /* Indexed by syntax code, give the object (cons of syntax code and
1076    nil) to be stored in syntax table.  Since these objects can be
1077    shared among syntax tables, we generate them in advance.  By
1078    sharing objects, the function `describe-syntax' can give a more
1079    compact listing.  */
1080 static Lisp_Object Vsyntax_code_object;
1081
1082 \f
1083 DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
1084        doc: /* Return the syntax code of CHARACTER, described by a character.
1085 For example, if CHARACTER is a word constituent, the
1086 character `w' (119) is returned.
1087 The characters that correspond to various syntax codes
1088 are listed in the documentation of `modify-syntax-entry'.  */)
1089   (Lisp_Object character)
1090 {
1091   int char_int;
1092   CHECK_CHARACTER (character);
1093   char_int = XINT (character);
1094   SETUP_BUFFER_SYNTAX_TABLE ();
1095   return make_number (syntax_code_spec[SYNTAX (char_int)]);
1096 }
1097
1098 DEFUN ("matching-paren", Fmatching_paren, Smatching_paren, 1, 1, 0,
1099        doc: /* Return the matching parenthesis of CHARACTER, or nil if none.  */)
1100   (Lisp_Object character)
1101 {
1102   int char_int;
1103   enum syntaxcode code;
1104   CHECK_CHARACTER (character);
1105   char_int = XINT (character);
1106   SETUP_BUFFER_SYNTAX_TABLE ();
1107   code = SYNTAX (char_int);
1108   if (code == Sopen || code == Sclose)
1109     return SYNTAX_MATCH (char_int);
1110   return Qnil;
1111 }
1112
1113 DEFUN ("string-to-syntax", Fstring_to_syntax, Sstring_to_syntax, 1, 1, 0,
1114        doc: /* Convert a syntax descriptor STRING into a raw syntax descriptor.
1115 STRING should be a string of the form allowed as argument of
1116 `modify-syntax-entry'.  The return value is a raw syntax descriptor: a
1117 cons cell (CODE . MATCHING-CHAR) which can be used, for example, as
1118 the value of a `syntax-table' text property.  */)
1119   (Lisp_Object string)
1120 {
1121   const unsigned char *p;
1122   int val;
1123   Lisp_Object match;
1124
1125   CHECK_STRING (string);
1126
1127   p = SDATA (string);
1128   val = syntax_spec_code[*p++];
1129   if (val == 0377)
1130     error ("Invalid syntax description letter: %c", p[-1]);
1131
1132   if (val == Sinherit)
1133     return Qnil;
1134
1135   if (*p)
1136     {
1137       int len;
1138       int character = STRING_CHAR_AND_LENGTH (p, len);
1139       XSETINT (match, character);
1140       if (XFASTINT (match) == ' ')
1141         match = Qnil;
1142       p += len;
1143     }
1144   else
1145     match = Qnil;
1146
1147   while (*p)
1148     switch (*p++)
1149       {
1150       case '1':
1151         val |= 1 << 16;
1152         break;
1153
1154       case '2':
1155         val |= 1 << 17;
1156         break;
1157
1158       case '3':
1159         val |= 1 << 18;
1160         break;
1161
1162       case '4':
1163         val |= 1 << 19;
1164         break;
1165
1166       case 'p':
1167         val |= 1 << 20;
1168         break;
1169
1170       case 'b':
1171         val |= 1 << 21;
1172         break;
1173
1174       case 'n':
1175         val |= 1 << 22;
1176         break;
1177
1178       case 'c':
1179         val |= 1 << 23;
1180         break;
1181       }
1182
1183   if (val < ASIZE (Vsyntax_code_object) && NILP (match))
1184     return AREF (Vsyntax_code_object, val);
1185   else
1186     /* Since we can't use a shared object, let's make a new one.  */
1187     return Fcons (make_number (val), match);
1188 }
1189
1190 /* I really don't know why this is interactive
1191    help-form should at least be made useful whilst reading the second arg.  */
1192 DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
1193   "cSet syntax for character: \nsSet syntax for %s to: ",
1194        doc: /* Set syntax for character CHAR according to string NEWENTRY.
1195 The syntax is changed only for table SYNTAX-TABLE, which defaults to
1196  the current buffer's syntax table.
1197 CHAR may be a cons (MIN . MAX), in which case, syntaxes of all characters
1198 in the range MIN to MAX are changed.
1199 The first character of NEWENTRY should be one of the following:
1200   Space or -  whitespace syntax.    w   word constituent.
1201   _           symbol constituent.   .   punctuation.
1202   (           open-parenthesis.     )   close-parenthesis.
1203   "           string quote.         \\   escape.
1204   $           paired delimiter.     \\='   expression quote or prefix operator.
1205   <           comment starter.      >   comment ender.
1206   /           character-quote.      @   inherit from parent table.
1207   |           generic string fence. !   generic comment fence.
1208
1209 Only single-character comment start and end sequences are represented thus.
1210 Two-character sequences are represented as described below.
1211 The second character of NEWENTRY is the matching parenthesis,
1212  used only if the first character is `(' or `)'.
1213 Any additional characters are flags.
1214 Defined flags are the characters 1, 2, 3, 4, b, p, and n.
1215  1 means CHAR is the start of a two-char comment start sequence.
1216  2 means CHAR is the second character of such a sequence.
1217  3 means CHAR is the start of a two-char comment end sequence.
1218  4 means CHAR is the second character of such a sequence.
1219
1220 There can be several orthogonal comment sequences.  This is to support
1221 language modes such as C++.  By default, all comment sequences are of style
1222 a, but you can set the comment sequence style to b (on the second character
1223 of a comment-start, and the first character of a comment-end sequence) and/or
1224 c (on any of its chars) using this flag:
1225  b means CHAR is part of comment sequence b.
1226  c means CHAR is part of comment sequence c.
1227  n means CHAR is part of a nestable comment sequence.
1228
1229  p means CHAR is a prefix character for `backward-prefix-chars';
1230    such characters are treated as whitespace when they occur
1231    between expressions.
1232 usage: (modify-syntax-entry CHAR NEWENTRY &optional SYNTAX-TABLE)  */)
1233   (Lisp_Object c, Lisp_Object newentry, Lisp_Object syntax_table)
1234 {
1235   if (CONSP (c))
1236     {
1237       CHECK_CHARACTER_CAR (c);
1238       CHECK_CHARACTER_CDR (c);
1239     }
1240   else
1241     CHECK_CHARACTER (c);
1242
1243   if (NILP (syntax_table))
1244     syntax_table = BVAR (current_buffer, syntax_table);
1245   else
1246     check_syntax_table (syntax_table);
1247
1248   newentry = Fstring_to_syntax (newentry);
1249   if (CONSP (c))
1250     SET_RAW_SYNTAX_ENTRY_RANGE (syntax_table, c, newentry);
1251   else
1252     SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), newentry);
1253
1254   /* We clear the regexp cache, since character classes can now have
1255      different values from those in the compiled regexps.*/
1256   clear_regexp_cache ();
1257
1258   return Qnil;
1259 }
1260 \f
1261 /* Dump syntax table to buffer in human-readable format */
1262
1263 DEFUN ("internal-describe-syntax-value", Finternal_describe_syntax_value,
1264        Sinternal_describe_syntax_value, 1, 1, 0,
1265        doc: /* Insert a description of the internal syntax description SYNTAX at point.  */)
1266   (Lisp_Object syntax)
1267 {
1268   int code, syntax_code;
1269   bool start1, start2, end1, end2, prefix, comstyleb, comstylec, comnested;
1270   char str[2];
1271   Lisp_Object first, match_lisp, value = syntax;
1272
1273   if (NILP (value))
1274     {
1275       insert_string ("default");
1276       return syntax;
1277     }
1278
1279   if (CHAR_TABLE_P (value))
1280     {
1281       insert_string ("deeper char-table ...");
1282       return syntax;
1283     }
1284
1285   if (!CONSP (value))
1286     {
1287       insert_string ("invalid");
1288       return syntax;
1289     }
1290
1291   first = XCAR (value);
1292   match_lisp = XCDR (value);
1293
1294   if (!INTEGERP (first) || !(NILP (match_lisp) || CHARACTERP (match_lisp)))
1295     {
1296       insert_string ("invalid");
1297       return syntax;
1298     }
1299
1300   syntax_code = XINT (first) & INT_MAX;
1301   code = syntax_code & 0377;
1302   start1 = SYNTAX_FLAGS_COMSTART_FIRST (syntax_code);
1303   start2 = SYNTAX_FLAGS_COMSTART_SECOND (syntax_code);
1304   end1 = SYNTAX_FLAGS_COMEND_FIRST (syntax_code);
1305   end2 = SYNTAX_FLAGS_COMEND_SECOND (syntax_code);
1306   prefix = SYNTAX_FLAGS_PREFIX (syntax_code);
1307   comstyleb = SYNTAX_FLAGS_COMMENT_STYLEB (syntax_code);
1308   comstylec = SYNTAX_FLAGS_COMMENT_STYLEC (syntax_code);
1309   comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax_code);
1310
1311   if (Smax <= code)
1312     {
1313       insert_string ("invalid");
1314       return syntax;
1315     }
1316
1317   str[0] = syntax_code_spec[code], str[1] = 0;
1318   insert (str, 1);
1319
1320   if (NILP (match_lisp))
1321     insert (" ", 1);
1322   else
1323     insert_char (XINT (match_lisp));
1324
1325   if (start1)
1326     insert ("1", 1);
1327   if (start2)
1328     insert ("2", 1);
1329
1330   if (end1)
1331     insert ("3", 1);
1332   if (end2)
1333     insert ("4", 1);
1334
1335   if (prefix)
1336     insert ("p", 1);
1337   if (comstyleb)
1338     insert ("b", 1);
1339   if (comstylec)
1340     insert ("c", 1);
1341   if (comnested)
1342     insert ("n", 1);
1343
1344   insert_string ("\twhich means: ");
1345
1346   switch (code)
1347     {
1348     case Swhitespace:
1349       insert_string ("whitespace"); break;
1350     case Spunct:
1351       insert_string ("punctuation"); break;
1352     case Sword:
1353       insert_string ("word"); break;
1354     case Ssymbol:
1355       insert_string ("symbol"); break;
1356     case Sopen:
1357       insert_string ("open"); break;
1358     case Sclose:
1359       insert_string ("close"); break;
1360     case Squote:
1361       insert_string ("prefix"); break;
1362     case Sstring:
1363       insert_string ("string"); break;
1364     case Smath:
1365       insert_string ("math"); break;
1366     case Sescape:
1367       insert_string ("escape"); break;
1368     case Scharquote:
1369       insert_string ("charquote"); break;
1370     case Scomment:
1371       insert_string ("comment"); break;
1372     case Sendcomment:
1373       insert_string ("endcomment"); break;
1374     case Sinherit:
1375       insert_string ("inherit"); break;
1376     case Scomment_fence:
1377       insert_string ("comment fence"); break;
1378     case Sstring_fence:
1379       insert_string ("string fence"); break;
1380     default:
1381       insert_string ("invalid");
1382       return syntax;
1383     }
1384
1385   if (!NILP (match_lisp))
1386     {
1387       insert_string (", matches ");
1388       insert_char (XINT (match_lisp));
1389     }
1390
1391   if (start1)
1392     insert_string (",\n\t  is the first character of a comment-start sequence");
1393   if (start2)
1394     insert_string (",\n\t  is the second character of a comment-start sequence");
1395
1396   if (end1)
1397     insert_string (",\n\t  is the first character of a comment-end sequence");
1398   if (end2)
1399     insert_string (",\n\t  is the second character of a comment-end sequence");
1400   if (comstyleb)
1401     insert_string (" (comment style b)");
1402   if (comstylec)
1403     insert_string (" (comment style c)");
1404   if (comnested)
1405     insert_string (" (nestable)");
1406
1407   if (prefix)
1408     {
1409       AUTO_STRING (prefixdoc,
1410                    ",\n\t  is a prefix character for `backward-prefix-chars'");
1411       insert1 (Fsubstitute_command_keys (prefixdoc));
1412     }
1413
1414   return syntax;
1415 }
1416 \f
1417 /* Return the position across COUNT words from FROM.
1418    If that many words cannot be found before the end of the buffer, return 0.
1419    COUNT negative means scan backward and stop at word beginning.  */
1420
1421 ptrdiff_t
1422 scan_words (register ptrdiff_t from, register EMACS_INT count)
1423 {
1424   register ptrdiff_t beg = BEGV;
1425   register ptrdiff_t end = ZV;
1426   register ptrdiff_t from_byte = CHAR_TO_BYTE (from);
1427   register enum syntaxcode code;
1428   int ch0, ch1;
1429   Lisp_Object func, pos;
1430
1431   immediate_quit = 1;
1432   QUIT;
1433
1434   SETUP_SYNTAX_TABLE (from, count);
1435
1436   while (count > 0)
1437     {
1438       while (1)
1439         {
1440           if (from == end)
1441             {
1442               immediate_quit = 0;
1443               return 0;
1444             }
1445           UPDATE_SYNTAX_TABLE_FORWARD (from);
1446           ch0 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1447           code = SYNTAX (ch0);
1448           INC_BOTH (from, from_byte);
1449           if (words_include_escapes
1450               && (code == Sescape || code == Scharquote))
1451             break;
1452           if (code == Sword)
1453             break;
1454         }
1455       /* Now CH0 is a character which begins a word and FROM is the
1456          position of the next character.  */
1457       func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch0);
1458       if (! NILP (Ffboundp (func)))
1459         {
1460           pos = call2 (func, make_number (from - 1), make_number (end));
1461           if (INTEGERP (pos) && from < XINT (pos) && XINT (pos) <= ZV)
1462             {
1463               from = XINT (pos);
1464               from_byte = CHAR_TO_BYTE (from);
1465             }
1466         }
1467       else
1468         {
1469           while (1)
1470             {
1471               if (from == end) break;
1472               UPDATE_SYNTAX_TABLE_FORWARD (from);
1473               ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1474               code = SYNTAX (ch1);
1475               if ((code != Sword
1476                    && (! words_include_escapes
1477                        || (code != Sescape && code != Scharquote)))
1478                   || word_boundary_p (ch0, ch1))
1479                 break;
1480               INC_BOTH (from, from_byte);
1481               ch0 = ch1;
1482             }
1483         }
1484       count--;
1485     }
1486   while (count < 0)
1487     {
1488       while (1)
1489         {
1490           if (from == beg)
1491             {
1492               immediate_quit = 0;
1493               return 0;
1494             }
1495           DEC_BOTH (from, from_byte);
1496           UPDATE_SYNTAX_TABLE_BACKWARD (from);
1497           ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1498           code = SYNTAX (ch1);
1499           if (words_include_escapes
1500               && (code == Sescape || code == Scharquote))
1501             break;
1502           if (code == Sword)
1503             break;
1504         }
1505       /* Now CH1 is a character which ends a word and FROM is the
1506          position of it.  */
1507       func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch1);
1508       if (! NILP (Ffboundp (func)))
1509         {
1510           pos = call2 (func, make_number (from), make_number (beg));
1511           if (INTEGERP (pos) && BEGV <= XINT (pos) && XINT (pos) < from)
1512             {
1513               from = XINT (pos);
1514               from_byte = CHAR_TO_BYTE (from);
1515             }
1516         }
1517       else
1518         {
1519           while (1)
1520             {
1521               if (from == beg)
1522                 break;
1523               DEC_BOTH (from, from_byte);
1524               UPDATE_SYNTAX_TABLE_BACKWARD (from);
1525               ch0 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1526               code = SYNTAX (ch0);
1527               if ((code != Sword
1528                    && (! words_include_escapes
1529                        || (code != Sescape && code != Scharquote)))
1530                   || word_boundary_p (ch0, ch1))
1531                 {
1532                   INC_BOTH (from, from_byte);
1533                   break;
1534                 }
1535               ch1 = ch0;
1536             }
1537         }
1538       count++;
1539     }
1540
1541   immediate_quit = 0;
1542
1543   return from;
1544 }
1545
1546 DEFUN ("forward-word", Fforward_word, Sforward_word, 0, 1, "^p",
1547        doc: /* Move point forward ARG words (backward if ARG is negative).
1548 If ARG is omitted or nil, move point forward one word.
1549 Normally returns t.
1550 If an edge of the buffer or a field boundary is reached, point is
1551 left there and the function returns nil.  Field boundaries are not
1552 noticed if `inhibit-field-text-motion' is non-nil.
1553
1554 The word boundaries are normally determined by the buffer's syntax
1555 table, but `find-word-boundary-function-table', such as set up
1556 by `subword-mode', can change that.  If a Lisp program needs to
1557 move by words determined strictly by the syntax table, it should
1558 use `forward-word-strictly' instead.  */)
1559   (Lisp_Object arg)
1560 {
1561   Lisp_Object tmp;
1562   ptrdiff_t orig_val, val;
1563
1564   if (NILP (arg))
1565     XSETFASTINT (arg, 1);
1566   else
1567     CHECK_NUMBER (arg);
1568
1569   val = orig_val = scan_words (PT, XINT (arg));
1570   if (! orig_val)
1571     val = XINT (arg) > 0 ? ZV : BEGV;
1572
1573   /* Avoid jumping out of an input field.  */
1574   tmp = Fconstrain_to_field (make_number (val), make_number (PT),
1575                              Qnil, Qnil, Qnil);
1576   val = XFASTINT (tmp);
1577
1578   SET_PT (val);
1579   return val == orig_val ? Qt : Qnil;
1580 }
1581 \f
1582 DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
1583        doc: /* Move point forward, stopping before a char not in STRING, or at pos LIM.
1584 STRING is like the inside of a `[...]' in a regular expression
1585 except that `]' is never special and `\\' quotes `^', `-' or `\\'
1586  (but not at the end of a range; quoting is never needed there).
1587 Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
1588 With arg "^a-zA-Z", skips nonletters stopping before first letter.
1589 Char classes, e.g. `[:alpha:]', are supported.
1590
1591 Returns the distance traveled, either zero or positive.  */)
1592   (Lisp_Object string, Lisp_Object lim)
1593 {
1594   return skip_chars (1, string, lim, 1);
1595 }
1596
1597 DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
1598        doc: /* Move point backward, stopping after a char not in STRING, or at pos LIM.
1599 See `skip-chars-forward' for details.
1600 Returns the distance traveled, either zero or negative.  */)
1601   (Lisp_Object string, Lisp_Object lim)
1602 {
1603   return skip_chars (0, string, lim, 1);
1604 }
1605
1606 DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0,
1607        doc: /* Move point forward across chars in specified syntax classes.
1608 SYNTAX is a string of syntax code characters.
1609 Stop before a char whose syntax is not in SYNTAX, or at position LIM.
1610 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1611 This function returns the distance traveled, either zero or positive.  */)
1612   (Lisp_Object syntax, Lisp_Object lim)
1613 {
1614   return skip_syntaxes (1, syntax, lim);
1615 }
1616
1617 DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0,
1618        doc: /* Move point backward across chars in specified syntax classes.
1619 SYNTAX is a string of syntax code characters.
1620 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.
1621 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1622 This function returns either zero or a negative number, and the absolute value
1623 of this is the distance traveled.  */)
1624   (Lisp_Object syntax, Lisp_Object lim)
1625 {
1626   return skip_syntaxes (0, syntax, lim);
1627 }
1628
1629 static Lisp_Object
1630 skip_chars (bool forwardp, Lisp_Object string, Lisp_Object lim,
1631             bool handle_iso_classes)
1632 {
1633   int c;
1634   char fastmap[0400];
1635   /* Store the ranges of non-ASCII characters.  */
1636   int *char_ranges UNINIT;
1637   int n_char_ranges = 0;
1638   bool negate = 0;
1639   ptrdiff_t i, i_byte;
1640   /* True if the current buffer is multibyte and the region contains
1641      non-ASCII chars.  */
1642   bool multibyte;
1643   /* True if STRING is multibyte and it contains non-ASCII chars.  */
1644   bool string_multibyte;
1645   ptrdiff_t size_byte;
1646   const unsigned char *str;
1647   int len;
1648   Lisp_Object iso_classes;
1649   USE_SAFE_ALLOCA;
1650
1651   CHECK_STRING (string);
1652   iso_classes = Qnil;
1653
1654   if (NILP (lim))
1655     XSETINT (lim, forwardp ? ZV : BEGV);
1656   else
1657     CHECK_NUMBER_COERCE_MARKER (lim);
1658
1659   /* In any case, don't allow scan outside bounds of buffer.  */
1660   if (XINT (lim) > ZV)
1661     XSETFASTINT (lim, ZV);
1662   if (XINT (lim) < BEGV)
1663     XSETFASTINT (lim, BEGV);
1664
1665   multibyte = (!NILP (BVAR (current_buffer, enable_multibyte_characters))
1666                && (XINT (lim) - PT != CHAR_TO_BYTE (XINT (lim)) - PT_BYTE));
1667   string_multibyte = SBYTES (string) > SCHARS (string);
1668
1669   memset (fastmap, 0, sizeof fastmap);
1670
1671   str = SDATA (string);
1672   size_byte = SBYTES (string);
1673
1674   i_byte = 0;
1675   if (i_byte < size_byte
1676       && SREF (string, 0) == '^')
1677     {
1678       negate = 1; i_byte++;
1679     }
1680
1681   /* Find the characters specified and set their elements of fastmap.
1682      Handle backslashes and ranges specially.
1683
1684      If STRING contains non-ASCII characters, setup char_ranges for
1685      them and use fastmap only for their leading codes.  */
1686
1687   if (! string_multibyte)
1688     {
1689       bool string_has_eight_bit = 0;
1690
1691       /* At first setup fastmap.  */
1692       while (i_byte < size_byte)
1693         {
1694           c = str[i_byte++];
1695
1696           if (handle_iso_classes && c == '['
1697               && i_byte < size_byte
1698               && str[i_byte] == ':')
1699             {
1700               const unsigned char *class_beg = str + i_byte + 1;
1701               const unsigned char *class_end = class_beg;
1702               const unsigned char *class_limit = str + size_byte - 2;
1703               /* Leave room for the null.  */
1704               unsigned char class_name[CHAR_CLASS_MAX_LENGTH + 1];
1705               re_wctype_t cc;
1706
1707               if (class_limit - class_beg > CHAR_CLASS_MAX_LENGTH)
1708                 class_limit = class_beg + CHAR_CLASS_MAX_LENGTH;
1709
1710               while (class_end < class_limit
1711                      && *class_end >= 'a' && *class_end <= 'z')
1712                 class_end++;
1713
1714               if (class_end == class_beg
1715                   || *class_end != ':' || class_end[1] != ']')
1716                 goto not_a_class_name;
1717
1718               memcpy (class_name, class_beg, class_end - class_beg);
1719               class_name[class_end - class_beg] = 0;
1720
1721               cc = re_wctype (class_name);
1722               if (cc == 0)
1723                 error ("Invalid ISO C character class");
1724
1725               iso_classes = Fcons (make_number (cc), iso_classes);
1726
1727               i_byte = class_end + 2 - str;
1728               continue;
1729             }
1730
1731         not_a_class_name:
1732           if (c == '\\')
1733             {
1734               if (i_byte == size_byte)
1735                 break;
1736
1737               c = str[i_byte++];
1738             }
1739           /* Treat `-' as range character only if another character
1740              follows.  */
1741           if (i_byte + 1 < size_byte
1742               && str[i_byte] == '-')
1743             {
1744               int c2;
1745
1746               /* Skip over the dash.  */
1747               i_byte++;
1748
1749               /* Get the end of the range.  */
1750               c2 = str[i_byte++];
1751               if (c2 == '\\'
1752                   && i_byte < size_byte)
1753                 c2 = str[i_byte++];
1754
1755               if (c <= c2)
1756                 {
1757                   int lim2 = c2 + 1;
1758                   while (c < lim2)
1759                     fastmap[c++] = 1;
1760                   if (! ASCII_CHAR_P (c2))
1761                     string_has_eight_bit = 1;
1762                 }
1763             }
1764           else
1765             {
1766               fastmap[c] = 1;
1767               if (! ASCII_CHAR_P (c))
1768                 string_has_eight_bit = 1;
1769             }
1770         }
1771
1772       /* If the current range is multibyte and STRING contains
1773          eight-bit chars, arrange fastmap and setup char_ranges for
1774          the corresponding multibyte chars.  */
1775       if (multibyte && string_has_eight_bit)
1776         {
1777           char *p1;
1778           char himap[0200 + 1];
1779           memcpy (himap, fastmap + 0200, 0200);
1780           himap[0200] = 0;
1781           memset (fastmap + 0200, 0, 0200);
1782           SAFE_NALLOCA (char_ranges, 2, 128);
1783           i = 0;
1784
1785           while ((p1 = memchr (himap + i, 1, 0200 - i)))
1786             {
1787               /* Deduce the next range C..C2 from the next clump of 1s
1788                  in HIMAP starting with &HIMAP[I].  HIMAP is the high
1789                  order half of the old FASTMAP.  */
1790               int c2, leading_code;
1791               i = p1 - himap;
1792               c = BYTE8_TO_CHAR (i + 0200);
1793               i += strlen (p1);
1794               c2 = BYTE8_TO_CHAR (i + 0200 - 1);
1795
1796               char_ranges[n_char_ranges++] = c;
1797               char_ranges[n_char_ranges++] = c2;
1798               leading_code = CHAR_LEADING_CODE (c);
1799               memset (fastmap + leading_code, 1,
1800                       CHAR_LEADING_CODE (c2) - leading_code + 1);
1801             }
1802         }
1803     }
1804   else                          /* STRING is multibyte */
1805     {
1806       SAFE_NALLOCA (char_ranges, 2, SCHARS (string));
1807
1808       while (i_byte < size_byte)
1809         {
1810           int leading_code = str[i_byte];
1811           c = STRING_CHAR_AND_LENGTH (str + i_byte, len);
1812           i_byte += len;
1813
1814           if (handle_iso_classes && c == '['
1815               && i_byte < size_byte
1816               && STRING_CHAR (str + i_byte) == ':')
1817             {
1818               const unsigned char *class_beg = str + i_byte + 1;
1819               const unsigned char *class_end = class_beg;
1820               const unsigned char *class_limit = str + size_byte - 2;
1821               /* Leave room for the null.        */
1822               unsigned char class_name[CHAR_CLASS_MAX_LENGTH + 1];
1823               re_wctype_t cc;
1824
1825               if (class_limit - class_beg > CHAR_CLASS_MAX_LENGTH)
1826                 class_limit = class_beg + CHAR_CLASS_MAX_LENGTH;
1827
1828               while (class_end < class_limit
1829                      && *class_end >= 'a' && *class_end <= 'z')
1830                 class_end++;
1831
1832               if (class_end == class_beg
1833                   || *class_end != ':' || class_end[1] != ']')
1834                 goto not_a_class_name_multibyte;
1835
1836               memcpy (class_name, class_beg, class_end - class_beg);
1837               class_name[class_end - class_beg] = 0;
1838
1839               cc = re_wctype (class_name);
1840               if (cc == 0)
1841                 error ("Invalid ISO C character class");
1842
1843               iso_classes = Fcons (make_number (cc), iso_classes);
1844
1845               i_byte = class_end + 2 - str;
1846               continue;
1847             }
1848
1849         not_a_class_name_multibyte:
1850           if (c == '\\')
1851             {
1852               if (i_byte == size_byte)
1853                 break;
1854
1855               leading_code = str[i_byte];
1856               c = STRING_CHAR_AND_LENGTH (str + i_byte, len);
1857               i_byte += len;
1858             }
1859           /* Treat `-' as range character only if another character
1860              follows.  */
1861           if (i_byte + 1 < size_byte
1862               && str[i_byte] == '-')
1863             {
1864               int c2, leading_code2;
1865
1866               /* Skip over the dash.  */
1867               i_byte++;
1868
1869               /* Get the end of the range.  */
1870               leading_code2 = str[i_byte];
1871               c2 = STRING_CHAR_AND_LENGTH (str + i_byte, len);
1872               i_byte += len;
1873
1874               if (c2 == '\\'
1875                   && i_byte < size_byte)
1876                 {
1877                   leading_code2 = str[i_byte];
1878                   c2 = STRING_CHAR_AND_LENGTH (str + i_byte, len);
1879                   i_byte += len;
1880                 }
1881
1882               if (c > c2)
1883                 continue;
1884               if (ASCII_CHAR_P (c))
1885                 {
1886                   while (c <= c2 && c < 0x80)
1887                     fastmap[c++] = 1;
1888                   leading_code = CHAR_LEADING_CODE (c);
1889                 }
1890               if (! ASCII_CHAR_P (c))
1891                 {
1892                   int lim2 = leading_code2 + 1;
1893                   while (leading_code < lim2)
1894                     fastmap[leading_code++] = 1;
1895                   if (c <= c2)
1896                     {
1897                       char_ranges[n_char_ranges++] = c;
1898                       char_ranges[n_char_ranges++] = c2;
1899                     }
1900                 }
1901             }
1902           else
1903             {
1904               if (ASCII_CHAR_P (c))
1905                 fastmap[c] = 1;
1906               else
1907                 {
1908                   fastmap[leading_code] = 1;
1909                   char_ranges[n_char_ranges++] = c;
1910                   char_ranges[n_char_ranges++] = c;
1911                 }
1912             }
1913         }
1914
1915       /* If the current range is unibyte and STRING contains non-ASCII
1916          chars, arrange fastmap for the corresponding unibyte
1917          chars.  */
1918
1919       if (! multibyte && n_char_ranges > 0)
1920         {
1921           memset (fastmap + 0200, 0, 0200);
1922           for (i = 0; i < n_char_ranges; i += 2)
1923             {
1924               int c1 = char_ranges[i];
1925               int lim2 = char_ranges[i + 1] + 1;
1926
1927               for (; c1 < lim2; c1++)
1928                 {
1929                   int b = CHAR_TO_BYTE_SAFE (c1);
1930                   if (b >= 0)
1931                     fastmap[b] = 1;
1932                 }
1933             }
1934         }
1935     }
1936
1937   /* If ^ was the first character, complement the fastmap.  */
1938   if (negate)
1939     {
1940       if (! multibyte)
1941         for (i = 0; i < sizeof fastmap; i++)
1942           fastmap[i] ^= 1;
1943       else
1944         {
1945           for (i = 0; i < 0200; i++)
1946             fastmap[i] ^= 1;
1947           /* All non-ASCII chars possibly match.  */
1948           for (; i < sizeof fastmap; i++)
1949             fastmap[i] = 1;
1950         }
1951     }
1952
1953   {
1954     ptrdiff_t start_point = PT;
1955     ptrdiff_t pos = PT;
1956     ptrdiff_t pos_byte = PT_BYTE;
1957     unsigned char *p = PT_ADDR, *endp, *stop;
1958
1959     if (forwardp)
1960       {
1961         endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
1962         stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp;
1963       }
1964     else
1965       {
1966         endp = CHAR_POS_ADDR (XINT (lim));
1967         stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp;
1968       }
1969
1970     immediate_quit = 1;
1971     /* This code may look up syntax tables using functions that rely on the
1972        gl_state object.  To make sure this object is not out of date,
1973        let's initialize it manually.
1974        We ignore syntax-table text-properties for now, since that's
1975        what we've done in the past.  */
1976     SETUP_BUFFER_SYNTAX_TABLE ();
1977     if (forwardp)
1978       {
1979         if (multibyte)
1980           while (1)
1981             {
1982               int nbytes;
1983
1984               if (p >= stop)
1985                 {
1986                   if (p >= endp)
1987                     break;
1988                   p = GAP_END_ADDR;
1989                   stop = endp;
1990                 }
1991               c = STRING_CHAR_AND_LENGTH (p, nbytes);
1992               if (! NILP (iso_classes) && in_classes (c, iso_classes))
1993                 {
1994                   if (negate)
1995                     break;
1996                   else
1997                     goto fwd_ok;
1998                 }
1999
2000               if (! fastmap[*p])
2001                 break;
2002               if (! ASCII_CHAR_P (c))
2003                 {
2004                   /* As we are looking at a multibyte character, we
2005                      must look up the character in the table
2006                      CHAR_RANGES.  If there's no data in the table,
2007                      that character is not what we want to skip.  */
2008
2009                   /* The following code do the right thing even if
2010                      n_char_ranges is zero (i.e. no data in
2011                      CHAR_RANGES).  */
2012                   for (i = 0; i < n_char_ranges; i += 2)
2013                     if (c >= char_ranges[i] && c <= char_ranges[i + 1])
2014                       break;
2015                   if (!(negate ^ (i < n_char_ranges)))
2016                     break;
2017                 }
2018             fwd_ok:
2019               p += nbytes, pos++, pos_byte += nbytes;
2020             }
2021         else
2022           while (1)
2023             {
2024               if (p >= stop)
2025                 {
2026                   if (p >= endp)
2027                     break;
2028                   p = GAP_END_ADDR;
2029                   stop = endp;
2030                 }
2031
2032               if (!NILP (iso_classes) && in_classes (*p, iso_classes))
2033                 {
2034                   if (negate)
2035                     break;
2036                   else
2037                     goto fwd_unibyte_ok;
2038                 }
2039
2040               if (!fastmap[*p])
2041                 break;
2042             fwd_unibyte_ok:
2043               p++, pos++, pos_byte++;
2044             }
2045       }
2046     else
2047       {
2048         if (multibyte)
2049           while (1)
2050             {
2051               unsigned char *prev_p;
2052
2053               if (p <= stop)
2054                 {
2055                   if (p <= endp)
2056                     break;
2057                   p = GPT_ADDR;
2058                   stop = endp;
2059                 }
2060               prev_p = p;
2061               while (--p >= stop && ! CHAR_HEAD_P (*p));
2062               c = STRING_CHAR (p);
2063
2064               if (! NILP (iso_classes) && in_classes (c, iso_classes))
2065                 {
2066                   if (negate)
2067                     break;
2068                   else
2069                     goto back_ok;
2070                 }
2071
2072               if (! fastmap[*p])
2073                 break;
2074               if (! ASCII_CHAR_P (c))
2075                 {
2076                   /* See the comment in the previous similar code.  */
2077                   for (i = 0; i < n_char_ranges; i += 2)
2078                     if (c >= char_ranges[i] && c <= char_ranges[i + 1])
2079                       break;
2080                   if (!(negate ^ (i < n_char_ranges)))
2081                     break;
2082                 }
2083             back_ok:
2084               pos--, pos_byte -= prev_p - p;
2085             }
2086         else
2087           while (1)
2088             {
2089               if (p <= stop)
2090                 {
2091                   if (p <= endp)
2092                     break;
2093                   p = GPT_ADDR;
2094                   stop = endp;
2095                 }
2096
2097               if (! NILP (iso_classes) && in_classes (p[-1], iso_classes))
2098                 {
2099                   if (negate)
2100                     break;
2101                   else
2102                     goto back_unibyte_ok;
2103                 }
2104
2105               if (!fastmap[p[-1]])
2106                 break;
2107             back_unibyte_ok:
2108               p--, pos--, pos_byte--;
2109             }
2110       }
2111
2112     SET_PT_BOTH (pos, pos_byte);
2113     immediate_quit = 0;
2114
2115     SAFE_FREE ();
2116     return make_number (PT - start_point);
2117   }
2118 }
2119
2120
2121 static Lisp_Object
2122 skip_syntaxes (bool forwardp, Lisp_Object string, Lisp_Object lim)
2123 {
2124   int c;
2125   unsigned char fastmap[0400];
2126   bool negate = 0;
2127   ptrdiff_t i, i_byte;
2128   bool multibyte;
2129   ptrdiff_t size_byte;
2130   unsigned char *str;
2131
2132   CHECK_STRING (string);
2133
2134   if (NILP (lim))
2135     XSETINT (lim, forwardp ? ZV : BEGV);
2136   else
2137     CHECK_NUMBER_COERCE_MARKER (lim);
2138
2139   /* In any case, don't allow scan outside bounds of buffer.  */
2140   if (XINT (lim) > ZV)
2141     XSETFASTINT (lim, ZV);
2142   if (XINT (lim) < BEGV)
2143     XSETFASTINT (lim, BEGV);
2144
2145   if (forwardp ? (PT >= XFASTINT (lim)) : (PT <= XFASTINT (lim)))
2146     return make_number (0);
2147
2148   multibyte = (!NILP (BVAR (current_buffer, enable_multibyte_characters))
2149                && (XINT (lim) - PT != CHAR_TO_BYTE (XINT (lim)) - PT_BYTE));
2150
2151   memset (fastmap, 0, sizeof fastmap);
2152
2153   if (SBYTES (string) > SCHARS (string))
2154     /* As this is very rare case (syntax spec is ASCII only), don't
2155        consider efficiency.  */
2156     string = string_make_unibyte (string);
2157
2158   str = SDATA (string);
2159   size_byte = SBYTES (string);
2160
2161   i_byte = 0;
2162   if (i_byte < size_byte
2163       && SREF (string, 0) == '^')
2164     {
2165       negate = 1; i_byte++;
2166     }
2167
2168   /* Find the syntaxes specified and set their elements of fastmap.  */
2169
2170   while (i_byte < size_byte)
2171     {
2172       c = str[i_byte++];
2173       fastmap[syntax_spec_code[c]] = 1;
2174     }
2175
2176   /* If ^ was the first character, complement the fastmap.  */
2177   if (negate)
2178     for (i = 0; i < sizeof fastmap; i++)
2179       fastmap[i] ^= 1;
2180
2181   {
2182     ptrdiff_t start_point = PT;
2183     ptrdiff_t pos = PT;
2184     ptrdiff_t pos_byte = PT_BYTE;
2185     unsigned char *p, *endp, *stop;
2186
2187     immediate_quit = 1;
2188     SETUP_SYNTAX_TABLE (pos, forwardp ? 1 : -1);
2189
2190     if (forwardp)
2191       {
2192         while (true)
2193           {
2194             p = BYTE_POS_ADDR (pos_byte);
2195             endp = XINT (lim) == GPT ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
2196             stop = pos < GPT && GPT < XINT (lim) ? GPT_ADDR : endp;
2197
2198             do
2199               {
2200                 int nbytes;
2201
2202                 if (p >= stop)
2203                   {
2204                     if (p >= endp)
2205                       goto done;
2206                     p = GAP_END_ADDR;
2207                     stop = endp;
2208                   }
2209                 if (multibyte)
2210                   c = STRING_CHAR_AND_LENGTH (p, nbytes);
2211                 else
2212                   c = *p, nbytes = 1;
2213                 if (! fastmap[SYNTAX (c)])
2214                   goto done;
2215                 p += nbytes, pos++, pos_byte += nbytes;
2216               }
2217             while (!parse_sexp_lookup_properties
2218                    || pos < gl_state.e_property);
2219
2220             update_syntax_table_forward (pos + gl_state.offset,
2221                                          false, gl_state.object);
2222           }
2223       }
2224     else
2225       {
2226         p = BYTE_POS_ADDR (pos_byte);
2227         endp = CHAR_POS_ADDR (XINT (lim));
2228         stop = pos >= GPT && GPT > XINT (lim) ? GAP_END_ADDR : endp;
2229
2230         if (multibyte)
2231           {
2232             while (1)
2233               {
2234                 unsigned char *prev_p;
2235
2236                 if (p <= stop)
2237                   {
2238                     if (p <= endp)
2239                       break;
2240                     p = GPT_ADDR;
2241                     stop = endp;
2242                   }
2243                 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
2244                 prev_p = p;
2245                 while (--p >= stop && ! CHAR_HEAD_P (*p));
2246                 c = STRING_CHAR (p);
2247                 if (! fastmap[SYNTAX (c)])
2248                   break;
2249                 pos--, pos_byte -= prev_p - p;
2250               }
2251           }
2252         else
2253           {
2254             while (1)
2255               {
2256                 if (p <= stop)
2257                   {
2258                     if (p <= endp)
2259                       break;
2260                     p = GPT_ADDR;
2261                     stop = endp;
2262                   }
2263                 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
2264                 if (! fastmap[SYNTAX (p[-1])])
2265                   break;
2266                 p--, pos--, pos_byte--;
2267               }
2268           }
2269       }
2270
2271   done:
2272     SET_PT_BOTH (pos, pos_byte);
2273     immediate_quit = 0;
2274
2275     return make_number (PT - start_point);
2276   }
2277 }
2278
2279 /* Return true if character C belongs to one of the ISO classes
2280    in the list ISO_CLASSES.  Each class is represented by an
2281    integer which is its type according to re_wctype.  */
2282
2283 static bool
2284 in_classes (int c, Lisp_Object iso_classes)
2285 {
2286   bool fits_class = 0;
2287
2288   while (CONSP (iso_classes))
2289     {
2290       Lisp_Object elt;
2291       elt = XCAR (iso_classes);
2292       iso_classes = XCDR (iso_classes);
2293
2294       if (re_iswctype (c, XFASTINT (elt)))
2295         fits_class = 1;
2296     }
2297
2298   return fits_class;
2299 }
2300 \f
2301 /* Jump over a comment, assuming we are at the beginning of one.
2302    FROM is the current position.
2303    FROM_BYTE is the bytepos corresponding to FROM.
2304    Do not move past STOP (a charpos).
2305    The comment over which we have to jump is of style STYLE
2306      (either SYNTAX_FLAGS_COMMENT_STYLE (foo) or ST_COMMENT_STYLE).
2307    NESTING should be positive to indicate the nesting at the beginning
2308      for nested comments and should be zero or negative else.
2309      ST_COMMENT_STYLE cannot be nested.
2310    PREV_SYNTAX is the SYNTAX_WITH_FLAGS of the previous character
2311      (or 0 If the search cannot start in the middle of a two-character).
2312
2313    If successful, return true and store the charpos of the comment's
2314    end into *CHARPOS_PTR and the corresponding bytepos into
2315    *BYTEPOS_PTR.  Else, return false and store the charpos STOP into
2316    *CHARPOS_PTR, the corresponding bytepos into *BYTEPOS_PTR and the
2317    current nesting (as defined for state->incomment) in
2318    *INCOMMENT_PTR.  Should the last character scanned in an incomplete
2319    comment be a possible first character of a two character construct,
2320    we store its SYNTAX_WITH_FLAGS into *last_syntax_ptr.  Otherwise,
2321    we store Smax into *last_syntax_ptr.
2322
2323    The comment end is the last character of the comment rather than the
2324    character just after the comment.
2325
2326    Global syntax data is assumed to initially be valid for FROM and
2327    remains valid for forward search starting at the returned position. */
2328
2329 static bool
2330 forw_comment (ptrdiff_t from, ptrdiff_t from_byte, ptrdiff_t stop,
2331               EMACS_INT nesting, int style, int prev_syntax,
2332               ptrdiff_t *charpos_ptr, ptrdiff_t *bytepos_ptr,
2333               EMACS_INT *incomment_ptr, int *last_syntax_ptr)
2334 {
2335   register int c, c1;
2336   register enum syntaxcode code;
2337   register int syntax, other_syntax;
2338
2339   if (nesting <= 0) nesting = -1;
2340
2341   /* Enter the loop in the middle so that we find
2342      a 2-char comment ender if we start in the middle of it.  */
2343   syntax = prev_syntax;
2344   code = syntax & 0xff;
2345   if (syntax != 0 && from < stop) goto forw_incomment;
2346
2347   while (1)
2348     {
2349       if (from == stop)
2350         {
2351           *incomment_ptr = nesting;
2352           *charpos_ptr = from;
2353           *bytepos_ptr = from_byte;
2354           *last_syntax_ptr =
2355             (code == Sescape || code == Scharquote
2356              || SYNTAX_FLAGS_COMEND_FIRST (syntax)
2357              || (nesting > 0
2358                  && SYNTAX_FLAGS_COMSTART_FIRST (syntax)))
2359             ? syntax : Smax ;
2360           return 0;
2361         }
2362       c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2363       syntax = SYNTAX_WITH_FLAGS (c);
2364       code = syntax & 0xff;
2365       if (code == Sendcomment
2366           && SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == style
2367           && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ?
2368               (nesting > 0 && --nesting == 0) : nesting < 0)
2369           && !(Vcomment_end_can_be_escaped && char_quoted (from, from_byte)))
2370         /* We have encountered a comment end of the same style
2371            as the comment sequence which began this comment
2372            section.  */
2373         break;
2374       if (code == Scomment_fence
2375           && style == ST_COMMENT_STYLE)
2376         /* We have encountered a comment end of the same style
2377            as the comment sequence which began this comment
2378            section.  */
2379         break;
2380       if (nesting > 0
2381           && code == Scomment
2382           && SYNTAX_FLAGS_COMMENT_NESTED (syntax)
2383           && SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == style)
2384         /* We have encountered a nested comment of the same style
2385            as the comment sequence which began this comment section.  */
2386         nesting++;
2387       INC_BOTH (from, from_byte);
2388       UPDATE_SYNTAX_TABLE_FORWARD (from);
2389
2390     forw_incomment:
2391       if (from < stop && SYNTAX_FLAGS_COMEND_FIRST (syntax)
2392           && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2393               other_syntax = SYNTAX_WITH_FLAGS (c1),
2394               SYNTAX_FLAGS_COMEND_SECOND (other_syntax))
2395           && SYNTAX_FLAGS_COMMENT_STYLE (syntax, other_syntax) == style
2396           && ((SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
2397                SYNTAX_FLAGS_COMMENT_NESTED (other_syntax))
2398               ? nesting > 0 : nesting < 0))
2399         {
2400           syntax = Smax;        /* So that "|#" (lisp) can not return
2401                                    the syntax of "#" in *last_syntax_ptr. */
2402           if (--nesting <= 0)
2403             /* We have encountered a comment end of the same style
2404                as the comment sequence which began this comment section.  */
2405             break;
2406           else
2407             {
2408               INC_BOTH (from, from_byte);
2409               UPDATE_SYNTAX_TABLE_FORWARD (from);
2410             }
2411         }
2412       if (nesting > 0
2413           && from < stop
2414           && SYNTAX_FLAGS_COMSTART_FIRST (syntax)
2415           && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2416               other_syntax = SYNTAX_WITH_FLAGS (c1),
2417               SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax) == style
2418               && SYNTAX_FLAGS_COMSTART_SECOND (other_syntax))
2419           && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
2420               SYNTAX_FLAGS_COMMENT_NESTED (other_syntax)))
2421         /* We have encountered a nested comment of the same style
2422            as the comment sequence which began this comment section.  */
2423         {
2424           syntax = Smax; /* So that "#|#" isn't also a comment ender. */
2425           INC_BOTH (from, from_byte);
2426           UPDATE_SYNTAX_TABLE_FORWARD (from);
2427           nesting++;
2428         }
2429     }
2430   *charpos_ptr = from;
2431   *bytepos_ptr = from_byte;
2432   *last_syntax_ptr = Smax; /* Any syntactic power the last byte had is
2433                               used up. */
2434   return 1;
2435 }
2436
2437 DEFUN ("forward-comment", Fforward_comment, Sforward_comment, 1, 1, 0,
2438        doc: /*
2439 Move forward across up to COUNT comments.  If COUNT is negative, move backward.
2440 Stop scanning if we find something other than a comment or whitespace.
2441 Set point to where scanning stops.
2442 If COUNT comments are found as expected, with nothing except whitespace
2443 between them, return t; otherwise return nil.  */)
2444   (Lisp_Object count)
2445 {
2446   ptrdiff_t from, from_byte, stop;
2447   int c, c1;
2448   enum syntaxcode code;
2449   int comstyle = 0;         /* style of comment encountered */
2450   bool comnested = 0;       /* whether the comment is nestable or not */
2451   bool found;
2452   EMACS_INT count1;
2453   ptrdiff_t out_charpos, out_bytepos;
2454   EMACS_INT dummy;
2455   int dummy2;
2456
2457   CHECK_NUMBER (count);
2458   count1 = XINT (count);
2459   stop = count1 > 0 ? ZV : BEGV;
2460
2461   immediate_quit = 1;
2462   QUIT;
2463
2464   from = PT;
2465   from_byte = PT_BYTE;
2466
2467   SETUP_SYNTAX_TABLE (from, count1);
2468   while (count1 > 0)
2469     {
2470       do
2471         {
2472           bool comstart_first;
2473           int syntax, other_syntax;
2474
2475           if (from == stop)
2476             {
2477               SET_PT_BOTH (from, from_byte);
2478               immediate_quit = 0;
2479               return Qnil;
2480             }
2481           c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2482           syntax = SYNTAX_WITH_FLAGS (c);
2483           code = SYNTAX (c);
2484           comstart_first = SYNTAX_FLAGS_COMSTART_FIRST (syntax);
2485           comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2486           comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2487           INC_BOTH (from, from_byte);
2488           UPDATE_SYNTAX_TABLE_FORWARD (from);
2489           if (from < stop && comstart_first
2490               && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2491                   other_syntax = SYNTAX_WITH_FLAGS (c1),
2492                   SYNTAX_FLAGS_COMSTART_SECOND (other_syntax)))
2493             {
2494               /* We have encountered a comment start sequence and we
2495                  are ignoring all text inside comments.  We must record
2496                  the comment style this sequence begins so that later,
2497                  only a comment end of the same style actually ends
2498                  the comment section.  */
2499               code = Scomment;
2500               comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2501               comnested |= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2502               INC_BOTH (from, from_byte);
2503               UPDATE_SYNTAX_TABLE_FORWARD (from);
2504             }
2505         }
2506       while (code == Swhitespace || (code == Sendcomment && c == '\n'));
2507
2508       if (code == Scomment_fence)
2509         comstyle = ST_COMMENT_STYLE;
2510       else if (code != Scomment)
2511         {
2512           immediate_quit = 0;
2513           DEC_BOTH (from, from_byte);
2514           SET_PT_BOTH (from, from_byte);
2515           return Qnil;
2516         }
2517       /* We're at the start of a comment.  */
2518       found = forw_comment (from, from_byte, stop, comnested, comstyle, 0,
2519                             &out_charpos, &out_bytepos, &dummy, &dummy2);
2520       from = out_charpos; from_byte = out_bytepos;
2521       if (!found)
2522         {
2523           immediate_quit = 0;
2524           SET_PT_BOTH (from, from_byte);
2525           return Qnil;
2526         }
2527       INC_BOTH (from, from_byte);
2528       UPDATE_SYNTAX_TABLE_FORWARD (from);
2529       /* We have skipped one comment.  */
2530       count1--;
2531     }
2532
2533   while (count1 < 0)
2534     {
2535       while (1)
2536         {
2537           bool quoted;
2538           int syntax;
2539
2540           if (from <= stop)
2541             {
2542               SET_PT_BOTH (BEGV, BEGV_BYTE);
2543               immediate_quit = 0;
2544               return Qnil;
2545             }
2546
2547           DEC_BOTH (from, from_byte);
2548           /* char_quoted does UPDATE_SYNTAX_TABLE_BACKWARD (from).  */
2549           quoted = char_quoted (from, from_byte);
2550           c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2551           syntax = SYNTAX_WITH_FLAGS (c);
2552           code = SYNTAX (c);
2553           comstyle = 0;
2554           comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2555           if (code == Sendcomment)
2556             comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2557           if (from > stop && SYNTAX_FLAGS_COMEND_SECOND (syntax)
2558               && prev_char_comend_first (from, from_byte)
2559               && !char_quoted (from - 1, dec_bytepos (from_byte)))
2560             {
2561               int other_syntax;
2562               /* We must record the comment style encountered so that
2563                  later, we can match only the proper comment begin
2564                  sequence of the same style.  */
2565               DEC_BOTH (from, from_byte);
2566               code = Sendcomment;
2567               /* Calling char_quoted, above, set up global syntax position
2568                  at the new value of FROM.  */
2569               c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2570               other_syntax = SYNTAX_WITH_FLAGS (c1);
2571               comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2572               comnested |= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2573             }
2574
2575           if (code == Scomment_fence)
2576             {
2577               /* Skip until first preceding unquoted comment_fence.  */
2578               bool fence_found = 0;
2579               ptrdiff_t ini = from, ini_byte = from_byte;
2580
2581               while (1)
2582                 {
2583                   DEC_BOTH (from, from_byte);
2584                   UPDATE_SYNTAX_TABLE_BACKWARD (from);
2585                   c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2586                   if (SYNTAX (c) == Scomment_fence
2587                       && !char_quoted (from, from_byte))
2588                     {
2589                       fence_found = 1;
2590                       break;
2591                     }
2592                   else if (from == stop)
2593                     break;
2594                 }
2595               if (fence_found == 0)
2596                 {
2597                   from = ini;           /* Set point to ini + 1.  */
2598                   from_byte = ini_byte;
2599                   goto leave;
2600                 }
2601               else
2602                 /* We have skipped one comment.  */
2603                 break;
2604             }
2605           else if (code == Sendcomment)
2606             {
2607               found = back_comment (from, from_byte, stop, comnested, comstyle,
2608                                     &out_charpos, &out_bytepos);
2609               if (!found)
2610                 {
2611                   if (c == '\n')
2612                     /* This end-of-line is not an end-of-comment.
2613                        Treat it like a whitespace.
2614                        CC-mode (and maybe others) relies on this behavior.  */
2615                     ;
2616                   else
2617                     {
2618                       /* Failure: we should go back to the end of this
2619                          not-quite-endcomment.  */
2620                       if (SYNTAX (c) != code)
2621                         /* It was a two-char Sendcomment.  */
2622                         INC_BOTH (from, from_byte);
2623                       goto leave;
2624                     }
2625                 }
2626               else
2627                 {
2628                   /* We have skipped one comment.  */
2629                   from = out_charpos, from_byte = out_bytepos;
2630                   break;
2631                 }
2632             }
2633           else if (code != Swhitespace || quoted)
2634             {
2635             leave:
2636               immediate_quit = 0;
2637               INC_BOTH (from, from_byte);
2638               SET_PT_BOTH (from, from_byte);
2639               return Qnil;
2640             }
2641         }
2642
2643       count1++;
2644     }
2645
2646   SET_PT_BOTH (from, from_byte);
2647   immediate_quit = 0;
2648   return Qt;
2649 }
2650 \f
2651 /* Return syntax code of character C if C is an ASCII character
2652    or if MULTIBYTE_SYMBOL_P is false.  Otherwise, return Ssymbol.  */
2653
2654 static enum syntaxcode
2655 syntax_multibyte (int c, bool multibyte_symbol_p)
2656 {
2657   return ASCII_CHAR_P (c) || !multibyte_symbol_p ? SYNTAX (c) : Ssymbol;
2658 }
2659
2660 static Lisp_Object
2661 scan_lists (EMACS_INT from, EMACS_INT count, EMACS_INT depth, bool sexpflag)
2662 {
2663   Lisp_Object val;
2664   ptrdiff_t stop = count > 0 ? ZV : BEGV;
2665   int c, c1;
2666   int stringterm;
2667   bool quoted;
2668   bool mathexit = 0;
2669   enum syntaxcode code;
2670   EMACS_INT min_depth = depth;  /* Err out if depth gets less than this.  */
2671   int comstyle = 0;             /* Style of comment encountered.  */
2672   bool comnested = 0;           /* Whether the comment is nestable or not.  */
2673   ptrdiff_t temp_pos;
2674   EMACS_INT last_good = from;
2675   bool found;
2676   ptrdiff_t from_byte;
2677   ptrdiff_t out_bytepos, out_charpos;
2678   EMACS_INT dummy;
2679   int dummy2;
2680   bool multibyte_symbol_p = sexpflag && multibyte_syntax_as_symbol;
2681
2682   if (depth > 0) min_depth = 0;
2683
2684   if (from > ZV) from = ZV;
2685   if (from < BEGV) from = BEGV;
2686
2687   from_byte = CHAR_TO_BYTE (from);
2688
2689   immediate_quit = 1;
2690   QUIT;
2691
2692   SETUP_SYNTAX_TABLE (from, count);
2693   while (count > 0)
2694     {
2695       while (from < stop)
2696         {
2697           bool comstart_first, prefix;
2698           int syntax, other_syntax;
2699           UPDATE_SYNTAX_TABLE_FORWARD (from);
2700           c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2701           syntax = SYNTAX_WITH_FLAGS (c);
2702           code = syntax_multibyte (c, multibyte_symbol_p);
2703           comstart_first = SYNTAX_FLAGS_COMSTART_FIRST (syntax);
2704           comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2705           comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2706           prefix = SYNTAX_FLAGS_PREFIX (syntax);
2707           if (depth == min_depth)
2708             last_good = from;
2709           INC_BOTH (from, from_byte);
2710           UPDATE_SYNTAX_TABLE_FORWARD (from);
2711           if (from < stop && comstart_first
2712               && (c = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2713                   other_syntax = SYNTAX_WITH_FLAGS (c),
2714                   SYNTAX_FLAGS_COMSTART_SECOND (other_syntax))
2715               && parse_sexp_ignore_comments)
2716             {
2717               /* We have encountered a comment start sequence and we
2718                  are ignoring all text inside comments.  We must record
2719                  the comment style this sequence begins so that later,
2720                  only a comment end of the same style actually ends
2721                  the comment section.  */
2722               code = Scomment;
2723               comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2724               comnested |= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2725               INC_BOTH (from, from_byte);
2726               UPDATE_SYNTAX_TABLE_FORWARD (from);
2727             }
2728
2729           if (prefix)
2730             continue;
2731
2732           switch (code)
2733             {
2734             case Sescape:
2735             case Scharquote:
2736               if (from == stop)
2737                 goto lose;
2738               INC_BOTH (from, from_byte);
2739               /* Treat following character as a word constituent.  */
2740             case Sword:
2741             case Ssymbol:
2742               if (depth || !sexpflag) break;
2743               /* This word counts as a sexp; return at end of it.  */
2744               while (from < stop)
2745                 {
2746                   UPDATE_SYNTAX_TABLE_FORWARD (from);
2747
2748                   c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2749                   switch (syntax_multibyte (c, multibyte_symbol_p))
2750                     {
2751                     case Scharquote:
2752                     case Sescape:
2753                       INC_BOTH (from, from_byte);
2754                       if (from == stop)
2755                         goto lose;
2756                       break;
2757                     case Sword:
2758                     case Ssymbol:
2759                     case Squote:
2760                       break;
2761                     default:
2762                       goto done;
2763                     }
2764                   INC_BOTH (from, from_byte);
2765                 }
2766               goto done;
2767
2768             case Scomment_fence:
2769               comstyle = ST_COMMENT_STYLE;
2770               /* FALLTHROUGH */
2771             case Scomment:
2772               if (!parse_sexp_ignore_comments) break;
2773               UPDATE_SYNTAX_TABLE_FORWARD (from);
2774               found = forw_comment (from, from_byte, stop,
2775                                     comnested, comstyle, 0,
2776                                     &out_charpos, &out_bytepos, &dummy,
2777                                     &dummy2);
2778               from = out_charpos, from_byte = out_bytepos;
2779               if (!found)
2780                 {
2781                   if (depth == 0)
2782                     goto done;
2783                   goto lose;
2784                 }
2785               INC_BOTH (from, from_byte);
2786               UPDATE_SYNTAX_TABLE_FORWARD (from);
2787               break;
2788
2789             case Smath:
2790               if (!sexpflag)
2791                 break;
2792               if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (from_byte))
2793                 {
2794                   INC_BOTH (from, from_byte);
2795                 }
2796               if (mathexit)
2797                 {
2798                   mathexit = 0;
2799                   goto close1;
2800                 }
2801               mathexit = 1;
2802
2803             case Sopen:
2804               if (!++depth) goto done;
2805               break;
2806
2807             case Sclose:
2808             close1:
2809               if (!--depth) goto done;
2810               if (depth < min_depth)
2811                 xsignal3 (Qscan_error,
2812                           build_string ("Containing expression ends prematurely"),
2813                           make_number (last_good), make_number (from));
2814               break;
2815
2816             case Sstring:
2817             case Sstring_fence:
2818               temp_pos = dec_bytepos (from_byte);
2819               stringterm = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
2820               while (1)
2821                 {
2822                   enum syntaxcode c_code;
2823                   if (from >= stop)
2824                     goto lose;
2825                   UPDATE_SYNTAX_TABLE_FORWARD (from);
2826                   c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2827                   c_code = syntax_multibyte (c, multibyte_symbol_p);
2828                   if (code == Sstring
2829                       ? c == stringterm && c_code == Sstring
2830                       : c_code == Sstring_fence)
2831                     break;
2832
2833                   if (c_code == Scharquote || c_code == Sescape)
2834                     INC_BOTH (from, from_byte);
2835                   INC_BOTH (from, from_byte);
2836                 }
2837               INC_BOTH (from, from_byte);
2838               if (!depth && sexpflag) goto done;
2839               break;
2840             default:
2841               /* Ignore whitespace, punctuation, quote, endcomment.  */
2842               break;
2843             }
2844         }
2845
2846       /* Reached end of buffer.  Error if within object, return nil if between */
2847       if (depth)
2848         goto lose;
2849
2850       immediate_quit = 0;
2851       return Qnil;
2852
2853       /* End of object reached */
2854     done:
2855       count--;
2856     }
2857
2858   while (count < 0)
2859     {
2860       while (from > stop)
2861         {
2862           int syntax;
2863           DEC_BOTH (from, from_byte);
2864           UPDATE_SYNTAX_TABLE_BACKWARD (from);
2865           c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2866           syntax= SYNTAX_WITH_FLAGS (c);
2867           code = syntax_multibyte (c, multibyte_symbol_p);
2868           if (depth == min_depth)
2869             last_good = from;
2870           comstyle = 0;
2871           comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2872           if (code == Sendcomment)
2873             comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2874           if (from > stop && SYNTAX_FLAGS_COMEND_SECOND (syntax)
2875               && prev_char_comend_first (from, from_byte)
2876               && parse_sexp_ignore_comments)
2877             {
2878               /* We must record the comment style encountered so that
2879                  later, we can match only the proper comment begin
2880                  sequence of the same style.  */
2881               int c2, other_syntax;
2882               DEC_BOTH (from, from_byte);
2883               UPDATE_SYNTAX_TABLE_BACKWARD (from);
2884               code = Sendcomment;
2885               c2 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2886               other_syntax = SYNTAX_WITH_FLAGS (c2);
2887               comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2888               comnested |= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2889             }
2890
2891           /* Quoting turns anything except a comment-ender
2892              into a word character.  Note that this cannot be true
2893              if we decremented FROM in the if-statement above.  */
2894           if (code != Sendcomment && char_quoted (from, from_byte))
2895             {
2896               DEC_BOTH (from, from_byte);
2897               code = Sword;
2898             }
2899           else if (SYNTAX_FLAGS_PREFIX (syntax))
2900             continue;
2901
2902           switch (code)
2903             {
2904             case Sword:
2905             case Ssymbol:
2906             case Sescape:
2907             case Scharquote:
2908               if (depth || !sexpflag) break;
2909               /* This word counts as a sexp; count object finished
2910                  after passing it.  */
2911               while (from > stop)
2912                 {
2913                   temp_pos = from_byte;
2914                   if (! NILP (BVAR (current_buffer, enable_multibyte_characters)))
2915                     DEC_POS (temp_pos);
2916                   else
2917                     temp_pos--;
2918                   UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2919                   c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
2920                   /* Don't allow comment-end to be quoted.  */
2921                   if (syntax_multibyte (c1, multibyte_symbol_p) == Sendcomment)
2922                     goto done2;
2923                   quoted = char_quoted (from - 1, temp_pos);
2924                   if (quoted)
2925                     {
2926                       DEC_BOTH (from, from_byte);
2927                       temp_pos = dec_bytepos (temp_pos);
2928                       UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2929                     }
2930                   c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
2931                   if (! quoted)
2932                     switch (syntax_multibyte (c1, multibyte_symbol_p))
2933                       {
2934                       case Sword: case Ssymbol: case Squote: break;
2935                       default: goto done2;
2936                       }
2937                   DEC_BOTH (from, from_byte);
2938                 }
2939               goto done2;
2940
2941             case Smath:
2942               if (!sexpflag)
2943                 break;
2944               if (from > BEGV)
2945                 {
2946                   temp_pos = dec_bytepos (from_byte);
2947                   UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2948                   if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (temp_pos))
2949                     DEC_BOTH (from, from_byte);
2950                 }
2951               if (mathexit)
2952                 {
2953                   mathexit = 0;
2954                   goto open2;
2955                 }
2956               mathexit = 1;
2957
2958             case Sclose:
2959               if (!++depth) goto done2;
2960               break;
2961
2962             case Sopen:
2963             open2:
2964               if (!--depth) goto done2;
2965               if (depth < min_depth)
2966                 xsignal3 (Qscan_error,
2967                           build_string ("Containing expression ends prematurely"),
2968                           make_number (last_good), make_number (from));
2969               break;
2970
2971             case Sendcomment:
2972               if (!parse_sexp_ignore_comments)
2973                 break;
2974               found = back_comment (from, from_byte, stop, comnested, comstyle,
2975                                     &out_charpos, &out_bytepos);
2976               /* FIXME:  if !found, it really wasn't a comment-end.
2977                  For single-char Sendcomment, we can't do much about it apart
2978                  from skipping the char.
2979                  For 2-char endcomments, we could try again, taking both
2980                  chars as separate entities, but it's a lot of trouble
2981                  for very little gain, so we don't bother either.  -sm */
2982               if (found)
2983                 from = out_charpos, from_byte = out_bytepos;
2984               break;
2985
2986             case Scomment_fence:
2987             case Sstring_fence:
2988               while (1)
2989                 {
2990                   if (from == stop)
2991                     goto lose;
2992                   DEC_BOTH (from, from_byte);
2993                   UPDATE_SYNTAX_TABLE_BACKWARD (from);
2994                   if (!char_quoted (from, from_byte))
2995                     {
2996                       c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2997                       if (syntax_multibyte (c, multibyte_symbol_p) == code)
2998                         break;
2999                     }
3000                 }
3001               if (code == Sstring_fence && !depth && sexpflag) goto done2;
3002               break;
3003
3004             case Sstring:
3005               stringterm = FETCH_CHAR_AS_MULTIBYTE (from_byte);
3006               while (1)
3007                 {
3008                   if (from == stop)
3009                     goto lose;
3010                   DEC_BOTH (from, from_byte);
3011                   UPDATE_SYNTAX_TABLE_BACKWARD (from);
3012                   if (!char_quoted (from, from_byte))
3013                     {
3014                       c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
3015                       if (c == stringterm
3016                           && (syntax_multibyte (c, multibyte_symbol_p)
3017                               == Sstring))
3018                         break;
3019                     }
3020                 }
3021               if (!depth && sexpflag) goto done2;
3022               break;
3023             default:
3024               /* Ignore whitespace, punctuation, quote, endcomment.  */
3025               break;
3026             }
3027         }
3028
3029       /* Reached start of buffer.  Error if within object, return nil if between */
3030       if (depth)
3031         goto lose;
3032
3033       immediate_quit = 0;
3034       return Qnil;
3035
3036     done2:
3037       count++;
3038     }
3039
3040
3041   immediate_quit = 0;
3042   XSETFASTINT (val, from);
3043   return val;
3044
3045  lose:
3046   xsignal3 (Qscan_error,
3047             build_string ("Unbalanced parentheses"),
3048             make_number (last_good), make_number (from));
3049 }
3050
3051 DEFUN ("scan-lists", Fscan_lists, Sscan_lists, 3, 3, 0,
3052        doc: /* Scan from character number FROM by COUNT lists.
3053 Scan forward if COUNT is positive, backward if COUNT is negative.
3054 Return the character number of the position thus found.
3055
3056 A \"list", in this context, refers to a balanced parenthetical
3057 grouping, as determined by the syntax table.
3058
3059 If DEPTH is nonzero, treat that as the nesting depth of the starting
3060 point (i.e. the starting point is DEPTH parentheses deep).  This
3061 function scans over parentheses until the depth goes to zero COUNT
3062 times.  Hence, positive DEPTH moves out that number of levels of
3063 parentheses, while negative DEPTH moves to a deeper level.
3064
3065 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
3066
3067 If we reach the beginning or end of the accessible part of the buffer
3068 before we have scanned over COUNT lists, return nil if the depth at
3069 that point is zero, and signal a error if the depth is nonzero.  */)
3070   (Lisp_Object from, Lisp_Object count, Lisp_Object depth)
3071 {
3072   CHECK_NUMBER (from);
3073   CHECK_NUMBER (count);
3074   CHECK_NUMBER (depth);
3075
3076   return scan_lists (XINT (from), XINT (count), XINT (depth), 0);
3077 }
3078
3079 DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
3080        doc: /* Scan from character number FROM by COUNT balanced expressions.
3081 If COUNT is negative, scan backwards.
3082 Returns the character number of the position thus found.
3083
3084 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
3085
3086 If the beginning or end of (the accessible part of) the buffer is reached
3087 in the middle of a parenthetical grouping, an error is signaled.
3088 If the beginning or end is reached between groupings
3089 but before count is used up, nil is returned.  */)
3090   (Lisp_Object from, Lisp_Object count)
3091 {
3092   CHECK_NUMBER (from);
3093   CHECK_NUMBER (count);
3094
3095   return scan_lists (XINT (from), XINT (count), 0, 1);
3096 }
3097
3098 DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, Sbackward_prefix_chars,
3099        0, 0, 0,
3100        doc: /* Move point backward over any number of chars with prefix syntax.
3101 This includes chars with expression prefix syntax class (\\=') and those with
3102 the prefix syntax flag (p).  */)
3103   (void)
3104 {
3105   ptrdiff_t beg = BEGV;
3106   ptrdiff_t opoint = PT;
3107   ptrdiff_t opoint_byte = PT_BYTE;
3108   ptrdiff_t pos = PT;
3109   ptrdiff_t pos_byte = PT_BYTE;
3110   int c;
3111
3112   if (pos <= beg)
3113     {
3114       SET_PT_BOTH (opoint, opoint_byte);
3115
3116       return Qnil;
3117     }
3118
3119   SETUP_SYNTAX_TABLE (pos, -1);
3120
3121   DEC_BOTH (pos, pos_byte);
3122
3123   while (!char_quoted (pos, pos_byte)
3124          /* Previous statement updates syntax table.  */
3125          && ((c = FETCH_CHAR_AS_MULTIBYTE (pos_byte), SYNTAX (c) == Squote)
3126              || syntax_prefix_flag_p (c)))
3127     {
3128       opoint = pos;
3129       opoint_byte = pos_byte;
3130
3131       if (pos <= beg)
3132         break;
3133       DEC_BOTH (pos, pos_byte);
3134     }
3135
3136   SET_PT_BOTH (opoint, opoint_byte);
3137
3138   return Qnil;
3139 }
3140 \f
3141 /* Parse forward from FROM / FROM_BYTE to END,
3142    assuming that FROM has state STATE,
3143    and return a description of the state of the parse at END.
3144    If STOPBEFORE, stop at the start of an atom.
3145    If COMMENTSTOP is 1, stop at the start of a comment.
3146    If COMMENTSTOP is -1, stop at the start or end of a comment,
3147    after the beginning of a string, or after the end of a string.  */
3148
3149 static void
3150 scan_sexps_forward (struct lisp_parse_state *state,
3151                     ptrdiff_t from, ptrdiff_t from_byte, ptrdiff_t end,
3152                     EMACS_INT targetdepth, bool stopbefore,
3153                     int commentstop)
3154 {
3155   enum syntaxcode code;
3156   int c1;
3157   bool comnested;
3158   struct level { ptrdiff_t last, prev; };
3159   struct level levelstart[100];
3160   struct level *curlevel = levelstart;
3161   struct level *endlevel = levelstart + 100;
3162   EMACS_INT depth;      /* Paren depth of current scanning location.
3163                            level - levelstart equals this except
3164                            when the depth becomes negative.  */
3165   EMACS_INT mindepth;           /* Lowest DEPTH value seen.  */
3166   bool start_quoted = 0;        /* True means starting after a char quote.  */
3167   Lisp_Object tem;
3168   ptrdiff_t prev_from;          /* Keep one character before FROM.  */
3169   ptrdiff_t prev_from_byte;
3170   int prev_from_syntax, prev_prev_from_syntax;
3171   bool boundary_stop = commentstop == -1;
3172   bool nofence;
3173   bool found;
3174   ptrdiff_t out_bytepos, out_charpos;
3175   int temp;
3176
3177   prev_from = from;
3178   prev_from_byte = from_byte;
3179   if (from != BEGV)
3180     DEC_BOTH (prev_from, prev_from_byte);
3181
3182   /* Use this macro instead of `from++'.  */
3183 #define INC_FROM                                \
3184 do { prev_from = from;                          \
3185      prev_from_byte = from_byte;                \
3186      temp = FETCH_CHAR_AS_MULTIBYTE (prev_from_byte);   \
3187      prev_prev_from_syntax = prev_from_syntax;  \
3188      prev_from_syntax = SYNTAX_WITH_FLAGS (temp); \
3189      INC_BOTH (from, from_byte);                \
3190      if (from < end)                            \
3191        UPDATE_SYNTAX_TABLE_FORWARD (from);      \
3192   } while (0)
3193
3194   immediate_quit = 1;
3195   QUIT;
3196
3197   depth = state->depth;
3198   start_quoted = state->quoted;
3199   prev_prev_from_syntax = Smax;
3200   prev_from_syntax = state->prev_syntax;
3201
3202   tem = state->levelstarts;
3203   while (!NILP (tem))           /* >= second enclosing sexps.  */
3204     {
3205       Lisp_Object temhd = Fcar (tem);
3206       if (RANGED_INTEGERP (PTRDIFF_MIN, temhd, PTRDIFF_MAX))
3207         curlevel->last = XINT (temhd);
3208       if (++curlevel == endlevel)
3209         curlevel--; /* error ("Nesting too deep for parser"); */
3210       curlevel->prev = -1;
3211       curlevel->last = -1;
3212       tem = Fcdr (tem);
3213     }
3214   curlevel->prev = -1;
3215   curlevel->last = -1;
3216
3217   state->quoted = 0;
3218   mindepth = depth;
3219
3220   SETUP_SYNTAX_TABLE (from, 1);
3221
3222   /* Enter the loop at a place appropriate for initial state.  */
3223
3224   if (state->incomment)
3225     goto startincomment;
3226   if (state->instring >= 0)
3227     {
3228       nofence = state->instring != ST_STRING_STYLE;
3229       if (start_quoted)
3230         goto startquotedinstring;
3231       goto startinstring;
3232     }
3233   else if (start_quoted)
3234     goto startquoted;
3235
3236   while (from < end)
3237     {
3238       int syntax;
3239
3240       if (SYNTAX_FLAGS_COMSTART_FIRST (prev_from_syntax)
3241           && (c1 = FETCH_CHAR (from_byte),
3242               syntax = SYNTAX_WITH_FLAGS (c1),
3243               SYNTAX_FLAGS_COMSTART_SECOND (syntax)))
3244         /* Duplicate code to avoid a complex if-expression
3245            which causes trouble for the SGI compiler.  */
3246         {
3247           /* Record the comment style we have entered so that only
3248              the comment-end sequence of the same style actually
3249              terminates the comment section.  */
3250           state->comstyle
3251             = SYNTAX_FLAGS_COMMENT_STYLE (syntax, prev_from_syntax);
3252           comnested = (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax)
3253                        | SYNTAX_FLAGS_COMMENT_NESTED (syntax));
3254           state->incomment = comnested ? 1 : -1;
3255           state->comstr_start = prev_from;
3256           INC_FROM;
3257           prev_from_syntax = Smax; /* the syntax has already been
3258                                       "used up". */
3259           code = Scomment;
3260         }
3261       else
3262         {
3263           INC_FROM;
3264           code = prev_from_syntax & 0xff;
3265           if (code == Scomment_fence)
3266             {
3267               /* Record the comment style we have entered so that only
3268                  the comment-end sequence of the same style actually
3269                  terminates the comment section.  */
3270               state->comstyle = ST_COMMENT_STYLE;
3271               state->incomment = -1;
3272               state->comstr_start = prev_from;
3273               code = Scomment;
3274             }
3275           else if (code == Scomment)
3276             {
3277               state->comstyle = SYNTAX_FLAGS_COMMENT_STYLE (prev_from_syntax, 0);
3278               state->incomment = (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax) ?
3279                                  1 : -1);
3280               state->comstr_start = prev_from;
3281             }
3282         }
3283
3284       if (SYNTAX_FLAGS_PREFIX (prev_from_syntax))
3285         continue;
3286       switch (code)
3287         {
3288         case Sescape:
3289         case Scharquote:
3290           if (stopbefore) goto stop;  /* this arg means stop at sexp start */
3291           curlevel->last = prev_from;
3292         startquoted:
3293           if (from == end) goto endquoted;
3294           INC_FROM;
3295           goto symstarted;
3296           /* treat following character as a word constituent */
3297         case Sword:
3298         case Ssymbol:
3299           if (stopbefore) goto stop;  /* this arg means stop at sexp start */
3300           curlevel->last = prev_from;
3301         symstarted:
3302           while (from < end)
3303             {
3304               int symchar = FETCH_CHAR_AS_MULTIBYTE (from_byte);
3305               switch (SYNTAX (symchar))
3306                 {
3307                 case Scharquote:
3308                 case Sescape:
3309                   INC_FROM;
3310                   if (from == end) goto endquoted;
3311                   break;
3312                 case Sword:
3313                 case Ssymbol:
3314                 case Squote:
3315                   break;
3316                 default:
3317                   goto symdone;
3318                 }
3319               INC_FROM;
3320             }
3321         symdone:
3322           curlevel->prev = curlevel->last;
3323           break;
3324
3325         case Scomment_fence: /* Can't happen because it's handled above.  */
3326         case Scomment:
3327           if (commentstop || boundary_stop) goto done;
3328         startincomment:
3329           /* The (from == BEGV) test was to enter the loop in the middle so
3330              that we find a 2-char comment ender even if we start in the
3331              middle of it.  We don't want to do that if we're just at the
3332              beginning of the comment (think of (*) ... (*)).  */
3333           found = forw_comment (from, from_byte, end,
3334                                 state->incomment, state->comstyle,
3335                                 from == BEGV ? 0 : prev_from_syntax,
3336                                 &out_charpos, &out_bytepos, &state->incomment,
3337                                 &prev_from_syntax);
3338           from = out_charpos; from_byte = out_bytepos;
3339           /* Beware!  prev_from and friends (except prev_from_syntax)
3340              are invalid now.  Luckily, the `done' doesn't use them
3341              and the INC_FROM sets them to a sane value without
3342              looking at them. */
3343           if (!found) goto done;
3344           INC_FROM;
3345           state->incomment = 0;
3346           state->comstyle = 0;  /* reset the comment style */
3347           prev_from_syntax = Smax; /* For the comment closer */
3348           if (boundary_stop) goto done;
3349           break;
3350
3351         case Sopen:
3352           if (stopbefore) goto stop;  /* this arg means stop at sexp start */
3353           depth++;
3354           /* curlevel++->last ran into compiler bug on Apollo */
3355           curlevel->last = prev_from;
3356           if (++curlevel == endlevel)
3357             curlevel--; /* error ("Nesting too deep for parser"); */
3358           curlevel->prev = -1;
3359           curlevel->last = -1;
3360           if (targetdepth == depth) goto done;
3361           break;
3362
3363         case Sclose:
3364           depth--;
3365           if (depth < mindepth)
3366             mindepth = depth;
3367           if (curlevel != levelstart)
3368             curlevel--;
3369           curlevel->prev = curlevel->last;
3370           if (targetdepth == depth) goto done;
3371           break;
3372
3373         case Sstring:
3374         case Sstring_fence:
3375           state->comstr_start = from - 1;
3376           if (stopbefore) goto stop;  /* this arg means stop at sexp start */
3377           curlevel->last = prev_from;
3378           state->instring = (code == Sstring
3379                             ? (FETCH_CHAR_AS_MULTIBYTE (prev_from_byte))
3380                             : ST_STRING_STYLE);
3381           if (boundary_stop) goto done;
3382         startinstring:
3383           {
3384             nofence = state->instring != ST_STRING_STYLE;
3385
3386             while (1)
3387               {
3388                 int c;
3389                 enum syntaxcode c_code;
3390
3391                 if (from >= end) goto done;
3392                 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
3393                 c_code = SYNTAX (c);
3394
3395                 /* Check C_CODE here so that if the char has
3396                    a syntax-table property which says it is NOT
3397                    a string character, it does not end the string.  */
3398                 if (nofence && c == state->instring && c_code == Sstring)
3399                   break;
3400
3401                 switch (c_code)
3402                   {
3403                   case Sstring_fence:
3404                     if (!nofence) goto string_end;
3405                     break;
3406
3407                   case Scharquote:
3408                   case Sescape:
3409                     INC_FROM;
3410                   startquotedinstring:
3411                     if (from >= end) goto endquoted;
3412                     break;
3413
3414                   default:
3415                     break;
3416                   }
3417                 INC_FROM;
3418               }
3419           }
3420         string_end:
3421           state->instring = -1;
3422           curlevel->prev = curlevel->last;
3423           INC_FROM;
3424           if (boundary_stop) goto done;
3425           break;
3426
3427         case Smath:
3428           /* FIXME: We should do something with it.  */
3429           break;
3430         default:
3431           /* Ignore whitespace, punctuation, quote, endcomment.  */
3432           break;
3433         }
3434     }
3435   goto done;
3436
3437  stop:   /* Here if stopping before start of sexp. */
3438   from = prev_from;    /* We have just fetched the char that starts it; */
3439   from_byte = prev_from_byte;
3440   prev_from_syntax = prev_prev_from_syntax;
3441   goto done; /* but return the position before it. */
3442
3443  endquoted:
3444   state->quoted = 1;
3445  done:
3446   state->depth = depth;
3447   state->mindepth = mindepth;
3448   state->thislevelstart = curlevel->prev;
3449   state->prevlevelstart
3450     = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
3451   state->location = from;
3452   state->location_byte = from_byte;
3453   state->levelstarts = Qnil;
3454   while (curlevel > levelstart)
3455     state->levelstarts = Fcons (make_number ((--curlevel)->last),
3456                                 state->levelstarts);
3457   state->prev_syntax = (SYNTAX_FLAGS_COMSTARTEND_FIRST (prev_from_syntax)
3458                         || state->quoted) ? prev_from_syntax : Smax;
3459   immediate_quit = 0;
3460 }
3461
3462 /* Convert a (lisp) parse state to the internal form used in
3463    scan_sexps_forward.  */
3464 static void
3465 internalize_parse_state (Lisp_Object external, struct lisp_parse_state *state)
3466 {
3467   Lisp_Object tem;
3468
3469   if (NILP (external))
3470     {
3471       state->depth = 0;
3472       state->instring = -1;
3473       state->incomment = 0;
3474       state->quoted = 0;
3475       state->comstyle = 0;      /* comment style a by default.  */
3476       state->comstr_start = -1; /* no comment/string seen.  */
3477       state->levelstarts = Qnil;
3478       state->prev_syntax = Smax;
3479     }
3480   else
3481     {
3482       tem = Fcar (external);
3483       if (!NILP (tem))
3484         state->depth = XINT (tem);
3485       else
3486         state->depth = 0;
3487
3488       external = Fcdr (external);
3489       external = Fcdr (external);
3490       external = Fcdr (external);
3491       tem = Fcar (external);
3492       /* Check whether we are inside string_fence-style string: */
3493       state->instring = (!NILP (tem)
3494                          ? (CHARACTERP (tem) ? XFASTINT (tem) : ST_STRING_STYLE)
3495                          : -1);
3496
3497       external = Fcdr (external);
3498       tem = Fcar (external);
3499       state->incomment = (!NILP (tem)
3500                           ? (INTEGERP (tem) ? XINT (tem) : -1)
3501                           : 0);
3502
3503       external = Fcdr (external);
3504       tem = Fcar (external);
3505       state->quoted = !NILP (tem);
3506
3507       /* if the eighth element of the list is nil, we are in comment
3508          style a.  If it is non-nil, we are in comment style b */
3509       external = Fcdr (external);
3510       external = Fcdr (external);
3511       tem = Fcar (external);
3512       state->comstyle = (NILP (tem)
3513                          ? 0
3514                          : (RANGED_INTEGERP (0, tem, ST_COMMENT_STYLE)
3515                             ? XINT (tem)
3516                             : ST_COMMENT_STYLE));
3517
3518       external = Fcdr (external);
3519       tem = Fcar (external);
3520       state->comstr_start =
3521         RANGED_INTEGERP (PTRDIFF_MIN, tem, PTRDIFF_MAX) ? XINT (tem) : -1;
3522       external = Fcdr (external);
3523       tem = Fcar (external);
3524       state->levelstarts = tem;
3525
3526       external = Fcdr (external);
3527       tem = Fcar (external);
3528       state->prev_syntax = NILP (tem) ? Smax : XINT (tem);
3529     }
3530 }
3531
3532 DEFUN ("parse-partial-sexp", Fparse_partial_sexp, Sparse_partial_sexp, 2, 6, 0,
3533        doc: /* Parse Lisp syntax starting at FROM until TO; return status of parse at TO.
3534 Parsing stops at TO or when certain criteria are met;
3535  point is set to where parsing stops.
3536 If fifth arg OLDSTATE is omitted or nil,
3537  parsing assumes that FROM is the beginning of a function.
3538
3539 Value is a list of elements describing final state of parsing:
3540  0. depth in parens.
3541  1. character address of start of innermost containing list; nil if none.
3542  2. character address of start of last complete sexp terminated.
3543  3. non-nil if inside a string.
3544     (it is the character that will terminate the string,
3545      or t if the string should be terminated by a generic string delimiter.)
3546  4. nil if outside a comment, t if inside a non-nestable comment,
3547     else an integer (the current comment nesting).
3548  5. t if following a quote character.
3549  6. the minimum paren-depth encountered during this scan.
3550  7. style of comment, if any.
3551  8. character address of start of comment or string; nil if not in one.
3552  9. List of positions of currently open parens, outermost first.
3553 10. When the last position scanned holds the first character of a
3554     (potential) two character construct, the syntax of that position,
3555     otherwise nil.  That construct can be a two character comment
3556     delimiter or an Escaped or Char-quoted character.
3557 11..... Possible further internal information used by `parse-partial-sexp'.
3558
3559 If third arg TARGETDEPTH is non-nil, parsing stops if the depth
3560 in parentheses becomes equal to TARGETDEPTH.
3561 Fourth arg STOPBEFORE non-nil means stop when we come to
3562  any character that starts a sexp.
3563 Fifth arg OLDSTATE is a list like what this function returns.
3564  It is used to initialize the state of the parse.  Elements number 1, 2, 6
3565  are ignored.
3566 Sixth arg COMMENTSTOP non-nil means stop after the start of a comment.
3567  If it is the symbol `syntax-table', stop after the start of a comment or a
3568  string, or after end of a comment or a string.  */)
3569   (Lisp_Object from, Lisp_Object to, Lisp_Object targetdepth,
3570    Lisp_Object stopbefore, Lisp_Object oldstate, Lisp_Object commentstop)
3571 {
3572   struct lisp_parse_state state;
3573   EMACS_INT target;
3574
3575   if (!NILP (targetdepth))
3576     {
3577       CHECK_NUMBER (targetdepth);
3578       target = XINT (targetdepth);
3579     }
3580   else
3581     target = TYPE_MINIMUM (EMACS_INT);  /* We won't reach this depth.  */
3582
3583   validate_region (&from, &to);
3584   internalize_parse_state (oldstate, &state);
3585   scan_sexps_forward (&state, XINT (from), CHAR_TO_BYTE (XINT (from)),
3586                       XINT (to),
3587                       target, !NILP (stopbefore),
3588                       (NILP (commentstop)
3589                        ? 0 : (EQ (commentstop, Qsyntax_table) ? -1 : 1)));
3590
3591   SET_PT_BOTH (state.location, state.location_byte);
3592
3593   return
3594     Fcons (make_number (state.depth),
3595            Fcons (state.prevlevelstart < 0
3596                   ? Qnil : make_number (state.prevlevelstart),
3597              Fcons (state.thislevelstart < 0
3598                     ? Qnil : make_number (state.thislevelstart),
3599                Fcons (state.instring >= 0
3600                       ? (state.instring == ST_STRING_STYLE
3601                          ? Qt : make_number (state.instring)) : Qnil,
3602                  Fcons (state.incomment < 0 ? Qt :
3603                         (state.incomment == 0 ? Qnil :
3604                          make_number (state.incomment)),
3605                    Fcons (state.quoted ? Qt : Qnil,
3606                      Fcons (make_number (state.mindepth),
3607                        Fcons ((state.comstyle
3608                                ? (state.comstyle == ST_COMMENT_STYLE
3609                                   ? Qsyntax_table
3610                                   : make_number (state.comstyle))
3611                                : Qnil),
3612                          Fcons (((state.incomment
3613                                   || (state.instring >= 0))
3614                                  ? make_number (state.comstr_start)
3615                                  : Qnil),
3616                            Fcons (state.levelstarts,
3617                              Fcons (state.prev_syntax == Smax
3618                                     ? Qnil
3619                                     : make_number (state.prev_syntax),
3620                                 Qnil)))))))))));
3621 }
3622 \f
3623 void
3624 init_syntax_once (void)
3625 {
3626   register int i, c;
3627   Lisp_Object temp;
3628
3629   /* This has to be done here, before we call Fmake_char_table.  */
3630   DEFSYM (Qsyntax_table, "syntax-table");
3631
3632   /* Create objects which can be shared among syntax tables.  */
3633   Vsyntax_code_object = make_uninit_vector (Smax);
3634   for (i = 0; i < Smax; i++)
3635     ASET (Vsyntax_code_object, i, Fcons (make_number (i), Qnil));
3636
3637   /* Now we are ready to set up this property, so we can
3638      create syntax tables.  */
3639   Fput (Qsyntax_table, Qchar_table_extra_slots, make_number (0));
3640
3641   temp = AREF (Vsyntax_code_object, Swhitespace);
3642
3643   Vstandard_syntax_table = Fmake_char_table (Qsyntax_table, temp);
3644
3645   /* Control characters should not be whitespace.  */
3646   temp = AREF (Vsyntax_code_object, Spunct);
3647   for (i = 0; i <= ' ' - 1; i++)
3648     SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3649   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 0177, temp);
3650
3651   /* Except that a few really are whitespace.  */
3652   temp = AREF (Vsyntax_code_object, Swhitespace);
3653   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ' ', temp);
3654   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\t', temp);
3655   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\n', temp);
3656   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 015, temp);
3657   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 014, temp);
3658
3659   temp = AREF (Vsyntax_code_object, Sword);
3660   for (i = 'a'; i <= 'z'; i++)
3661     SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3662   for (i = 'A'; i <= 'Z'; i++)
3663     SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3664   for (i = '0'; i <= '9'; i++)
3665     SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3666
3667   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '$', temp);
3668   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '%', temp);
3669
3670   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '(',
3671                         Fcons (make_number (Sopen), make_number (')')));
3672   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ')',
3673                         Fcons (make_number (Sclose), make_number ('(')));
3674   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '[',
3675                         Fcons (make_number (Sopen), make_number (']')));
3676   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ']',
3677                         Fcons (make_number (Sclose), make_number ('[')));
3678   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '{',
3679                         Fcons (make_number (Sopen), make_number ('}')));
3680   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '}',
3681                         Fcons (make_number (Sclose), make_number ('{')));
3682   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '"',
3683                         Fcons (make_number (Sstring), Qnil));
3684   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\\',
3685                         Fcons (make_number (Sescape), Qnil));
3686
3687   temp = AREF (Vsyntax_code_object, Ssymbol);
3688   for (i = 0; i < 10; i++)
3689     {
3690       c = "_-+*/&|<>="[i];
3691       SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3692     }
3693
3694   temp = AREF (Vsyntax_code_object, Spunct);
3695   for (i = 0; i < 12; i++)
3696     {
3697       c = ".,;:?!#@~^'`"[i];
3698       SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3699     }
3700
3701   /* All multibyte characters have syntax `word' by default.  */
3702   temp = AREF (Vsyntax_code_object, Sword);
3703   char_table_set_range (Vstandard_syntax_table, 0x80, MAX_CHAR, temp);
3704 }
3705
3706 void
3707 syms_of_syntax (void)
3708 {
3709   DEFSYM (Qsyntax_table_p, "syntax-table-p");
3710
3711   staticpro (&Vsyntax_code_object);
3712
3713   staticpro (&gl_state.object);
3714   staticpro (&gl_state.global_code);
3715   staticpro (&gl_state.current_syntax_table);
3716   staticpro (&gl_state.old_prop);
3717
3718   /* Defined in regex.c.  */
3719   staticpro (&re_match_object);
3720
3721   DEFSYM (Qscan_error, "scan-error");
3722   Fput (Qscan_error, Qerror_conditions,
3723         listn (CONSTYPE_PURE, 2, Qscan_error, Qerror));
3724   Fput (Qscan_error, Qerror_message,
3725         build_pure_c_string ("Scan error"));
3726
3727   DEFVAR_BOOL ("parse-sexp-ignore-comments", parse_sexp_ignore_comments,
3728                doc: /* Non-nil means `forward-sexp', etc., should treat comments as whitespace.  */);
3729
3730   DEFVAR_BOOL ("parse-sexp-lookup-properties", parse_sexp_lookup_properties,
3731                doc: /* Non-nil means `forward-sexp', etc., obey `syntax-table' property.
3732 Otherwise, that text property is simply ignored.
3733 See the info node `(elisp)Syntax Properties' for a description of the
3734 `syntax-table' property.  */);
3735
3736   DEFVAR_INT ("syntax-propertize--done", syntax_propertize__done,
3737               doc: /* Position up to which syntax-table properties have been set.  */);
3738   syntax_propertize__done = -1;
3739   DEFSYM (Qinternal__syntax_propertize, "internal--syntax-propertize");
3740   Fmake_variable_buffer_local (intern ("syntax-propertize--done"));
3741
3742   words_include_escapes = 0;
3743   DEFVAR_BOOL ("words-include-escapes", words_include_escapes,
3744                doc: /* Non-nil means `forward-word', etc., should treat escape chars part of words.  */);
3745
3746   DEFVAR_BOOL ("multibyte-syntax-as-symbol", multibyte_syntax_as_symbol,
3747                doc: /* Non-nil means `scan-sexps' treats all multibyte characters as symbol.  */);
3748   multibyte_syntax_as_symbol = 0;
3749
3750   DEFVAR_BOOL ("open-paren-in-column-0-is-defun-start",
3751                open_paren_in_column_0_is_defun_start,
3752                doc: /* Non-nil means an open paren in column 0 denotes the start of a defun.  */);
3753   open_paren_in_column_0_is_defun_start = 1;
3754
3755
3756   DEFVAR_LISP ("find-word-boundary-function-table",
3757                Vfind_word_boundary_function_table,
3758                doc: /*
3759 Char table of functions to search for the word boundary.
3760 Each function is called with two arguments; POS and LIMIT.
3761 POS and LIMIT are character positions in the current buffer.
3762
3763 If POS is less than LIMIT, POS is at the first character of a word,
3764 and the return value of a function should be a position after the
3765 last character of that word.
3766
3767 If POS is not less than LIMIT, POS is at the last character of a word,
3768 and the return value of a function should be a position at the first
3769 character of that word.
3770
3771 In both cases, LIMIT bounds the search. */);
3772   Vfind_word_boundary_function_table = Fmake_char_table (Qnil, Qnil);
3773
3774   DEFVAR_BOOL ("comment-end-can-be-escaped", Vcomment_end_can_be_escaped,
3775                doc: /* Non-nil means an escaped ender inside a comment doesn't end the comment.  */);
3776   Vcomment_end_can_be_escaped = 0;
3777   DEFSYM (Qcomment_end_can_be_escaped, "comment-end-can-be-escaped");
3778   Fmake_variable_buffer_local (Qcomment_end_can_be_escaped);
3779
3780   defsubr (&Ssyntax_table_p);
3781   defsubr (&Ssyntax_table);
3782   defsubr (&Sstandard_syntax_table);
3783   defsubr (&Scopy_syntax_table);
3784   defsubr (&Sset_syntax_table);
3785   defsubr (&Schar_syntax);
3786   defsubr (&Smatching_paren);
3787   defsubr (&Sstring_to_syntax);
3788   defsubr (&Smodify_syntax_entry);
3789   defsubr (&Sinternal_describe_syntax_value);
3790
3791   defsubr (&Sforward_word);
3792
3793   defsubr (&Sskip_chars_forward);
3794   defsubr (&Sskip_chars_backward);
3795   defsubr (&Sskip_syntax_forward);
3796   defsubr (&Sskip_syntax_backward);
3797
3798   defsubr (&Sforward_comment);
3799   defsubr (&Sscan_lists);
3800   defsubr (&Sscan_sexps);
3801   defsubr (&Sbackward_prefix_chars);
3802   defsubr (&Sparse_partial_sexp);
3803 }