code.delx.au - gnu-emacs/blob - src/syntax.c

   1 /* GNU Emacs routines to deal with syntax tables; also word and list parsing.
   2    Copyright (C) 1985, 1987, 1993, 1994, 1995, 1997, 1998, 1999, 2001,
   3                  2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
   4                  Free Software Foundation, Inc.
   5
   6 This file is part of GNU Emacs.
   7
   8 GNU Emacs is free software: you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation, either version 3 of the License, or
  11 (at your option) any later version.
  12
  13 GNU Emacs is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include <config.h>
  23 #include <ctype.h>
  24 #include <setjmp.h>
  25 #include "lisp.h"
  26 #include "commands.h"
  27 #include "buffer.h"
  28 #include "character.h"
  29 #include "keymap.h"
  30 #include "regex.h"
  31
  32 /* Make syntax table lookup grant data in gl_state.  */
  33 #define SYNTAX_ENTRY_VIA_PROPERTY
  34
  35 #include "syntax.h"
  36 #include "intervals.h"
  37 #include "category.h"
  38
  39 /* Then there are seven single-bit flags that have the following meanings:
  40   1. This character is the first of a two-character comment-start sequence.
  41   2. This character is the second of a two-character comment-start sequence.
  42   3. This character is the first of a two-character comment-end sequence.
  43   4. This character is the second of a two-character comment-end sequence.
  44   5. This character is a prefix, for backward-prefix-chars.
  45   6. The char is part of a delimiter for comments of style "b".
  46   7. This character is part of a nestable comment sequence.
  47   8. The char is part of a delimiter for comments of style "c".
  48   Note that any two-character sequence whose first character has flag 1
  49   and whose second character has flag 2 will be interpreted as a comment start.
  50
  51   bit 6 and 8 are used to discriminate between different comment styles.
  52   Languages such as C++ allow two orthogonal syntax start/end pairs
  53   and bit 6 is used to determine whether a comment-end or Scommentend
  54   ends style a or b.  Comment markers can start style a, b, c, or bc.
  55   Style a is always the default.
  56   For 2-char comment markers, the style b flag is only looked up on the second
  57   char of the comment marker and on the first char of the comment ender.
  58   For style c (like to for the nested flag), the flag can be placed on any
  59   one of the chars.
  60   */
  61
  62 /* These macros extract specific flags from an integer
  63    that holds the syntax code and the flags.  */
  64
  65 #define SYNTAX_FLAGS_COMSTART_FIRST(flags) (((flags) >> 16) & 1)
  66
  67 #define SYNTAX_FLAGS_COMSTART_SECOND(flags) (((flags) >> 17) & 1)
  68
  69 #define SYNTAX_FLAGS_COMEND_FIRST(flags) (((flags) >> 18) & 1)
  70
  71 #define SYNTAX_FLAGS_COMEND_SECOND(flags) (((flags) >> 19) & 1)
  72
  73 #define SYNTAX_FLAGS_PREFIX(flags) (((flags) >> 20) & 1)
  74
  75 #define SYNTAX_FLAGS_COMMENT_STYLEB(flags) (((flags) >> 21) & 1)
  76 #define SYNTAX_FLAGS_COMMENT_STYLEC(flags) (((flags) >> 22) & 2)
  77 /* FLAGS should be the flags of the main char of the comment marker, e.g.
  78    the second for comstart and the first for comend.  */
  79 #define SYNTAX_FLAGS_COMMENT_STYLE(flags, other_flags) \
  80   (SYNTAX_FLAGS_COMMENT_STYLEB (flags) \
  81    | SYNTAX_FLAGS_COMMENT_STYLEC (flags) \
  82    | SYNTAX_FLAGS_COMMENT_STYLEC (other_flags))
  83
  84 #define SYNTAX_FLAGS_COMMENT_NESTED(flags) (((flags) >> 22) & 1)
  85
  86 /* These macros extract a particular flag for a given character.  */
  87
  88 #define SYNTAX_COMEND_FIRST(c) \
  89   (SYNTAX_FLAGS_COMEND_FIRST (SYNTAX_WITH_FLAGS (c)))
  90 #define SYNTAX_PREFIX(c) (SYNTAX_FLAGS_PREFIX (SYNTAX_WITH_FLAGS (c)))
  91
  92 /* We use these constants in place for comment-style and
  93    string-ender-char to distinguish  comments/strings started by
  94    comment_fence and string_fence codes.  */
  95
  96 #define ST_COMMENT_STYLE (256 + 1)
  97 #define ST_STRING_STYLE (256 + 2)
  98
  99 Lisp_Object Qsyntax_table_p, Qsyntax_table, Qscan_error;
 100
 101 /* Used as a temporary in SYNTAX_ENTRY and other macros in syntax.h,
 102    if not compiled with GCC.  No need to mark it, since it is used
 103    only very temporarily.  */
 104 Lisp_Object syntax_temp;
 105
 106 /* This is the internal form of the parse state used in parse-partial-sexp.  */
 107
 108 struct lisp_parse_state
 109   {
 110     int depth;     /* Depth at end of parsing.  */
 111     int instring;  /* -1 if not within string, else desired terminator.  */
 112     int incomment; /* -1 if in unnestable comment else comment nesting */
 113     int comstyle;  /* comment style a=0, or b=1, or ST_COMMENT_STYLE.  */
 114     int quoted;    /* Nonzero if just after an escape char at end of parsing */
 115     int mindepth;  /* Minimum depth seen while scanning.  */
 116     /* Char number of most recent start-of-expression at current level */
 117     EMACS_INT thislevelstart;
 118     /* Char number of start of containing expression */
 119     EMACS_INT prevlevelstart;
 120     EMACS_INT location;      /* Char number at which parsing stopped.  */
 121     EMACS_INT comstr_start;  /* Position of last comment/string starter.  */
 122     Lisp_Object levelstarts; /* Char numbers of starts-of-expression
 123                                 of levels (starting from outermost).  */
 124   };
 125 \f
 126 /* These variables are a cache for finding the start of a defun.
 127    find_start_pos is the place for which the defun start was found.
 128    find_start_value is the defun start position found for it.
 129    find_start_value_byte is the corresponding byte position.
 130    find_start_buffer is the buffer it was found in.
 131    find_start_begv is the BEGV value when it was found.
 132    find_start_modiff is the value of MODIFF when it was found.  */
 133
 134 static EMACS_INT find_start_pos;
 135 static EMACS_INT find_start_value;
 136 static EMACS_INT find_start_value_byte;
 137 static struct buffer *find_start_buffer;
 138 static EMACS_INT find_start_begv;
 139 static int find_start_modiff;
 140
 141
 142 static Lisp_Object skip_chars (int, Lisp_Object, Lisp_Object, int);
 143 static Lisp_Object skip_syntaxes (int, Lisp_Object, Lisp_Object);
 144 static Lisp_Object scan_lists (EMACS_INT, EMACS_INT, EMACS_INT, int);
 145 static void scan_sexps_forward (struct lisp_parse_state *,
 146                                 EMACS_INT, EMACS_INT, EMACS_INT, int,
 147                                 int, Lisp_Object, int);
 148 static int in_classes (int, Lisp_Object);
 149 \f
 150 /* Whether the syntax of the character C has the prefix flag set.  */
 151 int syntax_prefix_flag_p (int c)
 152 {
 153   return SYNTAX_PREFIX (c);
 154 }
 155
 156 struct gl_state_s gl_state;             /* Global state of syntax parser.  */
 157
 158 #define INTERVALS_AT_ONCE 10            /* 1 + max-number of intervals
 159                                            to scan to property-change.  */
 160
 161 /* Update gl_state to an appropriate interval which contains CHARPOS.  The
 162    sign of COUNT give the relative position of CHARPOS wrt the previously
 163    valid interval.  If INIT, only [be]_property fields of gl_state are
 164    valid at start, the rest is filled basing on OBJECT.
 165
 166    `gl_state.*_i' are the intervals, and CHARPOS is further in the search
 167    direction than the intervals - or in an interval.  We update the
 168    current syntax-table basing on the property of this interval, and
 169    update the interval to start further than CHARPOS - or be
 170    NULL_INTERVAL.  We also update lim_property to be the next value of
 171    charpos to call this subroutine again - or be before/after the
 172    start/end of OBJECT.  */
 173
 174 void
 175 update_syntax_table (EMACS_INT charpos, int count, int init,
 176                      Lisp_Object object)
 177 {
 178   Lisp_Object tmp_table;
 179   int cnt = 0, invalidate = 1;
 180   INTERVAL i;
 181
 182   if (init)
 183     {
 184       gl_state.old_prop = Qnil;
 185       gl_state.start = gl_state.b_property;
 186       gl_state.stop = gl_state.e_property;
 187       i = interval_of (charpos, object);
 188       gl_state.backward_i = gl_state.forward_i = i;
 189       invalidate = 0;
 190       if (NULL_INTERVAL_P (i))
 191         return;
 192       /* interval_of updates only ->position of the return value, so
 193          update the parents manually to speed up update_interval.  */
 194       while (!NULL_PARENT (i))
 195         {
 196           if (AM_RIGHT_CHILD (i))
 197             INTERVAL_PARENT (i)->position = i->position
 198               - LEFT_TOTAL_LENGTH (i) + TOTAL_LENGTH (i) /* right end */
 199               - TOTAL_LENGTH (INTERVAL_PARENT (i))
 200               + LEFT_TOTAL_LENGTH (INTERVAL_PARENT (i));
 201           else
 202             INTERVAL_PARENT (i)->position = i->position - LEFT_TOTAL_LENGTH (i)
 203               + TOTAL_LENGTH (i);
 204           i = INTERVAL_PARENT (i);
 205         }
 206       i = gl_state.forward_i;
 207       gl_state.b_property = i->position - gl_state.offset;
 208       gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
 209       goto update;
 210     }
 211   i = count > 0 ? gl_state.forward_i : gl_state.backward_i;
 212
 213   /* We are guaranteed to be called with CHARPOS either in i,
 214      or further off.  */
 215   if (NULL_INTERVAL_P (i))
 216     error ("Error in syntax_table logic for to-the-end intervals");
 217   else if (charpos < i->position)               /* Move left.  */
 218     {
 219       if (count > 0)
 220         error ("Error in syntax_table logic for intervals <-");
 221       /* Update the interval.  */
 222       i = update_interval (i, charpos);
 223       if (INTERVAL_LAST_POS (i) != gl_state.b_property)
 224         {
 225           invalidate = 0;
 226           gl_state.forward_i = i;
 227           gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
 228         }
 229     }
 230   else if (charpos >= INTERVAL_LAST_POS (i)) /* Move right.  */
 231     {
 232       if (count < 0)
 233         error ("Error in syntax_table logic for intervals ->");
 234       /* Update the interval.  */
 235       i = update_interval (i, charpos);
 236       if (i->position != gl_state.e_property)
 237         {
 238           invalidate = 0;
 239           gl_state.backward_i = i;
 240           gl_state.b_property = i->position - gl_state.offset;
 241         }
 242     }
 243
 244   update:
 245   tmp_table = textget (i->plist, Qsyntax_table);
 246
 247   if (invalidate)
 248     invalidate = !EQ (tmp_table, gl_state.old_prop); /* Need to invalidate? */
 249
 250   if (invalidate)               /* Did not get to adjacent interval.  */
 251     {                           /* with the same table => */
 252                                 /* invalidate the old range.  */
 253       if (count > 0)
 254         {
 255           gl_state.backward_i = i;
 256           gl_state.b_property = i->position - gl_state.offset;
 257         }
 258       else
 259         {
 260           gl_state.forward_i = i;
 261           gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
 262         }
 263     }
 264
 265   if (!EQ (tmp_table, gl_state.old_prop))
 266     {
 267       gl_state.current_syntax_table = tmp_table;
 268       gl_state.old_prop = tmp_table;
 269       if (EQ (Fsyntax_table_p (tmp_table), Qt))
 270         {
 271           gl_state.use_global = 0;
 272         }
 273       else if (CONSP (tmp_table))
 274         {
 275           gl_state.use_global = 1;
 276           gl_state.global_code = tmp_table;
 277         }
 278       else
 279         {
 280           gl_state.use_global = 0;
 281           gl_state.current_syntax_table = current_buffer->syntax_table;
 282         }
 283     }
 284
 285   while (!NULL_INTERVAL_P (i))
 286     {
 287       if (cnt && !EQ (tmp_table, textget (i->plist, Qsyntax_table)))
 288         {
 289           if (count > 0)
 290             {
 291               gl_state.e_property = i->position - gl_state.offset;
 292               gl_state.forward_i = i;
 293             }
 294           else
 295             {
 296               gl_state.b_property
 297                 = i->position + LENGTH (i) - gl_state.offset;
 298               gl_state.backward_i = i;
 299             }
 300           return;
 301         }
 302       else if (cnt == INTERVALS_AT_ONCE)
 303         {
 304           if (count > 0)
 305             {
 306               gl_state.e_property
 307                 = i->position + LENGTH (i) - gl_state.offset
 308                 /* e_property at EOB is not set to ZV but to ZV+1, so that
 309                    we can do INC(from);UPDATE_SYNTAX_TABLE_FORWARD without
 310                    having to check eob between the two.  */
 311                 + (NULL_INTERVAL_P (next_interval (i)) ? 1 : 0);
 312               gl_state.forward_i = i;
 313             }
 314           else
 315             {
 316               gl_state.b_property = i->position - gl_state.offset;
 317               gl_state.backward_i = i;
 318             }
 319           return;
 320         }
 321       cnt++;
 322       i = count > 0 ? next_interval (i) : previous_interval (i);
 323     }
 324   eassert (NULL_INTERVAL_P (i)); /* This property goes to the end.  */
 325   if (count > 0)
 326     gl_state.e_property = gl_state.stop;
 327   else
 328     gl_state.b_property = gl_state.start;
 329 }
 330 \f
 331 /* Returns TRUE if char at CHARPOS is quoted.
 332    Global syntax-table data should be set up already to be good at CHARPOS
 333    or after.  On return global syntax data is good for lookup at CHARPOS. */
 334
 335 static int
 336 char_quoted (EMACS_INT charpos, EMACS_INT bytepos)
 337 {
 338   register enum syntaxcode code;
 339   register EMACS_INT beg = BEGV;
 340   register int quoted = 0;
 341   EMACS_INT orig = charpos;
 342
 343   while (charpos > beg)
 344     {
 345       int c;
 346       DEC_BOTH (charpos, bytepos);
 347
 348       UPDATE_SYNTAX_TABLE_BACKWARD (charpos);
 349       c = FETCH_CHAR_AS_MULTIBYTE (bytepos);
 350       code = SYNTAX (c);
 351       if (! (code == Scharquote || code == Sescape))
 352         break;
 353
 354       quoted = !quoted;
 355     }
 356
 357   UPDATE_SYNTAX_TABLE (orig);
 358   return quoted;
 359 }
 360
 361 /* Return the bytepos one character before BYTEPOS.
 362    We assume that BYTEPOS is not at the start of the buffer.  */
 363
 364 static INLINE EMACS_INT
 365 dec_bytepos (EMACS_INT bytepos)
 366 {
 367   if (NILP (current_buffer->enable_multibyte_characters))
 368     return bytepos - 1;
 369
 370   DEC_POS (bytepos);
 371   return bytepos;
 372 }
 373 \f
 374 /* Return a defun-start position before POS and not too far before.
 375    It should be the last one before POS, or nearly the last.
 376
 377    When open_paren_in_column_0_is_defun_start is nonzero,
 378    only the beginning of the buffer is treated as a defun-start.
 379
 380    We record the information about where the scan started
 381    and what its result was, so that another call in the same area
 382    can return the same value very quickly.
 383
 384    There is no promise at which position the global syntax data is
 385    valid on return from the subroutine, so the caller should explicitly
 386    update the global data.  */
 387
 388 static EMACS_INT
 389 find_defun_start (EMACS_INT pos, EMACS_INT pos_byte)
 390 {
 391   EMACS_INT opoint = PT, opoint_byte = PT_BYTE;
 392
 393   if (!open_paren_in_column_0_is_defun_start)
 394     {
 395       find_start_value_byte = BEGV_BYTE;
 396       return BEGV;
 397     }
 398
 399   /* Use previous finding, if it's valid and applies to this inquiry.  */
 400   if (current_buffer == find_start_buffer
 401       /* Reuse the defun-start even if POS is a little farther on.
 402          POS might be in the next defun, but that's ok.
 403          Our value may not be the best possible, but will still be usable.  */
 404       && pos <= find_start_pos + 1000
 405       && pos >= find_start_value
 406       && BEGV == find_start_begv
 407       && MODIFF == find_start_modiff)
 408     return find_start_value;
 409
 410   /* Back up to start of line.  */
 411   scan_newline (pos, pos_byte, BEGV, BEGV_BYTE, -1, 1);
 412
 413   /* We optimize syntax-table lookup for rare updates.  Thus we accept
 414      only those `^\s(' which are good in global _and_ text-property
 415      syntax-tables.  */
 416   SETUP_BUFFER_SYNTAX_TABLE ();
 417   while (PT > BEGV)
 418     {
 419       int c;
 420
 421       /* Open-paren at start of line means we may have found our
 422          defun-start.  */
 423       c = FETCH_CHAR_AS_MULTIBYTE (PT_BYTE);
 424       if (SYNTAX (c) == Sopen)
 425         {
 426           SETUP_SYNTAX_TABLE (PT + 1, -1);      /* Try again... */
 427           c = FETCH_CHAR_AS_MULTIBYTE (PT_BYTE);
 428           if (SYNTAX (c) == Sopen)
 429             break;
 430           /* Now fallback to the default value.  */
 431           SETUP_BUFFER_SYNTAX_TABLE ();
 432         }
 433       /* Move to beg of previous line.  */
 434       scan_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, -2, 1);
 435     }
 436
 437   /* Record what we found, for the next try.  */
 438   find_start_value = PT;
 439   find_start_value_byte = PT_BYTE;
 440   find_start_buffer = current_buffer;
 441   find_start_modiff = MODIFF;
 442   find_start_begv = BEGV;
 443   find_start_pos = pos;
 444
 445   TEMP_SET_PT_BOTH (opoint, opoint_byte);
 446
 447   return find_start_value;
 448 }
 449 \f
 450 /* Return the SYNTAX_COMEND_FIRST of the character before POS, POS_BYTE.  */
 451
 452 static int
 453 prev_char_comend_first (EMACS_INT pos, EMACS_INT pos_byte)
 454 {
 455   int c, val;
 456
 457   DEC_BOTH (pos, pos_byte);
 458   UPDATE_SYNTAX_TABLE_BACKWARD (pos);
 459   c = FETCH_CHAR (pos_byte);
 460   val = SYNTAX_COMEND_FIRST (c);
 461   UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
 462   return val;
 463 }
 464
 465 /* Return the SYNTAX_COMSTART_FIRST of the character before POS, POS_BYTE.  */
 466
 467 /* static int
 468  * prev_char_comstart_first (pos, pos_byte)
 469  *      int pos, pos_byte;
 470  * {
 471  *   int c, val;
 472  *
 473  *   DEC_BOTH (pos, pos_byte);
 474  *   UPDATE_SYNTAX_TABLE_BACKWARD (pos);
 475  *   c = FETCH_CHAR (pos_byte);
 476  *   val = SYNTAX_COMSTART_FIRST (c);
 477  *   UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
 478  *   return val;
 479  * } */
 480
 481 /* Checks whether charpos FROM is at the end of a comment.
 482    FROM_BYTE is the bytepos corresponding to FROM.
 483    Do not move back before STOP.
 484
 485    Return a positive value if we find a comment ending at FROM/FROM_BYTE;
 486    return -1 otherwise.
 487
 488    If successful, store the charpos of the comment's beginning
 489    into *CHARPOS_PTR, and the bytepos into *BYTEPOS_PTR.
 490
 491    Global syntax data remains valid for backward search starting at
 492    the returned value (or at FROM, if the search was not successful).  */
 493
 494 static int
 495 back_comment (EMACS_INT from, EMACS_INT from_byte, EMACS_INT stop, int comnested, int comstyle, EMACS_INT *charpos_ptr, EMACS_INT *bytepos_ptr)
 496 {
 497   /* Look back, counting the parity of string-quotes,
 498      and recording the comment-starters seen.
 499      When we reach a safe place, assume that's not in a string;
 500      then step the main scan to the earliest comment-starter seen
 501      an even number of string quotes away from the safe place.
 502
 503      OFROM[I] is position of the earliest comment-starter seen
 504      which is I+2X quotes from the comment-end.
 505      PARITY is current parity of quotes from the comment end.  */
 506   int string_style = -1;        /* Presumed outside of any string. */
 507   int string_lossage = 0;
 508   /* Not a real lossage: indicates that we have passed a matching comment
 509      starter plus a non-matching comment-ender, meaning that any matching
 510      comment-starter we might see later could be a false positive (hidden
 511      inside another comment).
 512      Test case:  { a (* b } c (* d *) */
 513   int comment_lossage = 0;
 514   EMACS_INT comment_end = from;
 515   EMACS_INT comment_end_byte = from_byte;
 516   EMACS_INT comstart_pos = 0;
 517   EMACS_INT comstart_byte;
 518   /* Place where the containing defun starts,
 519      or 0 if we didn't come across it yet.  */
 520   EMACS_INT defun_start = 0;
 521   EMACS_INT defun_start_byte = 0;
 522   register enum syntaxcode code;
 523   int nesting = 1;              /* current comment nesting */
 524   int c;
 525   int syntax = 0;
 526
 527   /* FIXME: A }} comment-ender style leads to incorrect behavior
 528      in the case of {{ c }}} because we ignore the last two chars which are
 529      assumed to be comment-enders although they aren't.  */
 530
 531   /* At beginning of range to scan, we're outside of strings;
 532      that determines quote parity to the comment-end.  */
 533   while (from != stop)
 534     {
 535       EMACS_INT temp_byte;
 536       int prev_syntax, com2start, com2end;
 537       int comstart;
 538
 539       /* Move back and examine a character.  */
 540       DEC_BOTH (from, from_byte);
 541       UPDATE_SYNTAX_TABLE_BACKWARD (from);
 542
 543       prev_syntax = syntax;
 544       c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
 545       syntax = SYNTAX_WITH_FLAGS (c);
 546       code = SYNTAX (c);
 547
 548       /* Check for 2-char comment markers.  */
 549       com2start = (SYNTAX_FLAGS_COMSTART_FIRST (syntax)
 550                    && SYNTAX_FLAGS_COMSTART_SECOND (prev_syntax)
 551                    && (comstyle
 552                        == SYNTAX_FLAGS_COMMENT_STYLE (prev_syntax, syntax))
 553                    && (SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax)
 554                        || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested);
 555       com2end = (SYNTAX_FLAGS_COMEND_FIRST (syntax)
 556                  && SYNTAX_FLAGS_COMEND_SECOND (prev_syntax));
 557       comstart = (com2start || code == Scomment);
 558
 559       /* Nasty cases with overlapping 2-char comment markers:
 560          - snmp-mode: -- c -- foo -- c --
 561                       --- c --
 562                       ------ c --
 563          - c-mode:    *||*
 564                       |* *|* *|
 565                       |*| |* |*|
 566                       ///   */
 567
 568       /* If a 2-char comment sequence partly overlaps with another,
 569          we don't try to be clever.  E.g. |*| in C, or }% in modes that
 570          have %..\n and %{..}%.  */
 571       if (from > stop && (com2end || comstart))
 572         {
 573           EMACS_INT next = from, next_byte = from_byte;
 574           int next_c, next_syntax;
 575           DEC_BOTH (next, next_byte);
 576           UPDATE_SYNTAX_TABLE_BACKWARD (next);
 577           next_c = FETCH_CHAR_AS_MULTIBYTE (next_byte);
 578           next_syntax = SYNTAX_WITH_FLAGS (next_c);
 579           if (((comstart || comnested)
 580                && SYNTAX_FLAGS_COMEND_SECOND (syntax)
 581                && SYNTAX_FLAGS_COMEND_FIRST (next_syntax))
 582               || ((com2end || comnested)
 583                   && SYNTAX_FLAGS_COMSTART_SECOND (syntax)
 584                   && (comstyle
 585                       == SYNTAX_FLAGS_COMMENT_STYLE (syntax, prev_syntax))
 586                   && SYNTAX_FLAGS_COMSTART_FIRST (next_syntax)))
 587             goto lossage;
 588           /* UPDATE_SYNTAX_TABLE_FORWARD (next + 1); */
 589         }
 590
 591       if (com2start && comstart_pos == 0)
 592         /* We're looking at a comment starter.  But it might be a comment
 593            ender as well (see snmp-mode).  The first time we see one, we
 594            need to consider it as a comment starter,
 595            and the subsequent times as a comment ender.  */
 596         com2end = 0;
 597
 598       /* Turn a 2-char comment sequences into the appropriate syntax.  */
 599       if (com2end)
 600         code = Sendcomment;
 601       else if (com2start)
 602         code = Scomment;
 603       /* Ignore comment starters of a different style.  */
 604       else if (code == Scomment
 605                && (comstyle != SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0)
 606                    || SYNTAX_FLAGS_COMMENT_NESTED (syntax) != comnested))
 607         continue;
 608
 609       /* Ignore escaped characters, except comment-enders.  */
 610       if (code != Sendcomment && char_quoted (from, from_byte))
 611         continue;
 612
 613       switch (code)
 614         {
 615         case Sstring_fence:
 616         case Scomment_fence:
 617           c = (code == Sstring_fence ? ST_STRING_STYLE : ST_COMMENT_STYLE);
 618         case Sstring:
 619           /* Track parity of quotes.  */
 620           if (string_style == -1)
 621             /* Entering a string.  */
 622             string_style = c;
 623           else if (string_style == c)
 624             /* Leaving the string.  */
 625             string_style = -1;
 626           else
 627             /* If we have two kinds of string delimiters.
 628                There's no way to grok this scanning backwards.  */
 629             string_lossage = 1;
 630           break;
 631
 632         case Scomment:
 633           /* We've already checked that it is the relevant comstyle.  */
 634           if (string_style != -1 || comment_lossage || string_lossage)
 635             /* There are odd string quotes involved, so let's be careful.
 636                Test case in Pascal: " { " a { " } */
 637             goto lossage;
 638
 639           if (!comnested)
 640             {
 641               /* Record best comment-starter so far.  */
 642               comstart_pos = from;
 643               comstart_byte = from_byte;
 644             }
 645           else if (--nesting <= 0)
 646             /* nested comments have to be balanced, so we don't need to
 647                keep looking for earlier ones.  We use here the same (slightly
 648                incorrect) reasoning as below:  since it is followed by uniform
 649                paired string quotes, this comment-start has to be outside of
 650                strings, else the comment-end itself would be inside a string. */
 651             goto done;
 652           break;
 653
 654         case Sendcomment:
 655           if (SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == comstyle
 656               && ((com2end && SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax))
 657                   || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested)
 658             /* This is the same style of comment ender as ours. */
 659             {
 660               if (comnested)
 661                 nesting++;
 662               else
 663                 /* Anything before that can't count because it would match
 664                    this comment-ender rather than ours.  */
 665                 from = stop;    /* Break out of the loop.  */
 666             }
 667           else if (comstart_pos != 0 || c != '\n')
 668             /* We're mixing comment styles here, so we'd better be careful.
 669                The (comstart_pos != 0 || c != '\n') check is not quite correct
 670                (we should just always set comment_lossage), but removing it
 671                would imply that any multiline comment in C would go through
 672                lossage, which seems overkill.
 673                The failure should only happen in the rare cases such as
 674                  { (* } *)   */
 675             comment_lossage = 1;
 676           break;
 677
 678         case Sopen:
 679           /* Assume a defun-start point is outside of strings.  */
 680           if (open_paren_in_column_0_is_defun_start
 681               && (from == stop
 682                   || (temp_byte = dec_bytepos (from_byte),
 683                       FETCH_CHAR (temp_byte) == '\n')))
 684             {
 685               defun_start = from;
 686               defun_start_byte = from_byte;
 687               from = stop;      /* Break out of the loop.  */
 688             }
 689           break;
 690
 691         default:
 692           break;
 693         }
 694     }
 695
 696   if (comstart_pos == 0)
 697     {
 698       from = comment_end;
 699       from_byte = comment_end_byte;
 700       UPDATE_SYNTAX_TABLE_FORWARD (comment_end - 1);
 701     }
 702   /* If comstart_pos is set and we get here (ie. didn't jump to `lossage'
 703      or `done'), then we've found the beginning of the non-nested comment.  */
 704   else if (1)   /* !comnested */
 705     {
 706       from = comstart_pos;
 707       from_byte = comstart_byte;
 708       UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
 709     }
 710   else
 711     {
 712       struct lisp_parse_state state;
 713     lossage:
 714       /* We had two kinds of string delimiters mixed up
 715          together.  Decode this going forwards.
 716          Scan fwd from a known safe place (beginning-of-defun)
 717          to the one in question; this records where we
 718          last passed a comment starter.  */
 719       /* If we did not already find the defun start, find it now.  */
 720       if (defun_start == 0)
 721         {
 722           defun_start = find_defun_start (comment_end, comment_end_byte);
 723           defun_start_byte = find_start_value_byte;
 724         }
 725       do
 726         {
 727           scan_sexps_forward (&state,
 728                               defun_start, defun_start_byte,
 729                               comment_end, -10000, 0, Qnil, 0);
 730           defun_start = comment_end;
 731           if (state.incomment == (comnested ? 1 : -1)
 732               && state.comstyle == comstyle)
 733             from = state.comstr_start;
 734           else
 735             {
 736               from = comment_end;
 737               if (state.incomment)
 738                 /* If comment_end is inside some other comment, maybe ours
 739                    is nested, so we need to try again from within the
 740                    surrounding comment.  Example: { a (* " *)  */
 741                 {
 742                   /* FIXME: We should advance by one or two chars. */
 743                   defun_start = state.comstr_start + 2;
 744                   defun_start_byte = CHAR_TO_BYTE (defun_start);
 745                 }
 746             }
 747         } while (defun_start < comment_end);
 748
 749       from_byte = CHAR_TO_BYTE (from);
 750       UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
 751     }
 752
 753  done:
 754   *charpos_ptr = from;
 755   *bytepos_ptr = from_byte;
 756
 757   return (from == comment_end) ? -1 : from;
 758 }
 759 \f
 760 DEFUN ("syntax-table-p", Fsyntax_table_p, Ssyntax_table_p, 1, 1, 0,
 761        doc: /* Return t if OBJECT is a syntax table.
 762 Currently, any char-table counts as a syntax table.  */)
 763   (Lisp_Object object)
 764 {
 765   if (CHAR_TABLE_P (object)
 766       && EQ (XCHAR_TABLE (object)->purpose, Qsyntax_table))
 767     return Qt;
 768   return Qnil;
 769 }
 770
 771 static void
 772 check_syntax_table (Lisp_Object obj)
 773 {
 774   CHECK_TYPE (CHAR_TABLE_P (obj) && EQ (XCHAR_TABLE (obj)->purpose, Qsyntax_table),
 775               Qsyntax_table_p, obj);
 776 }
 777
 778 DEFUN ("syntax-table", Fsyntax_table, Ssyntax_table, 0, 0, 0,
 779        doc: /* Return the current syntax table.
 780 This is the one specified by the current buffer.  */)
 781   (void)
 782 {
 783   return current_buffer->syntax_table;
 784 }
 785
 786 DEFUN ("standard-syntax-table", Fstandard_syntax_table,
 787    Sstandard_syntax_table, 0, 0, 0,
 788        doc: /* Return the standard syntax table.
 789 This is the one used for new buffers.  */)
 790   (void)
 791 {
 792   return Vstandard_syntax_table;
 793 }
 794
 795 DEFUN ("copy-syntax-table", Fcopy_syntax_table, Scopy_syntax_table, 0, 1, 0,
 796        doc: /* Construct a new syntax table and return it.
 797 It is a copy of the TABLE, which defaults to the standard syntax table.  */)
 798   (Lisp_Object table)
 799 {
 800   Lisp_Object copy;
 801
 802   if (!NILP (table))
 803     check_syntax_table (table);
 804   else
 805     table = Vstandard_syntax_table;
 806
 807   copy = Fcopy_sequence (table);
 808
 809   /* Only the standard syntax table should have a default element.
 810      Other syntax tables should inherit from parents instead.  */
 811   XCHAR_TABLE (copy)->defalt = Qnil;
 812
 813   /* Copied syntax tables should all have parents.
 814      If we copied one with no parent, such as the standard syntax table,
 815      use the standard syntax table as the copy's parent.  */
 816   if (NILP (XCHAR_TABLE (copy)->parent))
 817     Fset_char_table_parent (copy, Vstandard_syntax_table);
 818   return copy;
 819 }
 820
 821 DEFUN ("set-syntax-table", Fset_syntax_table, Sset_syntax_table, 1, 1, 0,
 822        doc: /* Select a new syntax table for the current buffer.
 823 One argument, a syntax table.  */)
 824   (Lisp_Object table)
 825 {
 826   int idx;
 827   check_syntax_table (table);
 828   current_buffer->syntax_table = table;
 829   /* Indicate that this buffer now has a specified syntax table.  */
 830   idx = PER_BUFFER_VAR_IDX (syntax_table);
 831   SET_PER_BUFFER_VALUE_P (current_buffer, idx, 1);
 832   return table;
 833 }
 834 \f
 835 /* Convert a letter which signifies a syntax code
 836  into the code it signifies.
 837  This is used by modify-syntax-entry, and other things.  */
 838
 839 unsigned char syntax_spec_code[0400] =
 840   { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
 841     0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
 842     0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
 843     0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
 844     (char) Swhitespace, (char) Scomment_fence, (char) Sstring, 0377,
 845         (char) Smath, 0377, 0377, (char) Squote,
 846     (char) Sopen, (char) Sclose, 0377, 0377,
 847         0377, (char) Swhitespace, (char) Spunct, (char) Scharquote,
 848     0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
 849     0377, 0377, 0377, 0377,
 850         (char) Scomment, 0377, (char) Sendcomment, 0377,
 851     (char) Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377,   /* @, A ... */
 852     0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
 853     0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
 854     0377, 0377, 0377, 0377, (char) Sescape, 0377, 0377, (char) Ssymbol,
 855     0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,   /* `, a, ... */
 856     0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
 857     0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
 858     0377, 0377, 0377, 0377, (char) Sstring_fence, 0377, 0377, 0377
 859   };
 860
 861 /* Indexed by syntax code, give the letter that describes it.  */
 862
 863 char syntax_code_spec[16] =
 864   {
 865     ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@',
 866     '!', '|'
 867   };
 868
 869 /* Indexed by syntax code, give the object (cons of syntax code and
 870    nil) to be stored in syntax table.  Since these objects can be
 871    shared among syntax tables, we generate them in advance.  By
 872    sharing objects, the function `describe-syntax' can give a more
 873    compact listing.  */
 874 static Lisp_Object Vsyntax_code_object;
 875
 876 \f
 877 DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
 878        doc: /* Return the syntax code of CHARACTER, described by a character.
 879 For example, if CHARACTER is a word constituent, the
 880 character `w' (119) is returned.
 881 The characters that correspond to various syntax codes
 882 are listed in the documentation of `modify-syntax-entry'.  */)
 883   (Lisp_Object character)
 884 {
 885   int char_int;
 886   CHECK_CHARACTER (character);
 887   char_int = XINT (character);
 888   SETUP_BUFFER_SYNTAX_TABLE ();
 889   return make_number (syntax_code_spec[(int) SYNTAX (char_int)]);
 890 }
 891
 892 DEFUN ("matching-paren", Fmatching_paren, Smatching_paren, 1, 1, 0,
 893        doc: /* Return the matching parenthesis of CHARACTER, or nil if none.  */)
 894   (Lisp_Object character)
 895 {
 896   int char_int, code;
 897   CHECK_NUMBER (character);
 898   char_int = XINT (character);
 899   SETUP_BUFFER_SYNTAX_TABLE ();
 900   code = SYNTAX (char_int);
 901   if (code == Sopen || code == Sclose)
 902     return SYNTAX_MATCH (char_int);
 903   return Qnil;
 904 }
 905
 906 DEFUN ("string-to-syntax", Fstring_to_syntax, Sstring_to_syntax, 1, 1, 0,
 907        doc: /* Convert a syntax specification STRING into syntax cell form.
 908 STRING should be a string as it is allowed as argument of
 909 `modify-syntax-entry'.  Value is the equivalent cons cell
 910 \(CODE . MATCHING-CHAR) that can be used as value of a `syntax-table'
 911 text property.  */)
 912   (Lisp_Object string)
 913 {
 914   register const unsigned char *p;
 915   register enum syntaxcode code;
 916   int val;
 917   Lisp_Object match;
 918
 919   CHECK_STRING (string);
 920
 921   p = SDATA (string);
 922   code = (enum syntaxcode) syntax_spec_code[*p++];
 923   if (((int) code & 0377) == 0377)
 924     error ("Invalid syntax description letter: %c", p[-1]);
 925
 926   if (code == Sinherit)
 927     return Qnil;
 928
 929   if (*p)
 930     {
 931       int len;
 932       int character = STRING_CHAR_AND_LENGTH (p, len);
 933       XSETINT (match, character);
 934       if (XFASTINT (match) == ' ')
 935         match = Qnil;
 936       p += len;
 937     }
 938   else
 939     match = Qnil;
 940
 941   val = (int) code;
 942   while (*p)
 943     switch (*p++)
 944       {
 945       case '1':
 946         val |= 1 << 16;
 947         break;
 948
 949       case '2':
 950         val |= 1 << 17;
 951         break;
 952
 953       case '3':
 954         val |= 1 << 18;
 955         break;
 956
 957       case '4':
 958         val |= 1 << 19;
 959         break;
 960
 961       case 'p':
 962         val |= 1 << 20;
 963         break;
 964
 965       case 'b':
 966         val |= 1 << 21;
 967         break;
 968
 969       case 'n':
 970         val |= 1 << 22;
 971         break;
 972
 973       case 'c':
 974         val |= 1 << 23;
 975         break;
 976       }
 977
 978   if (val < XVECTOR (Vsyntax_code_object)->size && NILP (match))
 979     return XVECTOR (Vsyntax_code_object)->contents[val];
 980   else
 981     /* Since we can't use a shared object, let's make a new one.  */
 982     return Fcons (make_number (val), match);
 983 }
 984
 985 /* I really don't know why this is interactive
 986    help-form should at least be made useful whilst reading the second arg.  */
 987 DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
 988   "cSet syntax for character: \nsSet syntax for %s to: ",
 989        doc: /* Set syntax for character CHAR according to string NEWENTRY.
 990 The syntax is changed only for table SYNTAX-TABLE, which defaults to
 991  the current buffer's syntax table.
 992 CHAR may be a cons (MIN . MAX), in which case, syntaxes of all characters
 993 in the range MIN to MAX are changed.
 994 The first character of NEWENTRY should be one of the following:
 995   Space or -  whitespace syntax.    w   word constituent.
 996   _           symbol constituent.   .   punctuation.
 997   (           open-parenthesis.     )   close-parenthesis.
 998   "           string quote.         \\   escape.
 999   $           paired delimiter.     '   expression quote or prefix operator.
1000   <           comment starter.      >   comment ender.
1001   /           character-quote.      @   inherit from `standard-syntax-table'.
1002   |           generic string fence. !   generic comment fence.
1003
1004 Only single-character comment start and end sequences are represented thus.
1005 Two-character sequences are represented as described below.
1006 The second character of NEWENTRY is the matching parenthesis,
1007  used only if the first character is `(' or `)'.
1008 Any additional characters are flags.
1009 Defined flags are the characters 1, 2, 3, 4, b, p, and n.
1010  1 means CHAR is the start of a two-char comment start sequence.
1011  2 means CHAR is the second character of such a sequence.
1012  3 means CHAR is the start of a two-char comment end sequence.
1013  4 means CHAR is the second character of such a sequence.
1014
1015 There can be several orthogonal comment sequences.  This is to support
1016 language modes such as C++.  By default, all comment sequences are of style
1017 a, but you can set the comment sequence style to b (on the second character
1018 of a comment-start, and the first character of a comment-end sequence) and/or
1019 c (on any of its chars) using this flag:
1020  b means CHAR is part of comment sequence b.
1021  c means CHAR is part of comment sequence c.
1022  n means CHAR is part of a nestable comment sequence.
1023
1024  p means CHAR is a prefix character for `backward-prefix-chars';
1025    such characters are treated as whitespace when they occur
1026    between expressions.
1027 usage: (modify-syntax-entry CHAR NEWENTRY &optional SYNTAX-TABLE)  */)
1028   (Lisp_Object c, Lisp_Object newentry, Lisp_Object syntax_table)
1029 {
1030   if (CONSP (c))
1031     {
1032       CHECK_CHARACTER_CAR (c);
1033       CHECK_CHARACTER_CDR (c);
1034     }
1035   else
1036     CHECK_CHARACTER (c);
1037
1038   if (NILP (syntax_table))
1039     syntax_table = current_buffer->syntax_table;
1040   else
1041     check_syntax_table (syntax_table);
1042
1043   newentry = Fstring_to_syntax (newentry);
1044   if (CONSP (c))
1045     SET_RAW_SYNTAX_ENTRY_RANGE (syntax_table, c, newentry);
1046   else
1047     SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), newentry);
1048
1049   /* We clear the regexp cache, since character classes can now have
1050      different values from those in the compiled regexps.*/
1051   clear_regexp_cache ();
1052
1053   return Qnil;
1054 }
1055 \f
1056 /* Dump syntax table to buffer in human-readable format */
1057
1058 DEFUN ("internal-describe-syntax-value", Finternal_describe_syntax_value,
1059        Sinternal_describe_syntax_value, 1, 1, 0,
1060        doc: /* Insert a description of the internal syntax description SYNTAX at point.  */)
1061   (Lisp_Object syntax)
1062 {
1063   register enum syntaxcode code;
1064   int syntax_code;
1065   char desc, start1, start2, end1, end2, prefix,
1066     comstyleb, comstylec, comnested;
1067   char str[2];
1068   Lisp_Object first, match_lisp, value = syntax;
1069
1070   if (NILP (value))
1071     {
1072       insert_string ("default");
1073       return syntax;
1074     }
1075
1076   if (CHAR_TABLE_P (value))
1077     {
1078       insert_string ("deeper char-table ...");
1079       return syntax;
1080     }
1081
1082   if (!CONSP (value))
1083     {
1084       insert_string ("invalid");
1085       return syntax;
1086     }
1087
1088   first = XCAR (value);
1089   match_lisp = XCDR (value);
1090
1091   if (!INTEGERP (first) || !(NILP (match_lisp) || INTEGERP (match_lisp)))
1092     {
1093       insert_string ("invalid");
1094       return syntax;
1095     }
1096
1097   syntax_code = XINT (first);
1098   code = (enum syntaxcode) (syntax_code & 0377);
1099   start1 = SYNTAX_FLAGS_COMSTART_FIRST (syntax_code);
1100   start2 = SYNTAX_FLAGS_COMSTART_SECOND (syntax_code);;
1101   end1 = SYNTAX_FLAGS_COMEND_FIRST (syntax_code);
1102   end2 = SYNTAX_FLAGS_COMEND_SECOND (syntax_code);
1103   prefix = SYNTAX_FLAGS_PREFIX (syntax_code);
1104   comstyleb = SYNTAX_FLAGS_COMMENT_STYLEB (syntax_code);
1105   comstylec = SYNTAX_FLAGS_COMMENT_STYLEC (syntax_code);
1106   comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax_code);
1107
1108   if ((int) code < 0 || (int) code >= (int) Smax)
1109     {
1110       insert_string ("invalid");
1111       return syntax;
1112     }
1113   desc = syntax_code_spec[(int) code];
1114
1115   str[0] = desc, str[1] = 0;
1116   insert (str, 1);
1117
1118   if (NILP (match_lisp))
1119     insert (" ", 1);
1120   else
1121     insert_char (XINT (match_lisp));
1122
1123   if (start1)
1124     insert ("1", 1);
1125   if (start2)
1126     insert ("2", 1);
1127
1128   if (end1)
1129     insert ("3", 1);
1130   if (end2)
1131     insert ("4", 1);
1132
1133   if (prefix)
1134     insert ("p", 1);
1135   if (comstyleb)
1136     insert ("b", 1);
1137   if (comstylec)
1138     insert ("c", 1);
1139   if (comnested)
1140     insert ("n", 1);
1141
1142   insert_string ("\twhich means: ");
1143
1144   switch (SWITCH_ENUM_CAST (code))
1145     {
1146     case Swhitespace:
1147       insert_string ("whitespace"); break;
1148     case Spunct:
1149       insert_string ("punctuation"); break;
1150     case Sword:
1151       insert_string ("word"); break;
1152     case Ssymbol:
1153       insert_string ("symbol"); break;
1154     case Sopen:
1155       insert_string ("open"); break;
1156     case Sclose:
1157       insert_string ("close"); break;
1158     case Squote:
1159       insert_string ("prefix"); break;
1160     case Sstring:
1161       insert_string ("string"); break;
1162     case Smath:
1163       insert_string ("math"); break;
1164     case Sescape:
1165       insert_string ("escape"); break;
1166     case Scharquote:
1167       insert_string ("charquote"); break;
1168     case Scomment:
1169       insert_string ("comment"); break;
1170     case Sendcomment:
1171       insert_string ("endcomment"); break;
1172     case Sinherit:
1173       insert_string ("inherit"); break;
1174     case Scomment_fence:
1175       insert_string ("comment fence"); break;
1176     case Sstring_fence:
1177       insert_string ("string fence"); break;
1178     default:
1179       insert_string ("invalid");
1180       return syntax;
1181     }
1182
1183   if (!NILP (match_lisp))
1184     {
1185       insert_string (", matches ");
1186       insert_char (XINT (match_lisp));
1187     }
1188
1189   if (start1)
1190     insert_string (",\n\t  is the first character of a comment-start sequence");
1191   if (start2)
1192     insert_string (",\n\t  is the second character of a comment-start sequence");
1193
1194   if (end1)
1195     insert_string (",\n\t  is the first character of a comment-end sequence");
1196   if (end2)
1197     insert_string (",\n\t  is the second character of a comment-end sequence");
1198   if (comstyleb)
1199     insert_string (" (comment style b)");
1200   if (comstylec)
1201     insert_string (" (comment style c)");
1202   if (comnested)
1203     insert_string (" (nestable)");
1204
1205   if (prefix)
1206     insert_string (",\n\t  is a prefix character for `backward-prefix-chars'");
1207
1208   return syntax;
1209 }
1210 \f
1211 /* Return the position across COUNT words from FROM.
1212    If that many words cannot be found before the end of the buffer, return 0.
1213    COUNT negative means scan backward and stop at word beginning.  */
1214
1215 EMACS_INT
1216 scan_words (register EMACS_INT from, register EMACS_INT count)
1217 {
1218   register EMACS_INT beg = BEGV;
1219   register EMACS_INT end = ZV;
1220   register EMACS_INT from_byte = CHAR_TO_BYTE (from);
1221   register enum syntaxcode code;
1222   int ch0, ch1;
1223   Lisp_Object func, script, pos;
1224
1225   immediate_quit = 1;
1226   QUIT;
1227
1228   SETUP_SYNTAX_TABLE (from, count);
1229
1230   while (count > 0)
1231     {
1232       while (1)
1233         {
1234           if (from == end)
1235             {
1236               immediate_quit = 0;
1237               return 0;
1238             }
1239           UPDATE_SYNTAX_TABLE_FORWARD (from);
1240           ch0 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1241           code = SYNTAX (ch0);
1242           INC_BOTH (from, from_byte);
1243           if (words_include_escapes
1244               && (code == Sescape || code == Scharquote))
1245             break;
1246           if (code == Sword)
1247             break;
1248         }
1249       /* Now CH0 is a character which begins a word and FROM is the
1250          position of the next character.  */
1251       func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch0);
1252       if (! NILP (Ffboundp (func)))
1253         {
1254           pos = call2 (func, make_number (from - 1), make_number (end));
1255           if (INTEGERP (pos) && XINT (pos) > from)
1256             {
1257               from = XINT (pos);
1258               from_byte = CHAR_TO_BYTE (from);
1259             }
1260         }
1261       else
1262         {
1263           script = CHAR_TABLE_REF (Vchar_script_table, ch0);
1264           while (1)
1265             {
1266               if (from == end) break;
1267               UPDATE_SYNTAX_TABLE_FORWARD (from);
1268               ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1269               code = SYNTAX (ch1);
1270               if ((code != Sword
1271                    && (! words_include_escapes
1272                        || (code != Sescape && code != Scharquote)))
1273                   || word_boundary_p (ch0, ch1))
1274                 break;
1275               INC_BOTH (from, from_byte);
1276               ch0 = ch1;
1277             }
1278         }
1279       count--;
1280     }
1281   while (count < 0)
1282     {
1283       while (1)
1284         {
1285           if (from == beg)
1286             {
1287               immediate_quit = 0;
1288               return 0;
1289             }
1290           DEC_BOTH (from, from_byte);
1291           UPDATE_SYNTAX_TABLE_BACKWARD (from);
1292           ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1293           code = SYNTAX (ch1);
1294           if (words_include_escapes
1295               && (code == Sescape || code == Scharquote))
1296             break;
1297           if (code == Sword)
1298             break;
1299         }
1300       /* Now CH1 is a character which ends a word and FROM is the
1301          position of it.  */
1302       func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch1);
1303       if (! NILP (Ffboundp (func)))
1304         {
1305           pos = call2 (func, make_number (from), make_number (beg));
1306           if (INTEGERP (pos) && XINT (pos) < from)
1307             {
1308               from = XINT (pos);
1309               from_byte = CHAR_TO_BYTE (from);
1310             }
1311         }
1312       else
1313         {
1314           script = CHAR_TABLE_REF (Vchar_script_table, ch1);
1315           while (1)
1316             {
1317               if (from == beg)
1318                 break;
1319               DEC_BOTH (from, from_byte);
1320               UPDATE_SYNTAX_TABLE_BACKWARD (from);
1321               ch0 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1322               code = SYNTAX (ch0);
1323               if ((code != Sword
1324                    && (! words_include_escapes
1325                        || (code != Sescape && code != Scharquote)))
1326                   || word_boundary_p (ch0, ch1))
1327                 {
1328                   INC_BOTH (from, from_byte);
1329                   break;
1330                 }
1331               ch1 = ch0;
1332             }
1333         }
1334       count++;
1335     }
1336
1337   immediate_quit = 0;
1338
1339   return from;
1340 }
1341
1342 DEFUN ("forward-word", Fforward_word, Sforward_word, 0, 1, "^p",
1343        doc: /* Move point forward ARG words (backward if ARG is negative).
1344 Normally returns t.
1345 If an edge of the buffer or a field boundary is reached, point is left there
1346 and the function returns nil.  Field boundaries are not noticed if
1347 `inhibit-field-text-motion' is non-nil.  */)
1348   (Lisp_Object arg)
1349 {
1350   Lisp_Object tmp;
1351   int orig_val, val;
1352
1353   if (NILP (arg))
1354     XSETFASTINT (arg, 1);
1355   else
1356     CHECK_NUMBER (arg);
1357
1358   val = orig_val = scan_words (PT, XINT (arg));
1359   if (! orig_val)
1360     val = XINT (arg) > 0 ? ZV : BEGV;
1361
1362   /* Avoid jumping out of an input field.  */
1363   tmp = Fconstrain_to_field (make_number (val), make_number (PT),
1364                              Qt, Qnil, Qnil);
1365   val = XFASTINT (tmp);
1366
1367   SET_PT (val);
1368   return val == orig_val ? Qt : Qnil;
1369 }
1370 \f
1371 Lisp_Object skip_chars (int, Lisp_Object, Lisp_Object, int);
1372
1373 DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
1374        doc: /* Move point forward, stopping before a char not in STRING, or at pos LIM.
1375 STRING is like the inside of a `[...]' in a regular expression
1376 except that `]' is never special and `\\' quotes `^', `-' or `\\'
1377  (but not at the end of a range; quoting is never needed there).
1378 Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
1379 With arg "^a-zA-Z", skips nonletters stopping before first letter.
1380 Char classes, e.g. `[:alpha:]', are supported.
1381
1382 Returns the distance traveled, either zero or positive.  */)
1383   (Lisp_Object string, Lisp_Object lim)
1384 {
1385   return skip_chars (1, string, lim, 1);
1386 }
1387
1388 DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
1389        doc: /* Move point backward, stopping after a char not in STRING, or at pos LIM.
1390 See `skip-chars-forward' for details.
1391 Returns the distance traveled, either zero or negative.  */)
1392   (Lisp_Object string, Lisp_Object lim)
1393 {
1394   return skip_chars (0, string, lim, 1);
1395 }
1396
1397 DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0,
1398        doc: /* Move point forward across chars in specified syntax classes.
1399 SYNTAX is a string of syntax code characters.
1400 Stop before a char whose syntax is not in SYNTAX, or at position LIM.
1401 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1402 This function returns the distance traveled, either zero or positive.  */)
1403   (Lisp_Object syntax, Lisp_Object lim)
1404 {
1405   return skip_syntaxes (1, syntax, lim);
1406 }
1407
1408 DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0,
1409        doc: /* Move point backward across chars in specified syntax classes.
1410 SYNTAX is a string of syntax code characters.
1411 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.
1412 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1413 This function returns the distance traveled, either zero or negative.  */)
1414   (Lisp_Object syntax, Lisp_Object lim)
1415 {
1416   return skip_syntaxes (0, syntax, lim);
1417 }
1418
1419 static Lisp_Object
1420 skip_chars (int forwardp, Lisp_Object string, Lisp_Object lim, int handle_iso_classes)
1421 {
1422   register unsigned int c;
1423   unsigned char fastmap[0400];
1424   /* Store the ranges of non-ASCII characters.  */
1425   int *char_ranges;
1426   int n_char_ranges = 0;
1427   int negate = 0;
1428   register EMACS_INT i, i_byte;
1429   /* Set to 1 if the current buffer is multibyte and the region
1430      contains non-ASCII chars.  */
1431   int multibyte;
1432   /* Set to 1 if STRING is multibyte and it contains non-ASCII
1433      chars.  */
1434   int string_multibyte;
1435   EMACS_INT size_byte;
1436   const unsigned char *str;
1437   int len;
1438   Lisp_Object iso_classes;
1439
1440   CHECK_STRING (string);
1441   iso_classes = Qnil;
1442
1443   if (NILP (lim))
1444     XSETINT (lim, forwardp ? ZV : BEGV);
1445   else
1446     CHECK_NUMBER_COERCE_MARKER (lim);
1447
1448   /* In any case, don't allow scan outside bounds of buffer.  */
1449   if (XINT (lim) > ZV)
1450     XSETFASTINT (lim, ZV);
1451   if (XINT (lim) < BEGV)
1452     XSETFASTINT (lim, BEGV);
1453
1454   multibyte = (!NILP (current_buffer->enable_multibyte_characters)
1455                && (XINT (lim) - PT != CHAR_TO_BYTE (XINT (lim)) - PT_BYTE));
1456   string_multibyte = SBYTES (string) > SCHARS (string);
1457
1458   memset (fastmap, 0, sizeof fastmap);
1459
1460   str = SDATA (string);
1461   size_byte = SBYTES (string);
1462
1463   i_byte = 0;
1464   if (i_byte < size_byte
1465       && SREF (string, 0) == '^')
1466     {
1467       negate = 1; i_byte++;
1468     }
1469
1470   /* Find the characters specified and set their elements of fastmap.
1471      Handle backslashes and ranges specially.
1472
1473      If STRING contains non-ASCII characters, setup char_ranges for
1474      them and use fastmap only for their leading codes.  */
1475
1476   if (! string_multibyte)
1477     {
1478       int string_has_eight_bit = 0;
1479
1480       /* At first setup fastmap.  */
1481       while (i_byte < size_byte)
1482         {
1483           c = str[i_byte++];
1484
1485           if (handle_iso_classes && c == '['
1486               && i_byte < size_byte
1487               && str[i_byte] == ':')
1488             {
1489               const unsigned char *class_beg = str + i_byte + 1;
1490               const unsigned char *class_end = class_beg;
1491               const unsigned char *class_limit = str + size_byte - 2;
1492               /* Leave room for the null.  */
1493               unsigned char class_name[CHAR_CLASS_MAX_LENGTH + 1];
1494               re_wctype_t cc;
1495
1496               if (class_limit - class_beg > CHAR_CLASS_MAX_LENGTH)
1497                 class_limit = class_beg + CHAR_CLASS_MAX_LENGTH;
1498
1499               while (class_end < class_limit
1500                      && *class_end >= 'a' && *class_end <= 'z')
1501                 class_end++;
1502
1503               if (class_end == class_beg
1504                   || *class_end != ':' || class_end[1] != ']')
1505                 goto not_a_class_name;
1506
1507               memcpy (class_name, class_beg, class_end - class_beg);
1508               class_name[class_end - class_beg] = 0;
1509
1510               cc = re_wctype (class_name);
1511               if (cc == 0)
1512                 error ("Invalid ISO C character class");
1513
1514               iso_classes = Fcons (make_number (cc), iso_classes);
1515
1516               i_byte = class_end + 2 - str;
1517               continue;
1518             }
1519
1520         not_a_class_name:
1521           if (c == '\\')
1522             {
1523               if (i_byte == size_byte)
1524                 break;
1525
1526               c = str[i_byte++];
1527             }
1528           /* Treat `-' as range character only if another character
1529              follows.  */
1530           if (i_byte + 1 < size_byte
1531               && str[i_byte] == '-')
1532             {
1533               unsigned int c2;
1534
1535               /* Skip over the dash.  */
1536               i_byte++;
1537
1538               /* Get the end of the range.  */
1539               c2 = str[i_byte++];
1540               if (c2 == '\\'
1541                   && i_byte < size_byte)
1542                 c2 = str[i_byte++];
1543
1544               if (c <= c2)
1545                 {
1546                   while (c <= c2)
1547                     fastmap[c++] = 1;
1548                   if (! ASCII_CHAR_P (c2))
1549                     string_has_eight_bit = 1;
1550                 }
1551             }
1552           else
1553             {
1554               fastmap[c] = 1;
1555               if (! ASCII_CHAR_P (c))
1556                 string_has_eight_bit = 1;
1557             }
1558         }
1559
1560       /* If the current range is multibyte and STRING contains
1561          eight-bit chars, arrange fastmap and setup char_ranges for
1562          the corresponding multibyte chars.  */
1563       if (multibyte && string_has_eight_bit)
1564         {
1565           unsigned char fastmap2[0400];
1566           int range_start_byte, range_start_char;
1567
1568           memcpy (fastmap + 0200, fastmap2 + 0200, 0200);
1569           memset (fastmap + 0200, 0, 0200);
1570           /* We are sure that this loop stops.  */
1571           for (i = 0200; ! fastmap2[i]; i++);
1572           c = BYTE8_TO_CHAR (i);
1573           fastmap[CHAR_LEADING_CODE (c)] = 1;
1574           range_start_byte = i;
1575           range_start_char = c;
1576           char_ranges = (int *) alloca (sizeof (int) * 128 * 2);
1577           for (i = 129; i < 0400; i++)
1578             {
1579               c = BYTE8_TO_CHAR (i);
1580               fastmap[CHAR_LEADING_CODE (c)] = 1;
1581               if (i - range_start_byte != c - range_start_char)
1582                 {
1583                   char_ranges[n_char_ranges++] = range_start_char;
1584                   char_ranges[n_char_ranges++] = ((i - 1 - range_start_byte)
1585                                                   + range_start_char);
1586                   range_start_byte = i;
1587                   range_start_char = c;
1588                 }
1589             }
1590           char_ranges[n_char_ranges++] = range_start_char;
1591           char_ranges[n_char_ranges++] = ((i - 1 - range_start_byte)
1592                                           + range_start_char);
1593         }
1594     }
1595   else                          /* STRING is multibyte */
1596     {
1597       char_ranges = (int *) alloca (sizeof (int) * SCHARS (string) * 2);
1598
1599       while (i_byte < size_byte)
1600         {
1601           unsigned char leading_code;
1602
1603           leading_code = str[i_byte];
1604           c = STRING_CHAR_AND_LENGTH (str + i_byte, len);
1605           i_byte += len;
1606
1607           if (handle_iso_classes && c == '['
1608               && i_byte < size_byte
1609               && STRING_CHAR (str + i_byte) == ':')
1610             {
1611               const unsigned char *class_beg = str + i_byte + 1;
1612               const unsigned char *class_end = class_beg;
1613               const unsigned char *class_limit = str + size_byte - 2;
1614               /* Leave room for the null.        */
1615               unsigned char class_name[CHAR_CLASS_MAX_LENGTH + 1];
1616               re_wctype_t cc;
1617
1618               if (class_limit - class_beg > CHAR_CLASS_MAX_LENGTH)
1619                 class_limit = class_beg + CHAR_CLASS_MAX_LENGTH;
1620
1621               while (class_end < class_limit
1622                      && *class_end >= 'a' && *class_end <= 'z')
1623                 class_end++;
1624
1625               if (class_end == class_beg
1626                   || *class_end != ':' || class_end[1] != ']')
1627                 goto not_a_class_name_multibyte;
1628
1629               memcpy (class_name, class_beg, class_end - class_beg);
1630               class_name[class_end - class_beg] = 0;
1631
1632               cc = re_wctype (class_name);
1633               if (cc == 0)
1634                 error ("Invalid ISO C character class");
1635
1636               iso_classes = Fcons (make_number (cc), iso_classes);
1637
1638               i_byte = class_end + 2 - str;
1639               continue;
1640             }
1641
1642         not_a_class_name_multibyte:
1643           if (c == '\\')
1644             {
1645               if (i_byte == size_byte)
1646                 break;
1647
1648               leading_code = str[i_byte];
1649               c = STRING_CHAR_AND_LENGTH (str + i_byte, len);
1650               i_byte += len;
1651             }
1652           /* Treat `-' as range character only if another character
1653              follows.  */
1654           if (i_byte + 1 < size_byte
1655               && str[i_byte] == '-')
1656             {
1657               unsigned int c2;
1658               unsigned char leading_code2;
1659
1660               /* Skip over the dash.  */
1661               i_byte++;
1662
1663               /* Get the end of the range.  */
1664               leading_code2 = str[i_byte];
1665               c2 = STRING_CHAR_AND_LENGTH (str + i_byte, len);
1666               i_byte += len;
1667
1668               if (c2 == '\\'
1669                   && i_byte < size_byte)
1670                 {
1671                   leading_code2 = str[i_byte];
1672                   c2 =STRING_CHAR_AND_LENGTH (str + i_byte, len);
1673                   i_byte += len;
1674                 }
1675
1676               if (c > c2)
1677                 continue;
1678               if (ASCII_CHAR_P (c))
1679                 {
1680                   while (c <= c2 && c < 0x80)
1681                     fastmap[c++] = 1;
1682                   leading_code = CHAR_LEADING_CODE (c);
1683                 }
1684               if (! ASCII_CHAR_P (c))
1685                 {
1686                   while (leading_code <= leading_code2)
1687                     fastmap[leading_code++] = 1;
1688                   if (c <= c2)
1689                     {
1690                       char_ranges[n_char_ranges++] = c;
1691                       char_ranges[n_char_ranges++] = c2;
1692                     }
1693                 }
1694             }
1695           else
1696             {
1697               if (ASCII_CHAR_P (c))
1698                 fastmap[c] = 1;
1699               else
1700                 {
1701                   fastmap[leading_code] = 1;
1702                   char_ranges[n_char_ranges++] = c;
1703                   char_ranges[n_char_ranges++] = c;
1704                 }
1705             }
1706         }
1707
1708       /* If the current range is unibyte and STRING contains non-ASCII
1709          chars, arrange fastmap for the corresponding unibyte
1710          chars.  */
1711
1712       if (! multibyte && n_char_ranges > 0)
1713         {
1714           memset (fastmap + 0200, 0, 0200);
1715           for (i = 0; i < n_char_ranges; i += 2)
1716             {
1717               int c1 = char_ranges[i];
1718               int c2 = char_ranges[i + 1];
1719
1720               for (; c1 <= c2; c1++)
1721                 {
1722                   int b = CHAR_TO_BYTE_SAFE (c1);
1723                   if (b >= 0)
1724                     fastmap[b] = 1;
1725                 }
1726             }
1727         }
1728     }
1729
1730   /* If ^ was the first character, complement the fastmap.  */
1731   if (negate)
1732     {
1733       if (! multibyte)
1734         for (i = 0; i < sizeof fastmap; i++)
1735           fastmap[i] ^= 1;
1736       else
1737         {
1738           for (i = 0; i < 0200; i++)
1739             fastmap[i] ^= 1;
1740           /* All non-ASCII chars possibly match.  */
1741           for (; i < sizeof fastmap; i++)
1742             fastmap[i] = 1;
1743         }
1744     }
1745
1746   {
1747     EMACS_INT start_point = PT;
1748     EMACS_INT pos = PT;
1749     EMACS_INT pos_byte = PT_BYTE;
1750     unsigned char *p = PT_ADDR, *endp, *stop;
1751
1752     if (forwardp)
1753       {
1754         endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
1755         stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp;
1756       }
1757     else
1758       {
1759         endp = CHAR_POS_ADDR (XINT (lim));
1760         stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp;
1761       }
1762
1763     immediate_quit = 1;
1764     /* This code may look up syntax tables using macros that rely on the
1765        gl_state object.  To make sure this object is not out of date,
1766        let's initialize it manually.
1767        We ignore syntax-table text-properties for now, since that's
1768        what we've done in the past.  */
1769     SETUP_BUFFER_SYNTAX_TABLE ();
1770     if (forwardp)
1771       {
1772         if (multibyte)
1773           while (1)
1774             {
1775               int nbytes;
1776
1777               if (p >= stop)
1778                 {
1779                   if (p >= endp)
1780                     break;
1781                   p = GAP_END_ADDR;
1782                   stop = endp;
1783                 }
1784               c = STRING_CHAR_AND_LENGTH (p, nbytes);
1785               if (! NILP (iso_classes) && in_classes (c, iso_classes))
1786                 {
1787                   if (negate)
1788                     break;
1789                   else
1790                     goto fwd_ok;
1791                 }
1792
1793               if (! fastmap[*p])
1794                 break;
1795               if (! ASCII_CHAR_P (c))
1796                 {
1797                   /* As we are looking at a multibyte character, we
1798                      must look up the character in the table
1799                      CHAR_RANGES.  If there's no data in the table,
1800                      that character is not what we want to skip.  */
1801
1802                   /* The following code do the right thing even if
1803                      n_char_ranges is zero (i.e. no data in
1804                      CHAR_RANGES).  */
1805                   for (i = 0; i < n_char_ranges; i += 2)
1806                     if (c >= char_ranges[i] && c <= char_ranges[i + 1])
1807                       break;
1808                   if (!(negate ^ (i < n_char_ranges)))
1809                     break;
1810                 }
1811             fwd_ok:
1812               p += nbytes, pos++, pos_byte += nbytes;
1813             }
1814         else
1815           while (1)
1816             {
1817               if (p >= stop)
1818                 {
1819                   if (p >= endp)
1820                     break;
1821                   p = GAP_END_ADDR;
1822                   stop = endp;
1823                 }
1824
1825               if (!NILP (iso_classes) && in_classes (*p, iso_classes))
1826                 {
1827                   if (negate)
1828                     break;
1829                   else
1830                     goto fwd_unibyte_ok;
1831                 }
1832
1833               if (!fastmap[*p])
1834                 break;
1835             fwd_unibyte_ok:
1836               p++, pos++, pos_byte++;
1837             }
1838       }
1839     else
1840       {
1841         if (multibyte)
1842           while (1)
1843             {
1844               unsigned char *prev_p;
1845
1846               if (p <= stop)
1847                 {
1848                   if (p <= endp)
1849                     break;
1850                   p = GPT_ADDR;
1851                   stop = endp;
1852                 }
1853               prev_p = p;
1854               while (--p >= stop && ! CHAR_HEAD_P (*p));
1855               c = STRING_CHAR (p);
1856
1857               if (! NILP (iso_classes) && in_classes (c, iso_classes))
1858                 {
1859                   if (negate)
1860                     break;
1861                   else
1862                     goto back_ok;
1863                 }
1864
1865               if (! fastmap[*p])
1866                 break;
1867               if (! ASCII_CHAR_P (c))
1868                 {
1869                   /* See the comment in the previous similar code.  */
1870                   for (i = 0; i < n_char_ranges; i += 2)
1871                     if (c >= char_ranges[i] && c <= char_ranges[i + 1])
1872                       break;
1873                   if (!(negate ^ (i < n_char_ranges)))
1874                     break;
1875                 }
1876             back_ok:
1877               pos--, pos_byte -= prev_p - p;
1878             }
1879         else
1880           while (1)
1881             {
1882               if (p <= stop)
1883                 {
1884                   if (p <= endp)
1885                     break;
1886                   p = GPT_ADDR;
1887                   stop = endp;
1888                 }
1889
1890               if (! NILP (iso_classes) && in_classes (p[-1], iso_classes))
1891                 {
1892                   if (negate)
1893                     break;
1894                   else
1895                     goto back_unibyte_ok;
1896                 }
1897
1898               if (!fastmap[p[-1]])
1899                 break;
1900             back_unibyte_ok:
1901               p--, pos--, pos_byte--;
1902             }
1903       }
1904
1905     SET_PT_BOTH (pos, pos_byte);
1906     immediate_quit = 0;
1907
1908     return make_number (PT - start_point);
1909   }
1910 }
1911
1912
1913 static Lisp_Object
1914 skip_syntaxes (int forwardp, Lisp_Object string, Lisp_Object lim)
1915 {
1916   register unsigned int c;
1917   unsigned char fastmap[0400];
1918   int negate = 0;
1919   register EMACS_INT i, i_byte;
1920   int multibyte;
1921   EMACS_INT size_byte;
1922   unsigned char *str;
1923
1924   CHECK_STRING (string);
1925
1926   if (NILP (lim))
1927     XSETINT (lim, forwardp ? ZV : BEGV);
1928   else
1929     CHECK_NUMBER_COERCE_MARKER (lim);
1930
1931   /* In any case, don't allow scan outside bounds of buffer.  */
1932   if (XINT (lim) > ZV)
1933     XSETFASTINT (lim, ZV);
1934   if (XINT (lim) < BEGV)
1935     XSETFASTINT (lim, BEGV);
1936
1937   if (forwardp ? (PT >= XFASTINT (lim)) : (PT <= XFASTINT (lim)))
1938     return make_number (0);
1939
1940   multibyte = (!NILP (current_buffer->enable_multibyte_characters)
1941                && (XINT (lim) - PT != CHAR_TO_BYTE (XINT (lim)) - PT_BYTE));
1942
1943   memset (fastmap, 0, sizeof fastmap);
1944
1945   if (SBYTES (string) > SCHARS (string))
1946     /* As this is very rare case (syntax spec is ASCII only), don't
1947        consider efficiency.  */
1948     string = string_make_unibyte (string);
1949
1950   str = SDATA (string);
1951   size_byte = SBYTES (string);
1952
1953   i_byte = 0;
1954   if (i_byte < size_byte
1955       && SREF (string, 0) == '^')
1956     {
1957       negate = 1; i_byte++;
1958     }
1959
1960   /* Find the syntaxes specified and set their elements of fastmap.  */
1961
1962   while (i_byte < size_byte)
1963     {
1964       c = str[i_byte++];
1965       fastmap[syntax_spec_code[c]] = 1;
1966     }
1967
1968   /* If ^ was the first character, complement the fastmap.  */
1969   if (negate)
1970     for (i = 0; i < sizeof fastmap; i++)
1971       fastmap[i] ^= 1;
1972
1973   {
1974     EMACS_INT start_point = PT;
1975     EMACS_INT pos = PT;
1976     EMACS_INT pos_byte = PT_BYTE;
1977     unsigned char *p = PT_ADDR, *endp, *stop;
1978
1979     if (forwardp)
1980       {
1981         endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
1982         stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp;
1983       }
1984     else
1985       {
1986         endp = CHAR_POS_ADDR (XINT (lim));
1987         stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp;
1988       }
1989
1990     immediate_quit = 1;
1991     SETUP_SYNTAX_TABLE (pos, forwardp ? 1 : -1);
1992     if (forwardp)
1993       {
1994         if (multibyte)
1995           {
1996             while (1)
1997               {
1998                 int nbytes;
1999
2000                 if (p >= stop)
2001                   {
2002                     if (p >= endp)
2003                       break;
2004                     p = GAP_END_ADDR;
2005                     stop = endp;
2006                   }
2007                 c = STRING_CHAR_AND_LENGTH (p, nbytes);
2008                 if (! fastmap[(int) SYNTAX (c)])
2009                   break;
2010                 p += nbytes, pos++, pos_byte += nbytes;
2011                 UPDATE_SYNTAX_TABLE_FORWARD (pos);
2012               }
2013           }
2014         else
2015           {
2016             while (1)
2017               {
2018                 if (p >= stop)
2019                   {
2020                     if (p >= endp)
2021                       break;
2022                     p = GAP_END_ADDR;
2023                     stop = endp;
2024                   }
2025                 if (! fastmap[(int) SYNTAX (*p)])
2026                   break;
2027                 p++, pos++, pos_byte++;
2028                 UPDATE_SYNTAX_TABLE_FORWARD (pos);
2029               }
2030           }
2031       }
2032     else
2033       {
2034         if (multibyte)
2035           {
2036             while (1)
2037               {
2038                 unsigned char *prev_p;
2039
2040                 if (p <= stop)
2041                   {
2042                     if (p <= endp)
2043                       break;
2044                     p = GPT_ADDR;
2045                     stop = endp;
2046                   }
2047                 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
2048                 prev_p = p;
2049                 while (--p >= stop && ! CHAR_HEAD_P (*p));
2050                 c = STRING_CHAR (p);
2051                 if (! fastmap[(int) SYNTAX (c)])
2052                   break;
2053                 pos--, pos_byte -= prev_p - p;
2054               }
2055           }
2056         else
2057           {
2058             while (1)
2059               {
2060                 if (p <= stop)
2061                   {
2062                     if (p <= endp)
2063                       break;
2064                     p = GPT_ADDR;
2065                     stop = endp;
2066                   }
2067                 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
2068                 if (! fastmap[(int) SYNTAX (p[-1])])
2069                   break;
2070                 p--, pos--, pos_byte--;
2071               }
2072           }
2073       }
2074
2075     SET_PT_BOTH (pos, pos_byte);
2076     immediate_quit = 0;
2077
2078     return make_number (PT - start_point);
2079   }
2080 }
2081
2082 /* Return 1 if character C belongs to one of the ISO classes
2083    in the list ISO_CLASSES.  Each class is represented by an
2084    integer which is its type according to re_wctype.  */
2085
2086 static int
2087 in_classes (int c, Lisp_Object iso_classes)
2088 {
2089   int fits_class = 0;
2090
2091   while (CONSP (iso_classes))
2092     {
2093       Lisp_Object elt;
2094       elt = XCAR (iso_classes);
2095       iso_classes = XCDR (iso_classes);
2096
2097       if (re_iswctype (c, XFASTINT (elt)))
2098         fits_class = 1;
2099     }
2100
2101   return fits_class;
2102 }
2103 \f
2104 /* Jump over a comment, assuming we are at the beginning of one.
2105    FROM is the current position.
2106    FROM_BYTE is the bytepos corresponding to FROM.
2107    Do not move past STOP (a charpos).
2108    The comment over which we have to jump is of style STYLE
2109      (either SYNTAX_FLAGS_COMMENT_STYLE(foo) or ST_COMMENT_STYLE).
2110    NESTING should be positive to indicate the nesting at the beginning
2111      for nested comments and should be zero or negative else.
2112      ST_COMMENT_STYLE cannot be nested.
2113    PREV_SYNTAX is the SYNTAX_WITH_FLAGS of the previous character
2114      (or 0 If the search cannot start in the middle of a two-character).
2115
2116    If successful, return 1 and store the charpos of the comment's end
2117    into *CHARPOS_PTR and the corresponding bytepos into *BYTEPOS_PTR.
2118    Else, return 0 and store the charpos STOP into *CHARPOS_PTR, the
2119    corresponding bytepos into *BYTEPOS_PTR and the current nesting
2120    (as defined for state.incomment) in *INCOMMENT_PTR.
2121
2122    The comment end is the last character of the comment rather than the
2123      character just after the comment.
2124
2125    Global syntax data is assumed to initially be valid for FROM and
2126    remains valid for forward search starting at the returned position. */
2127
2128 static int
2129 forw_comment (EMACS_INT from, EMACS_INT from_byte, EMACS_INT stop,
2130               int nesting, int style, int prev_syntax,
2131               EMACS_INT *charpos_ptr, EMACS_INT *bytepos_ptr,
2132               int *incomment_ptr)
2133 {
2134   register int c, c1;
2135   register enum syntaxcode code;
2136   register int syntax, other_syntax;
2137
2138   if (nesting <= 0) nesting = -1;
2139
2140   /* Enter the loop in the middle so that we find
2141      a 2-char comment ender if we start in the middle of it.  */
2142   syntax = prev_syntax;
2143   if (syntax != 0) goto forw_incomment;
2144
2145   while (1)
2146     {
2147       if (from == stop)
2148         {
2149           *incomment_ptr = nesting;
2150           *charpos_ptr = from;
2151           *bytepos_ptr = from_byte;
2152           return 0;
2153         }
2154       c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2155       syntax = SYNTAX_WITH_FLAGS (c);
2156       code = syntax & 0xff;
2157       if (code == Sendcomment
2158           && SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == style
2159           && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ?
2160               (nesting > 0 && --nesting == 0) : nesting < 0))
2161         /* we have encountered a comment end of the same style
2162            as the comment sequence which began this comment
2163            section */
2164         break;
2165       if (code == Scomment_fence
2166           && style == ST_COMMENT_STYLE)
2167         /* we have encountered a comment end of the same style
2168            as the comment sequence which began this comment
2169            section.  */
2170         break;
2171       if (nesting > 0
2172           && code == Scomment
2173           && SYNTAX_FLAGS_COMMENT_NESTED (syntax)
2174           && SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == style)
2175         /* we have encountered a nested comment of the same style
2176            as the comment sequence which began this comment section */
2177         nesting++;
2178       INC_BOTH (from, from_byte);
2179       UPDATE_SYNTAX_TABLE_FORWARD (from);
2180
2181     forw_incomment:
2182       if (from < stop && SYNTAX_FLAGS_COMEND_FIRST (syntax)
2183           && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2184               other_syntax = SYNTAX_WITH_FLAGS (c1),
2185               SYNTAX_FLAGS_COMEND_SECOND (other_syntax))
2186           && SYNTAX_FLAGS_COMMENT_STYLE (syntax, other_syntax) == style
2187           && ((SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
2188                SYNTAX_FLAGS_COMMENT_NESTED (other_syntax))
2189               ? nesting > 0 : nesting < 0))
2190         {
2191           if (--nesting <= 0)
2192             /* we have encountered a comment end of the same style
2193                as the comment sequence which began this comment
2194                section */
2195             break;
2196           else
2197             {
2198               INC_BOTH (from, from_byte);
2199               UPDATE_SYNTAX_TABLE_FORWARD (from);
2200             }
2201         }
2202       if (nesting > 0
2203           && from < stop
2204           && SYNTAX_FLAGS_COMSTART_FIRST (syntax)
2205           && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2206               other_syntax = SYNTAX_WITH_FLAGS (c1),
2207               SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax) == style
2208               && SYNTAX_FLAGS_COMSTART_SECOND (other_syntax))
2209           && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
2210               SYNTAX_FLAGS_COMMENT_NESTED (other_syntax)))
2211         /* we have encountered a nested comment of the same style
2212            as the comment sequence which began this comment
2213            section */
2214         {
2215           INC_BOTH (from, from_byte);
2216           UPDATE_SYNTAX_TABLE_FORWARD (from);
2217           nesting++;
2218         }
2219     }
2220   *charpos_ptr = from;
2221   *bytepos_ptr = from_byte;
2222   return 1;
2223 }
2224
2225 DEFUN ("forward-comment", Fforward_comment, Sforward_comment, 1, 1, 0,
2226        doc: /*
2227 Move forward across up to COUNT comments.  If COUNT is negative, move backward.
2228 Stop scanning if we find something other than a comment or whitespace.
2229 Set point to where scanning stops.
2230 If COUNT comments are found as expected, with nothing except whitespace
2231 between them, return t; otherwise return nil.  */)
2232   (Lisp_Object count)
2233 {
2234   register EMACS_INT from;
2235   EMACS_INT from_byte;
2236   register EMACS_INT stop;
2237   register int c, c1;
2238   register enum syntaxcode code;
2239   int comstyle = 0;         /* style of comment encountered */
2240   int comnested = 0;        /* whether the comment is nestable or not */
2241   int found;
2242   EMACS_INT count1;
2243   EMACS_INT out_charpos, out_bytepos;
2244   int dummy;
2245
2246   CHECK_NUMBER (count);
2247   count1 = XINT (count);
2248   stop = count1 > 0 ? ZV : BEGV;
2249
2250   immediate_quit = 1;
2251   QUIT;
2252
2253   from = PT;
2254   from_byte = PT_BYTE;
2255
2256   SETUP_SYNTAX_TABLE (from, count1);
2257   while (count1 > 0)
2258     {
2259       do
2260         {
2261           int comstart_first, syntax, other_syntax;
2262
2263           if (from == stop)
2264             {
2265               SET_PT_BOTH (from, from_byte);
2266               immediate_quit = 0;
2267               return Qnil;
2268             }
2269           c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2270           syntax = SYNTAX_WITH_FLAGS (c);
2271           code = SYNTAX (c);
2272           comstart_first = SYNTAX_FLAGS_COMSTART_FIRST (syntax);
2273           comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2274           comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2275           INC_BOTH (from, from_byte);
2276           UPDATE_SYNTAX_TABLE_FORWARD (from);
2277           if (from < stop && comstart_first
2278               && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2279                   other_syntax = SYNTAX_WITH_FLAGS (c1),
2280                   SYNTAX_FLAGS_COMSTART_SECOND (other_syntax)))
2281             {
2282               /* We have encountered a comment start sequence and we
2283                  are ignoring all text inside comments.  We must record
2284                  the comment style this sequence begins so that later,
2285                  only a comment end of the same style actually ends
2286                  the comment section.  */
2287               code = Scomment;
2288               comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2289               comnested
2290                 = comnested || SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2291               INC_BOTH (from, from_byte);
2292               UPDATE_SYNTAX_TABLE_FORWARD (from);
2293             }
2294         }
2295       while (code == Swhitespace || (code == Sendcomment && c == '\n'));
2296
2297       if (code == Scomment_fence)
2298         comstyle = ST_COMMENT_STYLE;
2299       else if (code != Scomment)
2300         {
2301           immediate_quit = 0;
2302           DEC_BOTH (from, from_byte);
2303           SET_PT_BOTH (from, from_byte);
2304           return Qnil;
2305         }
2306       /* We're at the start of a comment.  */
2307       found = forw_comment (from, from_byte, stop, comnested, comstyle, 0,
2308                             &out_charpos, &out_bytepos, &dummy);
2309       from = out_charpos; from_byte = out_bytepos;
2310       if (!found)
2311         {
2312           immediate_quit = 0;
2313           SET_PT_BOTH (from, from_byte);
2314           return Qnil;
2315         }
2316       INC_BOTH (from, from_byte);
2317       UPDATE_SYNTAX_TABLE_FORWARD (from);
2318       /* We have skipped one comment.  */
2319       count1--;
2320     }
2321
2322   while (count1 < 0)
2323     {
2324       while (1)
2325         {
2326           int quoted, syntax;
2327
2328           if (from <= stop)
2329             {
2330               SET_PT_BOTH (BEGV, BEGV_BYTE);
2331               immediate_quit = 0;
2332               return Qnil;
2333             }
2334
2335           DEC_BOTH (from, from_byte);
2336           /* char_quoted does UPDATE_SYNTAX_TABLE_BACKWARD (from).  */
2337           quoted = char_quoted (from, from_byte);
2338           c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2339           syntax = SYNTAX_WITH_FLAGS (c);
2340           code = SYNTAX (c);
2341           comstyle = 0;
2342           comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2343           if (code == Sendcomment)
2344             comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2345           if (from > stop && SYNTAX_FLAGS_COMEND_SECOND (syntax)
2346               && prev_char_comend_first (from, from_byte)
2347               && !char_quoted (from - 1, dec_bytepos (from_byte)))
2348             {
2349               int other_syntax;
2350               /* We must record the comment style encountered so that
2351                  later, we can match only the proper comment begin
2352                  sequence of the same style.  */
2353               DEC_BOTH (from, from_byte);
2354               code = Sendcomment;
2355               /* Calling char_quoted, above, set up global syntax position
2356                  at the new value of FROM.  */
2357               c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2358               other_syntax = SYNTAX_WITH_FLAGS (c1);
2359               comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2360               comnested
2361                 = comnested || SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2362             }
2363
2364           if (code == Scomment_fence)
2365             {
2366               /* Skip until first preceding unquoted comment_fence.  */
2367               int found = 0;
2368               EMACS_INT ini = from, ini_byte = from_byte;
2369
2370               while (1)
2371                 {
2372                   DEC_BOTH (from, from_byte);
2373                   UPDATE_SYNTAX_TABLE_BACKWARD (from);
2374                   c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2375                   if (SYNTAX (c) == Scomment_fence
2376                       && !char_quoted (from, from_byte))
2377                     {
2378                       found = 1;
2379                       break;
2380                     }
2381                   else if (from == stop)
2382                     break;
2383                 }
2384               if (found == 0)
2385                 {
2386                   from = ini;           /* Set point to ini + 1.  */
2387                   from_byte = ini_byte;
2388                   goto leave;
2389                 }
2390               else
2391                 /* We have skipped one comment.  */
2392                 break;
2393             }
2394           else if (code == Sendcomment)
2395             {
2396               found = back_comment (from, from_byte, stop, comnested, comstyle,
2397                                     &out_charpos, &out_bytepos);
2398               if (found == -1)
2399                 {
2400                   if (c == '\n')
2401                     /* This end-of-line is not an end-of-comment.
2402                        Treat it like a whitespace.
2403                        CC-mode (and maybe others) relies on this behavior.  */
2404                     ;
2405                   else
2406                     {
2407                       /* Failure: we should go back to the end of this
2408                          not-quite-endcomment.  */
2409                       if (SYNTAX (c) != code)
2410                         /* It was a two-char Sendcomment.  */
2411                         INC_BOTH (from, from_byte);
2412                       goto leave;
2413                     }
2414                 }
2415               else
2416                 {
2417                   /* We have skipped one comment.  */
2418                   from = out_charpos, from_byte = out_bytepos;
2419                   break;
2420                 }
2421             }
2422           else if (code != Swhitespace || quoted)
2423             {
2424             leave:
2425               immediate_quit = 0;
2426               INC_BOTH (from, from_byte);
2427               SET_PT_BOTH (from, from_byte);
2428               return Qnil;
2429             }
2430         }
2431
2432       count1++;
2433     }
2434
2435   SET_PT_BOTH (from, from_byte);
2436   immediate_quit = 0;
2437   return Qt;
2438 }
2439 \f
2440 /* Return syntax code of character C if C is an ASCII character
2441    or `multibyte_symbol_p' is zero.  Otherwise, return Ssymbol.  */
2442
2443 #define SYNTAX_WITH_MULTIBYTE_CHECK(c)          \
2444   ((ASCII_CHAR_P (c) || !multibyte_symbol_p)    \
2445    ? SYNTAX (c) : Ssymbol)
2446
2447 static Lisp_Object
2448 scan_lists (register EMACS_INT from, EMACS_INT count, EMACS_INT depth, int sexpflag)
2449 {
2450   Lisp_Object val;
2451   register EMACS_INT stop = count > 0 ? ZV : BEGV;
2452   register int c, c1;
2453   int stringterm;
2454   int quoted;
2455   int mathexit = 0;
2456   register enum syntaxcode code, temp_code;
2457   int min_depth = depth;    /* Err out if depth gets less than this.  */
2458   int comstyle = 0;         /* style of comment encountered */
2459   int comnested = 0;        /* whether the comment is nestable or not */
2460   EMACS_INT temp_pos;
2461   EMACS_INT last_good = from;
2462   int found;
2463   EMACS_INT from_byte;
2464   EMACS_INT out_bytepos, out_charpos;
2465   int temp, dummy;
2466   int multibyte_symbol_p = sexpflag && multibyte_syntax_as_symbol;
2467
2468   if (depth > 0) min_depth = 0;
2469
2470   if (from > ZV) from = ZV;
2471   if (from < BEGV) from = BEGV;
2472
2473   from_byte = CHAR_TO_BYTE (from);
2474
2475   immediate_quit = 1;
2476   QUIT;
2477
2478   SETUP_SYNTAX_TABLE (from, count);
2479   while (count > 0)
2480     {
2481       while (from < stop)
2482         {
2483           int comstart_first, prefix, syntax, other_syntax;
2484           UPDATE_SYNTAX_TABLE_FORWARD (from);
2485           c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2486           syntax = SYNTAX_WITH_FLAGS (c);
2487           code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2488           comstart_first = SYNTAX_FLAGS_COMSTART_FIRST (syntax);
2489           comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2490           comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2491           prefix = SYNTAX_FLAGS_PREFIX (syntax);
2492           if (depth == min_depth)
2493             last_good = from;
2494           INC_BOTH (from, from_byte);
2495           UPDATE_SYNTAX_TABLE_FORWARD (from);
2496           if (from < stop && comstart_first
2497               && (c = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2498                   other_syntax = SYNTAX_WITH_FLAGS (c),
2499                   SYNTAX_FLAGS_COMSTART_SECOND (other_syntax))
2500               && parse_sexp_ignore_comments)
2501             {
2502               /* we have encountered a comment start sequence and we
2503                  are ignoring all text inside comments.  We must record
2504                  the comment style this sequence begins so that later,
2505                  only a comment end of the same style actually ends
2506                  the comment section */
2507               code = Scomment;
2508               comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2509               comnested
2510                 = comnested || SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2511               INC_BOTH (from, from_byte);
2512               UPDATE_SYNTAX_TABLE_FORWARD (from);
2513             }
2514
2515           if (prefix)
2516             continue;
2517
2518           switch (SWITCH_ENUM_CAST (code))
2519             {
2520             case Sescape:
2521             case Scharquote:
2522               if (from == stop)
2523                 goto lose;
2524               INC_BOTH (from, from_byte);
2525               /* treat following character as a word constituent */
2526             case Sword:
2527             case Ssymbol:
2528               if (depth || !sexpflag) break;
2529               /* This word counts as a sexp; return at end of it.  */
2530               while (from < stop)
2531                 {
2532                   UPDATE_SYNTAX_TABLE_FORWARD (from);
2533
2534                   /* Some compilers can't handle this inside the switch.  */
2535                   c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2536                   temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2537                   switch (temp)
2538                     {
2539                     case Scharquote:
2540                     case Sescape:
2541                       INC_BOTH (from, from_byte);
2542                       if (from == stop)
2543                         goto lose;
2544                       break;
2545                     case Sword:
2546                     case Ssymbol:
2547                     case Squote:
2548                       break;
2549                     default:
2550                       goto done;
2551                     }
2552                   INC_BOTH (from, from_byte);
2553                 }
2554               goto done;
2555
2556             case Scomment_fence:
2557               comstyle = ST_COMMENT_STYLE;
2558               /* FALLTHROUGH */
2559             case Scomment:
2560               if (!parse_sexp_ignore_comments) break;
2561               UPDATE_SYNTAX_TABLE_FORWARD (from);
2562               found = forw_comment (from, from_byte, stop,
2563                                     comnested, comstyle, 0,
2564                                     &out_charpos, &out_bytepos, &dummy);
2565               from = out_charpos, from_byte = out_bytepos;
2566               if (!found)
2567                 {
2568                   if (depth == 0)
2569                     goto done;
2570                   goto lose;
2571                 }
2572               INC_BOTH (from, from_byte);
2573               UPDATE_SYNTAX_TABLE_FORWARD (from);
2574               break;
2575
2576             case Smath:
2577               if (!sexpflag)
2578                 break;
2579               if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (from_byte))
2580                 {
2581                   INC_BOTH (from, from_byte);
2582                 }
2583               if (mathexit)
2584                 {
2585                   mathexit = 0;
2586                   goto close1;
2587                 }
2588               mathexit = 1;
2589
2590             case Sopen:
2591               if (!++depth) goto done;
2592               break;
2593
2594             case Sclose:
2595             close1:
2596               if (!--depth) goto done;
2597               if (depth < min_depth)
2598                 xsignal3 (Qscan_error,
2599                           build_string ("Containing expression ends prematurely"),
2600                           make_number (last_good), make_number (from));
2601               break;
2602
2603             case Sstring:
2604             case Sstring_fence:
2605               temp_pos = dec_bytepos (from_byte);
2606               stringterm = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
2607               while (1)
2608                 {
2609                   if (from >= stop)
2610                     goto lose;
2611                   UPDATE_SYNTAX_TABLE_FORWARD (from);
2612                   c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2613                   if (code == Sstring
2614                       ? (c == stringterm
2615                          && SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring)
2616                       : SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring_fence)
2617                     break;
2618
2619                   /* Some compilers can't handle this inside the switch.  */
2620                   temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2621                   switch (temp)
2622                     {
2623                     case Scharquote:
2624                     case Sescape:
2625                       INC_BOTH (from, from_byte);
2626                     }
2627                   INC_BOTH (from, from_byte);
2628                 }
2629               INC_BOTH (from, from_byte);
2630               if (!depth && sexpflag) goto done;
2631               break;
2632             default:
2633               /* Ignore whitespace, punctuation, quote, endcomment.  */
2634               break;
2635             }
2636         }
2637
2638       /* Reached end of buffer.  Error if within object, return nil if between */
2639       if (depth)
2640         goto lose;
2641
2642       immediate_quit = 0;
2643       return Qnil;
2644
2645       /* End of object reached */
2646     done:
2647       count--;
2648     }
2649
2650   while (count < 0)
2651     {
2652       while (from > stop)
2653         {
2654           int syntax;
2655           DEC_BOTH (from, from_byte);
2656           UPDATE_SYNTAX_TABLE_BACKWARD (from);
2657           c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2658           syntax= SYNTAX_WITH_FLAGS (c);
2659           code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2660           if (depth == min_depth)
2661             last_good = from;
2662           comstyle = 0;
2663           comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2664           if (code == Sendcomment)
2665             comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2666           if (from > stop && SYNTAX_FLAGS_COMEND_SECOND (syntax)
2667               && prev_char_comend_first (from, from_byte)
2668               && parse_sexp_ignore_comments)
2669             {
2670               /* We must record the comment style encountered so that
2671                  later, we can match only the proper comment begin
2672                  sequence of the same style.  */
2673               int c1, other_syntax;
2674               DEC_BOTH (from, from_byte);
2675               UPDATE_SYNTAX_TABLE_BACKWARD (from);
2676               code = Sendcomment;
2677               c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2678               other_syntax = SYNTAX_WITH_FLAGS (c1);
2679               comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2680               comnested
2681                 = comnested || SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2682             }
2683
2684           /* Quoting turns anything except a comment-ender
2685              into a word character.  Note that this cannot be true
2686              if we decremented FROM in the if-statement above.  */
2687           if (code != Sendcomment && char_quoted (from, from_byte))
2688             {
2689               DEC_BOTH (from, from_byte);
2690               code = Sword;
2691             }
2692           else if (SYNTAX_FLAGS_PREFIX (syntax))
2693             continue;
2694
2695           switch (SWITCH_ENUM_CAST (code))
2696             {
2697             case Sword:
2698             case Ssymbol:
2699             case Sescape:
2700             case Scharquote:
2701               if (depth || !sexpflag) break;
2702               /* This word counts as a sexp; count object finished
2703                  after passing it.  */
2704               while (from > stop)
2705                 {
2706                   temp_pos = from_byte;
2707                   if (! NILP (current_buffer->enable_multibyte_characters))
2708                     DEC_POS (temp_pos);
2709                   else
2710                     temp_pos--;
2711                   UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2712                   c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
2713                   temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
2714                   /* Don't allow comment-end to be quoted.  */
2715                   if (temp_code == Sendcomment)
2716                     goto done2;
2717                   quoted = char_quoted (from - 1, temp_pos);
2718                   if (quoted)
2719                     {
2720                       DEC_BOTH (from, from_byte);
2721                       temp_pos = dec_bytepos (temp_pos);
2722                       UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2723                     }
2724                   c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
2725                   temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
2726                   if (! (quoted || temp_code == Sword
2727                          || temp_code == Ssymbol
2728                          || temp_code == Squote))
2729                     goto done2;
2730                   DEC_BOTH (from, from_byte);
2731                 }
2732               goto done2;
2733
2734             case Smath:
2735               if (!sexpflag)
2736                 break;
2737               temp_pos = dec_bytepos (from_byte);
2738               UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2739               if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (temp_pos))
2740                 DEC_BOTH (from, from_byte);
2741               if (mathexit)
2742                 {
2743                   mathexit = 0;
2744                   goto open2;
2745                 }
2746               mathexit = 1;
2747
2748             case Sclose:
2749               if (!++depth) goto done2;
2750               break;
2751
2752             case Sopen:
2753             open2:
2754               if (!--depth) goto done2;
2755               if (depth < min_depth)
2756                 xsignal3 (Qscan_error,
2757                           build_string ("Containing expression ends prematurely"),
2758                           make_number (last_good), make_number (from));
2759               break;
2760
2761             case Sendcomment:
2762               if (!parse_sexp_ignore_comments)
2763                 break;
2764               found = back_comment (from, from_byte, stop, comnested, comstyle,
2765                                     &out_charpos, &out_bytepos);
2766               /* FIXME:  if found == -1, then it really wasn't a comment-end.
2767                  For single-char Sendcomment, we can't do much about it apart
2768                  from skipping the char.
2769                  For 2-char endcomments, we could try again, taking both
2770                  chars as separate entities, but it's a lot of trouble
2771                  for very little gain, so we don't bother either.  -sm */
2772               if (found != -1)
2773                 from = out_charpos, from_byte = out_bytepos;
2774               break;
2775
2776             case Scomment_fence:
2777             case Sstring_fence:
2778               while (1)
2779                 {
2780                   if (from == stop)
2781                     goto lose;
2782                   DEC_BOTH (from, from_byte);
2783                   UPDATE_SYNTAX_TABLE_BACKWARD (from);
2784                   if (!char_quoted (from, from_byte)
2785                       && (c = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2786                           SYNTAX_WITH_MULTIBYTE_CHECK (c) == code))
2787                     break;
2788                 }
2789               if (code == Sstring_fence && !depth && sexpflag) goto done2;
2790               break;
2791
2792             case Sstring:
2793               stringterm = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2794               while (1)
2795                 {
2796                   if (from == stop)
2797                     goto lose;
2798                   DEC_BOTH (from, from_byte);
2799                   UPDATE_SYNTAX_TABLE_BACKWARD (from);
2800                   if (!char_quoted (from, from_byte)
2801                       && (stringterm
2802                           == (c = FETCH_CHAR_AS_MULTIBYTE (from_byte)))
2803                       && SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring)
2804                     break;
2805                 }
2806               if (!depth && sexpflag) goto done2;
2807               break;
2808             default:
2809               /* Ignore whitespace, punctuation, quote, endcomment.  */
2810               break;
2811             }
2812         }
2813
2814       /* Reached start of buffer.  Error if within object, return nil if between */
2815       if (depth)
2816         goto lose;
2817
2818       immediate_quit = 0;
2819       return Qnil;
2820
2821     done2:
2822       count++;
2823     }
2824
2825
2826   immediate_quit = 0;
2827   XSETFASTINT (val, from);
2828   return val;
2829
2830  lose:
2831   xsignal3 (Qscan_error,
2832             build_string ("Unbalanced parentheses"),
2833             make_number (last_good), make_number (from));
2834 }
2835
2836 DEFUN ("scan-lists", Fscan_lists, Sscan_lists, 3, 3, 0,
2837        doc: /* Scan from character number FROM by COUNT lists.
2838 Returns the character number of the position thus found.
2839
2840 If DEPTH is nonzero, paren depth begins counting from that value,
2841 only places where the depth in parentheses becomes zero
2842 are candidates for stopping; COUNT such places are counted.
2843 Thus, a positive value for DEPTH means go out levels.
2844
2845 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
2846
2847 If the beginning or end of (the accessible part of) the buffer is reached
2848 and the depth is wrong, an error is signaled.
2849 If the depth is right but the count is not used up, nil is returned.  */)
2850   (Lisp_Object from, Lisp_Object count, Lisp_Object depth)
2851 {
2852   CHECK_NUMBER (from);
2853   CHECK_NUMBER (count);
2854   CHECK_NUMBER (depth);
2855
2856   return scan_lists (XINT (from), XINT (count), XINT (depth), 0);
2857 }
2858
2859 DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
2860        doc: /* Scan from character number FROM by COUNT balanced expressions.
2861 If COUNT is negative, scan backwards.
2862 Returns the character number of the position thus found.
2863
2864 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
2865
2866 If the beginning or end of (the accessible part of) the buffer is reached
2867 in the middle of a parenthetical grouping, an error is signaled.
2868 If the beginning or end is reached between groupings
2869 but before count is used up, nil is returned.  */)
2870   (Lisp_Object from, Lisp_Object count)
2871 {
2872   CHECK_NUMBER (from);
2873   CHECK_NUMBER (count);
2874
2875   return scan_lists (XINT (from), XINT (count), 0, 1);
2876 }
2877
2878 DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, Sbackward_prefix_chars,
2879        0, 0, 0,
2880        doc: /* Move point backward over any number of chars with prefix syntax.
2881 This includes chars with "quote" or "prefix" syntax (' or p).  */)
2882   (void)
2883 {
2884   EMACS_INT beg = BEGV;
2885   EMACS_INT opoint = PT;
2886   EMACS_INT opoint_byte = PT_BYTE;
2887   EMACS_INT pos = PT;
2888   EMACS_INT pos_byte = PT_BYTE;
2889   int c;
2890
2891   if (pos <= beg)
2892     {
2893       SET_PT_BOTH (opoint, opoint_byte);
2894
2895       return Qnil;
2896     }
2897
2898   SETUP_SYNTAX_TABLE (pos, -1);
2899
2900   DEC_BOTH (pos, pos_byte);
2901
2902   while (!char_quoted (pos, pos_byte)
2903          /* Previous statement updates syntax table.  */
2904          && ((c = FETCH_CHAR_AS_MULTIBYTE (pos_byte), SYNTAX (c) == Squote)
2905              || SYNTAX_PREFIX (c)))
2906     {
2907       opoint = pos;
2908       opoint_byte = pos_byte;
2909
2910       if (pos + 1 > beg)
2911         DEC_BOTH (pos, pos_byte);
2912     }
2913
2914   SET_PT_BOTH (opoint, opoint_byte);
2915
2916   return Qnil;
2917 }
2918 \f
2919 /* Parse forward from FROM / FROM_BYTE to END,
2920    assuming that FROM has state OLDSTATE (nil means FROM is start of function),
2921    and return a description of the state of the parse at END.
2922    If STOPBEFORE is nonzero, stop at the start of an atom.
2923    If COMMENTSTOP is 1, stop at the start of a comment.
2924    If COMMENTSTOP is -1, stop at the start or end of a comment,
2925    after the beginning of a string, or after the end of a string.  */
2926
2927 static void
2928 scan_sexps_forward (struct lisp_parse_state *stateptr,
2929                     EMACS_INT from, EMACS_INT from_byte, EMACS_INT end,
2930                     int targetdepth, int stopbefore,
2931                     Lisp_Object oldstate, int commentstop)
2932 {
2933   struct lisp_parse_state state;
2934
2935   register enum syntaxcode code;
2936   int c1;
2937   int comnested;
2938   struct level { int last, prev; };
2939   struct level levelstart[100];
2940   register struct level *curlevel = levelstart;
2941   struct level *endlevel = levelstart + 100;
2942   register int depth;   /* Paren depth of current scanning location.
2943                            level - levelstart equals this except
2944                            when the depth becomes negative.  */
2945   int mindepth;         /* Lowest DEPTH value seen.  */
2946   int start_quoted = 0;         /* Nonzero means starting after a char quote */
2947   Lisp_Object tem;
2948   EMACS_INT prev_from;          /* Keep one character before FROM.  */
2949   EMACS_INT prev_from_byte;
2950   int prev_from_syntax;
2951   int boundary_stop = commentstop == -1;
2952   int nofence;
2953   int found;
2954   EMACS_INT out_bytepos, out_charpos;
2955   int temp;
2956
2957   prev_from = from;
2958   prev_from_byte = from_byte;
2959   if (from != BEGV)
2960     DEC_BOTH (prev_from, prev_from_byte);
2961
2962   /* Use this macro instead of `from++'.  */
2963 #define INC_FROM                                \
2964 do { prev_from = from;                          \
2965      prev_from_byte = from_byte;                \
2966      temp = FETCH_CHAR_AS_MULTIBYTE (prev_from_byte);   \
2967      prev_from_syntax = SYNTAX_WITH_FLAGS (temp); \
2968      INC_BOTH (from, from_byte);                \
2969      if (from < end)                            \
2970        UPDATE_SYNTAX_TABLE_FORWARD (from);      \
2971   } while (0)
2972
2973   immediate_quit = 1;
2974   QUIT;
2975
2976   if (NILP (oldstate))
2977     {
2978       depth = 0;
2979       state.instring = -1;
2980       state.incomment = 0;
2981       state.comstyle = 0;       /* comment style a by default.  */
2982       state.comstr_start = -1;  /* no comment/string seen.  */
2983     }
2984   else
2985     {
2986       tem = Fcar (oldstate);
2987       if (!NILP (tem))
2988         depth = XINT (tem);
2989       else
2990         depth = 0;
2991
2992       oldstate = Fcdr (oldstate);
2993       oldstate = Fcdr (oldstate);
2994       oldstate = Fcdr (oldstate);
2995       tem = Fcar (oldstate);
2996       /* Check whether we are inside string_fence-style string: */
2997       state.instring = (!NILP (tem)
2998                         ? (INTEGERP (tem) ? XINT (tem) : ST_STRING_STYLE)
2999                         : -1);
3000
3001       oldstate = Fcdr (oldstate);
3002       tem = Fcar (oldstate);
3003       state.incomment = (!NILP (tem)
3004                          ? (INTEGERP (tem) ? XINT (tem) : -1)
3005                          : 0);
3006
3007       oldstate = Fcdr (oldstate);
3008       tem = Fcar (oldstate);
3009       start_quoted = !NILP (tem);
3010
3011       /* if the eighth element of the list is nil, we are in comment
3012          style a.  If it is non-nil, we are in comment style b */
3013       oldstate = Fcdr (oldstate);
3014       oldstate = Fcdr (oldstate);
3015       tem = Fcar (oldstate);
3016       state.comstyle = (NILP (tem)
3017                         ? 0
3018                         : (EQ (tem, Qsyntax_table)
3019                            ? ST_COMMENT_STYLE
3020                            : INTEGERP (tem) ? XINT (tem) : 1));
3021
3022       oldstate = Fcdr (oldstate);
3023       tem = Fcar (oldstate);
3024       state.comstr_start = NILP (tem) ? -1 : XINT (tem) ;
3025       oldstate = Fcdr (oldstate);
3026       tem = Fcar (oldstate);
3027       while (!NILP (tem))               /* >= second enclosing sexps.  */
3028         {
3029           /* curlevel++->last ran into compiler bug on Apollo */
3030           curlevel->last = XINT (Fcar (tem));
3031           if (++curlevel == endlevel)
3032             curlevel--; /* error ("Nesting too deep for parser"); */
3033           curlevel->prev = -1;
3034           curlevel->last = -1;
3035           tem = Fcdr (tem);
3036         }
3037     }
3038   state.quoted = 0;
3039   mindepth = depth;
3040
3041   curlevel->prev = -1;
3042   curlevel->last = -1;
3043
3044   SETUP_SYNTAX_TABLE (prev_from, 1);
3045   temp = FETCH_CHAR (prev_from_byte);
3046   prev_from_syntax = SYNTAX_WITH_FLAGS (temp);
3047   UPDATE_SYNTAX_TABLE_FORWARD (from);
3048
3049   /* Enter the loop at a place appropriate for initial state.  */
3050
3051   if (state.incomment)
3052     goto startincomment;
3053   if (state.instring >= 0)
3054     {
3055       nofence = state.instring != ST_STRING_STYLE;
3056       if (start_quoted)
3057         goto startquotedinstring;
3058       goto startinstring;
3059     }
3060   else if (start_quoted)
3061     goto startquoted;
3062
3063   while (from < end)
3064     {
3065       int syntax;
3066       INC_FROM;
3067       code = prev_from_syntax & 0xff;
3068
3069       if (from < end
3070           && SYNTAX_FLAGS_COMSTART_FIRST (prev_from_syntax)
3071           && (c1 = FETCH_CHAR (from_byte),
3072               syntax = SYNTAX_WITH_FLAGS (c1),
3073               SYNTAX_FLAGS_COMSTART_SECOND (syntax)))
3074         /* Duplicate code to avoid a complex if-expression
3075            which causes trouble for the SGI compiler.  */
3076         {
3077           /* Record the comment style we have entered so that only
3078              the comment-end sequence of the same style actually
3079              terminates the comment section.  */
3080           state.comstyle
3081             = SYNTAX_FLAGS_COMMENT_STYLE (syntax, prev_from_syntax);
3082           comnested = SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax);
3083           comnested = comnested || SYNTAX_FLAGS_COMMENT_NESTED (syntax);
3084           state.incomment = comnested ? 1 : -1;
3085           state.comstr_start = prev_from;
3086           INC_FROM;
3087           code = Scomment;
3088         }
3089       else if (code == Scomment_fence)
3090         {
3091           /* Record the comment style we have entered so that only
3092              the comment-end sequence of the same style actually
3093              terminates the comment section.  */
3094           state.comstyle = ST_COMMENT_STYLE;
3095           state.incomment = -1;
3096           state.comstr_start = prev_from;
3097           code = Scomment;
3098         }
3099       else if (code == Scomment)
3100         {
3101           state.comstyle = SYNTAX_FLAGS_COMMENT_STYLE (prev_from_syntax, 0);
3102           state.incomment = (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax) ?
3103                              1 : -1);
3104           state.comstr_start = prev_from;
3105         }
3106
3107       if (SYNTAX_FLAGS_PREFIX (prev_from_syntax))
3108         continue;
3109       switch (SWITCH_ENUM_CAST (code))
3110         {
3111         case Sescape:
3112         case Scharquote:
3113           if (stopbefore) goto stop;  /* this arg means stop at sexp start */
3114           curlevel->last = prev_from;
3115         startquoted:
3116           if (from == end) goto endquoted;
3117           INC_FROM;
3118           goto symstarted;
3119           /* treat following character as a word constituent */
3120         case Sword:
3121         case Ssymbol:
3122           if (stopbefore) goto stop;  /* this arg means stop at sexp start */
3123           curlevel->last = prev_from;
3124         symstarted:
3125           while (from < end)
3126             {
3127               /* Some compilers can't handle this inside the switch.  */
3128               temp = FETCH_CHAR_AS_MULTIBYTE (from_byte);
3129               temp = SYNTAX (temp);
3130               switch (temp)
3131                 {
3132                 case Scharquote:
3133                 case Sescape:
3134                   INC_FROM;
3135                   if (from == end) goto endquoted;
3136                   break;
3137                 case Sword:
3138                 case Ssymbol:
3139                 case Squote:
3140                   break;
3141                 default:
3142                   goto symdone;
3143                 }
3144               INC_FROM;
3145             }
3146         symdone:
3147           curlevel->prev = curlevel->last;
3148           break;
3149
3150         case Scomment_fence: /* Can't happen because it's handled above.  */
3151         case Scomment:
3152           if (commentstop || boundary_stop) goto done;
3153         startincomment:
3154           /* The (from == BEGV) test was to enter the loop in the middle so
3155              that we find a 2-char comment ender even if we start in the
3156              middle of it.  We don't want to do that if we're just at the
3157              beginning of the comment (think of (*) ... (*)).  */
3158           found = forw_comment (from, from_byte, end,
3159                                 state.incomment, state.comstyle,
3160                                 (from == BEGV || from < state.comstr_start + 3)
3161                                 ? 0 : prev_from_syntax,
3162                                 &out_charpos, &out_bytepos, &state.incomment);
3163           from = out_charpos; from_byte = out_bytepos;
3164           /* Beware!  prev_from and friends are invalid now.
3165              Luckily, the `done' doesn't use them and the INC_FROM
3166              sets them to a sane value without looking at them. */
3167           if (!found) goto done;
3168           INC_FROM;
3169           state.incomment = 0;
3170           state.comstyle = 0;   /* reset the comment style */
3171           if (boundary_stop) goto done;
3172           break;
3173
3174         case Sopen:
3175           if (stopbefore) goto stop;  /* this arg means stop at sexp start */
3176           depth++;
3177           /* curlevel++->last ran into compiler bug on Apollo */
3178           curlevel->last = prev_from;
3179           if (++curlevel == endlevel)
3180             curlevel--; /* error ("Nesting too deep for parser"); */
3181           curlevel->prev = -1;
3182           curlevel->last = -1;
3183           if (targetdepth == depth) goto done;
3184           break;
3185
3186         case Sclose:
3187           depth--;
3188           if (depth < mindepth)
3189             mindepth = depth;
3190           if (curlevel != levelstart)
3191             curlevel--;
3192           curlevel->prev = curlevel->last;
3193           if (targetdepth == depth) goto done;
3194           break;
3195
3196         case Sstring:
3197         case Sstring_fence:
3198           state.comstr_start = from - 1;
3199           if (stopbefore) goto stop;  /* this arg means stop at sexp start */
3200           curlevel->last = prev_from;
3201           state.instring = (code == Sstring
3202                             ? (FETCH_CHAR_AS_MULTIBYTE (prev_from_byte))
3203                             : ST_STRING_STYLE);
3204           if (boundary_stop) goto done;
3205         startinstring:
3206           {
3207             nofence = state.instring != ST_STRING_STYLE;
3208
3209             while (1)
3210               {
3211                 int c;
3212
3213                 if (from >= end) goto done;
3214                 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
3215                 /* Some compilers can't handle this inside the switch.  */
3216                 temp = SYNTAX (c);
3217
3218                 /* Check TEMP here so that if the char has
3219                    a syntax-table property which says it is NOT
3220                    a string character, it does not end the string.  */
3221                 if (nofence && c == state.instring && temp == Sstring)
3222                   break;
3223
3224                 switch (temp)
3225                   {
3226                   case Sstring_fence:
3227                     if (!nofence) goto string_end;
3228                     break;
3229                   case Scharquote:
3230                   case Sescape:
3231                     INC_FROM;
3232                   startquotedinstring:
3233                     if (from >= end) goto endquoted;
3234                   }
3235                 INC_FROM;
3236               }
3237           }
3238         string_end:
3239           state.instring = -1;
3240           curlevel->prev = curlevel->last;
3241           INC_FROM;
3242           if (boundary_stop) goto done;
3243           break;
3244
3245         case Smath:
3246           /* FIXME: We should do something with it.  */
3247           break;
3248         default:
3249           /* Ignore whitespace, punctuation, quote, endcomment.  */
3250           break;
3251         }
3252     }
3253   goto done;
3254
3255  stop:   /* Here if stopping before start of sexp. */
3256   from = prev_from;    /* We have just fetched the char that starts it; */
3257   goto done; /* but return the position before it. */
3258
3259  endquoted:
3260   state.quoted = 1;
3261  done:
3262   state.depth = depth;
3263   state.mindepth = mindepth;
3264   state.thislevelstart = curlevel->prev;
3265   state.prevlevelstart
3266     = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
3267   state.location = from;
3268   state.levelstarts = Qnil;
3269   while (--curlevel >= levelstart)
3270       state.levelstarts = Fcons (make_number (curlevel->last),
3271                                  state.levelstarts);
3272   immediate_quit = 0;
3273
3274   *stateptr = state;
3275 }
3276
3277 DEFUN ("parse-partial-sexp", Fparse_partial_sexp, Sparse_partial_sexp, 2, 6, 0,
3278        doc: /* Parse Lisp syntax starting at FROM until TO; return status of parse at TO.
3279 Parsing stops at TO or when certain criteria are met;
3280  point is set to where parsing stops.
3281 If fifth arg OLDSTATE is omitted or nil,
3282  parsing assumes that FROM is the beginning of a function.
3283 Value is a list of elements describing final state of parsing:
3284  0. depth in parens.
3285  1. character address of start of innermost containing list; nil if none.
3286  2. character address of start of last complete sexp terminated.
3287  3. non-nil if inside a string.
3288     (it is the character that will terminate the string,
3289      or t if the string should be terminated by a generic string delimiter.)
3290  4. nil if outside a comment, t if inside a non-nestable comment,
3291     else an integer (the current comment nesting).
3292  5. t if following a quote character.
3293  6. the minimum paren-depth encountered during this scan.
3294  7. style of comment, if any.
3295  8. character address of start of comment or string; nil if not in one.
3296  9. Intermediate data for continuation of parsing (subject to change).
3297 If third arg TARGETDEPTH is non-nil, parsing stops if the depth
3298 in parentheses becomes equal to TARGETDEPTH.
3299 Fourth arg STOPBEFORE non-nil means stop when come to
3300  any character that starts a sexp.
3301 Fifth arg OLDSTATE is a list like what this function returns.
3302  It is used to initialize the state of the parse.  Elements number 1, 2, 6
3303  and 8 are ignored.
3304 Sixth arg COMMENTSTOP non-nil means stop at the start of a comment.
3305  If it is symbol `syntax-table', stop after the start of a comment or a
3306  string, or after end of a comment or a string.  */)
3307   (Lisp_Object from, Lisp_Object to, Lisp_Object targetdepth, Lisp_Object stopbefore, Lisp_Object oldstate, Lisp_Object commentstop)
3308 {
3309   struct lisp_parse_state state;
3310   int target;
3311
3312   if (!NILP (targetdepth))
3313     {
3314       CHECK_NUMBER (targetdepth);
3315       target = XINT (targetdepth);
3316     }
3317   else
3318     target = -100000;           /* We won't reach this depth */
3319
3320   validate_region (&from, &to);
3321   scan_sexps_forward (&state, XINT (from), CHAR_TO_BYTE (XINT (from)),
3322                       XINT (to),
3323                       target, !NILP (stopbefore), oldstate,
3324                       (NILP (commentstop)
3325                        ? 0 : (EQ (commentstop, Qsyntax_table) ? -1 : 1)));
3326
3327   SET_PT (state.location);
3328
3329   return Fcons (make_number (state.depth),
3330            Fcons (state.prevlevelstart < 0
3331                   ? Qnil : make_number (state.prevlevelstart),
3332              Fcons (state.thislevelstart < 0
3333                     ? Qnil : make_number (state.thislevelstart),
3334                Fcons (state.instring >= 0
3335                       ? (state.instring == ST_STRING_STYLE
3336                          ? Qt : make_number (state.instring)) : Qnil,
3337                  Fcons (state.incomment < 0 ? Qt :
3338                         (state.incomment == 0 ? Qnil :
3339                          make_number (state.incomment)),
3340                    Fcons (state.quoted ? Qt : Qnil,
3341                      Fcons (make_number (state.mindepth),
3342                        Fcons ((state.comstyle
3343                                ? (state.comstyle == ST_COMMENT_STYLE
3344                                   ? Qsyntax_table
3345                                   : make_number (state.comstyle))
3346                                : Qnil),
3347                               Fcons (((state.incomment
3348                                        || (state.instring >= 0))
3349                                       ? make_number (state.comstr_start)
3350                                       : Qnil),
3351                                      Fcons (state.levelstarts, Qnil))))))))));
3352 }
3353 \f
3354 void
3355 init_syntax_once (void)
3356 {
3357   register int i, c;
3358   Lisp_Object temp;
3359
3360   /* This has to be done here, before we call Fmake_char_table.  */
3361   Qsyntax_table = intern_c_string ("syntax-table");
3362   staticpro (&Qsyntax_table);
3363
3364   /* Intern_C_String this now in case it isn't already done.
3365      Setting this variable twice is harmless.
3366      But don't staticpro it here--that is done in alloc.c.  */
3367   Qchar_table_extra_slots = intern_c_string ("char-table-extra-slots");
3368
3369   /* Create objects which can be shared among syntax tables.  */
3370   Vsyntax_code_object = Fmake_vector (make_number (Smax), Qnil);
3371   for (i = 0; i < XVECTOR (Vsyntax_code_object)->size; i++)
3372     XVECTOR (Vsyntax_code_object)->contents[i]
3373       = Fcons (make_number (i), Qnil);
3374
3375   /* Now we are ready to set up this property, so we can
3376      create syntax tables.  */
3377   Fput (Qsyntax_table, Qchar_table_extra_slots, make_number (0));
3378
3379   temp = XVECTOR (Vsyntax_code_object)->contents[(int) Swhitespace];
3380
3381   Vstandard_syntax_table = Fmake_char_table (Qsyntax_table, temp);
3382
3383   /* Control characters should not be whitespace.  */
3384   temp = XVECTOR (Vsyntax_code_object)->contents[(int) Spunct];
3385   for (i = 0; i <= ' ' - 1; i++)
3386     SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3387   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 0177, temp);
3388
3389   /* Except that a few really are whitespace.  */
3390   temp = XVECTOR (Vsyntax_code_object)->contents[(int) Swhitespace];
3391   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ' ', temp);
3392   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\t', temp);
3393   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\n', temp);
3394   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 015, temp);
3395   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 014, temp);
3396
3397   temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
3398   for (i = 'a'; i <= 'z'; i++)
3399     SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3400   for (i = 'A'; i <= 'Z'; i++)
3401     SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3402   for (i = '0'; i <= '9'; i++)
3403     SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3404
3405   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '$', temp);
3406   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '%', temp);
3407
3408   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '(',
3409                         Fcons (make_number (Sopen), make_number (')')));
3410   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ')',
3411                         Fcons (make_number (Sclose), make_number ('(')));
3412   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '[',
3413                         Fcons (make_number (Sopen), make_number (']')));
3414   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ']',
3415                         Fcons (make_number (Sclose), make_number ('[')));
3416   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '{',
3417                         Fcons (make_number (Sopen), make_number ('}')));
3418   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '}',
3419                         Fcons (make_number (Sclose), make_number ('{')));
3420   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '"',
3421                         Fcons (make_number ((int) Sstring), Qnil));
3422   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\\',
3423                         Fcons (make_number ((int) Sescape), Qnil));
3424
3425   temp = XVECTOR (Vsyntax_code_object)->contents[(int) Ssymbol];
3426   for (i = 0; i < 10; i++)
3427     {
3428       c = "_-+*/&|<>="[i];
3429       SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3430     }
3431
3432   temp = XVECTOR (Vsyntax_code_object)->contents[(int) Spunct];
3433   for (i = 0; i < 12; i++)
3434     {
3435       c = ".,;:?!#@~^'`"[i];
3436       SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3437     }
3438
3439   /* All multibyte characters have syntax `word' by default.  */
3440   temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
3441   char_table_set_range (Vstandard_syntax_table, 0x80, MAX_CHAR, temp);
3442 }
3443
3444 void
3445 syms_of_syntax (void)
3446 {
3447   Qsyntax_table_p = intern_c_string ("syntax-table-p");
3448   staticpro (&Qsyntax_table_p);
3449
3450   staticpro (&Vsyntax_code_object);
3451
3452   staticpro (&gl_state.object);
3453   staticpro (&gl_state.global_code);
3454   staticpro (&gl_state.current_syntax_table);
3455   staticpro (&gl_state.old_prop);
3456
3457   /* Defined in regex.c */
3458   staticpro (&re_match_object);
3459
3460   Qscan_error = intern_c_string ("scan-error");
3461   staticpro (&Qscan_error);
3462   Fput (Qscan_error, Qerror_conditions,
3463         pure_cons (Qscan_error, pure_cons (Qerror, Qnil)));
3464   Fput (Qscan_error, Qerror_message,
3465         make_pure_c_string ("Scan error"));
3466
3467   DEFVAR_BOOL ("parse-sexp-ignore-comments", parse_sexp_ignore_comments,
3468                doc: /* Non-nil means `forward-sexp', etc., should treat comments as whitespace.  */);
3469
3470   DEFVAR_BOOL ("parse-sexp-lookup-properties", parse_sexp_lookup_properties,
3471                doc: /* Non-nil means `forward-sexp', etc., obey `syntax-table' property.
3472 Otherwise, that text property is simply ignored.
3473 See the info node `(elisp)Syntax Properties' for a description of the
3474 `syntax-table' property.  */);
3475
3476   words_include_escapes = 0;
3477   DEFVAR_BOOL ("words-include-escapes", words_include_escapes,
3478                doc: /* Non-nil means `forward-word', etc., should treat escape chars part of words.  */);
3479
3480   DEFVAR_BOOL ("multibyte-syntax-as-symbol", multibyte_syntax_as_symbol,
3481                doc: /* Non-nil means `scan-sexps' treats all multibyte characters as symbol.  */);
3482   multibyte_syntax_as_symbol = 0;
3483
3484   DEFVAR_BOOL ("open-paren-in-column-0-is-defun-start",
3485                open_paren_in_column_0_is_defun_start,
3486                doc: /* *Non-nil means an open paren in column 0 denotes the start of a defun.  */);
3487   open_paren_in_column_0_is_defun_start = 1;
3488
3489
3490   DEFVAR_LISP ("find-word-boundary-function-table",
3491                Vfind_word_boundary_function_table,
3492                doc: /*
3493 Char table of functions to search for the word boundary.
3494 Each function is called with two arguments; POS and LIMIT.
3495 POS and LIMIT are character positions in the current buffer.
3496
3497 If POS is less than LIMIT, POS is at the first character of a word,
3498 and the return value of a function is a position after the last
3499 character of that word.
3500
3501 If POS is not less than LIMIT, POS is at the last character of a word,
3502 and the return value of a function is a position at the first
3503 character of that word.
3504
3505 In both cases, LIMIT bounds the search. */);
3506   Vfind_word_boundary_function_table = Fmake_char_table (Qnil, Qnil);
3507
3508   defsubr (&Ssyntax_table_p);
3509   defsubr (&Ssyntax_table);
3510   defsubr (&Sstandard_syntax_table);
3511   defsubr (&Scopy_syntax_table);
3512   defsubr (&Sset_syntax_table);
3513   defsubr (&Schar_syntax);
3514   defsubr (&Smatching_paren);
3515   defsubr (&Sstring_to_syntax);
3516   defsubr (&Smodify_syntax_entry);
3517   defsubr (&Sinternal_describe_syntax_value);
3518
3519   defsubr (&Sforward_word);
3520
3521   defsubr (&Sskip_chars_forward);
3522   defsubr (&Sskip_chars_backward);
3523   defsubr (&Sskip_syntax_forward);
3524   defsubr (&Sskip_syntax_backward);
3525
3526   defsubr (&Sforward_comment);
3527   defsubr (&Sscan_lists);
3528   defsubr (&Sscan_sexps);
3529   defsubr (&Sbackward_prefix_chars);
3530   defsubr (&Sparse_partial_sexp);
3531 }
3532