]> code.delx.au - gnu-emacs/blob - src/syntax.h
*** empty log message ***
[gnu-emacs] / src / syntax.h
1 /* Declarations having to do with GNU Emacs syntax tables.
2 Copyright (C) 1985, 93, 94, 97, 1998, 2002 Free Software Foundation, Inc.
3
4 This file is part of GNU Emacs.
5
6 GNU Emacs is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 GNU Emacs is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GNU Emacs; see the file COPYING. If not, write to
18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
20
21
22 extern Lisp_Object Qsyntax_table_p;
23 extern void update_syntax_table P_ ((int, int, int, Lisp_Object));
24
25 /* The standard syntax table is stored where it will automatically
26 be used in all new buffers. */
27 #define Vstandard_syntax_table buffer_defaults.syntax_table
28
29 /* A syntax table is a chartable whose elements are cons cells
30 (CODE+FLAGS . MATCHING-CHAR). MATCHING-CHAR can be nil if the char
31 is not a kind of parenthesis.
32
33 The low 8 bits of CODE+FLAGS is a code, as follows: */
34
35 enum syntaxcode
36 {
37 Swhitespace, /* for a whitespace character */
38 Spunct, /* for random punctuation characters */
39 Sword, /* for a word constituent */
40 Ssymbol, /* symbol constituent but not word constituent */
41 Sopen, /* for a beginning delimiter */
42 Sclose, /* for an ending delimiter */
43 Squote, /* for a prefix character like Lisp ' */
44 Sstring, /* for a string-grouping character like Lisp " */
45 Smath, /* for delimiters like $ in Tex. */
46 Sescape, /* for a character that begins a C-style escape */
47 Scharquote, /* for a character that quotes the following character */
48 Scomment, /* for a comment-starting character */
49 Sendcomment, /* for a comment-ending character */
50 Sinherit, /* use the standard syntax table for this character */
51 Scomment_fence, /* Starts/ends comment which is delimited on the
52 other side by any char with the same syntaxcode. */
53 Sstring_fence, /* Starts/ends string which is delimited on the
54 other side by any char with the same syntaxcode. */
55 Smax /* Upper bound on codes that are meaningful */
56 };
57
58 /* Set the syntax entry VAL for char C in table TABLE. */
59
60 #define SET_RAW_SYNTAX_ENTRY(table, c, val) \
61 CHAR_TABLE_SET ((table), c, (val))
62
63 /* Set the syntax entry VAL for char-range RANGE in table TABLE.
64 RANGE is a cons (FROM . TO) specifying the range of characters. */
65
66 #define SET_RAW_SYNTAX_ENTRY_RANGE(table, range, val) \
67 Fset_char_table_range ((table), (range), (val))
68
69 /* SYNTAX_ENTRY fetches the information from the entry for character C
70 in syntax table TABLE, or from globally kept data (gl_state).
71 Does inheritance. */
72 /* CURRENT_SYNTAX_TABLE gives the syntax table valid for current
73 position, it is either the buffer's syntax table, or syntax table
74 found in text properties. */
75
76 #ifdef SYNTAX_ENTRY_VIA_PROPERTY
77 # define SYNTAX_ENTRY(c) \
78 (gl_state.use_global ? gl_state.global_code : SYNTAX_ENTRY_INT (c))
79 # define CURRENT_SYNTAX_TABLE gl_state.current_syntax_table
80 #else
81 # define SYNTAX_ENTRY SYNTAX_ENTRY_INT
82 # define CURRENT_SYNTAX_TABLE current_buffer->syntax_table
83 #endif
84
85 #define SYNTAX_ENTRY_INT(c) CHAR_TABLE_REF (CURRENT_SYNTAX_TABLE, (c))
86
87 /* Extract the information from the entry for character C
88 in the current syntax table. */
89
90 #ifdef __GNUC__
91 #define SYNTAX(c) \
92 ({ Lisp_Object temp; \
93 temp = SYNTAX_ENTRY (c); \
94 (CONSP (temp) \
95 ? (enum syntaxcode) (XINT (XCAR (temp)) & 0xff) \
96 : Swhitespace); })
97
98 #define SYNTAX_WITH_FLAGS(c) \
99 ({ Lisp_Object temp; \
100 temp = SYNTAX_ENTRY (c); \
101 (CONSP (temp) \
102 ? XINT (XCAR (temp)) \
103 : (int) Swhitespace); })
104
105 #define SYNTAX_MATCH(c) \
106 ({ Lisp_Object temp; \
107 temp = SYNTAX_ENTRY (c); \
108 (CONSP (temp) \
109 ? XCDR (temp) \
110 : Qnil); })
111 #else
112 extern Lisp_Object syntax_temp;
113 #define SYNTAX(c) \
114 (syntax_temp = SYNTAX_ENTRY ((c)), \
115 (CONSP (syntax_temp) \
116 ? (enum syntaxcode) (XINT (XCAR (syntax_temp)) & 0xff) \
117 : Swhitespace))
118
119 #define SYNTAX_WITH_FLAGS(c) \
120 (syntax_temp = SYNTAX_ENTRY ((c)), \
121 (CONSP (syntax_temp) \
122 ? XINT (XCAR (syntax_temp)) \
123 : (int) Swhitespace))
124
125 #define SYNTAX_MATCH(c) \
126 (syntax_temp = SYNTAX_ENTRY ((c)), \
127 (CONSP (syntax_temp) \
128 ? XCDR (syntax_temp) \
129 : Qnil))
130 #endif
131
132 /* Then there are seven single-bit flags that have the following meanings:
133 1. This character is the first of a two-character comment-start sequence.
134 2. This character is the second of a two-character comment-start sequence.
135 3. This character is the first of a two-character comment-end sequence.
136 4. This character is the second of a two-character comment-end sequence.
137 5. This character is a prefix, for backward-prefix-chars.
138 6. see below
139 7. This character is part of a nestable comment sequence.
140 Note that any two-character sequence whose first character has flag 1
141 and whose second character has flag 2 will be interpreted as a comment start.
142
143 bit 6 is used to discriminate between two different comment styles.
144 Languages such as C++ allow two orthogonal syntax start/end pairs
145 and bit 6 is used to determine whether a comment-end or Scommentend
146 ends style a or b. Comment start sequences can start style a or b.
147 Style a is always the default.
148 */
149
150 /* These macros extract a particular flag for a given character. */
151
152 #define SYNTAX_COMSTART_FIRST(c) ((SYNTAX_WITH_FLAGS (c) >> 16) & 1)
153
154 #define SYNTAX_COMSTART_SECOND(c) ((SYNTAX_WITH_FLAGS (c) >> 17) & 1)
155
156 #define SYNTAX_COMEND_FIRST(c) ((SYNTAX_WITH_FLAGS (c) >> 18) & 1)
157
158 #define SYNTAX_COMEND_SECOND(c) ((SYNTAX_WITH_FLAGS (c) >> 19) & 1)
159
160 #define SYNTAX_PREFIX(c) ((SYNTAX_WITH_FLAGS (c) >> 20) & 1)
161
162 #define SYNTAX_COMMENT_STYLE(c) ((SYNTAX_WITH_FLAGS (c) >> 21) & 1)
163
164 #define SYNTAX_COMMENT_NESTED(c) ((SYNTAX_WITH_FLAGS (c) >> 22) & 1)
165
166 /* These macros extract specific flags from an integer
167 that holds the syntax code and the flags. */
168
169 #define SYNTAX_FLAGS_COMSTART_FIRST(flags) (((flags) >> 16) & 1)
170
171 #define SYNTAX_FLAGS_COMSTART_SECOND(flags) (((flags) >> 17) & 1)
172
173 #define SYNTAX_FLAGS_COMEND_FIRST(flags) (((flags) >> 18) & 1)
174
175 #define SYNTAX_FLAGS_COMEND_SECOND(flags) (((flags) >> 19) & 1)
176
177 #define SYNTAX_FLAGS_PREFIX(flags) (((flags) >> 20) & 1)
178
179 #define SYNTAX_FLAGS_COMMENT_STYLE(flags) (((flags) >> 21) & 1)
180
181 #define SYNTAX_FLAGS_COMMENT_NESTED(flags) (((flags) >> 22) & 1)
182
183 /* This array, indexed by a character, contains the syntax code which that
184 character signifies (as a char). For example,
185 (enum syntaxcode) syntax_spec_code['w'] is Sword. */
186
187 extern unsigned char syntax_spec_code[0400];
188
189 /* Indexed by syntax code, give the letter that describes it. */
190
191 extern char syntax_code_spec[16];
192
193 /* Convert the byte offset BYTEPOS into a character position,
194 for the object recorded in gl_state with SETUP_SYNTAX_TABLE_FOR_OBJECT.
195
196 The value is meant for use in the UPDATE_SYNTAX_TABLE... macros.
197 These macros do nothing when parse_sexp_lookup_properties is 0,
198 so we return 0 in that case, for speed. */
199
200 #define SYNTAX_TABLE_BYTE_TO_CHAR(bytepos) \
201 (! parse_sexp_lookup_properties \
202 ? 0 \
203 : STRINGP (gl_state.object) \
204 ? string_byte_to_char (gl_state.object, (bytepos)) \
205 : BUFFERP (gl_state.object) \
206 ? buf_bytepos_to_charpos (XBUFFER (gl_state.object), \
207 (bytepos) + BUF_BEGV_BYTE (XBUFFER (gl_state.object)) - 1) - BUF_BEGV (XBUFFER (gl_state.object)) + 1 \
208 : NILP (gl_state.object) \
209 ? BYTE_TO_CHAR ((bytepos) + BEGV_BYTE - 1) - BEGV + 1 \
210 : (bytepos))
211
212 /* Make syntax table state (gl_state) good for CHARPOS, assuming it is
213 currently good for a position before CHARPOS. */
214
215 #define UPDATE_SYNTAX_TABLE_FORWARD(charpos) \
216 (parse_sexp_lookup_properties \
217 && (charpos) >= gl_state.e_property \
218 ? (update_syntax_table ((charpos) + gl_state.offset, 1, 0, \
219 gl_state.object), \
220 1) \
221 : 0)
222
223 /* Make syntax table state (gl_state) good for CHARPOS, assuming it is
224 currently good for a position after CHARPOS. */
225
226 #define UPDATE_SYNTAX_TABLE_BACKWARD(charpos) \
227 (parse_sexp_lookup_properties \
228 && (charpos) < gl_state.b_property \
229 ? (update_syntax_table ((charpos) + gl_state.offset, -1, 0, \
230 gl_state.object), \
231 1) \
232 : 0)
233
234 /* Make syntax table good for CHARPOS. */
235
236 #define UPDATE_SYNTAX_TABLE(charpos) \
237 (parse_sexp_lookup_properties \
238 && (charpos) < gl_state.b_property \
239 ? (update_syntax_table ((charpos) + gl_state.offset, -1, 0, \
240 gl_state.object), \
241 1) \
242 : (parse_sexp_lookup_properties \
243 && (charpos) >= gl_state.e_property \
244 ? (update_syntax_table ((charpos) + gl_state.offset, 1, 0,\
245 gl_state.object), \
246 1) \
247 : 0))
248
249 /* This macro should be called with FROM at the start of forward
250 search, or after the last position of the backward search. It
251 makes sure that the first char is picked up with correct table, so
252 one does not need to call UPDATE_SYNTAX_TABLE immediately after the
253 call.
254 Sign of COUNT gives the direction of the search.
255 */
256
257 #define SETUP_SYNTAX_TABLE(FROM, COUNT) \
258 if (1) \
259 { \
260 gl_state.b_property = BEGV; \
261 gl_state.e_property = ZV + 1; \
262 gl_state.object = Qnil; \
263 gl_state.use_global = 0; \
264 gl_state.offset = 0; \
265 gl_state.current_syntax_table = current_buffer->syntax_table; \
266 if (parse_sexp_lookup_properties) \
267 if ((COUNT) > 0 || (FROM) > BEGV) \
268 update_syntax_table ((COUNT) > 0 ? (FROM) : (FROM) - 1, (COUNT),\
269 1, Qnil); \
270 } \
271 else
272
273 /* Same as above, but in OBJECT. If OBJECT is nil, use current buffer.
274 If it is t, ignore properties altogether.
275
276 This is meant for regex.c to use. For buffers, regex.c passes arguments
277 to the UPDATE_SYNTAX_TABLE macros which are relative to BEGV.
278 So if it is a buffer, we set the offset field to BEGV. */
279
280 #define SETUP_SYNTAX_TABLE_FOR_OBJECT(OBJECT, FROM, COUNT) \
281 if (1) \
282 { \
283 gl_state.object = (OBJECT); \
284 if (BUFFERP (gl_state.object)) \
285 { \
286 struct buffer *buf = XBUFFER (gl_state.object); \
287 gl_state.b_property = 1; \
288 gl_state.e_property = BUF_ZV (buf) - BUF_BEGV (buf) + 1; \
289 gl_state.offset = BUF_BEGV (buf) - 1; \
290 } \
291 else if (NILP (gl_state.object)) \
292 { \
293 gl_state.b_property = 1; \
294 gl_state.e_property = ZV - BEGV + 1; \
295 gl_state.offset = BEGV - 1; \
296 } \
297 else if (EQ (gl_state.object, Qt)) \
298 { \
299 gl_state.b_property = 0; \
300 gl_state.e_property = 1500000000; \
301 gl_state.offset = 0; \
302 } \
303 else \
304 { \
305 gl_state.b_property = 0; \
306 gl_state.e_property = 1 + XSTRING (gl_state.object)->size; \
307 gl_state.offset = 0; \
308 } \
309 gl_state.use_global = 0; \
310 gl_state.current_syntax_table = current_buffer->syntax_table; \
311 if (parse_sexp_lookup_properties) \
312 update_syntax_table (((FROM) + gl_state.offset \
313 + (COUNT > 0 ? 0 : -1)), \
314 COUNT, 1, gl_state.object); \
315 } \
316 else
317
318 struct gl_state_s
319 {
320 Lisp_Object object; /* The object we are scanning. */
321 int start; /* Where to stop. */
322 int stop; /* Where to stop. */
323 int use_global; /* Whether to use global_code
324 or c_s_t. */
325 Lisp_Object global_code; /* Syntax code of current char. */
326 Lisp_Object current_syntax_table; /* Syntax table for current pos. */
327 Lisp_Object old_prop; /* Syntax-table prop at prev pos. */
328 int b_property; /* First index where c_s_t is valid. */
329 int e_property; /* First index where c_s_t is
330 not valid. */
331 INTERVAL forward_i; /* Where to start lookup on forward */
332 INTERVAL backward_i; /* or backward movement. The
333 data in c_s_t is valid
334 between these intervals,
335 and possibly at the
336 intervals too, depending
337 on: */
338 /* Offset for positions specified to UPDATE_SYNTAX_TABLE. */
339 int offset;
340 };
341
342 extern struct gl_state_s gl_state;
343 extern int parse_sexp_lookup_properties;
344 extern INTERVAL interval_of P_ ((int, Lisp_Object));
345
346 extern int scan_words P_ ((int, int));