]> code.delx.au - gnu-emacs/blob - src/syntax.h
*** empty log message ***
[gnu-emacs] / src / syntax.h
1 /* Declarations having to do with GNU Emacs syntax tables.
2 Copyright (C) 1985, 1993, 1994, 1997, 1998, 2002, 2003, 2004,
3 2005, 2006 Free Software Foundation, Inc.
4
5 This file is part of GNU Emacs.
6
7 GNU Emacs is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
21
22
23 extern Lisp_Object Qsyntax_table_p;
24 extern void update_syntax_table P_ ((int, int, int, Lisp_Object));
25
26 /* The standard syntax table is stored where it will automatically
27 be used in all new buffers. */
28 #define Vstandard_syntax_table buffer_defaults.syntax_table
29
30 /* A syntax table is a chartable whose elements are cons cells
31 (CODE+FLAGS . MATCHING-CHAR). MATCHING-CHAR can be nil if the char
32 is not a kind of parenthesis.
33
34 The low 8 bits of CODE+FLAGS is a code, as follows: */
35
36 enum syntaxcode
37 {
38 Swhitespace, /* for a whitespace character */
39 Spunct, /* for random punctuation characters */
40 Sword, /* for a word constituent */
41 Ssymbol, /* symbol constituent but not word constituent */
42 Sopen, /* for a beginning delimiter */
43 Sclose, /* for an ending delimiter */
44 Squote, /* for a prefix character like Lisp ' */
45 Sstring, /* for a string-grouping character like Lisp " */
46 Smath, /* for delimiters like $ in Tex. */
47 Sescape, /* for a character that begins a C-style escape */
48 Scharquote, /* for a character that quotes the following character */
49 Scomment, /* for a comment-starting character */
50 Sendcomment, /* for a comment-ending character */
51 Sinherit, /* use the standard syntax table for this character */
52 Scomment_fence, /* Starts/ends comment which is delimited on the
53 other side by any char with the same syntaxcode. */
54 Sstring_fence, /* Starts/ends string which is delimited on the
55 other side by any char with the same syntaxcode. */
56 Smax /* Upper bound on codes that are meaningful */
57 };
58
59 /* Set the syntax entry VAL for char C in table TABLE. */
60
61 #define SET_RAW_SYNTAX_ENTRY(table, c, val) \
62 ((((c) & 0xFF) == (c)) \
63 ? (XCHAR_TABLE (table)->contents[(unsigned char) (c)] = (val)) \
64 : Faset ((table), make_number (c), (val)))
65
66 /* Fetch the syntax entry for char C in syntax table TABLE.
67 This macro is called only when C is less than CHAR_TABLE_ORDINARY_SLOTS.
68 Do inheritance. */
69
70 #ifdef __GNUC__
71 #define SYNTAX_ENTRY_FOLLOW_PARENT(table, c) \
72 ({ Lisp_Object _syntax_tbl = (table); \
73 Lisp_Object _syntax_temp = XCHAR_TABLE (_syntax_tbl)->contents[(c)]; \
74 while (NILP (_syntax_temp)) \
75 { \
76 _syntax_tbl = XCHAR_TABLE (_syntax_tbl)->parent; \
77 if (NILP (_syntax_tbl)) \
78 break; \
79 _syntax_temp = XCHAR_TABLE (_syntax_tbl)->contents[(c)]; \
80 } \
81 _syntax_temp; })
82 #else
83 extern Lisp_Object syntax_temp;
84 extern Lisp_Object syntax_parent_lookup P_ ((Lisp_Object, int));
85
86 #define SYNTAX_ENTRY_FOLLOW_PARENT(table, c) \
87 (syntax_temp = XCHAR_TABLE (table)->contents[(c)], \
88 (NILP (syntax_temp) \
89 ? syntax_parent_lookup (table, (c)) \
90 : syntax_temp))
91 #endif
92
93 /* SYNTAX_ENTRY fetches the information from the entry for character C
94 in syntax table TABLE, or from globally kept data (gl_state).
95 Does inheritance. */
96 /* CURRENT_SYNTAX_TABLE gives the syntax table valid for current
97 position, it is either the buffer's syntax table, or syntax table
98 found in text properties. */
99
100 #ifdef SYNTAX_ENTRY_VIA_PROPERTY
101 # define SYNTAX_ENTRY(c) \
102 (gl_state.use_global ? gl_state.global_code : SYNTAX_ENTRY_INT (c))
103 # define CURRENT_SYNTAX_TABLE gl_state.current_syntax_table
104 #else
105 # define SYNTAX_ENTRY SYNTAX_ENTRY_INT
106 # define CURRENT_SYNTAX_TABLE current_buffer->syntax_table
107 #endif
108
109 #define SYNTAX_ENTRY_INT(c) \
110 ((((c) & 0xFF) == (c)) \
111 ? SYNTAX_ENTRY_FOLLOW_PARENT (CURRENT_SYNTAX_TABLE, \
112 (unsigned char) (c)) \
113 : Faref (CURRENT_SYNTAX_TABLE, \
114 make_number (c)))
115
116 /* Extract the information from the entry for character C
117 in the current syntax table. */
118
119 #ifdef __GNUC__
120 #define SYNTAX(c) \
121 ({ Lisp_Object _syntax_temp; \
122 _syntax_temp = SYNTAX_ENTRY (c); \
123 (CONSP (_syntax_temp) \
124 ? (enum syntaxcode) (XINT (XCAR (_syntax_temp)) & 0xff) \
125 : Swhitespace); })
126
127 #define SYNTAX_WITH_FLAGS(c) \
128 ({ Lisp_Object _syntax_temp; \
129 _syntax_temp = SYNTAX_ENTRY (c); \
130 (CONSP (_syntax_temp) \
131 ? XINT (XCAR (_syntax_temp)) \
132 : (int) Swhitespace); })
133
134 #define SYNTAX_MATCH(c) \
135 ({ Lisp_Object _syntax_temp; \
136 _syntax_temp = SYNTAX_ENTRY (c); \
137 (CONSP (_syntax_temp) \
138 ? XCDR (_syntax_temp) \
139 : Qnil); })
140 #else
141 #define SYNTAX(c) \
142 (syntax_temp = SYNTAX_ENTRY ((c)), \
143 (CONSP (syntax_temp) \
144 ? (enum syntaxcode) (XINT (XCAR (syntax_temp)) & 0xff) \
145 : Swhitespace))
146
147 #define SYNTAX_WITH_FLAGS(c) \
148 (syntax_temp = SYNTAX_ENTRY ((c)), \
149 (CONSP (syntax_temp) \
150 ? XINT (XCAR (syntax_temp)) \
151 : (int) Swhitespace))
152
153 #define SYNTAX_MATCH(c) \
154 (syntax_temp = SYNTAX_ENTRY ((c)), \
155 (CONSP (syntax_temp) \
156 ? XCDR (syntax_temp) \
157 : Qnil))
158 #endif
159
160 /* Then there are seven single-bit flags that have the following meanings:
161 1. This character is the first of a two-character comment-start sequence.
162 2. This character is the second of a two-character comment-start sequence.
163 3. This character is the first of a two-character comment-end sequence.
164 4. This character is the second of a two-character comment-end sequence.
165 5. This character is a prefix, for backward-prefix-chars.
166 6. see below
167 7. This character is part of a nestable comment sequence.
168 Note that any two-character sequence whose first character has flag 1
169 and whose second character has flag 2 will be interpreted as a comment start.
170
171 bit 6 is used to discriminate between two different comment styles.
172 Languages such as C++ allow two orthogonal syntax start/end pairs
173 and bit 6 is used to determine whether a comment-end or Scommentend
174 ends style a or b. Comment start sequences can start style a or b.
175 Style a is always the default.
176 */
177
178 /* These macros extract a particular flag for a given character. */
179
180 #define SYNTAX_COMSTART_FIRST(c) ((SYNTAX_WITH_FLAGS (c) >> 16) & 1)
181
182 #define SYNTAX_COMSTART_SECOND(c) ((SYNTAX_WITH_FLAGS (c) >> 17) & 1)
183
184 #define SYNTAX_COMEND_FIRST(c) ((SYNTAX_WITH_FLAGS (c) >> 18) & 1)
185
186 #define SYNTAX_COMEND_SECOND(c) ((SYNTAX_WITH_FLAGS (c) >> 19) & 1)
187
188 #define SYNTAX_PREFIX(c) ((SYNTAX_WITH_FLAGS (c) >> 20) & 1)
189
190 #define SYNTAX_COMMENT_STYLE(c) ((SYNTAX_WITH_FLAGS (c) >> 21) & 1)
191
192 #define SYNTAX_COMMENT_NESTED(c) ((SYNTAX_WITH_FLAGS (c) >> 22) & 1)
193
194 /* These macros extract specific flags from an integer
195 that holds the syntax code and the flags. */
196
197 #define SYNTAX_FLAGS_COMSTART_FIRST(flags) (((flags) >> 16) & 1)
198
199 #define SYNTAX_FLAGS_COMSTART_SECOND(flags) (((flags) >> 17) & 1)
200
201 #define SYNTAX_FLAGS_COMEND_FIRST(flags) (((flags) >> 18) & 1)
202
203 #define SYNTAX_FLAGS_COMEND_SECOND(flags) (((flags) >> 19) & 1)
204
205 #define SYNTAX_FLAGS_PREFIX(flags) (((flags) >> 20) & 1)
206
207 #define SYNTAX_FLAGS_COMMENT_STYLE(flags) (((flags) >> 21) & 1)
208
209 #define SYNTAX_FLAGS_COMMENT_NESTED(flags) (((flags) >> 22) & 1)
210
211 /* This array, indexed by a character, contains the syntax code which that
212 character signifies (as a char). For example,
213 (enum syntaxcode) syntax_spec_code['w'] is Sword. */
214
215 extern unsigned char syntax_spec_code[0400];
216
217 /* Indexed by syntax code, give the letter that describes it. */
218
219 extern char syntax_code_spec[16];
220
221 /* Convert the byte offset BYTEPOS into a character position,
222 for the object recorded in gl_state with SETUP_SYNTAX_TABLE_FOR_OBJECT.
223
224 The value is meant for use in the UPDATE_SYNTAX_TABLE... macros.
225 These macros do nothing when parse_sexp_lookup_properties is 0,
226 so we return 0 in that case, for speed. */
227
228 #define SYNTAX_TABLE_BYTE_TO_CHAR(bytepos) \
229 (! parse_sexp_lookup_properties \
230 ? 0 \
231 : STRINGP (gl_state.object) \
232 ? string_byte_to_char (gl_state.object, (bytepos)) \
233 : BUFFERP (gl_state.object) \
234 ? buf_bytepos_to_charpos (XBUFFER (gl_state.object), \
235 (bytepos) + BUF_BEGV_BYTE (XBUFFER (gl_state.object)) - 1) - BUF_BEGV (XBUFFER (gl_state.object)) + 1 \
236 : NILP (gl_state.object) \
237 ? BYTE_TO_CHAR ((bytepos) + BEGV_BYTE - 1) - BEGV + 1 \
238 : (bytepos))
239
240 /* Make syntax table state (gl_state) good for CHARPOS, assuming it is
241 currently good for a position before CHARPOS. */
242
243 #define UPDATE_SYNTAX_TABLE_FORWARD(charpos) \
244 (parse_sexp_lookup_properties \
245 && (charpos) >= gl_state.e_property \
246 ? (update_syntax_table ((charpos) + gl_state.offset, 1, 0, \
247 gl_state.object), \
248 1) \
249 : 0)
250
251 /* Make syntax table state (gl_state) good for CHARPOS, assuming it is
252 currently good for a position after CHARPOS. */
253
254 #define UPDATE_SYNTAX_TABLE_BACKWARD(charpos) \
255 (parse_sexp_lookup_properties \
256 && (charpos) < gl_state.b_property \
257 ? (update_syntax_table ((charpos) + gl_state.offset, -1, 0, \
258 gl_state.object), \
259 1) \
260 : 0)
261
262 /* Make syntax table good for CHARPOS. */
263
264 #define UPDATE_SYNTAX_TABLE(charpos) \
265 (parse_sexp_lookup_properties \
266 && (charpos) < gl_state.b_property \
267 ? (update_syntax_table ((charpos) + gl_state.offset, -1, 0, \
268 gl_state.object), \
269 1) \
270 : (parse_sexp_lookup_properties \
271 && (charpos) >= gl_state.e_property \
272 ? (update_syntax_table ((charpos) + gl_state.offset, 1, 0,\
273 gl_state.object), \
274 1) \
275 : 0))
276
277 /* This macro should be called with FROM at the start of forward
278 search, or after the last position of the backward search. It
279 makes sure that the first char is picked up with correct table, so
280 one does not need to call UPDATE_SYNTAX_TABLE immediately after the
281 call.
282 Sign of COUNT gives the direction of the search.
283 */
284
285 #define SETUP_SYNTAX_TABLE(FROM, COUNT) \
286 if (1) \
287 { \
288 gl_state.b_property = BEGV; \
289 gl_state.e_property = ZV + 1; \
290 gl_state.object = Qnil; \
291 gl_state.use_global = 0; \
292 gl_state.offset = 0; \
293 gl_state.current_syntax_table = current_buffer->syntax_table; \
294 if (parse_sexp_lookup_properties) \
295 if ((COUNT) > 0 || (FROM) > BEGV) \
296 update_syntax_table ((COUNT) > 0 ? (FROM) : (FROM) - 1, (COUNT),\
297 1, Qnil); \
298 } \
299 else
300
301 /* Same as above, but in OBJECT. If OBJECT is nil, use current buffer.
302 If it is t, ignore properties altogether.
303
304 This is meant for regex.c to use. For buffers, regex.c passes arguments
305 to the UPDATE_SYNTAX_TABLE macros which are relative to BEGV.
306 So if it is a buffer, we set the offset field to BEGV. */
307
308 #define SETUP_SYNTAX_TABLE_FOR_OBJECT(OBJECT, FROM, COUNT) \
309 if (1) \
310 { \
311 gl_state.object = (OBJECT); \
312 if (BUFFERP (gl_state.object)) \
313 { \
314 struct buffer *buf = XBUFFER (gl_state.object); \
315 gl_state.b_property = 1; \
316 gl_state.e_property = BUF_ZV (buf) - BUF_BEGV (buf) + 1; \
317 gl_state.offset = BUF_BEGV (buf) - 1; \
318 } \
319 else if (NILP (gl_state.object)) \
320 { \
321 gl_state.b_property = 1; \
322 gl_state.e_property = ZV - BEGV + 1; \
323 gl_state.offset = BEGV - 1; \
324 } \
325 else if (EQ (gl_state.object, Qt)) \
326 { \
327 gl_state.b_property = 0; \
328 gl_state.e_property = 1500000000; \
329 gl_state.offset = 0; \
330 } \
331 else \
332 { \
333 gl_state.b_property = 0; \
334 gl_state.e_property = 1 + SCHARS (gl_state.object); \
335 gl_state.offset = 0; \
336 } \
337 gl_state.use_global = 0; \
338 gl_state.current_syntax_table = current_buffer->syntax_table; \
339 if (parse_sexp_lookup_properties) \
340 update_syntax_table (((FROM) + gl_state.offset \
341 + (COUNT > 0 ? 0 : -1)), \
342 COUNT, 1, gl_state.object); \
343 } \
344 else
345
346 struct gl_state_s
347 {
348 Lisp_Object object; /* The object we are scanning. */
349 int start; /* Where to stop. */
350 int stop; /* Where to stop. */
351 int use_global; /* Whether to use global_code
352 or c_s_t. */
353 Lisp_Object global_code; /* Syntax code of current char. */
354 Lisp_Object current_syntax_table; /* Syntax table for current pos. */
355 Lisp_Object old_prop; /* Syntax-table prop at prev pos. */
356 int b_property; /* First index where c_s_t is valid. */
357 int e_property; /* First index where c_s_t is
358 not valid. */
359 INTERVAL forward_i; /* Where to start lookup on forward */
360 INTERVAL backward_i; /* or backward movement. The
361 data in c_s_t is valid
362 between these intervals,
363 and possibly at the
364 intervals too, depending
365 on: */
366 /* Offset for positions specified to UPDATE_SYNTAX_TABLE. */
367 int offset;
368 };
369
370 extern struct gl_state_s gl_state;
371 extern int parse_sexp_lookup_properties;
372 extern INTERVAL interval_of P_ ((int, Lisp_Object));
373
374 extern int scan_words P_ ((int, int));
375
376 /* arch-tag: 28833cca-cd73-4741-8c85-a3111166a0e0
377 (do not change this comment) */