]> code.delx.au - gnu-emacs/blob - lib-src/etags.c
Merge from origin/emacs-25
[gnu-emacs] / lib-src / etags.c
1 /* Tags file maker to go with GNU Emacs -*- coding: utf-8 -*-
2
3 Copyright (C) 1984 The Regents of the University of California
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
7 met:
8 1. Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the
13 distribution.
14 3. Neither the name of the University nor the names of its
15 contributors may be used to endorse or promote products derived
16 from this software without specific prior written permission.
17
18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30
31 Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2016 Free Software
32 Foundation, Inc.
33
34 This file is not considered part of GNU Emacs.
35
36 This program is free software: you can redistribute it and/or modify
37 it under the terms of the GNU General Public License as published by
38 the Free Software Foundation, either version 3 of the License, or
39 (at your option) any later version.
40
41 This program is distributed in the hope that it will be useful,
42 but WITHOUT ANY WARRANTY; without even the implied warranty of
43 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
44 GNU General Public License for more details.
45
46 You should have received a copy of the GNU General Public License
47 along with this program. If not, see <http://www.gnu.org/licenses/>. */
48
49
50 /* NB To comply with the above BSD license, copyright information is
51 reproduced in etc/ETAGS.README. That file should be updated when the
52 above notices are.
53
54 To the best of our knowledge, this code was originally based on the
55 ctags.c distributed with BSD4.2, which was copyrighted by the
56 University of California, as described above. */
57
58
59 /*
60 * Authors:
61 * 1983 Ctags originally by Ken Arnold.
62 * 1984 Fortran added by Jim Kleckner.
63 * 1984 Ed Pelegri-Llopart added C typedefs.
64 * 1985 Emacs TAGS format by Richard Stallman.
65 * 1989 Sam Kendall added C++.
66 * 1992 Joseph B. Wells improved C and C++ parsing.
67 * 1993 Francesco Potortì reorganized C and C++.
68 * 1994 Line-by-line regexp tags by Tom Tromey.
69 * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
70 * 2002 #line directives by Francesco Potortì.
71 * Francesco Potortì maintained and improved it for many years
72 starting in 1993.
73 */
74
75 /*
76 * If you want to add support for a new language, start by looking at the LUA
77 * language, which is the simplest. Alternatively, consider distributing etags
78 * together with a configuration file containing regexp definitions for etags.
79 */
80
81 char pot_etags_version[] = "@(#) pot revision number is 17.38.1.4";
82
83 #ifdef DEBUG
84 # undef DEBUG
85 # define DEBUG true
86 #else
87 # define DEBUG false
88 # define NDEBUG /* disable assert */
89 #endif
90
91 #include <config.h>
92
93 #ifndef _GNU_SOURCE
94 # define _GNU_SOURCE 1 /* enables some compiler checks on GNU */
95 #endif
96
97 /* WIN32_NATIVE is for XEmacs.
98 MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
99 #ifdef WIN32_NATIVE
100 # undef MSDOS
101 # undef WINDOWSNT
102 # define WINDOWSNT
103 #endif /* WIN32_NATIVE */
104
105 #ifdef MSDOS
106 # undef MSDOS
107 # define MSDOS true
108 # include <sys/param.h>
109 #else
110 # define MSDOS false
111 #endif /* MSDOS */
112
113 #ifdef WINDOWSNT
114 # include <direct.h>
115 # define MAXPATHLEN _MAX_PATH
116 # undef HAVE_NTGUI
117 # undef DOS_NT
118 # define DOS_NT
119 # define O_CLOEXEC O_NOINHERIT
120 #endif /* WINDOWSNT */
121
122 #include <limits.h>
123 #include <unistd.h>
124 #include <stdarg.h>
125 #include <stdlib.h>
126 #include <string.h>
127 #include <sysstdio.h>
128 #include <errno.h>
129 #include <fcntl.h>
130 #include <binary-io.h>
131 #include <c-ctype.h>
132 #include <c-strcase.h>
133
134 #include <assert.h>
135 #ifdef NDEBUG
136 # undef assert /* some systems have a buggy assert.h */
137 # define assert(x) ((void) 0)
138 #endif
139
140 #include <getopt.h>
141 #include <regex.h>
142
143 /* Define CTAGS to make the program "ctags" compatible with the usual one.
144 Leave it undefined to make the program "etags", which makes emacs-style
145 tag tables and tags typedefs, #defines and struct/union/enum by default. */
146 #ifdef CTAGS
147 # undef CTAGS
148 # define CTAGS true
149 #else
150 # define CTAGS false
151 #endif
152
153 static bool
154 streq (char const *s, char const *t)
155 {
156 return strcmp (s, t) == 0;
157 }
158
159 static bool
160 strcaseeq (char const *s, char const *t)
161 {
162 return c_strcasecmp (s, t) == 0;
163 }
164
165 static bool
166 strneq (char const *s, char const *t, size_t n)
167 {
168 return strncmp (s, t, n) == 0;
169 }
170
171 static bool
172 strncaseeq (char const *s, char const *t, size_t n)
173 {
174 return c_strncasecmp (s, t, n) == 0;
175 }
176
177 /* C is not in a name. */
178 static bool
179 notinname (unsigned char c)
180 {
181 /* Look at make_tag before modifying! */
182 static bool const table[UCHAR_MAX + 1] = {
183 ['\0']=1, ['\t']=1, ['\n']=1, ['\f']=1, ['\r']=1, [' ']=1,
184 ['(']=1, [')']=1, [',']=1, [';']=1, ['=']=1
185 };
186 return table[c];
187 }
188
189 /* C can start a token. */
190 static bool
191 begtoken (unsigned char c)
192 {
193 static bool const table[UCHAR_MAX + 1] = {
194 ['$']=1, ['@']=1,
195 ['A']=1, ['B']=1, ['C']=1, ['D']=1, ['E']=1, ['F']=1, ['G']=1, ['H']=1,
196 ['I']=1, ['J']=1, ['K']=1, ['L']=1, ['M']=1, ['N']=1, ['O']=1, ['P']=1,
197 ['Q']=1, ['R']=1, ['S']=1, ['T']=1, ['U']=1, ['V']=1, ['W']=1, ['X']=1,
198 ['Y']=1, ['Z']=1,
199 ['_']=1,
200 ['a']=1, ['b']=1, ['c']=1, ['d']=1, ['e']=1, ['f']=1, ['g']=1, ['h']=1,
201 ['i']=1, ['j']=1, ['k']=1, ['l']=1, ['m']=1, ['n']=1, ['o']=1, ['p']=1,
202 ['q']=1, ['r']=1, ['s']=1, ['t']=1, ['u']=1, ['v']=1, ['w']=1, ['x']=1,
203 ['y']=1, ['z']=1,
204 ['~']=1
205 };
206 return table[c];
207 }
208
209 /* C can be in the middle of a token. */
210 static bool
211 intoken (unsigned char c)
212 {
213 static bool const table[UCHAR_MAX + 1] = {
214 ['$']=1,
215 ['0']=1, ['1']=1, ['2']=1, ['3']=1, ['4']=1,
216 ['5']=1, ['6']=1, ['7']=1, ['8']=1, ['9']=1,
217 ['A']=1, ['B']=1, ['C']=1, ['D']=1, ['E']=1, ['F']=1, ['G']=1, ['H']=1,
218 ['I']=1, ['J']=1, ['K']=1, ['L']=1, ['M']=1, ['N']=1, ['O']=1, ['P']=1,
219 ['Q']=1, ['R']=1, ['S']=1, ['T']=1, ['U']=1, ['V']=1, ['W']=1, ['X']=1,
220 ['Y']=1, ['Z']=1,
221 ['_']=1,
222 ['a']=1, ['b']=1, ['c']=1, ['d']=1, ['e']=1, ['f']=1, ['g']=1, ['h']=1,
223 ['i']=1, ['j']=1, ['k']=1, ['l']=1, ['m']=1, ['n']=1, ['o']=1, ['p']=1,
224 ['q']=1, ['r']=1, ['s']=1, ['t']=1, ['u']=1, ['v']=1, ['w']=1, ['x']=1,
225 ['y']=1, ['z']=1
226 };
227 return table[c];
228 }
229
230 /* C can end a token. */
231 static bool
232 endtoken (unsigned char c)
233 {
234 static bool const table[UCHAR_MAX + 1] = {
235 ['\0']=1, ['\t']=1, ['\n']=1, ['\r']=1, [' ']=1,
236 ['!']=1, ['"']=1, ['#']=1, ['%']=1, ['&']=1, ['\'']=1, ['(']=1, [')']=1,
237 ['*']=1, ['+']=1, [',']=1, ['-']=1, ['.']=1, ['/']=1, [':']=1, [';']=1,
238 ['<']=1, ['=']=1, ['>']=1, ['?']=1, ['[']=1, [']']=1, ['^']=1,
239 ['{']=1, ['|']=1, ['}']=1, ['~']=1
240 };
241 return table[c];
242 }
243
244 /*
245 * xnew, xrnew -- allocate, reallocate storage
246 *
247 * SYNOPSIS: Type *xnew (int n, Type);
248 * void xrnew (OldPointer, int n, Type);
249 */
250 #define xnew(n, Type) ((Type *) xmalloc ((n) * sizeof (Type)))
251 #define xrnew(op, n, Type) ((op) = (Type *) xrealloc (op, (n) * sizeof (Type)))
252
253 typedef void Lang_function (FILE *);
254
255 typedef struct
256 {
257 const char *suffix; /* file name suffix for this compressor */
258 const char *command; /* takes one arg and decompresses to stdout */
259 } compressor;
260
261 typedef struct
262 {
263 const char *name; /* language name */
264 const char *help; /* detailed help for the language */
265 Lang_function *function; /* parse function */
266 const char **suffixes; /* name suffixes of this language's files */
267 const char **filenames; /* names of this language's files */
268 const char **interpreters; /* interpreters for this language */
269 bool metasource; /* source used to generate other sources */
270 } language;
271
272 typedef struct fdesc
273 {
274 struct fdesc *next; /* for the linked list */
275 char *infname; /* uncompressed input file name */
276 char *infabsname; /* absolute uncompressed input file name */
277 char *infabsdir; /* absolute dir of input file */
278 char *taggedfname; /* file name to write in tagfile */
279 language *lang; /* language of file */
280 char *prop; /* file properties to write in tagfile */
281 bool usecharno; /* etags tags shall contain char number */
282 bool written; /* entry written in the tags file */
283 } fdesc;
284
285 typedef struct node_st
286 { /* sorting structure */
287 struct node_st *left, *right; /* left and right sons */
288 fdesc *fdp; /* description of file to whom tag belongs */
289 char *name; /* tag name */
290 char *regex; /* search regexp */
291 bool valid; /* write this tag on the tag file */
292 bool is_func; /* function tag: use regexp in CTAGS mode */
293 bool been_warned; /* warning already given for duplicated tag */
294 int lno; /* line number tag is on */
295 long cno; /* character number line starts on */
296 } node;
297
298 /*
299 * A `linebuffer' is a structure which holds a line of text.
300 * `readline_internal' reads a line from a stream into a linebuffer
301 * and works regardless of the length of the line.
302 * SIZE is the size of BUFFER, LEN is the length of the string in
303 * BUFFER after readline reads it.
304 */
305 typedef struct
306 {
307 long size;
308 int len;
309 char *buffer;
310 } linebuffer;
311
312 /* Used to support mixing of --lang and file names. */
313 typedef struct
314 {
315 enum {
316 at_language, /* a language specification */
317 at_regexp, /* a regular expression */
318 at_filename, /* a file name */
319 at_stdin, /* read from stdin here */
320 at_end /* stop parsing the list */
321 } arg_type; /* argument type */
322 language *lang; /* language associated with the argument */
323 char *what; /* the argument itself */
324 } argument;
325
326 /* Structure defining a regular expression. */
327 typedef struct regexp
328 {
329 struct regexp *p_next; /* pointer to next in list */
330 language *lang; /* if set, use only for this language */
331 char *pattern; /* the regexp pattern */
332 char *name; /* tag name */
333 struct re_pattern_buffer *pat; /* the compiled pattern */
334 struct re_registers regs; /* re registers */
335 bool error_signaled; /* already signaled for this regexp */
336 bool force_explicit_name; /* do not allow implicit tag name */
337 bool ignore_case; /* ignore case when matching */
338 bool multi_line; /* do a multi-line match on the whole file */
339 } regexp;
340
341
342 /* Many compilers barf on this:
343 Lang_function Ada_funcs;
344 so let's write it this way */
345 static void Ada_funcs (FILE *);
346 static void Asm_labels (FILE *);
347 static void C_entries (int c_ext, FILE *);
348 static void default_C_entries (FILE *);
349 static void plain_C_entries (FILE *);
350 static void Cjava_entries (FILE *);
351 static void Cobol_paragraphs (FILE *);
352 static void Cplusplus_entries (FILE *);
353 static void Cstar_entries (FILE *);
354 static void Erlang_functions (FILE *);
355 static void Forth_words (FILE *);
356 static void Fortran_functions (FILE *);
357 static void Go_functions (FILE *);
358 static void HTML_labels (FILE *);
359 static void Lisp_functions (FILE *);
360 static void Lua_functions (FILE *);
361 static void Makefile_targets (FILE *);
362 static void Pascal_functions (FILE *);
363 static void Perl_functions (FILE *);
364 static void PHP_functions (FILE *);
365 static void PS_functions (FILE *);
366 static void Prolog_functions (FILE *);
367 static void Python_functions (FILE *);
368 static void Ruby_functions (FILE *);
369 static void Scheme_functions (FILE *);
370 static void TeX_commands (FILE *);
371 static void Texinfo_nodes (FILE *);
372 static void Yacc_entries (FILE *);
373 static void just_read_file (FILE *);
374
375 static language *get_language_from_langname (const char *);
376 static void readline (linebuffer *, FILE *);
377 static long readline_internal (linebuffer *, FILE *, char const *);
378 static bool nocase_tail (const char *);
379 static void get_tag (char *, char **);
380
381 static void analyze_regex (char *);
382 static void free_regexps (void);
383 static void regex_tag_multiline (void);
384 static void error (const char *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
385 static void verror (char const *, va_list) ATTRIBUTE_FORMAT_PRINTF (1, 0);
386 static _Noreturn void suggest_asking_for_help (void);
387 static _Noreturn void fatal (char const *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
388 static _Noreturn void pfatal (const char *);
389 static void add_node (node *, node **);
390
391 static void process_file_name (char *, language *);
392 static void process_file (FILE *, char *, language *);
393 static void find_entries (FILE *);
394 static void free_tree (node *);
395 static void free_fdesc (fdesc *);
396 static void pfnote (char *, bool, char *, int, int, long);
397 static void invalidate_nodes (fdesc *, node **);
398 static void put_entries (node *);
399
400 static char *concat (const char *, const char *, const char *);
401 static char *skip_spaces (char *);
402 static char *skip_non_spaces (char *);
403 static char *skip_name (char *);
404 static char *savenstr (const char *, int);
405 static char *savestr (const char *);
406 static char *etags_getcwd (void);
407 static char *relative_filename (char *, char *);
408 static char *absolute_filename (char *, char *);
409 static char *absolute_dirname (char *, char *);
410 static bool filename_is_absolute (char *f);
411 static void canonicalize_filename (char *);
412 static char *etags_mktmp (void);
413 static void linebuffer_init (linebuffer *);
414 static void linebuffer_setlen (linebuffer *, int);
415 static void *xmalloc (size_t);
416 static void *xrealloc (void *, size_t);
417
418 \f
419 static char searchar = '/'; /* use /.../ searches */
420
421 static char *tagfile; /* output file */
422 static char *progname; /* name this program was invoked with */
423 static char *cwd; /* current working directory */
424 static char *tagfiledir; /* directory of tagfile */
425 static FILE *tagf; /* ioptr for tags file */
426 static ptrdiff_t whatlen_max; /* maximum length of any 'what' member */
427
428 static fdesc *fdhead; /* head of file description list */
429 static fdesc *curfdp; /* current file description */
430 static char *infilename; /* current input file name */
431 static int lineno; /* line number of current line */
432 static long charno; /* current character number */
433 static long linecharno; /* charno of start of current line */
434 static char *dbp; /* pointer to start of current tag */
435
436 static const int invalidcharno = -1;
437
438 static node *nodehead; /* the head of the binary tree of tags */
439 static node *last_node; /* the last node created */
440
441 static linebuffer lb; /* the current line */
442 static linebuffer filebuf; /* a buffer containing the whole file */
443 static linebuffer token_name; /* a buffer containing a tag name */
444
445 static bool append_to_tagfile; /* -a: append to tags */
446 /* The next five default to true in C and derived languages. */
447 static bool typedefs; /* -t: create tags for C and Ada typedefs */
448 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
449 /* 0 struct/enum/union decls, and C++ */
450 /* member functions. */
451 static bool constantypedefs; /* -d: create tags for C #define, enum */
452 /* constants and variables. */
453 /* -D: opposite of -d. Default under ctags. */
454 static int globals; /* create tags for global variables */
455 static int members; /* create tags for C member variables */
456 static int declarations; /* --declarations: tag them and extern in C&Co*/
457 static int no_line_directive; /* ignore #line directives (undocumented) */
458 static int no_duplicates; /* no duplicate tags for ctags (undocumented) */
459 static bool update; /* -u: update tags */
460 static bool vgrind_style; /* -v: create vgrind style index output */
461 static bool no_warnings; /* -w: suppress warnings (undocumented) */
462 static bool cxref_style; /* -x: create cxref style output */
463 static bool cplusplus; /* .[hc] means C++, not C (undocumented) */
464 static bool ignoreindent; /* -I: ignore indentation in C */
465 static int packages_only; /* --packages-only: in Ada, only tag packages*/
466 static int class_qualify; /* -Q: produce class-qualified tags in C++/Java */
467
468 /* STDIN is defined in LynxOS system headers */
469 #ifdef STDIN
470 # undef STDIN
471 #endif
472
473 #define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */
474 static bool parsing_stdin; /* --parse-stdin used */
475
476 static regexp *p_head; /* list of all regexps */
477 static bool need_filebuf; /* some regexes are multi-line */
478
479 static struct option longopts[] =
480 {
481 { "append", no_argument, NULL, 'a' },
482 { "packages-only", no_argument, &packages_only, 1 },
483 { "c++", no_argument, NULL, 'C' },
484 { "declarations", no_argument, &declarations, 1 },
485 { "no-line-directive", no_argument, &no_line_directive, 1 },
486 { "no-duplicates", no_argument, &no_duplicates, 1 },
487 { "help", no_argument, NULL, 'h' },
488 { "help", no_argument, NULL, 'H' },
489 { "ignore-indentation", no_argument, NULL, 'I' },
490 { "language", required_argument, NULL, 'l' },
491 { "members", no_argument, &members, 1 },
492 { "no-members", no_argument, &members, 0 },
493 { "output", required_argument, NULL, 'o' },
494 { "class-qualify", no_argument, &class_qualify, 'Q' },
495 { "regex", required_argument, NULL, 'r' },
496 { "no-regex", no_argument, NULL, 'R' },
497 { "ignore-case-regex", required_argument, NULL, 'c' },
498 { "parse-stdin", required_argument, NULL, STDIN },
499 { "version", no_argument, NULL, 'V' },
500
501 #if CTAGS /* Ctags options */
502 { "backward-search", no_argument, NULL, 'B' },
503 { "cxref", no_argument, NULL, 'x' },
504 { "defines", no_argument, NULL, 'd' },
505 { "globals", no_argument, &globals, 1 },
506 { "typedefs", no_argument, NULL, 't' },
507 { "typedefs-and-c++", no_argument, NULL, 'T' },
508 { "update", no_argument, NULL, 'u' },
509 { "vgrind", no_argument, NULL, 'v' },
510 { "no-warn", no_argument, NULL, 'w' },
511
512 #else /* Etags options */
513 { "no-defines", no_argument, NULL, 'D' },
514 { "no-globals", no_argument, &globals, 0 },
515 { "include", required_argument, NULL, 'i' },
516 #endif
517 { NULL }
518 };
519
520 static compressor compressors[] =
521 {
522 { "z", "gzip -d -c"},
523 { "Z", "gzip -d -c"},
524 { "gz", "gzip -d -c"},
525 { "GZ", "gzip -d -c"},
526 { "bz2", "bzip2 -d -c" },
527 { "xz", "xz -d -c" },
528 { NULL }
529 };
530
531 /*
532 * Language stuff.
533 */
534
535 /* Ada code */
536 static const char *Ada_suffixes [] =
537 { "ads", "adb", "ada", NULL };
538 static const char Ada_help [] =
539 "In Ada code, functions, procedures, packages, tasks and types are\n\
540 tags. Use the '--packages-only' option to create tags for\n\
541 packages only.\n\
542 Ada tag names have suffixes indicating the type of entity:\n\
543 Entity type: Qualifier:\n\
544 ------------ ----------\n\
545 function /f\n\
546 procedure /p\n\
547 package spec /s\n\
548 package body /b\n\
549 type /t\n\
550 task /k\n\
551 Thus, 'M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
552 body of the package 'bidule', while 'M-x find-tag <RET> bidule <RET>'\n\
553 will just search for any tag 'bidule'.";
554
555 /* Assembly code */
556 static const char *Asm_suffixes [] =
557 { "a", /* Unix assembler */
558 "asm", /* Microcontroller assembly */
559 "def", /* BSO/Tasking definition includes */
560 "inc", /* Microcontroller include files */
561 "ins", /* Microcontroller include files */
562 "s", "sa", /* Unix assembler */
563 "S", /* cpp-processed Unix assembler */
564 "src", /* BSO/Tasking C compiler output */
565 NULL
566 };
567 static const char Asm_help [] =
568 "In assembler code, labels appearing at the beginning of a line,\n\
569 followed by a colon, are tags.";
570
571
572 /* Note that .c and .h can be considered C++, if the --c++ flag was
573 given, or if the `class' or `template' keywords are met inside the file.
574 That is why default_C_entries is called for these. */
575 static const char *default_C_suffixes [] =
576 { "c", "h", NULL };
577 #if CTAGS /* C help for Ctags */
578 static const char default_C_help [] =
579 "In C code, any C function is a tag. Use -t to tag typedefs.\n\
580 Use -T to tag definitions of 'struct', 'union' and 'enum'.\n\
581 Use -d to tag '#define' macro definitions and 'enum' constants.\n\
582 Use --globals to tag global variables.\n\
583 You can tag function declarations and external variables by\n\
584 using '--declarations', and struct members by using '--members'.";
585 #else /* C help for Etags */
586 static const char default_C_help [] =
587 "In C code, any C function or typedef is a tag, and so are\n\
588 definitions of 'struct', 'union' and 'enum'. '#define' macro\n\
589 definitions and 'enum' constants are tags unless you specify\n\
590 '--no-defines'. Global variables are tags unless you specify\n\
591 '--no-globals' and so are struct members unless you specify\n\
592 '--no-members'. Use of '--no-globals', '--no-defines' and\n\
593 '--no-members' can make the tags table file much smaller.\n\
594 You can tag function declarations and external variables by\n\
595 using '--declarations'.";
596 #endif /* C help for Ctags and Etags */
597
598 static const char *Cplusplus_suffixes [] =
599 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
600 "M", /* Objective C++ */
601 "pdb", /* PostScript with C syntax */
602 NULL };
603 static const char Cplusplus_help [] =
604 "In C++ code, all the tag constructs of C code are tagged. (Use\n\
605 --help --lang=c --lang=c++ for full help.)\n\
606 In addition to C tags, member functions are also recognized. Member\n\
607 variables are recognized unless you use the '--no-members' option.\n\
608 Tags for variables and functions in classes are named 'CLASS::VARIABLE'\n\
609 and 'CLASS::FUNCTION'. 'operator' definitions have tag names like\n\
610 'operator+'.";
611
612 static const char *Cjava_suffixes [] =
613 { "java", NULL };
614 static char Cjava_help [] =
615 "In Java code, all the tags constructs of C and C++ code are\n\
616 tagged. (Use --help --lang=c --lang=c++ --lang=java for full help.)";
617
618
619 static const char *Cobol_suffixes [] =
620 { "COB", "cob", NULL };
621 static char Cobol_help [] =
622 "In Cobol code, tags are paragraph names; that is, any word\n\
623 starting in column 8 and followed by a period.";
624
625 static const char *Cstar_suffixes [] =
626 { "cs", "hs", NULL };
627
628 static const char *Erlang_suffixes [] =
629 { "erl", "hrl", NULL };
630 static const char Erlang_help [] =
631 "In Erlang code, the tags are the functions, records and macros\n\
632 defined in the file.";
633
634 const char *Forth_suffixes [] =
635 { "fth", "tok", NULL };
636 static const char Forth_help [] =
637 "In Forth code, tags are words defined by ':',\n\
638 constant, code, create, defer, value, variable, buffer:, field.";
639
640 static const char *Fortran_suffixes [] =
641 { "F", "f", "f90", "for", NULL };
642 static const char Fortran_help [] =
643 "In Fortran code, functions, subroutines and block data are tags.";
644
645 static const char *Go_suffixes [] = {"go", NULL};
646 static const char Go_help [] =
647 "In Go code, functions, interfaces and packages are tags.";
648
649 static const char *HTML_suffixes [] =
650 { "htm", "html", "shtml", NULL };
651 static const char HTML_help [] =
652 "In HTML input files, the tags are the 'title' and the 'h1', 'h2',\n\
653 'h3' headers. Also, tags are 'name=' in anchors and all\n\
654 occurrences of 'id='.";
655
656 static const char *Lisp_suffixes [] =
657 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
658 static const char Lisp_help [] =
659 "In Lisp code, any function defined with 'defun', any variable\n\
660 defined with 'defvar' or 'defconst', and in general the first\n\
661 argument of any expression that starts with '(def' in column zero\n\
662 is a tag.\n\
663 The '--declarations' option tags \"(defvar foo)\" constructs too.";
664
665 static const char *Lua_suffixes [] =
666 { "lua", "LUA", NULL };
667 static const char Lua_help [] =
668 "In Lua scripts, all functions are tags.";
669
670 static const char *Makefile_filenames [] =
671 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
672 static const char Makefile_help [] =
673 "In makefiles, targets are tags; additionally, variables are tags\n\
674 unless you specify '--no-globals'.";
675
676 static const char *Objc_suffixes [] =
677 { "lm", /* Objective lex file */
678 "m", /* Objective C file */
679 NULL };
680 static const char Objc_help [] =
681 "In Objective C code, tags include Objective C definitions for classes,\n\
682 class categories, methods and protocols. Tags for variables and\n\
683 functions in classes are named 'CLASS::VARIABLE' and 'CLASS::FUNCTION'.\n\
684 (Use --help --lang=c --lang=objc --lang=java for full help.)";
685
686 static const char *Pascal_suffixes [] =
687 { "p", "pas", NULL };
688 static const char Pascal_help [] =
689 "In Pascal code, the tags are the functions and procedures defined\n\
690 in the file.";
691 /* " // this is for working around an Emacs highlighting bug... */
692
693 static const char *Perl_suffixes [] =
694 { "pl", "pm", NULL };
695 static const char *Perl_interpreters [] =
696 { "perl", "@PERL@", NULL };
697 static const char Perl_help [] =
698 "In Perl code, the tags are the packages, subroutines and variables\n\
699 defined by the 'package', 'sub', 'my' and 'local' keywords. Use\n\
700 '--globals' if you want to tag global variables. Tags for\n\
701 subroutines are named 'PACKAGE::SUB'. The name for subroutines\n\
702 defined in the default package is 'main::SUB'.";
703
704 static const char *PHP_suffixes [] =
705 { "php", "php3", "php4", NULL };
706 static const char PHP_help [] =
707 "In PHP code, tags are functions, classes and defines. Unless you use\n\
708 the '--no-members' option, vars are tags too.";
709
710 static const char *plain_C_suffixes [] =
711 { "pc", /* Pro*C file */
712 NULL };
713
714 static const char *PS_suffixes [] =
715 { "ps", "psw", NULL }; /* .psw is for PSWrap */
716 static const char PS_help [] =
717 "In PostScript code, the tags are the functions.";
718
719 static const char *Prolog_suffixes [] =
720 { "prolog", NULL };
721 static const char Prolog_help [] =
722 "In Prolog code, tags are predicates and rules at the beginning of\n\
723 line.";
724
725 static const char *Python_suffixes [] =
726 { "py", NULL };
727 static const char Python_help [] =
728 "In Python code, 'def' or 'class' at the beginning of a line\n\
729 generate a tag.";
730
731 static const char *Ruby_suffixes [] =
732 { "rb", "ruby", NULL };
733 static const char Ruby_help [] =
734 "In Ruby code, 'def' or 'class' or 'module' at the beginning of\n\
735 a line generate a tag. Constants also generate a tag.";
736
737 /* Can't do the `SCM' or `scm' prefix with a version number. */
738 static const char *Scheme_suffixes [] =
739 { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
740 static const char Scheme_help [] =
741 "In Scheme code, tags include anything defined with 'def' or with a\n\
742 construct whose name starts with 'def'. They also include\n\
743 variables set with 'set!' at top level in the file.";
744
745 static const char *TeX_suffixes [] =
746 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
747 static const char TeX_help [] =
748 "In LaTeX text, the argument of any of the commands '\\chapter',\n\
749 '\\section', '\\subsection', '\\subsubsection', '\\eqno', '\\label',\n\
750 '\\ref', '\\cite', '\\bibitem', '\\part', '\\appendix', '\\entry',\n\
751 '\\index', '\\def', '\\newcommand', '\\renewcommand',\n\
752 '\\newenvironment' or '\\renewenvironment' is a tag.\n\
753 \n\
754 Other commands can be specified by setting the environment variable\n\
755 'TEXTAGS' to a colon-separated list like, for example,\n\
756 TEXTAGS=\"mycommand:myothercommand\".";
757
758
759 static const char *Texinfo_suffixes [] =
760 { "texi", "texinfo", "txi", NULL };
761 static const char Texinfo_help [] =
762 "for texinfo files, lines starting with @node are tagged.";
763
764 static const char *Yacc_suffixes [] =
765 { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
766 static const char Yacc_help [] =
767 "In Bison or Yacc input files, each rule defines as a tag the\n\
768 nonterminal it constructs. The portions of the file that contain\n\
769 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
770 for full help).";
771
772 static const char auto_help [] =
773 "'auto' is not a real language, it indicates to use\n\
774 a default language for files base on file name suffix and file contents.";
775
776 static const char none_help [] =
777 "'none' is not a real language, it indicates to only do\n\
778 regexp processing on files.";
779
780 static const char no_lang_help [] =
781 "No detailed help available for this language.";
782
783
784 /*
785 * Table of languages.
786 *
787 * It is ok for a given function to be listed under more than one
788 * name. I just didn't.
789 */
790
791 static language lang_names [] =
792 {
793 { "ada", Ada_help, Ada_funcs, Ada_suffixes },
794 { "asm", Asm_help, Asm_labels, Asm_suffixes },
795 { "c", default_C_help, default_C_entries, default_C_suffixes },
796 { "c++", Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
797 { "c*", no_lang_help, Cstar_entries, Cstar_suffixes },
798 { "cobol", Cobol_help, Cobol_paragraphs, Cobol_suffixes },
799 { "erlang", Erlang_help, Erlang_functions, Erlang_suffixes },
800 { "forth", Forth_help, Forth_words, Forth_suffixes },
801 { "fortran", Fortran_help, Fortran_functions, Fortran_suffixes },
802 { "go", Go_help, Go_functions, Go_suffixes },
803 { "html", HTML_help, HTML_labels, HTML_suffixes },
804 { "java", Cjava_help, Cjava_entries, Cjava_suffixes },
805 { "lisp", Lisp_help, Lisp_functions, Lisp_suffixes },
806 { "lua", Lua_help, Lua_functions, Lua_suffixes },
807 { "makefile", Makefile_help,Makefile_targets,NULL,Makefile_filenames},
808 { "objc", Objc_help, plain_C_entries, Objc_suffixes },
809 { "pascal", Pascal_help, Pascal_functions, Pascal_suffixes },
810 { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
811 { "php", PHP_help, PHP_functions, PHP_suffixes },
812 { "postscript",PS_help, PS_functions, PS_suffixes },
813 { "proc", no_lang_help, plain_C_entries, plain_C_suffixes },
814 { "prolog", Prolog_help, Prolog_functions, Prolog_suffixes },
815 { "python", Python_help, Python_functions, Python_suffixes },
816 { "ruby", Ruby_help, Ruby_functions, Ruby_suffixes },
817 { "scheme", Scheme_help, Scheme_functions, Scheme_suffixes },
818 { "tex", TeX_help, TeX_commands, TeX_suffixes },
819 { "texinfo", Texinfo_help, Texinfo_nodes, Texinfo_suffixes },
820 { "yacc", Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,true},
821 { "auto", auto_help }, /* default guessing scheme */
822 { "none", none_help, just_read_file }, /* regexp matching only */
823 { NULL } /* end of list */
824 };
825
826 \f
827 static void
828 print_language_names (void)
829 {
830 language *lang;
831 const char **name, **ext;
832
833 puts ("\nThese are the currently supported languages, along with the\n\
834 default file names and dot suffixes:");
835 for (lang = lang_names; lang->name != NULL; lang++)
836 {
837 printf (" %-*s", 10, lang->name);
838 if (lang->filenames != NULL)
839 for (name = lang->filenames; *name != NULL; name++)
840 printf (" %s", *name);
841 if (lang->suffixes != NULL)
842 for (ext = lang->suffixes; *ext != NULL; ext++)
843 printf (" .%s", *ext);
844 puts ("");
845 }
846 puts ("where 'auto' means use default language for files based on file\n\
847 name suffix, and 'none' means only do regexp processing on files.\n\
848 If no language is specified and no matching suffix is found,\n\
849 the first line of the file is read for a sharp-bang (#!) sequence\n\
850 followed by the name of an interpreter. If no such sequence is found,\n\
851 Fortran is tried first; if no tags are found, C is tried next.\n\
852 When parsing any C file, a \"class\" or \"template\" keyword\n\
853 switches to C++.");
854 puts ("Compressed files are supported using gzip, bzip2, and xz.\n\
855 \n\
856 For detailed help on a given language use, for example,\n\
857 etags --help --lang=ada.");
858 }
859
860 #ifndef EMACS_NAME
861 # define EMACS_NAME "standalone"
862 #endif
863 #ifndef VERSION
864 # define VERSION "17.38.1.4"
865 #endif
866 static _Noreturn void
867 print_version (void)
868 {
869 char emacs_copyright[] = COPYRIGHT;
870
871 printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
872 puts (emacs_copyright);
873 puts ("This program is distributed under the terms in ETAGS.README");
874
875 exit (EXIT_SUCCESS);
876 }
877
878 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
879 # define PRINT_UNDOCUMENTED_OPTIONS_HELP false
880 #endif
881
882 static _Noreturn void
883 print_help (argument *argbuffer)
884 {
885 bool help_for_lang = false;
886
887 for (; argbuffer->arg_type != at_end; argbuffer++)
888 if (argbuffer->arg_type == at_language)
889 {
890 if (help_for_lang)
891 puts ("");
892 puts (argbuffer->lang->help);
893 help_for_lang = true;
894 }
895
896 if (help_for_lang)
897 exit (EXIT_SUCCESS);
898
899 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
900 \n\
901 These are the options accepted by %s.\n", progname, progname);
902 puts ("You may use unambiguous abbreviations for the long option names.");
903 puts (" A - as file name means read names from stdin (one per line).\n\
904 Absolute names are stored in the output file as they are.\n\
905 Relative ones are stored relative to the output file's directory.\n");
906
907 puts ("-a, --append\n\
908 Append tag entries to existing tags file.");
909
910 puts ("--packages-only\n\
911 For Ada files, only generate tags for packages.");
912
913 if (CTAGS)
914 puts ("-B, --backward-search\n\
915 Write the search commands for the tag entries using '?', the\n\
916 backward-search command instead of '/', the forward-search command.");
917
918 /* This option is mostly obsolete, because etags can now automatically
919 detect C++. Retained for backward compatibility and for debugging and
920 experimentation. In principle, we could want to tag as C++ even
921 before any "class" or "template" keyword.
922 puts ("-C, --c++\n\
923 Treat files whose name suffix defaults to C language as C++ files.");
924 */
925
926 puts ("--declarations\n\
927 In C and derived languages, create tags for function declarations,");
928 if (CTAGS)
929 puts ("\tand create tags for extern variables if --globals is used.");
930 else
931 puts
932 ("\tand create tags for extern variables unless --no-globals is used.");
933
934 if (CTAGS)
935 puts ("-d, --defines\n\
936 Create tag entries for C #define constants and enum constants, too.");
937 else
938 puts ("-D, --no-defines\n\
939 Don't create tag entries for C #define constants and enum constants.\n\
940 This makes the tags file smaller.");
941
942 if (!CTAGS)
943 puts ("-i FILE, --include=FILE\n\
944 Include a note in tag file indicating that, when searching for\n\
945 a tag, one should also consult the tags file FILE after\n\
946 checking the current file.");
947
948 puts ("-l LANG, --language=LANG\n\
949 Force the following files to be considered as written in the\n\
950 named language up to the next --language=LANG option.");
951
952 if (CTAGS)
953 puts ("--globals\n\
954 Create tag entries for global variables in some languages.");
955 else
956 puts ("--no-globals\n\
957 Do not create tag entries for global variables in some\n\
958 languages. This makes the tags file smaller.");
959
960 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
961 puts ("--no-line-directive\n\
962 Ignore #line preprocessor directives in C and derived languages.");
963
964 if (CTAGS)
965 puts ("--members\n\
966 Create tag entries for members of structures in some languages.");
967 else
968 puts ("--no-members\n\
969 Do not create tag entries for members of structures\n\
970 in some languages.");
971
972 puts ("-Q, --class-qualify\n\
973 Qualify tag names with their class name in C++, ObjC, and Java.\n\
974 This produces tag names of the form \"class::member\" for C++,\n\
975 \"class(category)\" for Objective C, and \"class.member\" for Java.\n\
976 For Objective C, this also produces class methods qualified with\n\
977 their arguments, as in \"foo:bar:baz:more\".");
978 puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
979 Make a tag for each line matching a regular expression pattern\n\
980 in the following files. {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
981 files only. REGEXFILE is a file containing one REGEXP per line.\n\
982 REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
983 optional. The TAGREGEXP pattern is anchored (as if preceded by ^).");
984 puts (" If TAGNAME/ is present, the tags created are named.\n\
985 For example Tcl named tags can be created with:\n\
986 --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
987 MODS are optional one-letter modifiers: 'i' means to ignore case,\n\
988 'm' means to allow multi-line matches, 's' implies 'm' and\n\
989 causes dot to match any character, including newline.");
990
991 puts ("-R, --no-regex\n\
992 Don't create tags from regexps for the following files.");
993
994 puts ("-I, --ignore-indentation\n\
995 In C and C++ do not assume that a closing brace in the first\n\
996 column is the final brace of a function or structure definition.");
997
998 puts ("-o FILE, --output=FILE\n\
999 Write the tags to FILE.");
1000
1001 puts ("--parse-stdin=NAME\n\
1002 Read from standard input and record tags as belonging to file NAME.");
1003
1004 if (CTAGS)
1005 {
1006 puts ("-t, --typedefs\n\
1007 Generate tag entries for C and Ada typedefs.");
1008 puts ("-T, --typedefs-and-c++\n\
1009 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
1010 and C++ member functions.");
1011 }
1012
1013 if (CTAGS)
1014 puts ("-u, --update\n\
1015 Update the tag entries for the given files, leaving tag\n\
1016 entries for other files in place. Currently, this is\n\
1017 implemented by deleting the existing entries for the given\n\
1018 files and then rewriting the new entries at the end of the\n\
1019 tags file. It is often faster to simply rebuild the entire\n\
1020 tag file than to use this.");
1021
1022 if (CTAGS)
1023 {
1024 puts ("-v, --vgrind\n\
1025 Print on the standard output an index of items intended for\n\
1026 human consumption, similar to the output of vgrind. The index\n\
1027 is sorted, and gives the page number of each item.");
1028
1029 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1030 puts ("-w, --no-duplicates\n\
1031 Do not create duplicate tag entries, for compatibility with\n\
1032 traditional ctags.");
1033
1034 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1035 puts ("-w, --no-warn\n\
1036 Suppress warning messages about duplicate tag entries.");
1037
1038 puts ("-x, --cxref\n\
1039 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1040 The output uses line numbers instead of page numbers, but\n\
1041 beyond that the differences are cosmetic; try both to see\n\
1042 which you like.");
1043 }
1044
1045 puts ("-V, --version\n\
1046 Print the version of the program.\n\
1047 -h, --help\n\
1048 Print this help message.\n\
1049 Followed by one or more '--language' options prints detailed\n\
1050 help about tag generation for the specified languages.");
1051
1052 print_language_names ();
1053
1054 puts ("");
1055 puts ("Report bugs to bug-gnu-emacs@gnu.org");
1056
1057 exit (EXIT_SUCCESS);
1058 }
1059
1060 \f
1061 int
1062 main (int argc, char **argv)
1063 {
1064 int i;
1065 unsigned int nincluded_files;
1066 char **included_files;
1067 argument *argbuffer;
1068 int current_arg, file_count;
1069 linebuffer filename_lb;
1070 bool help_asked = false;
1071 ptrdiff_t len;
1072 char *optstring;
1073 int opt;
1074
1075 progname = argv[0];
1076 nincluded_files = 0;
1077 included_files = xnew (argc, char *);
1078 current_arg = 0;
1079 file_count = 0;
1080
1081 /* Allocate enough no matter what happens. Overkill, but each one
1082 is small. */
1083 argbuffer = xnew (argc, argument);
1084
1085 /*
1086 * Always find typedefs and structure tags.
1087 * Also default to find macro constants, enum constants, struct
1088 * members and global variables. Do it for both etags and ctags.
1089 */
1090 typedefs = typedefs_or_cplusplus = constantypedefs = true;
1091 globals = members = true;
1092
1093 /* When the optstring begins with a '-' getopt_long does not rearrange the
1094 non-options arguments to be at the end, but leaves them alone. */
1095 optstring = concat ("-ac:Cf:Il:o:Qr:RSVhH",
1096 (CTAGS) ? "BxdtTuvw" : "Di:",
1097 "");
1098
1099 while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1100 switch (opt)
1101 {
1102 case 0:
1103 /* If getopt returns 0, then it has already processed a
1104 long-named option. We should do nothing. */
1105 break;
1106
1107 case 1:
1108 /* This means that a file name has been seen. Record it. */
1109 argbuffer[current_arg].arg_type = at_filename;
1110 argbuffer[current_arg].what = optarg;
1111 len = strlen (optarg);
1112 if (whatlen_max < len)
1113 whatlen_max = len;
1114 ++current_arg;
1115 ++file_count;
1116 break;
1117
1118 case STDIN:
1119 /* Parse standard input. Idea by Vivek <vivek@etla.org>. */
1120 argbuffer[current_arg].arg_type = at_stdin;
1121 argbuffer[current_arg].what = optarg;
1122 len = strlen (optarg);
1123 if (whatlen_max < len)
1124 whatlen_max = len;
1125 ++current_arg;
1126 ++file_count;
1127 if (parsing_stdin)
1128 fatal ("cannot parse standard input more than once");
1129 parsing_stdin = true;
1130 break;
1131
1132 /* Common options. */
1133 case 'a': append_to_tagfile = true; break;
1134 case 'C': cplusplus = true; break;
1135 case 'f': /* for compatibility with old makefiles */
1136 case 'o':
1137 if (tagfile)
1138 {
1139 error ("-o option may only be given once.");
1140 suggest_asking_for_help ();
1141 /* NOTREACHED */
1142 }
1143 tagfile = optarg;
1144 break;
1145 case 'I':
1146 case 'S': /* for backward compatibility */
1147 ignoreindent = true;
1148 break;
1149 case 'l':
1150 {
1151 language *lang = get_language_from_langname (optarg);
1152 if (lang != NULL)
1153 {
1154 argbuffer[current_arg].lang = lang;
1155 argbuffer[current_arg].arg_type = at_language;
1156 ++current_arg;
1157 }
1158 }
1159 break;
1160 case 'c':
1161 /* Backward compatibility: support obsolete --ignore-case-regexp. */
1162 optarg = concat (optarg, "i", ""); /* memory leak here */
1163 /* FALLTHRU */
1164 case 'r':
1165 argbuffer[current_arg].arg_type = at_regexp;
1166 argbuffer[current_arg].what = optarg;
1167 len = strlen (optarg);
1168 if (whatlen_max < len)
1169 whatlen_max = len;
1170 ++current_arg;
1171 break;
1172 case 'R':
1173 argbuffer[current_arg].arg_type = at_regexp;
1174 argbuffer[current_arg].what = NULL;
1175 ++current_arg;
1176 break;
1177 case 'V':
1178 print_version ();
1179 break;
1180 case 'h':
1181 case 'H':
1182 help_asked = true;
1183 break;
1184 case 'Q':
1185 class_qualify = 1;
1186 break;
1187
1188 /* Etags options */
1189 case 'D': constantypedefs = false; break;
1190 case 'i': included_files[nincluded_files++] = optarg; break;
1191
1192 /* Ctags options. */
1193 case 'B': searchar = '?'; break;
1194 case 'd': constantypedefs = true; break;
1195 case 't': typedefs = true; break;
1196 case 'T': typedefs = typedefs_or_cplusplus = true; break;
1197 case 'u': update = true; break;
1198 case 'v': vgrind_style = true; /*FALLTHRU*/
1199 case 'x': cxref_style = true; break;
1200 case 'w': no_warnings = true; break;
1201 default:
1202 suggest_asking_for_help ();
1203 /* NOTREACHED */
1204 }
1205
1206 /* No more options. Store the rest of arguments. */
1207 for (; optind < argc; optind++)
1208 {
1209 argbuffer[current_arg].arg_type = at_filename;
1210 argbuffer[current_arg].what = argv[optind];
1211 len = strlen (argv[optind]);
1212 if (whatlen_max < len)
1213 whatlen_max = len;
1214 ++current_arg;
1215 ++file_count;
1216 }
1217
1218 argbuffer[current_arg].arg_type = at_end;
1219
1220 if (help_asked)
1221 print_help (argbuffer);
1222 /* NOTREACHED */
1223
1224 if (nincluded_files == 0 && file_count == 0)
1225 {
1226 error ("no input files specified.");
1227 suggest_asking_for_help ();
1228 /* NOTREACHED */
1229 }
1230
1231 if (tagfile == NULL)
1232 tagfile = savestr (CTAGS ? "tags" : "TAGS");
1233 cwd = etags_getcwd (); /* the current working directory */
1234 if (cwd[strlen (cwd) - 1] != '/')
1235 {
1236 char *oldcwd = cwd;
1237 cwd = concat (oldcwd, "/", "");
1238 free (oldcwd);
1239 }
1240
1241 /* Compute base directory for relative file names. */
1242 if (streq (tagfile, "-")
1243 || strneq (tagfile, "/dev/", 5))
1244 tagfiledir = cwd; /* relative file names are relative to cwd */
1245 else
1246 {
1247 canonicalize_filename (tagfile);
1248 tagfiledir = absolute_dirname (tagfile, cwd);
1249 }
1250
1251 linebuffer_init (&lb);
1252 linebuffer_init (&filename_lb);
1253 linebuffer_init (&filebuf);
1254 linebuffer_init (&token_name);
1255
1256 if (!CTAGS)
1257 {
1258 if (streq (tagfile, "-"))
1259 {
1260 tagf = stdout;
1261 SET_BINARY (fileno (stdout));
1262 }
1263 else
1264 tagf = fopen (tagfile, append_to_tagfile ? "ab" : "wb");
1265 if (tagf == NULL)
1266 pfatal (tagfile);
1267 }
1268
1269 /*
1270 * Loop through files finding functions.
1271 */
1272 for (i = 0; i < current_arg; i++)
1273 {
1274 static language *lang; /* non-NULL if language is forced */
1275 char *this_file;
1276
1277 switch (argbuffer[i].arg_type)
1278 {
1279 case at_language:
1280 lang = argbuffer[i].lang;
1281 break;
1282 case at_regexp:
1283 analyze_regex (argbuffer[i].what);
1284 break;
1285 case at_filename:
1286 this_file = argbuffer[i].what;
1287 /* Input file named "-" means read file names from stdin
1288 (one per line) and use them. */
1289 if (streq (this_file, "-"))
1290 {
1291 if (parsing_stdin)
1292 fatal ("cannot parse standard input "
1293 "AND read file names from it");
1294 while (readline_internal (&filename_lb, stdin, "-") > 0)
1295 process_file_name (filename_lb.buffer, lang);
1296 }
1297 else
1298 process_file_name (this_file, lang);
1299 break;
1300 case at_stdin:
1301 this_file = argbuffer[i].what;
1302 process_file (stdin, this_file, lang);
1303 break;
1304 default:
1305 error ("internal error: arg_type");
1306 }
1307 }
1308
1309 free_regexps ();
1310 free (lb.buffer);
1311 free (filebuf.buffer);
1312 free (token_name.buffer);
1313
1314 if (!CTAGS || cxref_style)
1315 {
1316 /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1317 put_entries (nodehead);
1318 free_tree (nodehead);
1319 nodehead = NULL;
1320 if (!CTAGS)
1321 {
1322 fdesc *fdp;
1323
1324 /* Output file entries that have no tags. */
1325 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1326 if (!fdp->written)
1327 fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1328
1329 while (nincluded_files-- > 0)
1330 fprintf (tagf, "\f\n%s,include\n", *included_files++);
1331
1332 if (fclose (tagf) == EOF)
1333 pfatal (tagfile);
1334 }
1335
1336 exit (EXIT_SUCCESS);
1337 }
1338
1339 /* From here on, we are in (CTAGS && !cxref_style) */
1340 if (update)
1341 {
1342 char *cmd =
1343 xmalloc (strlen (tagfile) + whatlen_max +
1344 sizeof "mv..OTAGS;fgrep -v '\t\t' OTAGS >;rm OTAGS");
1345 for (i = 0; i < current_arg; ++i)
1346 {
1347 switch (argbuffer[i].arg_type)
1348 {
1349 case at_filename:
1350 case at_stdin:
1351 break;
1352 default:
1353 continue; /* the for loop */
1354 }
1355 char *z = stpcpy (cmd, "mv ");
1356 z = stpcpy (z, tagfile);
1357 z = stpcpy (z, " OTAGS;fgrep -v '\t");
1358 z = stpcpy (z, argbuffer[i].what);
1359 z = stpcpy (z, "\t' OTAGS >");
1360 z = stpcpy (z, tagfile);
1361 strcpy (z, ";rm OTAGS");
1362 if (system (cmd) != EXIT_SUCCESS)
1363 fatal ("failed to execute shell command");
1364 }
1365 free (cmd);
1366 append_to_tagfile = true;
1367 }
1368
1369 tagf = fopen (tagfile, append_to_tagfile ? "ab" : "wb");
1370 if (tagf == NULL)
1371 pfatal (tagfile);
1372 put_entries (nodehead); /* write all the tags (CTAGS) */
1373 free_tree (nodehead);
1374 nodehead = NULL;
1375 if (fclose (tagf) == EOF)
1376 pfatal (tagfile);
1377
1378 if (CTAGS)
1379 if (append_to_tagfile || update)
1380 {
1381 char *cmd = xmalloc (2 * strlen (tagfile) + sizeof "sort -u -o..");
1382 /* Maybe these should be used:
1383 setenv ("LC_COLLATE", "C", 1);
1384 setenv ("LC_ALL", "C", 1); */
1385 char *z = stpcpy (cmd, "sort -u -o ");
1386 z = stpcpy (z, tagfile);
1387 *z++ = ' ';
1388 strcpy (z, tagfile);
1389 exit (system (cmd));
1390 }
1391 return EXIT_SUCCESS;
1392 }
1393
1394
1395 /*
1396 * Return a compressor given the file name. If EXTPTR is non-zero,
1397 * return a pointer into FILE where the compressor-specific
1398 * extension begins. If no compressor is found, NULL is returned
1399 * and EXTPTR is not significant.
1400 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1401 */
1402 static compressor *
1403 get_compressor_from_suffix (char *file, char **extptr)
1404 {
1405 compressor *compr;
1406 char *slash, *suffix;
1407
1408 /* File has been processed by canonicalize_filename,
1409 so we don't need to consider backslashes on DOS_NT. */
1410 slash = strrchr (file, '/');
1411 suffix = strrchr (file, '.');
1412 if (suffix == NULL || suffix < slash)
1413 return NULL;
1414 if (extptr != NULL)
1415 *extptr = suffix;
1416 suffix += 1;
1417 /* Let those poor souls who live with DOS 8+3 file name limits get
1418 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1419 Only the first do loop is run if not MSDOS */
1420 do
1421 {
1422 for (compr = compressors; compr->suffix != NULL; compr++)
1423 if (streq (compr->suffix, suffix))
1424 return compr;
1425 if (!MSDOS)
1426 break; /* do it only once: not really a loop */
1427 if (extptr != NULL)
1428 *extptr = ++suffix;
1429 } while (*suffix != '\0');
1430 return NULL;
1431 }
1432
1433
1434
1435 /*
1436 * Return a language given the name.
1437 */
1438 static language *
1439 get_language_from_langname (const char *name)
1440 {
1441 language *lang;
1442
1443 if (name == NULL)
1444 error ("empty language name");
1445 else
1446 {
1447 for (lang = lang_names; lang->name != NULL; lang++)
1448 if (streq (name, lang->name))
1449 return lang;
1450 error ("unknown language \"%s\"", name);
1451 }
1452
1453 return NULL;
1454 }
1455
1456
1457 /*
1458 * Return a language given the interpreter name.
1459 */
1460 static language *
1461 get_language_from_interpreter (char *interpreter)
1462 {
1463 language *lang;
1464 const char **iname;
1465
1466 if (interpreter == NULL)
1467 return NULL;
1468 for (lang = lang_names; lang->name != NULL; lang++)
1469 if (lang->interpreters != NULL)
1470 for (iname = lang->interpreters; *iname != NULL; iname++)
1471 if (streq (*iname, interpreter))
1472 return lang;
1473
1474 return NULL;
1475 }
1476
1477
1478
1479 /*
1480 * Return a language given the file name.
1481 */
1482 static language *
1483 get_language_from_filename (char *file, int case_sensitive)
1484 {
1485 language *lang;
1486 const char **name, **ext, *suffix;
1487
1488 /* Try whole file name first. */
1489 for (lang = lang_names; lang->name != NULL; lang++)
1490 if (lang->filenames != NULL)
1491 for (name = lang->filenames; *name != NULL; name++)
1492 if ((case_sensitive)
1493 ? streq (*name, file)
1494 : strcaseeq (*name, file))
1495 return lang;
1496
1497 /* If not found, try suffix after last dot. */
1498 suffix = strrchr (file, '.');
1499 if (suffix == NULL)
1500 return NULL;
1501 suffix += 1;
1502 for (lang = lang_names; lang->name != NULL; lang++)
1503 if (lang->suffixes != NULL)
1504 for (ext = lang->suffixes; *ext != NULL; ext++)
1505 if ((case_sensitive)
1506 ? streq (*ext, suffix)
1507 : strcaseeq (*ext, suffix))
1508 return lang;
1509 return NULL;
1510 }
1511
1512 \f
1513 /*
1514 * This routine is called on each file argument.
1515 */
1516 static void
1517 process_file_name (char *file, language *lang)
1518 {
1519 FILE *inf;
1520 fdesc *fdp;
1521 compressor *compr;
1522 char *compressed_name, *uncompressed_name;
1523 char *ext, *real_name, *tmp_name;
1524 int retval;
1525
1526 canonicalize_filename (file);
1527 if (streq (file, tagfile) && !streq (tagfile, "-"))
1528 {
1529 error ("skipping inclusion of %s in self.", file);
1530 return;
1531 }
1532 compr = get_compressor_from_suffix (file, &ext);
1533 if (compr)
1534 {
1535 compressed_name = file;
1536 uncompressed_name = savenstr (file, ext - file);
1537 }
1538 else
1539 {
1540 compressed_name = NULL;
1541 uncompressed_name = file;
1542 }
1543
1544 /* If the canonicalized uncompressed name
1545 has already been dealt with, skip it silently. */
1546 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1547 {
1548 assert (fdp->infname != NULL);
1549 if (streq (uncompressed_name, fdp->infname))
1550 goto cleanup;
1551 }
1552
1553 inf = fopen (file, "r" FOPEN_BINARY);
1554 if (inf)
1555 real_name = file;
1556 else
1557 {
1558 int file_errno = errno;
1559 if (compressed_name)
1560 {
1561 /* Try with the given suffix. */
1562 inf = fopen (uncompressed_name, "r" FOPEN_BINARY);
1563 if (inf)
1564 real_name = uncompressed_name;
1565 }
1566 else
1567 {
1568 /* Try all possible suffixes. */
1569 for (compr = compressors; compr->suffix != NULL; compr++)
1570 {
1571 compressed_name = concat (file, ".", compr->suffix);
1572 inf = fopen (compressed_name, "r" FOPEN_BINARY);
1573 if (inf)
1574 {
1575 real_name = compressed_name;
1576 break;
1577 }
1578 if (MSDOS)
1579 {
1580 char *suf = compressed_name + strlen (file);
1581 size_t suflen = strlen (compr->suffix) + 1;
1582 for ( ; suf[1]; suf++, suflen--)
1583 {
1584 memmove (suf, suf + 1, suflen);
1585 inf = fopen (compressed_name, "r" FOPEN_BINARY);
1586 if (inf)
1587 {
1588 real_name = compressed_name;
1589 break;
1590 }
1591 }
1592 if (inf)
1593 break;
1594 }
1595 free (compressed_name);
1596 compressed_name = NULL;
1597 }
1598 }
1599 if (! inf)
1600 {
1601 errno = file_errno;
1602 perror (file);
1603 goto cleanup;
1604 }
1605 }
1606
1607 if (real_name == compressed_name)
1608 {
1609 fclose (inf);
1610 tmp_name = etags_mktmp ();
1611 if (!tmp_name)
1612 inf = NULL;
1613 else
1614 {
1615 #if MSDOS || defined (DOS_NT)
1616 char *cmd1 = concat (compr->command, " \"", real_name);
1617 char *cmd = concat (cmd1, "\" > ", tmp_name);
1618 #else
1619 char *cmd1 = concat (compr->command, " '", real_name);
1620 char *cmd = concat (cmd1, "' > ", tmp_name);
1621 #endif
1622 free (cmd1);
1623 int tmp_errno;
1624 if (system (cmd) == -1)
1625 {
1626 inf = NULL;
1627 tmp_errno = EINVAL;
1628 }
1629 else
1630 {
1631 inf = fopen (tmp_name, "r" FOPEN_BINARY);
1632 tmp_errno = errno;
1633 }
1634 free (cmd);
1635 errno = tmp_errno;
1636 }
1637
1638 if (!inf)
1639 {
1640 perror (real_name);
1641 goto cleanup;
1642 }
1643 }
1644
1645 process_file (inf, uncompressed_name, lang);
1646
1647 retval = fclose (inf);
1648 if (real_name == compressed_name)
1649 {
1650 remove (tmp_name);
1651 free (tmp_name);
1652 }
1653 if (retval < 0)
1654 pfatal (file);
1655
1656 cleanup:
1657 if (compressed_name != file)
1658 free (compressed_name);
1659 if (uncompressed_name != file)
1660 free (uncompressed_name);
1661 last_node = NULL;
1662 curfdp = NULL;
1663 return;
1664 }
1665
1666 static void
1667 process_file (FILE *fh, char *fn, language *lang)
1668 {
1669 static const fdesc emptyfdesc;
1670 fdesc *fdp;
1671
1672 infilename = fn;
1673 /* Create a new input file description entry. */
1674 fdp = xnew (1, fdesc);
1675 *fdp = emptyfdesc;
1676 fdp->next = fdhead;
1677 fdp->infname = savestr (fn);
1678 fdp->lang = lang;
1679 fdp->infabsname = absolute_filename (fn, cwd);
1680 fdp->infabsdir = absolute_dirname (fn, cwd);
1681 if (filename_is_absolute (fn))
1682 {
1683 /* An absolute file name. Canonicalize it. */
1684 fdp->taggedfname = absolute_filename (fn, NULL);
1685 }
1686 else
1687 {
1688 /* A file name relative to cwd. Make it relative
1689 to the directory of the tags file. */
1690 fdp->taggedfname = relative_filename (fn, tagfiledir);
1691 }
1692 fdp->usecharno = true; /* use char position when making tags */
1693 fdp->prop = NULL;
1694 fdp->written = false; /* not written on tags file yet */
1695
1696 fdhead = fdp;
1697 curfdp = fdhead; /* the current file description */
1698
1699 find_entries (fh);
1700
1701 /* If not Ctags, and if this is not metasource and if it contained no #line
1702 directives, we can write the tags and free all nodes pointing to
1703 curfdp. */
1704 if (!CTAGS
1705 && curfdp->usecharno /* no #line directives in this file */
1706 && !curfdp->lang->metasource)
1707 {
1708 node *np, *prev;
1709
1710 /* Look for the head of the sublist relative to this file. See add_node
1711 for the structure of the node tree. */
1712 prev = NULL;
1713 for (np = nodehead; np != NULL; prev = np, np = np->left)
1714 if (np->fdp == curfdp)
1715 break;
1716
1717 /* If we generated tags for this file, write and delete them. */
1718 if (np != NULL)
1719 {
1720 /* This is the head of the last sublist, if any. The following
1721 instructions depend on this being true. */
1722 assert (np->left == NULL);
1723
1724 assert (fdhead == curfdp);
1725 assert (last_node->fdp == curfdp);
1726 put_entries (np); /* write tags for file curfdp->taggedfname */
1727 free_tree (np); /* remove the written nodes */
1728 if (prev == NULL)
1729 nodehead = NULL; /* no nodes left */
1730 else
1731 prev->left = NULL; /* delete the pointer to the sublist */
1732 }
1733 }
1734 }
1735
1736 static void
1737 reset_input (FILE *inf)
1738 {
1739 if (fseek (inf, 0, SEEK_SET) != 0)
1740 perror (infilename);
1741 }
1742
1743 /*
1744 * This routine opens the specified file and calls the function
1745 * which finds the function and type definitions.
1746 */
1747 static void
1748 find_entries (FILE *inf)
1749 {
1750 char *cp;
1751 language *lang = curfdp->lang;
1752 Lang_function *parser = NULL;
1753
1754 /* If user specified a language, use it. */
1755 if (lang != NULL && lang->function != NULL)
1756 {
1757 parser = lang->function;
1758 }
1759
1760 /* Else try to guess the language given the file name. */
1761 if (parser == NULL)
1762 {
1763 lang = get_language_from_filename (curfdp->infname, true);
1764 if (lang != NULL && lang->function != NULL)
1765 {
1766 curfdp->lang = lang;
1767 parser = lang->function;
1768 }
1769 }
1770
1771 /* Else look for sharp-bang as the first two characters. */
1772 if (parser == NULL
1773 && readline_internal (&lb, inf, infilename) > 0
1774 && lb.len >= 2
1775 && lb.buffer[0] == '#'
1776 && lb.buffer[1] == '!')
1777 {
1778 char *lp;
1779
1780 /* Set lp to point at the first char after the last slash in the
1781 line or, if no slashes, at the first nonblank. Then set cp to
1782 the first successive blank and terminate the string. */
1783 lp = strrchr (lb.buffer+2, '/');
1784 if (lp != NULL)
1785 lp += 1;
1786 else
1787 lp = skip_spaces (lb.buffer + 2);
1788 cp = skip_non_spaces (lp);
1789 *cp = '\0';
1790
1791 if (strlen (lp) > 0)
1792 {
1793 lang = get_language_from_interpreter (lp);
1794 if (lang != NULL && lang->function != NULL)
1795 {
1796 curfdp->lang = lang;
1797 parser = lang->function;
1798 }
1799 }
1800 }
1801
1802 reset_input (inf);
1803
1804 /* Else try to guess the language given the case insensitive file name. */
1805 if (parser == NULL)
1806 {
1807 lang = get_language_from_filename (curfdp->infname, false);
1808 if (lang != NULL && lang->function != NULL)
1809 {
1810 curfdp->lang = lang;
1811 parser = lang->function;
1812 }
1813 }
1814
1815 /* Else try Fortran or C. */
1816 if (parser == NULL)
1817 {
1818 node *old_last_node = last_node;
1819
1820 curfdp->lang = get_language_from_langname ("fortran");
1821 find_entries (inf);
1822
1823 if (old_last_node == last_node)
1824 /* No Fortran entries found. Try C. */
1825 {
1826 reset_input (inf);
1827 curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1828 find_entries (inf);
1829 }
1830 return;
1831 }
1832
1833 if (!no_line_directive
1834 && curfdp->lang != NULL && curfdp->lang->metasource)
1835 /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1836 file, or anyway we parsed a file that is automatically generated from
1837 this one. If this is the case, the bingo.c file contained #line
1838 directives that generated tags pointing to this file. Let's delete
1839 them all before parsing this file, which is the real source. */
1840 {
1841 fdesc **fdpp = &fdhead;
1842 while (*fdpp != NULL)
1843 if (*fdpp != curfdp
1844 && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1845 /* We found one of those! We must delete both the file description
1846 and all tags referring to it. */
1847 {
1848 fdesc *badfdp = *fdpp;
1849
1850 /* Delete the tags referring to badfdp->taggedfname
1851 that were obtained from badfdp->infname. */
1852 invalidate_nodes (badfdp, &nodehead);
1853
1854 *fdpp = badfdp->next; /* remove the bad description from the list */
1855 free_fdesc (badfdp);
1856 }
1857 else
1858 fdpp = &(*fdpp)->next; /* advance the list pointer */
1859 }
1860
1861 assert (parser != NULL);
1862
1863 /* Generic initializations before reading from file. */
1864 linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1865
1866 /* Generic initializations before parsing file with readline. */
1867 lineno = 0; /* reset global line number */
1868 charno = 0; /* reset global char number */
1869 linecharno = 0; /* reset global char number of line start */
1870
1871 parser (inf);
1872
1873 regex_tag_multiline ();
1874 }
1875
1876 \f
1877 /*
1878 * Check whether an implicitly named tag should be created,
1879 * then call `pfnote'.
1880 * NAME is a string that is internally copied by this function.
1881 *
1882 * TAGS format specification
1883 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1884 * The following is explained in some more detail in etc/ETAGS.EBNF.
1885 *
1886 * make_tag creates tags with "implicit tag names" (unnamed tags)
1887 * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1888 * 1. NAME does not contain any of the characters in NONAM;
1889 * 2. LINESTART contains name as either a rightmost, or rightmost but
1890 * one character, substring;
1891 * 3. the character, if any, immediately before NAME in LINESTART must
1892 * be a character in NONAM;
1893 * 4. the character, if any, immediately after NAME in LINESTART must
1894 * also be a character in NONAM.
1895 *
1896 * The implementation uses the notinname() macro, which recognizes the
1897 * characters stored in the string `nonam'.
1898 * etags.el needs to use the same characters that are in NONAM.
1899 */
1900 static void
1901 make_tag (const char *name, /* tag name, or NULL if unnamed */
1902 int namelen, /* tag length */
1903 bool is_func, /* tag is a function */
1904 char *linestart, /* start of the line where tag is */
1905 int linelen, /* length of the line where tag is */
1906 int lno, /* line number */
1907 long int cno) /* character number */
1908 {
1909 bool named = (name != NULL && namelen > 0);
1910 char *nname = NULL;
1911
1912 if (!CTAGS && named) /* maybe set named to false */
1913 /* Let's try to make an implicit tag name, that is, create an unnamed tag
1914 such that etags.el can guess a name from it. */
1915 {
1916 int i;
1917 register const char *cp = name;
1918
1919 for (i = 0; i < namelen; i++)
1920 if (notinname (*cp++))
1921 break;
1922 if (i == namelen) /* rule #1 */
1923 {
1924 cp = linestart + linelen - namelen;
1925 if (notinname (linestart[linelen-1]))
1926 cp -= 1; /* rule #4 */
1927 if (cp >= linestart /* rule #2 */
1928 && (cp == linestart
1929 || notinname (cp[-1])) /* rule #3 */
1930 && strneq (name, cp, namelen)) /* rule #2 */
1931 named = false; /* use implicit tag name */
1932 }
1933 }
1934
1935 if (named)
1936 nname = savenstr (name, namelen);
1937
1938 pfnote (nname, is_func, linestart, linelen, lno, cno);
1939 }
1940
1941 /* Record a tag. */
1942 static void
1943 pfnote (char *name, bool is_func, char *linestart, int linelen, int lno,
1944 long int cno)
1945 /* tag name, or NULL if unnamed */
1946 /* tag is a function */
1947 /* start of the line where tag is */
1948 /* length of the line where tag is */
1949 /* line number */
1950 /* character number */
1951 {
1952 register node *np;
1953
1954 assert (name == NULL || name[0] != '\0');
1955 if (CTAGS && name == NULL)
1956 return;
1957
1958 np = xnew (1, node);
1959
1960 /* If ctags mode, change name "main" to M<thisfilename>. */
1961 if (CTAGS && !cxref_style && streq (name, "main"))
1962 {
1963 char *fp = strrchr (curfdp->taggedfname, '/');
1964 np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
1965 fp = strrchr (np->name, '.');
1966 if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1967 fp[0] = '\0';
1968 }
1969 else
1970 np->name = name;
1971 np->valid = true;
1972 np->been_warned = false;
1973 np->fdp = curfdp;
1974 np->is_func = is_func;
1975 np->lno = lno;
1976 if (np->fdp->usecharno)
1977 /* Our char numbers are 0-base, because of C language tradition?
1978 ctags compatibility? old versions compatibility? I don't know.
1979 Anyway, since emacs's are 1-base we expect etags.el to take care
1980 of the difference. If we wanted to have 1-based numbers, we would
1981 uncomment the +1 below. */
1982 np->cno = cno /* + 1 */ ;
1983 else
1984 np->cno = invalidcharno;
1985 np->left = np->right = NULL;
1986 if (CTAGS && !cxref_style)
1987 {
1988 if (strlen (linestart) < 50)
1989 np->regex = concat (linestart, "$", "");
1990 else
1991 np->regex = savenstr (linestart, 50);
1992 }
1993 else
1994 np->regex = savenstr (linestart, linelen);
1995
1996 add_node (np, &nodehead);
1997 }
1998
1999 /*
2000 * free_tree ()
2001 * recurse on left children, iterate on right children.
2002 */
2003 static void
2004 free_tree (register node *np)
2005 {
2006 while (np)
2007 {
2008 register node *node_right = np->right;
2009 free_tree (np->left);
2010 free (np->name);
2011 free (np->regex);
2012 free (np);
2013 np = node_right;
2014 }
2015 }
2016
2017 /*
2018 * free_fdesc ()
2019 * delete a file description
2020 */
2021 static void
2022 free_fdesc (register fdesc *fdp)
2023 {
2024 free (fdp->infname);
2025 free (fdp->infabsname);
2026 free (fdp->infabsdir);
2027 free (fdp->taggedfname);
2028 free (fdp->prop);
2029 free (fdp);
2030 }
2031
2032 /*
2033 * add_node ()
2034 * Adds a node to the tree of nodes. In etags mode, sort by file
2035 * name. In ctags mode, sort by tag name. Make no attempt at
2036 * balancing.
2037 *
2038 * add_node is the only function allowed to add nodes, so it can
2039 * maintain state.
2040 */
2041 static void
2042 add_node (node *np, node **cur_node_p)
2043 {
2044 register int dif;
2045 register node *cur_node = *cur_node_p;
2046
2047 if (cur_node == NULL)
2048 {
2049 *cur_node_p = np;
2050 last_node = np;
2051 return;
2052 }
2053
2054 if (!CTAGS)
2055 /* Etags Mode */
2056 {
2057 /* For each file name, tags are in a linked sublist on the right
2058 pointer. The first tags of different files are a linked list
2059 on the left pointer. last_node points to the end of the last
2060 used sublist. */
2061 if (last_node != NULL && last_node->fdp == np->fdp)
2062 {
2063 /* Let's use the same sublist as the last added node. */
2064 assert (last_node->right == NULL);
2065 last_node->right = np;
2066 last_node = np;
2067 }
2068 else if (cur_node->fdp == np->fdp)
2069 {
2070 /* Scanning the list we found the head of a sublist which is
2071 good for us. Let's scan this sublist. */
2072 add_node (np, &cur_node->right);
2073 }
2074 else
2075 /* The head of this sublist is not good for us. Let's try the
2076 next one. */
2077 add_node (np, &cur_node->left);
2078 } /* if ETAGS mode */
2079
2080 else
2081 {
2082 /* Ctags Mode */
2083 dif = strcmp (np->name, cur_node->name);
2084
2085 /*
2086 * If this tag name matches an existing one, then
2087 * do not add the node, but maybe print a warning.
2088 */
2089 if (no_duplicates && !dif)
2090 {
2091 if (np->fdp == cur_node->fdp)
2092 {
2093 if (!no_warnings)
2094 {
2095 fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2096 np->fdp->infname, lineno, np->name);
2097 fprintf (stderr, "Second entry ignored\n");
2098 }
2099 }
2100 else if (!cur_node->been_warned && !no_warnings)
2101 {
2102 fprintf
2103 (stderr,
2104 "Duplicate entry in files %s and %s: %s (Warning only)\n",
2105 np->fdp->infname, cur_node->fdp->infname, np->name);
2106 cur_node->been_warned = true;
2107 }
2108 return;
2109 }
2110
2111 /* Actually add the node */
2112 add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2113 } /* if CTAGS mode */
2114 }
2115
2116 /*
2117 * invalidate_nodes ()
2118 * Scan the node tree and invalidate all nodes pointing to the
2119 * given file description (CTAGS case) or free them (ETAGS case).
2120 */
2121 static void
2122 invalidate_nodes (fdesc *badfdp, node **npp)
2123 {
2124 node *np = *npp;
2125
2126 if (np == NULL)
2127 return;
2128
2129 if (CTAGS)
2130 {
2131 if (np->left != NULL)
2132 invalidate_nodes (badfdp, &np->left);
2133 if (np->fdp == badfdp)
2134 np->valid = false;
2135 if (np->right != NULL)
2136 invalidate_nodes (badfdp, &np->right);
2137 }
2138 else
2139 {
2140 assert (np->fdp != NULL);
2141 if (np->fdp == badfdp)
2142 {
2143 *npp = np->left; /* detach the sublist from the list */
2144 np->left = NULL; /* isolate it */
2145 free_tree (np); /* free it */
2146 invalidate_nodes (badfdp, npp);
2147 }
2148 else
2149 invalidate_nodes (badfdp, &np->left);
2150 }
2151 }
2152
2153 \f
2154 static int total_size_of_entries (node *);
2155 static int number_len (long) ATTRIBUTE_CONST;
2156
2157 /* Length of a non-negative number's decimal representation. */
2158 static int
2159 number_len (long int num)
2160 {
2161 int len = 1;
2162 while ((num /= 10) > 0)
2163 len += 1;
2164 return len;
2165 }
2166
2167 /*
2168 * Return total number of characters that put_entries will output for
2169 * the nodes in the linked list at the right of the specified node.
2170 * This count is irrelevant with etags.el since emacs 19.34 at least,
2171 * but is still supplied for backward compatibility.
2172 */
2173 static int
2174 total_size_of_entries (register node *np)
2175 {
2176 register int total = 0;
2177
2178 for (; np != NULL; np = np->right)
2179 if (np->valid)
2180 {
2181 total += strlen (np->regex) + 1; /* pat\177 */
2182 if (np->name != NULL)
2183 total += strlen (np->name) + 1; /* name\001 */
2184 total += number_len ((long) np->lno) + 1; /* lno, */
2185 if (np->cno != invalidcharno) /* cno */
2186 total += number_len (np->cno);
2187 total += 1; /* newline */
2188 }
2189
2190 return total;
2191 }
2192
2193 static void
2194 put_entries (register node *np)
2195 {
2196 register char *sp;
2197 static fdesc *fdp = NULL;
2198
2199 if (np == NULL)
2200 return;
2201
2202 /* Output subentries that precede this one */
2203 if (CTAGS)
2204 put_entries (np->left);
2205
2206 /* Output this entry */
2207 if (np->valid)
2208 {
2209 if (!CTAGS)
2210 {
2211 /* Etags mode */
2212 if (fdp != np->fdp)
2213 {
2214 fdp = np->fdp;
2215 fprintf (tagf, "\f\n%s,%d\n",
2216 fdp->taggedfname, total_size_of_entries (np));
2217 fdp->written = true;
2218 }
2219 fputs (np->regex, tagf);
2220 fputc ('\177', tagf);
2221 if (np->name != NULL)
2222 {
2223 fputs (np->name, tagf);
2224 fputc ('\001', tagf);
2225 }
2226 fprintf (tagf, "%d,", np->lno);
2227 if (np->cno != invalidcharno)
2228 fprintf (tagf, "%ld", np->cno);
2229 fputs ("\n", tagf);
2230 }
2231 else
2232 {
2233 /* Ctags mode */
2234 if (np->name == NULL)
2235 error ("internal error: NULL name in ctags mode.");
2236
2237 if (cxref_style)
2238 {
2239 if (vgrind_style)
2240 fprintf (stdout, "%s %s %d\n",
2241 np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2242 else
2243 fprintf (stdout, "%-16s %3d %-16s %s\n",
2244 np->name, np->lno, np->fdp->taggedfname, np->regex);
2245 }
2246 else
2247 {
2248 fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2249
2250 if (np->is_func)
2251 { /* function or #define macro with args */
2252 putc (searchar, tagf);
2253 putc ('^', tagf);
2254
2255 for (sp = np->regex; *sp; sp++)
2256 {
2257 if (*sp == '\\' || *sp == searchar)
2258 putc ('\\', tagf);
2259 putc (*sp, tagf);
2260 }
2261 putc (searchar, tagf);
2262 }
2263 else
2264 { /* anything else; text pattern inadequate */
2265 fprintf (tagf, "%d", np->lno);
2266 }
2267 putc ('\n', tagf);
2268 }
2269 }
2270 } /* if this node contains a valid tag */
2271
2272 /* Output subentries that follow this one */
2273 put_entries (np->right);
2274 if (!CTAGS)
2275 put_entries (np->left);
2276 }
2277
2278 \f
2279 /* C extensions. */
2280 #define C_EXT 0x00fff /* C extensions */
2281 #define C_PLAIN 0x00000 /* C */
2282 #define C_PLPL 0x00001 /* C++ */
2283 #define C_STAR 0x00003 /* C* */
2284 #define C_JAVA 0x00005 /* JAVA */
2285 #define C_AUTO 0x01000 /* C, but switch to C++ if `class' is met */
2286 #define YACC 0x10000 /* yacc file */
2287
2288 /*
2289 * The C symbol tables.
2290 */
2291 enum sym_type
2292 {
2293 st_none,
2294 st_C_objprot, st_C_objimpl, st_C_objend,
2295 st_C_gnumacro,
2296 st_C_ignore, st_C_attribute,
2297 st_C_javastruct,
2298 st_C_operator,
2299 st_C_class, st_C_template,
2300 st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2301 };
2302
2303 /* Feed stuff between (but not including) %[ and %] lines to:
2304 gperf -m 5
2305 %[
2306 %compare-strncmp
2307 %enum
2308 %struct-type
2309 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2310 %%
2311 if, 0, st_C_ignore
2312 for, 0, st_C_ignore
2313 while, 0, st_C_ignore
2314 switch, 0, st_C_ignore
2315 return, 0, st_C_ignore
2316 __attribute__, 0, st_C_attribute
2317 GTY, 0, st_C_attribute
2318 @interface, 0, st_C_objprot
2319 @protocol, 0, st_C_objprot
2320 @implementation,0, st_C_objimpl
2321 @end, 0, st_C_objend
2322 import, (C_JAVA & ~C_PLPL), st_C_ignore
2323 package, (C_JAVA & ~C_PLPL), st_C_ignore
2324 friend, C_PLPL, st_C_ignore
2325 extends, (C_JAVA & ~C_PLPL), st_C_javastruct
2326 implements, (C_JAVA & ~C_PLPL), st_C_javastruct
2327 interface, (C_JAVA & ~C_PLPL), st_C_struct
2328 class, 0, st_C_class
2329 namespace, C_PLPL, st_C_struct
2330 domain, C_STAR, st_C_struct
2331 union, 0, st_C_struct
2332 struct, 0, st_C_struct
2333 extern, 0, st_C_extern
2334 enum, 0, st_C_enum
2335 typedef, 0, st_C_typedef
2336 define, 0, st_C_define
2337 undef, 0, st_C_define
2338 operator, C_PLPL, st_C_operator
2339 template, 0, st_C_template
2340 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2341 DEFUN, 0, st_C_gnumacro
2342 SYSCALL, 0, st_C_gnumacro
2343 ENTRY, 0, st_C_gnumacro
2344 PSEUDO, 0, st_C_gnumacro
2345 # These are defined inside C functions, so currently they are not met.
2346 # EXFUN used in glibc, DEFVAR_* in emacs.
2347 #EXFUN, 0, st_C_gnumacro
2348 #DEFVAR_, 0, st_C_gnumacro
2349 %]
2350 and replace lines between %< and %> with its output, then:
2351 - remove the #if characterset check
2352 - make in_word_set static and not inline. */
2353 /*%<*/
2354 /* C code produced by gperf version 3.0.1 */
2355 /* Command-line: gperf -m 5 */
2356 /* Computed positions: -k'2-3' */
2357
2358 struct C_stab_entry { const char *name; int c_ext; enum sym_type type; };
2359 /* maximum key range = 33, duplicates = 0 */
2360
2361 static int
2362 hash (const char *str, int len)
2363 {
2364 static char const asso_values[] =
2365 {
2366 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2367 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2368 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2369 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2370 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2371 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2372 35, 35, 35, 35, 35, 35, 35, 35, 35, 3,
2373 26, 35, 35, 35, 35, 35, 35, 35, 27, 35,
2374 35, 35, 35, 24, 0, 35, 35, 35, 35, 0,
2375 35, 35, 35, 35, 35, 1, 35, 16, 35, 6,
2376 23, 0, 0, 35, 22, 0, 35, 35, 5, 0,
2377 0, 15, 1, 35, 6, 35, 8, 19, 35, 16,
2378 4, 5, 35, 35, 35, 35, 35, 35, 35, 35,
2379 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2380 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2381 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2382 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2383 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2384 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2385 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2386 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2387 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2388 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2389 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2390 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2391 35, 35, 35, 35, 35, 35
2392 };
2393 int hval = len;
2394
2395 switch (hval)
2396 {
2397 default:
2398 hval += asso_values[(unsigned char) str[2]];
2399 /*FALLTHROUGH*/
2400 case 2:
2401 hval += asso_values[(unsigned char) str[1]];
2402 break;
2403 }
2404 return hval;
2405 }
2406
2407 static struct C_stab_entry *
2408 in_word_set (register const char *str, register unsigned int len)
2409 {
2410 enum
2411 {
2412 TOTAL_KEYWORDS = 33,
2413 MIN_WORD_LENGTH = 2,
2414 MAX_WORD_LENGTH = 15,
2415 MIN_HASH_VALUE = 2,
2416 MAX_HASH_VALUE = 34
2417 };
2418
2419 static struct C_stab_entry wordlist[] =
2420 {
2421 {""}, {""},
2422 {"if", 0, st_C_ignore},
2423 {"GTY", 0, st_C_attribute},
2424 {"@end", 0, st_C_objend},
2425 {"union", 0, st_C_struct},
2426 {"define", 0, st_C_define},
2427 {"import", (C_JAVA & ~C_PLPL), st_C_ignore},
2428 {"template", 0, st_C_template},
2429 {"operator", C_PLPL, st_C_operator},
2430 {"@interface", 0, st_C_objprot},
2431 {"implements", (C_JAVA & ~C_PLPL), st_C_javastruct},
2432 {"friend", C_PLPL, st_C_ignore},
2433 {"typedef", 0, st_C_typedef},
2434 {"return", 0, st_C_ignore},
2435 {"@implementation",0, st_C_objimpl},
2436 {"@protocol", 0, st_C_objprot},
2437 {"interface", (C_JAVA & ~C_PLPL), st_C_struct},
2438 {"extern", 0, st_C_extern},
2439 {"extends", (C_JAVA & ~C_PLPL), st_C_javastruct},
2440 {"struct", 0, st_C_struct},
2441 {"domain", C_STAR, st_C_struct},
2442 {"switch", 0, st_C_ignore},
2443 {"enum", 0, st_C_enum},
2444 {"for", 0, st_C_ignore},
2445 {"namespace", C_PLPL, st_C_struct},
2446 {"class", 0, st_C_class},
2447 {"while", 0, st_C_ignore},
2448 {"undef", 0, st_C_define},
2449 {"package", (C_JAVA & ~C_PLPL), st_C_ignore},
2450 {"__attribute__", 0, st_C_attribute},
2451 {"SYSCALL", 0, st_C_gnumacro},
2452 {"ENTRY", 0, st_C_gnumacro},
2453 {"PSEUDO", 0, st_C_gnumacro},
2454 {"DEFUN", 0, st_C_gnumacro}
2455 };
2456
2457 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2458 {
2459 int key = hash (str, len);
2460
2461 if (key <= MAX_HASH_VALUE && key >= 0)
2462 {
2463 const char *s = wordlist[key].name;
2464
2465 if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2466 return &wordlist[key];
2467 }
2468 }
2469 return 0;
2470 }
2471 /*%>*/
2472
2473 static enum sym_type
2474 C_symtype (char *str, int len, int c_ext)
2475 {
2476 register struct C_stab_entry *se = in_word_set (str, len);
2477
2478 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2479 return st_none;
2480 return se->type;
2481 }
2482
2483 \f
2484 /*
2485 * Ignoring __attribute__ ((list))
2486 */
2487 static bool inattribute; /* looking at an __attribute__ construct */
2488
2489 /*
2490 * C functions and variables are recognized using a simple
2491 * finite automaton. fvdef is its state variable.
2492 */
2493 static enum
2494 {
2495 fvnone, /* nothing seen */
2496 fdefunkey, /* Emacs DEFUN keyword seen */
2497 fdefunname, /* Emacs DEFUN name seen */
2498 foperator, /* func: operator keyword seen (cplpl) */
2499 fvnameseen, /* function or variable name seen */
2500 fstartlist, /* func: just after open parenthesis */
2501 finlist, /* func: in parameter list */
2502 flistseen, /* func: after parameter list */
2503 fignore, /* func: before open brace */
2504 vignore /* var-like: ignore until ';' */
2505 } fvdef;
2506
2507 static bool fvextern; /* func or var: extern keyword seen; */
2508
2509 /*
2510 * typedefs are recognized using a simple finite automaton.
2511 * typdef is its state variable.
2512 */
2513 static enum
2514 {
2515 tnone, /* nothing seen */
2516 tkeyseen, /* typedef keyword seen */
2517 ttypeseen, /* defined type seen */
2518 tinbody, /* inside typedef body */
2519 tend, /* just before typedef tag */
2520 tignore /* junk after typedef tag */
2521 } typdef;
2522
2523 /*
2524 * struct-like structures (enum, struct and union) are recognized
2525 * using another simple finite automaton. `structdef' is its state
2526 * variable.
2527 */
2528 static enum
2529 {
2530 snone, /* nothing seen yet,
2531 or in struct body if bracelev > 0 */
2532 skeyseen, /* struct-like keyword seen */
2533 stagseen, /* struct-like tag seen */
2534 scolonseen /* colon seen after struct-like tag */
2535 } structdef;
2536
2537 /*
2538 * When objdef is different from onone, objtag is the name of the class.
2539 */
2540 static const char *objtag = "<uninited>";
2541
2542 /*
2543 * Yet another little state machine to deal with preprocessor lines.
2544 */
2545 static enum
2546 {
2547 dnone, /* nothing seen */
2548 dsharpseen, /* '#' seen as first char on line */
2549 ddefineseen, /* '#' and 'define' seen */
2550 dignorerest /* ignore rest of line */
2551 } definedef;
2552
2553 /*
2554 * State machine for Objective C protocols and implementations.
2555 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2556 */
2557 static enum
2558 {
2559 onone, /* nothing seen */
2560 oprotocol, /* @interface or @protocol seen */
2561 oimplementation, /* @implementations seen */
2562 otagseen, /* class name seen */
2563 oparenseen, /* parenthesis before category seen */
2564 ocatseen, /* category name seen */
2565 oinbody, /* in @implementation body */
2566 omethodsign, /* in @implementation body, after +/- */
2567 omethodtag, /* after method name */
2568 omethodcolon, /* after method colon */
2569 omethodparm, /* after method parameter */
2570 oignore /* wait for @end */
2571 } objdef;
2572
2573
2574 /*
2575 * Use this structure to keep info about the token read, and how it
2576 * should be tagged. Used by the make_C_tag function to build a tag.
2577 */
2578 static struct tok
2579 {
2580 char *line; /* string containing the token */
2581 int offset; /* where the token starts in LINE */
2582 int length; /* token length */
2583 /*
2584 The previous members can be used to pass strings around for generic
2585 purposes. The following ones specifically refer to creating tags. In this
2586 case the token contained here is the pattern that will be used to create a
2587 tag.
2588 */
2589 bool valid; /* do not create a tag; the token should be
2590 invalidated whenever a state machine is
2591 reset prematurely */
2592 bool named; /* create a named tag */
2593 int lineno; /* source line number of tag */
2594 long linepos; /* source char number of tag */
2595 } token; /* latest token read */
2596
2597 /*
2598 * Variables and functions for dealing with nested structures.
2599 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2600 */
2601 static void pushclass_above (int, char *, int);
2602 static void popclass_above (int);
2603 static void write_classname (linebuffer *, const char *qualifier);
2604
2605 static struct {
2606 char **cname; /* nested class names */
2607 int *bracelev; /* nested class brace level */
2608 int nl; /* class nesting level (elements used) */
2609 int size; /* length of the array */
2610 } cstack; /* stack for nested declaration tags */
2611 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2612 #define nestlev (cstack.nl)
2613 /* After struct keyword or in struct body, not inside a nested function. */
2614 #define instruct (structdef == snone && nestlev > 0 \
2615 && bracelev == cstack.bracelev[nestlev-1] + 1)
2616
2617 static void
2618 pushclass_above (int bracelev, char *str, int len)
2619 {
2620 int nl;
2621
2622 popclass_above (bracelev);
2623 nl = cstack.nl;
2624 if (nl >= cstack.size)
2625 {
2626 int size = cstack.size *= 2;
2627 xrnew (cstack.cname, size, char *);
2628 xrnew (cstack.bracelev, size, int);
2629 }
2630 assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2631 cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2632 cstack.bracelev[nl] = bracelev;
2633 cstack.nl = nl + 1;
2634 }
2635
2636 static void
2637 popclass_above (int bracelev)
2638 {
2639 int nl;
2640
2641 for (nl = cstack.nl - 1;
2642 nl >= 0 && cstack.bracelev[nl] >= bracelev;
2643 nl--)
2644 {
2645 free (cstack.cname[nl]);
2646 cstack.nl = nl;
2647 }
2648 }
2649
2650 static void
2651 write_classname (linebuffer *cn, const char *qualifier)
2652 {
2653 int i, len;
2654 int qlen = strlen (qualifier);
2655
2656 if (cstack.nl == 0 || cstack.cname[0] == NULL)
2657 {
2658 len = 0;
2659 cn->len = 0;
2660 cn->buffer[0] = '\0';
2661 }
2662 else
2663 {
2664 len = strlen (cstack.cname[0]);
2665 linebuffer_setlen (cn, len);
2666 strcpy (cn->buffer, cstack.cname[0]);
2667 }
2668 for (i = 1; i < cstack.nl; i++)
2669 {
2670 char *s = cstack.cname[i];
2671 if (s == NULL)
2672 continue;
2673 linebuffer_setlen (cn, len + qlen + strlen (s));
2674 len += sprintf (cn->buffer + len, "%s%s", qualifier, s);
2675 }
2676 }
2677
2678 \f
2679 static bool consider_token (char *, int, int, int *, int, int, bool *);
2680 static void make_C_tag (bool);
2681
2682 /*
2683 * consider_token ()
2684 * checks to see if the current token is at the start of a
2685 * function or variable, or corresponds to a typedef, or
2686 * is a struct/union/enum tag, or #define, or an enum constant.
2687 *
2688 * *IS_FUNC_OR_VAR gets true if the token is a function or #define macro
2689 * with args. C_EXTP points to which language we are looking at.
2690 *
2691 * Globals
2692 * fvdef IN OUT
2693 * structdef IN OUT
2694 * definedef IN OUT
2695 * typdef IN OUT
2696 * objdef IN OUT
2697 */
2698
2699 static bool
2700 consider_token (char *str, int len, int c, int *c_extp,
2701 int bracelev, int parlev, bool *is_func_or_var)
2702 /* IN: token pointer */
2703 /* IN: token length */
2704 /* IN: first char after the token */
2705 /* IN, OUT: C extensions mask */
2706 /* IN: brace level */
2707 /* IN: parenthesis level */
2708 /* OUT: function or variable found */
2709 {
2710 /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2711 structtype is the type of the preceding struct-like keyword, and
2712 structbracelev is the brace level where it has been seen. */
2713 static enum sym_type structtype;
2714 static int structbracelev;
2715 static enum sym_type toktype;
2716
2717
2718 toktype = C_symtype (str, len, *c_extp);
2719
2720 /*
2721 * Skip __attribute__
2722 */
2723 if (toktype == st_C_attribute)
2724 {
2725 inattribute = true;
2726 return false;
2727 }
2728
2729 /*
2730 * Advance the definedef state machine.
2731 */
2732 switch (definedef)
2733 {
2734 case dnone:
2735 /* We're not on a preprocessor line. */
2736 if (toktype == st_C_gnumacro)
2737 {
2738 fvdef = fdefunkey;
2739 return false;
2740 }
2741 break;
2742 case dsharpseen:
2743 if (toktype == st_C_define)
2744 {
2745 definedef = ddefineseen;
2746 }
2747 else
2748 {
2749 definedef = dignorerest;
2750 }
2751 return false;
2752 case ddefineseen:
2753 /*
2754 * Make a tag for any macro, unless it is a constant
2755 * and constantypedefs is false.
2756 */
2757 definedef = dignorerest;
2758 *is_func_or_var = (c == '(');
2759 if (!*is_func_or_var && !constantypedefs)
2760 return false;
2761 else
2762 return true;
2763 case dignorerest:
2764 return false;
2765 default:
2766 error ("internal error: definedef value.");
2767 }
2768
2769 /*
2770 * Now typedefs
2771 */
2772 switch (typdef)
2773 {
2774 case tnone:
2775 if (toktype == st_C_typedef)
2776 {
2777 if (typedefs)
2778 typdef = tkeyseen;
2779 fvextern = false;
2780 fvdef = fvnone;
2781 return false;
2782 }
2783 break;
2784 case tkeyseen:
2785 switch (toktype)
2786 {
2787 case st_none:
2788 case st_C_class:
2789 case st_C_struct:
2790 case st_C_enum:
2791 typdef = ttypeseen;
2792 break;
2793 default:
2794 break;
2795 }
2796 break;
2797 case ttypeseen:
2798 if (structdef == snone && fvdef == fvnone)
2799 {
2800 fvdef = fvnameseen;
2801 return true;
2802 }
2803 break;
2804 case tend:
2805 switch (toktype)
2806 {
2807 case st_C_class:
2808 case st_C_struct:
2809 case st_C_enum:
2810 return false;
2811 default:
2812 return true;
2813 }
2814 default:
2815 break;
2816 }
2817
2818 switch (toktype)
2819 {
2820 case st_C_javastruct:
2821 if (structdef == stagseen)
2822 structdef = scolonseen;
2823 return false;
2824 case st_C_template:
2825 case st_C_class:
2826 if ((*c_extp & C_AUTO) /* automatic detection of C++ language */
2827 && bracelev == 0
2828 && definedef == dnone && structdef == snone
2829 && typdef == tnone && fvdef == fvnone)
2830 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2831 if (toktype == st_C_template)
2832 break;
2833 /* FALLTHRU */
2834 case st_C_struct:
2835 case st_C_enum:
2836 if (parlev == 0
2837 && fvdef != vignore
2838 && (typdef == tkeyseen
2839 || (typedefs_or_cplusplus && structdef == snone)))
2840 {
2841 structdef = skeyseen;
2842 structtype = toktype;
2843 structbracelev = bracelev;
2844 if (fvdef == fvnameseen)
2845 fvdef = fvnone;
2846 }
2847 return false;
2848 default:
2849 break;
2850 }
2851
2852 if (structdef == skeyseen)
2853 {
2854 structdef = stagseen;
2855 return true;
2856 }
2857
2858 if (typdef != tnone)
2859 definedef = dnone;
2860
2861 /* Detect Objective C constructs. */
2862 switch (objdef)
2863 {
2864 case onone:
2865 switch (toktype)
2866 {
2867 case st_C_objprot:
2868 objdef = oprotocol;
2869 return false;
2870 case st_C_objimpl:
2871 objdef = oimplementation;
2872 return false;
2873 default:
2874 break;
2875 }
2876 break;
2877 case oimplementation:
2878 /* Save the class tag for functions or variables defined inside. */
2879 objtag = savenstr (str, len);
2880 objdef = oinbody;
2881 return false;
2882 case oprotocol:
2883 /* Save the class tag for categories. */
2884 objtag = savenstr (str, len);
2885 objdef = otagseen;
2886 *is_func_or_var = true;
2887 return true;
2888 case oparenseen:
2889 objdef = ocatseen;
2890 *is_func_or_var = true;
2891 return true;
2892 case oinbody:
2893 break;
2894 case omethodsign:
2895 if (parlev == 0)
2896 {
2897 fvdef = fvnone;
2898 objdef = omethodtag;
2899 linebuffer_setlen (&token_name, len);
2900 memcpy (token_name.buffer, str, len);
2901 token_name.buffer[len] = '\0';
2902 return true;
2903 }
2904 return false;
2905 case omethodcolon:
2906 if (parlev == 0)
2907 objdef = omethodparm;
2908 return false;
2909 case omethodparm:
2910 if (parlev == 0)
2911 {
2912 objdef = omethodtag;
2913 if (class_qualify)
2914 {
2915 int oldlen = token_name.len;
2916 fvdef = fvnone;
2917 linebuffer_setlen (&token_name, oldlen + len);
2918 memcpy (token_name.buffer + oldlen, str, len);
2919 token_name.buffer[oldlen + len] = '\0';
2920 }
2921 return true;
2922 }
2923 return false;
2924 case oignore:
2925 if (toktype == st_C_objend)
2926 {
2927 /* Memory leakage here: the string pointed by objtag is
2928 never released, because many tests would be needed to
2929 avoid breaking on incorrect input code. The amount of
2930 memory leaked here is the sum of the lengths of the
2931 class tags.
2932 free (objtag); */
2933 objdef = onone;
2934 }
2935 return false;
2936 default:
2937 break;
2938 }
2939
2940 /* A function, variable or enum constant? */
2941 switch (toktype)
2942 {
2943 case st_C_extern:
2944 fvextern = true;
2945 switch (fvdef)
2946 {
2947 case finlist:
2948 case flistseen:
2949 case fignore:
2950 case vignore:
2951 break;
2952 default:
2953 fvdef = fvnone;
2954 }
2955 return false;
2956 case st_C_ignore:
2957 fvextern = false;
2958 fvdef = vignore;
2959 return false;
2960 case st_C_operator:
2961 fvdef = foperator;
2962 *is_func_or_var = true;
2963 return true;
2964 case st_none:
2965 if (constantypedefs
2966 && structdef == snone
2967 && structtype == st_C_enum && bracelev > structbracelev
2968 /* Don't tag tokens in expressions that assign values to enum
2969 constants. */
2970 && fvdef != vignore)
2971 return true; /* enum constant */
2972 switch (fvdef)
2973 {
2974 case fdefunkey:
2975 if (bracelev > 0)
2976 break;
2977 fvdef = fdefunname; /* GNU macro */
2978 *is_func_or_var = true;
2979 return true;
2980 case fvnone:
2981 switch (typdef)
2982 {
2983 case ttypeseen:
2984 return false;
2985 case tnone:
2986 if ((strneq (str, "asm", 3) && endtoken (str[3]))
2987 || (strneq (str, "__asm__", 7) && endtoken (str[7])))
2988 {
2989 fvdef = vignore;
2990 return false;
2991 }
2992 break;
2993 default:
2994 break;
2995 }
2996 /* FALLTHRU */
2997 case fvnameseen:
2998 if (len >= 10 && strneq (str+len-10, "::operator", 10))
2999 {
3000 if (*c_extp & C_AUTO) /* automatic detection of C++ */
3001 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3002 fvdef = foperator;
3003 *is_func_or_var = true;
3004 return true;
3005 }
3006 if (bracelev > 0 && !instruct)
3007 break;
3008 fvdef = fvnameseen; /* function or variable */
3009 *is_func_or_var = true;
3010 return true;
3011 default:
3012 break;
3013 }
3014 break;
3015 default:
3016 break;
3017 }
3018
3019 return false;
3020 }
3021
3022 \f
3023 /*
3024 * C_entries often keeps pointers to tokens or lines which are older than
3025 * the line currently read. By keeping two line buffers, and switching
3026 * them at end of line, it is possible to use those pointers.
3027 */
3028 static struct
3029 {
3030 long linepos;
3031 linebuffer lb;
3032 } lbs[2];
3033
3034 #define current_lb_is_new (newndx == curndx)
3035 #define switch_line_buffers() (curndx = 1 - curndx)
3036
3037 #define curlb (lbs[curndx].lb)
3038 #define newlb (lbs[newndx].lb)
3039 #define curlinepos (lbs[curndx].linepos)
3040 #define newlinepos (lbs[newndx].linepos)
3041
3042 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3043 #define cplpl (c_ext & C_PLPL)
3044 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3045
3046 #define CNL_SAVE_DEFINEDEF() \
3047 do { \
3048 curlinepos = charno; \
3049 readline (&curlb, inf); \
3050 lp = curlb.buffer; \
3051 quotednl = false; \
3052 newndx = curndx; \
3053 } while (0)
3054
3055 #define CNL() \
3056 do { \
3057 CNL_SAVE_DEFINEDEF (); \
3058 if (savetoken.valid) \
3059 { \
3060 token = savetoken; \
3061 savetoken.valid = false; \
3062 } \
3063 definedef = dnone; \
3064 } while (0)
3065
3066
3067 static void
3068 make_C_tag (bool isfun)
3069 {
3070 /* This function is never called when token.valid is false, but
3071 we must protect against invalid input or internal errors. */
3072 if (token.valid)
3073 make_tag (token_name.buffer, token_name.len, isfun, token.line,
3074 token.offset+token.length+1, token.lineno, token.linepos);
3075 else if (DEBUG)
3076 { /* this branch is optimized away if !DEBUG */
3077 make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3078 token_name.len + 17, isfun, token.line,
3079 token.offset+token.length+1, token.lineno, token.linepos);
3080 error ("INVALID TOKEN");
3081 }
3082
3083 token.valid = false;
3084 }
3085
3086 static bool
3087 perhaps_more_input (FILE *inf)
3088 {
3089 return !feof (inf) && !ferror (inf);
3090 }
3091
3092
3093 /*
3094 * C_entries ()
3095 * This routine finds functions, variables, typedefs,
3096 * #define's, enum constants and struct/union/enum definitions in
3097 * C syntax and adds them to the list.
3098 */
3099 static void
3100 C_entries (int c_ext, FILE *inf)
3101 /* extension of C */
3102 /* input file */
3103 {
3104 register char c; /* latest char read; '\0' for end of line */
3105 register char *lp; /* pointer one beyond the character `c' */
3106 int curndx, newndx; /* indices for current and new lb */
3107 register int tokoff; /* offset in line of start of current token */
3108 register int toklen; /* length of current token */
3109 const char *qualifier; /* string used to qualify names */
3110 int qlen; /* length of qualifier */
3111 int bracelev; /* current brace level */
3112 int bracketlev; /* current bracket level */
3113 int parlev; /* current parenthesis level */
3114 int attrparlev; /* __attribute__ parenthesis level */
3115 int templatelev; /* current template level */
3116 int typdefbracelev; /* bracelev where a typedef struct body begun */
3117 bool incomm, inquote, inchar, quotednl, midtoken;
3118 bool yacc_rules; /* in the rules part of a yacc file */
3119 struct tok savetoken = {0}; /* token saved during preprocessor handling */
3120
3121
3122 linebuffer_init (&lbs[0].lb);
3123 linebuffer_init (&lbs[1].lb);
3124 if (cstack.size == 0)
3125 {
3126 cstack.size = (DEBUG) ? 1 : 4;
3127 cstack.nl = 0;
3128 cstack.cname = xnew (cstack.size, char *);
3129 cstack.bracelev = xnew (cstack.size, int);
3130 }
3131
3132 tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3133 curndx = newndx = 0;
3134 lp = curlb.buffer;
3135 *lp = 0;
3136
3137 fvdef = fvnone; fvextern = false; typdef = tnone;
3138 structdef = snone; definedef = dnone; objdef = onone;
3139 yacc_rules = false;
3140 midtoken = inquote = inchar = incomm = quotednl = false;
3141 token.valid = savetoken.valid = false;
3142 bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3143 if (cjava)
3144 { qualifier = "."; qlen = 1; }
3145 else
3146 { qualifier = "::"; qlen = 2; }
3147
3148
3149 while (perhaps_more_input (inf))
3150 {
3151 c = *lp++;
3152 if (c == '\\')
3153 {
3154 /* If we are at the end of the line, the next character is a
3155 '\0'; do not skip it, because it is what tells us
3156 to read the next line. */
3157 if (*lp == '\0')
3158 {
3159 quotednl = true;
3160 continue;
3161 }
3162 lp++;
3163 c = ' ';
3164 }
3165 else if (incomm)
3166 {
3167 switch (c)
3168 {
3169 case '*':
3170 if (*lp == '/')
3171 {
3172 c = *lp++;
3173 incomm = false;
3174 }
3175 break;
3176 case '\0':
3177 /* Newlines inside comments do not end macro definitions in
3178 traditional cpp. */
3179 CNL_SAVE_DEFINEDEF ();
3180 break;
3181 }
3182 continue;
3183 }
3184 else if (inquote)
3185 {
3186 switch (c)
3187 {
3188 case '"':
3189 inquote = false;
3190 break;
3191 case '\0':
3192 /* Newlines inside strings do not end macro definitions
3193 in traditional cpp, even though compilers don't
3194 usually accept them. */
3195 CNL_SAVE_DEFINEDEF ();
3196 break;
3197 }
3198 continue;
3199 }
3200 else if (inchar)
3201 {
3202 switch (c)
3203 {
3204 case '\0':
3205 /* Hmmm, something went wrong. */
3206 CNL ();
3207 /* FALLTHRU */
3208 case '\'':
3209 inchar = false;
3210 break;
3211 }
3212 continue;
3213 }
3214 else switch (c)
3215 {
3216 case '"':
3217 inquote = true;
3218 if (bracketlev > 0)
3219 continue;
3220 if (inattribute)
3221 break;
3222 switch (fvdef)
3223 {
3224 case fdefunkey:
3225 case fstartlist:
3226 case finlist:
3227 case fignore:
3228 case vignore:
3229 break;
3230 default:
3231 fvextern = false;
3232 fvdef = fvnone;
3233 }
3234 continue;
3235 case '\'':
3236 inchar = true;
3237 if (bracketlev > 0)
3238 continue;
3239 if (inattribute)
3240 break;
3241 if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
3242 {
3243 fvextern = false;
3244 fvdef = fvnone;
3245 }
3246 continue;
3247 case '/':
3248 if (*lp == '*')
3249 {
3250 incomm = true;
3251 lp++;
3252 c = ' ';
3253 if (bracketlev > 0)
3254 continue;
3255 }
3256 else if (/* cplpl && */ *lp == '/')
3257 {
3258 c = '\0';
3259 }
3260 break;
3261 case '%':
3262 if ((c_ext & YACC) && *lp == '%')
3263 {
3264 /* Entering or exiting rules section in yacc file. */
3265 lp++;
3266 definedef = dnone; fvdef = fvnone; fvextern = false;
3267 typdef = tnone; structdef = snone;
3268 midtoken = inquote = inchar = incomm = quotednl = false;
3269 bracelev = 0;
3270 yacc_rules = !yacc_rules;
3271 continue;
3272 }
3273 else
3274 break;
3275 case '#':
3276 if (definedef == dnone)
3277 {
3278 char *cp;
3279 bool cpptoken = true;
3280
3281 /* Look back on this line. If all blanks, or nonblanks
3282 followed by an end of comment, this is a preprocessor
3283 token. */
3284 for (cp = newlb.buffer; cp < lp-1; cp++)
3285 if (!c_isspace (*cp))
3286 {
3287 if (*cp == '*' && cp[1] == '/')
3288 {
3289 cp++;
3290 cpptoken = true;
3291 }
3292 else
3293 cpptoken = false;
3294 }
3295 if (cpptoken)
3296 {
3297 definedef = dsharpseen;
3298 /* This is needed for tagging enum values: when there are
3299 preprocessor conditionals inside the enum, we need to
3300 reset the value of fvdef so that the next enum value is
3301 tagged even though the one before it did not end in a
3302 comma. */
3303 if (fvdef == vignore && instruct && parlev == 0)
3304 {
3305 if (strneq (cp, "#if", 3) || strneq (cp, "#el", 3))
3306 fvdef = fvnone;
3307 }
3308 }
3309 } /* if (definedef == dnone) */
3310 continue;
3311 case '[':
3312 bracketlev++;
3313 continue;
3314 default:
3315 if (bracketlev > 0)
3316 {
3317 if (c == ']')
3318 --bracketlev;
3319 else if (c == '\0')
3320 CNL_SAVE_DEFINEDEF ();
3321 continue;
3322 }
3323 break;
3324 } /* switch (c) */
3325
3326
3327 /* Consider token only if some involved conditions are satisfied. */
3328 if (typdef != tignore
3329 && definedef != dignorerest
3330 && fvdef != finlist
3331 && templatelev == 0
3332 && (definedef != dnone
3333 || structdef != scolonseen)
3334 && !inattribute)
3335 {
3336 if (midtoken)
3337 {
3338 if (endtoken (c))
3339 {
3340 if (c == ':' && *lp == ':' && begtoken (lp[1]))
3341 /* This handles :: in the middle,
3342 but not at the beginning of an identifier.
3343 Also, space-separated :: is not recognized. */
3344 {
3345 if (c_ext & C_AUTO) /* automatic detection of C++ */
3346 c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3347 lp += 2;
3348 toklen += 2;
3349 c = lp[-1];
3350 goto still_in_token;
3351 }
3352 else
3353 {
3354 bool funorvar = false;
3355
3356 if (yacc_rules
3357 || consider_token (newlb.buffer + tokoff, toklen, c,
3358 &c_ext, bracelev, parlev,
3359 &funorvar))
3360 {
3361 if (fvdef == foperator)
3362 {
3363 char *oldlp = lp;
3364 lp = skip_spaces (lp-1);
3365 if (*lp != '\0')
3366 lp += 1;
3367 while (*lp != '\0'
3368 && !c_isspace (*lp) && *lp != '(')
3369 lp += 1;
3370 c = *lp++;
3371 toklen += lp - oldlp;
3372 }
3373 token.named = false;
3374 if (!plainc
3375 && nestlev > 0 && definedef == dnone)
3376 /* in struct body */
3377 {
3378 if (class_qualify)
3379 {
3380 int len;
3381 write_classname (&token_name, qualifier);
3382 len = token_name.len;
3383 linebuffer_setlen (&token_name,
3384 len + qlen + toklen);
3385 sprintf (token_name.buffer + len, "%s%.*s",
3386 qualifier, toklen,
3387 newlb.buffer + tokoff);
3388 }
3389 else
3390 {
3391 linebuffer_setlen (&token_name, toklen);
3392 sprintf (token_name.buffer, "%.*s",
3393 toklen, newlb.buffer + tokoff);
3394 }
3395 token.named = true;
3396 }
3397 else if (objdef == ocatseen)
3398 /* Objective C category */
3399 {
3400 if (class_qualify)
3401 {
3402 int len = strlen (objtag) + 2 + toklen;
3403 linebuffer_setlen (&token_name, len);
3404 sprintf (token_name.buffer, "%s(%.*s)",
3405 objtag, toklen,
3406 newlb.buffer + tokoff);
3407 }
3408 else
3409 {
3410 linebuffer_setlen (&token_name, toklen);
3411 sprintf (token_name.buffer, "%.*s",
3412 toklen, newlb.buffer + tokoff);
3413 }
3414 token.named = true;
3415 }
3416 else if (objdef == omethodtag
3417 || objdef == omethodparm)
3418 /* Objective C method */
3419 {
3420 token.named = true;
3421 }
3422 else if (fvdef == fdefunname)
3423 /* GNU DEFUN and similar macros */
3424 {
3425 bool defun = (newlb.buffer[tokoff] == 'F');
3426 int off = tokoff;
3427 int len = toklen;
3428
3429 /* Rewrite the tag so that emacs lisp DEFUNs
3430 can be found by their elisp name */
3431 if (defun)
3432 {
3433 off += 1;
3434 len -= 1;
3435 }
3436 linebuffer_setlen (&token_name, len);
3437 memcpy (token_name.buffer,
3438 newlb.buffer + off, len);
3439 token_name.buffer[len] = '\0';
3440 if (defun)
3441 while (--len >= 0)
3442 if (token_name.buffer[len] == '_')
3443 token_name.buffer[len] = '-';
3444 token.named = defun;
3445 }
3446 else
3447 {
3448 linebuffer_setlen (&token_name, toklen);
3449 memcpy (token_name.buffer,
3450 newlb.buffer + tokoff, toklen);
3451 token_name.buffer[toklen] = '\0';
3452 /* Name macros and members. */
3453 token.named = (structdef == stagseen
3454 || typdef == ttypeseen
3455 || typdef == tend
3456 || (funorvar
3457 && definedef == dignorerest)
3458 || (funorvar
3459 && definedef == dnone
3460 && structdef == snone
3461 && bracelev > 0));
3462 }
3463 token.lineno = lineno;
3464 token.offset = tokoff;
3465 token.length = toklen;
3466 token.line = newlb.buffer;
3467 token.linepos = newlinepos;
3468 token.valid = true;
3469
3470 if (definedef == dnone
3471 && (fvdef == fvnameseen
3472 || fvdef == foperator
3473 || structdef == stagseen
3474 || typdef == tend
3475 || typdef == ttypeseen
3476 || objdef != onone))
3477 {
3478 if (current_lb_is_new)
3479 switch_line_buffers ();
3480 }
3481 else if (definedef != dnone
3482 || fvdef == fdefunname
3483 || instruct)
3484 make_C_tag (funorvar);
3485 }
3486 else /* not yacc and consider_token failed */
3487 {
3488 if (inattribute && fvdef == fignore)
3489 {
3490 /* We have just met __attribute__ after a
3491 function parameter list: do not tag the
3492 function again. */
3493 fvdef = fvnone;
3494 }
3495 }
3496 midtoken = false;
3497 }
3498 } /* if (endtoken (c)) */
3499 else if (intoken (c))
3500 still_in_token:
3501 {
3502 toklen++;
3503 continue;
3504 }
3505 } /* if (midtoken) */
3506 else if (begtoken (c))
3507 {
3508 switch (definedef)
3509 {
3510 case dnone:
3511 switch (fvdef)
3512 {
3513 case fstartlist:
3514 /* This prevents tagging fb in
3515 void (__attribute__((noreturn)) *fb) (void);
3516 Fixing this is not easy and not very important. */
3517 fvdef = finlist;
3518 continue;
3519 case flistseen:
3520 if (plainc || declarations)
3521 {
3522 make_C_tag (true); /* a function */
3523 fvdef = fignore;
3524 }
3525 break;
3526 default:
3527 break;
3528 }
3529 if (structdef == stagseen && !cjava)
3530 {
3531 popclass_above (bracelev);
3532 structdef = snone;
3533 }
3534 break;
3535 case dsharpseen:
3536 savetoken = token;
3537 break;
3538 default:
3539 break;
3540 }
3541 if (!yacc_rules || lp == newlb.buffer + 1)
3542 {
3543 tokoff = lp - 1 - newlb.buffer;
3544 toklen = 1;
3545 midtoken = true;
3546 }
3547 continue;
3548 } /* if (begtoken) */
3549 } /* if must look at token */
3550
3551
3552 /* Detect end of line, colon, comma, semicolon and various braces
3553 after having handled a token.*/
3554 switch (c)
3555 {
3556 case ':':
3557 if (inattribute)
3558 break;
3559 if (yacc_rules && token.offset == 0 && token.valid)
3560 {
3561 make_C_tag (false); /* a yacc function */
3562 break;
3563 }
3564 if (definedef != dnone)
3565 break;
3566 switch (objdef)
3567 {
3568 case otagseen:
3569 objdef = oignore;
3570 make_C_tag (true); /* an Objective C class */
3571 break;
3572 case omethodtag:
3573 case omethodparm:
3574 objdef = omethodcolon;
3575 if (class_qualify)
3576 {
3577 int toklen = token_name.len;
3578 linebuffer_setlen (&token_name, toklen + 1);
3579 strcpy (token_name.buffer + toklen, ":");
3580 }
3581 break;
3582 default:
3583 break;
3584 }
3585 if (structdef == stagseen)
3586 {
3587 structdef = scolonseen;
3588 break;
3589 }
3590 /* Should be useless, but may be work as a safety net. */
3591 if (cplpl && fvdef == flistseen)
3592 {
3593 make_C_tag (true); /* a function */
3594 fvdef = fignore;
3595 break;
3596 }
3597 break;
3598 case ';':
3599 if (definedef != dnone || inattribute)
3600 break;
3601 switch (typdef)
3602 {
3603 case tend:
3604 case ttypeseen:
3605 make_C_tag (false); /* a typedef */
3606 typdef = tnone;
3607 fvdef = fvnone;
3608 break;
3609 case tnone:
3610 case tinbody:
3611 case tignore:
3612 switch (fvdef)
3613 {
3614 case fignore:
3615 if (typdef == tignore || cplpl)
3616 fvdef = fvnone;
3617 break;
3618 case fvnameseen:
3619 if ((globals && bracelev == 0 && (!fvextern || declarations))
3620 || (members && instruct))
3621 make_C_tag (false); /* a variable */
3622 fvextern = false;
3623 fvdef = fvnone;
3624 token.valid = false;
3625 break;
3626 case flistseen:
3627 if ((declarations
3628 && (cplpl || !instruct)
3629 && (typdef == tnone || (typdef != tignore && instruct)))
3630 || (members
3631 && plainc && instruct))
3632 make_C_tag (true); /* a function */
3633 /* FALLTHRU */
3634 default:
3635 fvextern = false;
3636 fvdef = fvnone;
3637 if (declarations
3638 && cplpl && structdef == stagseen)
3639 make_C_tag (false); /* forward declaration */
3640 else
3641 token.valid = false;
3642 } /* switch (fvdef) */
3643 /* FALLTHRU */
3644 default:
3645 if (!instruct)
3646 typdef = tnone;
3647 }
3648 if (structdef == stagseen)
3649 structdef = snone;
3650 break;
3651 case ',':
3652 if (definedef != dnone || inattribute)
3653 break;
3654 switch (objdef)
3655 {
3656 case omethodtag:
3657 case omethodparm:
3658 make_C_tag (true); /* an Objective C method */
3659 objdef = oinbody;
3660 break;
3661 default:
3662 break;
3663 }
3664 switch (fvdef)
3665 {
3666 case fdefunkey:
3667 case foperator:
3668 case fstartlist:
3669 case finlist:
3670 case fignore:
3671 break;
3672 case vignore:
3673 if (instruct && parlev == 0)
3674 fvdef = fvnone;
3675 break;
3676 case fdefunname:
3677 fvdef = fignore;
3678 break;
3679 case fvnameseen:
3680 if (parlev == 0
3681 && ((globals
3682 && bracelev == 0
3683 && templatelev == 0
3684 && (!fvextern || declarations))
3685 || (members && instruct)))
3686 make_C_tag (false); /* a variable */
3687 break;
3688 case flistseen:
3689 if ((declarations && typdef == tnone && !instruct)
3690 || (members && typdef != tignore && instruct))
3691 {
3692 make_C_tag (true); /* a function */
3693 fvdef = fvnameseen;
3694 }
3695 else if (!declarations)
3696 fvdef = fvnone;
3697 token.valid = false;
3698 break;
3699 default:
3700 fvdef = fvnone;
3701 }
3702 if (structdef == stagseen)
3703 structdef = snone;
3704 break;
3705 case ']':
3706 if (definedef != dnone || inattribute)
3707 break;
3708 if (structdef == stagseen)
3709 structdef = snone;
3710 switch (typdef)
3711 {
3712 case ttypeseen:
3713 case tend:
3714 typdef = tignore;
3715 make_C_tag (false); /* a typedef */
3716 break;
3717 case tnone:
3718 case tinbody:
3719 switch (fvdef)
3720 {
3721 case foperator:
3722 case finlist:
3723 case fignore:
3724 case vignore:
3725 break;
3726 case fvnameseen:
3727 if ((members && bracelev == 1)
3728 || (globals && bracelev == 0
3729 && (!fvextern || declarations)))
3730 make_C_tag (false); /* a variable */
3731 /* FALLTHRU */
3732 default:
3733 fvdef = fvnone;
3734 }
3735 break;
3736 default:
3737 break;
3738 }
3739 break;
3740 case '(':
3741 if (inattribute)
3742 {
3743 attrparlev++;
3744 break;
3745 }
3746 if (definedef != dnone)
3747 break;
3748 if (objdef == otagseen && parlev == 0)
3749 objdef = oparenseen;
3750 switch (fvdef)
3751 {
3752 case fvnameseen:
3753 if (typdef == ttypeseen
3754 && *lp != '*'
3755 && !instruct)
3756 {
3757 /* This handles constructs like:
3758 typedef void OperatorFun (int fun); */
3759 make_C_tag (false);
3760 typdef = tignore;
3761 fvdef = fignore;
3762 break;
3763 }
3764 /* FALLTHRU */
3765 case foperator:
3766 fvdef = fstartlist;
3767 break;
3768 case flistseen:
3769 fvdef = finlist;
3770 break;
3771 default:
3772 break;
3773 }
3774 parlev++;
3775 break;
3776 case ')':
3777 if (inattribute)
3778 {
3779 if (--attrparlev == 0)
3780 inattribute = false;
3781 break;
3782 }
3783 if (definedef != dnone)
3784 break;
3785 if (objdef == ocatseen && parlev == 1)
3786 {
3787 make_C_tag (true); /* an Objective C category */
3788 objdef = oignore;
3789 }
3790 if (--parlev == 0)
3791 {
3792 switch (fvdef)
3793 {
3794 case fstartlist:
3795 case finlist:
3796 fvdef = flistseen;
3797 break;
3798 default:
3799 break;
3800 }
3801 if (!instruct
3802 && (typdef == tend
3803 || typdef == ttypeseen))
3804 {
3805 typdef = tignore;
3806 make_C_tag (false); /* a typedef */
3807 }
3808 }
3809 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
3810 parlev = 0;
3811 break;
3812 case '{':
3813 if (definedef != dnone)
3814 break;
3815 if (typdef == ttypeseen)
3816 {
3817 /* Whenever typdef is set to tinbody (currently only
3818 here), typdefbracelev should be set to bracelev. */
3819 typdef = tinbody;
3820 typdefbracelev = bracelev;
3821 }
3822 switch (fvdef)
3823 {
3824 case flistseen:
3825 if (cplpl && !class_qualify)
3826 {
3827 /* Remove class and namespace qualifiers from the token,
3828 leaving only the method/member name. */
3829 char *cc, *uqname = token_name.buffer;
3830 char *tok_end = token_name.buffer + token_name.len;
3831
3832 for (cc = token_name.buffer; cc < tok_end; cc++)
3833 {
3834 if (*cc == ':' && cc[1] == ':')
3835 {
3836 uqname = cc + 2;
3837 cc++;
3838 }
3839 }
3840 if (uqname > token_name.buffer)
3841 {
3842 int uqlen = strlen (uqname);
3843 linebuffer_setlen (&token_name, uqlen);
3844 memmove (token_name.buffer, uqname, uqlen + 1);
3845 }
3846 }
3847 make_C_tag (true); /* a function */
3848 /* FALLTHRU */
3849 case fignore:
3850 fvdef = fvnone;
3851 break;
3852 case fvnone:
3853 switch (objdef)
3854 {
3855 case otagseen:
3856 make_C_tag (true); /* an Objective C class */
3857 objdef = oignore;
3858 break;
3859 case omethodtag:
3860 case omethodparm:
3861 make_C_tag (true); /* an Objective C method */
3862 objdef = oinbody;
3863 break;
3864 default:
3865 /* Neutralize `extern "C" {' grot. */
3866 if (bracelev == 0 && structdef == snone && nestlev == 0
3867 && typdef == tnone)
3868 bracelev = -1;
3869 }
3870 break;
3871 default:
3872 break;
3873 }
3874 switch (structdef)
3875 {
3876 case skeyseen: /* unnamed struct */
3877 pushclass_above (bracelev, NULL, 0);
3878 structdef = snone;
3879 break;
3880 case stagseen: /* named struct or enum */
3881 case scolonseen: /* a class */
3882 pushclass_above (bracelev,token.line+token.offset, token.length);
3883 structdef = snone;
3884 make_C_tag (false); /* a struct or enum */
3885 break;
3886 default:
3887 break;
3888 }
3889 bracelev += 1;
3890 break;
3891 case '*':
3892 if (definedef != dnone)
3893 break;
3894 if (fvdef == fstartlist)
3895 {
3896 fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
3897 token.valid = false;
3898 }
3899 break;
3900 case '}':
3901 if (definedef != dnone)
3902 break;
3903 bracelev -= 1;
3904 if (!ignoreindent && lp == newlb.buffer + 1)
3905 {
3906 if (bracelev != 0)
3907 token.valid = false; /* unexpected value, token unreliable */
3908 bracelev = 0; /* reset brace level if first column */
3909 parlev = 0; /* also reset paren level, just in case... */
3910 }
3911 else if (bracelev < 0)
3912 {
3913 token.valid = false; /* something gone amiss, token unreliable */
3914 bracelev = 0;
3915 }
3916 if (bracelev == 0 && fvdef == vignore)
3917 fvdef = fvnone; /* end of function */
3918 popclass_above (bracelev);
3919 structdef = snone;
3920 /* Only if typdef == tinbody is typdefbracelev significant. */
3921 if (typdef == tinbody && bracelev <= typdefbracelev)
3922 {
3923 assert (bracelev == typdefbracelev);
3924 typdef = tend;
3925 }
3926 break;
3927 case '=':
3928 if (definedef != dnone)
3929 break;
3930 switch (fvdef)
3931 {
3932 case foperator:
3933 case finlist:
3934 case fignore:
3935 case vignore:
3936 break;
3937 case fvnameseen:
3938 if ((members && bracelev == 1)
3939 || (globals && bracelev == 0 && (!fvextern || declarations)))
3940 make_C_tag (false); /* a variable */
3941 /* FALLTHRU */
3942 default:
3943 fvdef = vignore;
3944 }
3945 break;
3946 case '<':
3947 if (cplpl
3948 && (structdef == stagseen || fvdef == fvnameseen))
3949 {
3950 templatelev++;
3951 break;
3952 }
3953 goto resetfvdef;
3954 case '>':
3955 if (templatelev > 0)
3956 {
3957 templatelev--;
3958 break;
3959 }
3960 goto resetfvdef;
3961 case '+':
3962 case '-':
3963 if (objdef == oinbody && bracelev == 0)
3964 {
3965 objdef = omethodsign;
3966 break;
3967 }
3968 /* FALLTHRU */
3969 resetfvdef:
3970 case '#': case '~': case '&': case '%': case '/':
3971 case '|': case '^': case '!': case '.': case '?':
3972 if (definedef != dnone)
3973 break;
3974 /* These surely cannot follow a function tag in C. */
3975 switch (fvdef)
3976 {
3977 case foperator:
3978 case finlist:
3979 case fignore:
3980 case vignore:
3981 break;
3982 default:
3983 fvdef = fvnone;
3984 }
3985 break;
3986 case '\0':
3987 if (objdef == otagseen)
3988 {
3989 make_C_tag (true); /* an Objective C class */
3990 objdef = oignore;
3991 }
3992 /* If a macro spans multiple lines don't reset its state. */
3993 if (quotednl)
3994 CNL_SAVE_DEFINEDEF ();
3995 else
3996 CNL ();
3997 break;
3998 } /* switch (c) */
3999
4000 } /* while not eof */
4001
4002 free (lbs[0].lb.buffer);
4003 free (lbs[1].lb.buffer);
4004 }
4005
4006 /*
4007 * Process either a C++ file or a C file depending on the setting
4008 * of a global flag.
4009 */
4010 static void
4011 default_C_entries (FILE *inf)
4012 {
4013 C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
4014 }
4015
4016 /* Always do plain C. */
4017 static void
4018 plain_C_entries (FILE *inf)
4019 {
4020 C_entries (0, inf);
4021 }
4022
4023 /* Always do C++. */
4024 static void
4025 Cplusplus_entries (FILE *inf)
4026 {
4027 C_entries (C_PLPL, inf);
4028 }
4029
4030 /* Always do Java. */
4031 static void
4032 Cjava_entries (FILE *inf)
4033 {
4034 C_entries (C_JAVA, inf);
4035 }
4036
4037 /* Always do C*. */
4038 static void
4039 Cstar_entries (FILE *inf)
4040 {
4041 C_entries (C_STAR, inf);
4042 }
4043
4044 /* Always do Yacc. */
4045 static void
4046 Yacc_entries (FILE *inf)
4047 {
4048 C_entries (YACC, inf);
4049 }
4050
4051 \f
4052 /* Useful macros. */
4053 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
4054 while (perhaps_more_input (file_pointer) \
4055 && (readline (&(line_buffer), file_pointer), \
4056 (char_pointer) = (line_buffer).buffer, \
4057 true)) \
4058
4059 #define LOOKING_AT(cp, kw) /* kw is the keyword, a literal string */ \
4060 ((assert ("" kw), true) /* syntax error if not a literal string */ \
4061 && strneq ((cp), kw, sizeof (kw)-1) /* cp points at kw */ \
4062 && notinname ((cp)[sizeof (kw)-1]) /* end of kw */ \
4063 && ((cp) = skip_spaces ((cp)+sizeof (kw)-1))) /* skip spaces */
4064
4065 /* Similar to LOOKING_AT but does not use notinname, does not skip */
4066 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
4067 ((assert ("" kw), true) /* syntax error if not a literal string */ \
4068 && strncaseeq ((cp), kw, sizeof (kw)-1) /* cp points at kw */ \
4069 && ((cp) += sizeof (kw)-1)) /* skip spaces */
4070
4071 /*
4072 * Read a file, but do no processing. This is used to do regexp
4073 * matching on files that have no language defined.
4074 */
4075 static void
4076 just_read_file (FILE *inf)
4077 {
4078 while (perhaps_more_input (inf))
4079 readline (&lb, inf);
4080 }
4081
4082 \f
4083 /* Fortran parsing */
4084
4085 static void F_takeprec (void);
4086 static void F_getit (FILE *);
4087
4088 static void
4089 F_takeprec (void)
4090 {
4091 dbp = skip_spaces (dbp);
4092 if (*dbp != '*')
4093 return;
4094 dbp++;
4095 dbp = skip_spaces (dbp);
4096 if (strneq (dbp, "(*)", 3))
4097 {
4098 dbp += 3;
4099 return;
4100 }
4101 if (!c_isdigit (*dbp))
4102 {
4103 --dbp; /* force failure */
4104 return;
4105 }
4106 do
4107 dbp++;
4108 while (c_isdigit (*dbp));
4109 }
4110
4111 static void
4112 F_getit (FILE *inf)
4113 {
4114 register char *cp;
4115
4116 dbp = skip_spaces (dbp);
4117 if (*dbp == '\0')
4118 {
4119 readline (&lb, inf);
4120 dbp = lb.buffer;
4121 if (dbp[5] != '&')
4122 return;
4123 dbp += 6;
4124 dbp = skip_spaces (dbp);
4125 }
4126 if (!c_isalpha (*dbp) && *dbp != '_' && *dbp != '$')
4127 return;
4128 for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4129 continue;
4130 make_tag (dbp, cp-dbp, true,
4131 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4132 }
4133
4134
4135 static void
4136 Fortran_functions (FILE *inf)
4137 {
4138 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4139 {
4140 if (*dbp == '%')
4141 dbp++; /* Ratfor escape to fortran */
4142 dbp = skip_spaces (dbp);
4143 if (*dbp == '\0')
4144 continue;
4145
4146 if (LOOKING_AT_NOCASE (dbp, "recursive"))
4147 dbp = skip_spaces (dbp);
4148
4149 if (LOOKING_AT_NOCASE (dbp, "pure"))
4150 dbp = skip_spaces (dbp);
4151
4152 if (LOOKING_AT_NOCASE (dbp, "elemental"))
4153 dbp = skip_spaces (dbp);
4154
4155 switch (c_tolower (*dbp))
4156 {
4157 case 'i':
4158 if (nocase_tail ("integer"))
4159 F_takeprec ();
4160 break;
4161 case 'r':
4162 if (nocase_tail ("real"))
4163 F_takeprec ();
4164 break;
4165 case 'l':
4166 if (nocase_tail ("logical"))
4167 F_takeprec ();
4168 break;
4169 case 'c':
4170 if (nocase_tail ("complex") || nocase_tail ("character"))
4171 F_takeprec ();
4172 break;
4173 case 'd':
4174 if (nocase_tail ("double"))
4175 {
4176 dbp = skip_spaces (dbp);
4177 if (*dbp == '\0')
4178 continue;
4179 if (nocase_tail ("precision"))
4180 break;
4181 continue;
4182 }
4183 break;
4184 }
4185 dbp = skip_spaces (dbp);
4186 if (*dbp == '\0')
4187 continue;
4188 switch (c_tolower (*dbp))
4189 {
4190 case 'f':
4191 if (nocase_tail ("function"))
4192 F_getit (inf);
4193 continue;
4194 case 's':
4195 if (nocase_tail ("subroutine"))
4196 F_getit (inf);
4197 continue;
4198 case 'e':
4199 if (nocase_tail ("entry"))
4200 F_getit (inf);
4201 continue;
4202 case 'b':
4203 if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4204 {
4205 dbp = skip_spaces (dbp);
4206 if (*dbp == '\0') /* assume un-named */
4207 make_tag ("blockdata", 9, true,
4208 lb.buffer, dbp - lb.buffer, lineno, linecharno);
4209 else
4210 F_getit (inf); /* look for name */
4211 }
4212 continue;
4213 }
4214 }
4215 }
4216
4217 \f
4218 /*
4219 * Go language support
4220 * Original code by Xi Lu <lx@shellcodes.org> (2016)
4221 */
4222 static void
4223 Go_functions(FILE *inf)
4224 {
4225 char *cp, *name;
4226
4227 LOOP_ON_INPUT_LINES(inf, lb, cp)
4228 {
4229 cp = skip_spaces (cp);
4230
4231 if (LOOKING_AT (cp, "package"))
4232 {
4233 name = cp;
4234 while (!notinname (*cp) && *cp != '\0')
4235 cp++;
4236 make_tag (name, cp - name, false, lb.buffer,
4237 cp - lb.buffer + 1, lineno, linecharno);
4238 }
4239 else if (LOOKING_AT (cp, "func"))
4240 {
4241 /* Go implementation of interface, such as:
4242 func (n *Integer) Add(m Integer) ...
4243 skip `(n *Integer)` part.
4244 */
4245 if (*cp == '(')
4246 {
4247 while (*cp != ')')
4248 cp++;
4249 cp = skip_spaces (cp+1);
4250 }
4251
4252 if (*cp)
4253 {
4254 name = cp;
4255
4256 while (!notinname (*cp))
4257 cp++;
4258
4259 make_tag (name, cp - name, true, lb.buffer,
4260 cp - lb.buffer + 1, lineno, linecharno);
4261 }
4262 }
4263 else if (members && LOOKING_AT (cp, "type"))
4264 {
4265 name = cp;
4266
4267 /* Ignore the likes of the following:
4268 type (
4269 A
4270 )
4271 */
4272 if (*cp == '(')
4273 return;
4274
4275 while (!notinname (*cp) && *cp != '\0')
4276 cp++;
4277
4278 make_tag (name, cp - name, false, lb.buffer,
4279 cp - lb.buffer + 1, lineno, linecharno);
4280 }
4281 }
4282 }
4283
4284 \f
4285 /*
4286 * Ada parsing
4287 * Original code by
4288 * Philippe Waroquiers (1998)
4289 */
4290
4291 /* Once we are positioned after an "interesting" keyword, let's get
4292 the real tag value necessary. */
4293 static void
4294 Ada_getit (FILE *inf, const char *name_qualifier)
4295 {
4296 register char *cp;
4297 char *name;
4298 char c;
4299
4300 while (perhaps_more_input (inf))
4301 {
4302 dbp = skip_spaces (dbp);
4303 if (*dbp == '\0'
4304 || (dbp[0] == '-' && dbp[1] == '-'))
4305 {
4306 readline (&lb, inf);
4307 dbp = lb.buffer;
4308 }
4309 switch (c_tolower (*dbp))
4310 {
4311 case 'b':
4312 if (nocase_tail ("body"))
4313 {
4314 /* Skipping body of procedure body or package body or ....
4315 resetting qualifier to body instead of spec. */
4316 name_qualifier = "/b";
4317 continue;
4318 }
4319 break;
4320 case 't':
4321 /* Skipping type of task type or protected type ... */
4322 if (nocase_tail ("type"))
4323 continue;
4324 break;
4325 }
4326 if (*dbp == '"')
4327 {
4328 dbp += 1;
4329 for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4330 continue;
4331 }
4332 else
4333 {
4334 dbp = skip_spaces (dbp);
4335 for (cp = dbp;
4336 c_isalnum (*cp) || *cp == '_' || *cp == '.';
4337 cp++)
4338 continue;
4339 if (cp == dbp)
4340 return;
4341 }
4342 c = *cp;
4343 *cp = '\0';
4344 name = concat (dbp, name_qualifier, "");
4345 *cp = c;
4346 make_tag (name, strlen (name), true,
4347 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4348 free (name);
4349 if (c == '"')
4350 dbp = cp + 1;
4351 return;
4352 }
4353 }
4354
4355 static void
4356 Ada_funcs (FILE *inf)
4357 {
4358 bool inquote = false;
4359 bool skip_till_semicolumn = false;
4360
4361 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4362 {
4363 while (*dbp != '\0')
4364 {
4365 /* Skip a string i.e. "abcd". */
4366 if (inquote || (*dbp == '"'))
4367 {
4368 dbp = strchr (dbp + !inquote, '"');
4369 if (dbp != NULL)
4370 {
4371 inquote = false;
4372 dbp += 1;
4373 continue; /* advance char */
4374 }
4375 else
4376 {
4377 inquote = true;
4378 break; /* advance line */
4379 }
4380 }
4381
4382 /* Skip comments. */
4383 if (dbp[0] == '-' && dbp[1] == '-')
4384 break; /* advance line */
4385
4386 /* Skip character enclosed in single quote i.e. 'a'
4387 and skip single quote starting an attribute i.e. 'Image. */
4388 if (*dbp == '\'')
4389 {
4390 dbp++ ;
4391 if (*dbp != '\0')
4392 dbp++;
4393 continue;
4394 }
4395
4396 if (skip_till_semicolumn)
4397 {
4398 if (*dbp == ';')
4399 skip_till_semicolumn = false;
4400 dbp++;
4401 continue; /* advance char */
4402 }
4403
4404 /* Search for beginning of a token. */
4405 if (!begtoken (*dbp))
4406 {
4407 dbp++;
4408 continue; /* advance char */
4409 }
4410
4411 /* We are at the beginning of a token. */
4412 switch (c_tolower (*dbp))
4413 {
4414 case 'f':
4415 if (!packages_only && nocase_tail ("function"))
4416 Ada_getit (inf, "/f");
4417 else
4418 break; /* from switch */
4419 continue; /* advance char */
4420 case 'p':
4421 if (!packages_only && nocase_tail ("procedure"))
4422 Ada_getit (inf, "/p");
4423 else if (nocase_tail ("package"))
4424 Ada_getit (inf, "/s");
4425 else if (nocase_tail ("protected")) /* protected type */
4426 Ada_getit (inf, "/t");
4427 else
4428 break; /* from switch */
4429 continue; /* advance char */
4430
4431 case 'u':
4432 if (typedefs && !packages_only && nocase_tail ("use"))
4433 {
4434 /* when tagging types, avoid tagging use type Pack.Typename;
4435 for this, we will skip everything till a ; */
4436 skip_till_semicolumn = true;
4437 continue; /* advance char */
4438 }
4439
4440 case 't':
4441 if (!packages_only && nocase_tail ("task"))
4442 Ada_getit (inf, "/k");
4443 else if (typedefs && !packages_only && nocase_tail ("type"))
4444 {
4445 Ada_getit (inf, "/t");
4446 while (*dbp != '\0')
4447 dbp += 1;
4448 }
4449 else
4450 break; /* from switch */
4451 continue; /* advance char */
4452 }
4453
4454 /* Look for the end of the token. */
4455 while (!endtoken (*dbp))
4456 dbp++;
4457
4458 } /* advance char */
4459 } /* advance line */
4460 }
4461
4462 \f
4463 /*
4464 * Unix and microcontroller assembly tag handling
4465 * Labels: /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4466 * Idea by Bob Weiner, Motorola Inc. (1994)
4467 */
4468 static void
4469 Asm_labels (FILE *inf)
4470 {
4471 register char *cp;
4472
4473 LOOP_ON_INPUT_LINES (inf, lb, cp)
4474 {
4475 /* If first char is alphabetic or one of [_.$], test for colon
4476 following identifier. */
4477 if (c_isalpha (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4478 {
4479 /* Read past label. */
4480 cp++;
4481 while (c_isalnum (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4482 cp++;
4483 if (*cp == ':' || c_isspace (*cp))
4484 /* Found end of label, so copy it and add it to the table. */
4485 make_tag (lb.buffer, cp - lb.buffer, true,
4486 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4487 }
4488 }
4489 }
4490
4491 \f
4492 /*
4493 * Perl support
4494 * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4495 * /^use constant[ \t\n]+[^ \t\n{=,;]+/
4496 * Perl variable names: /^(my|local).../
4497 * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4498 * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4499 * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4500 */
4501 static void
4502 Perl_functions (FILE *inf)
4503 {
4504 char *package = savestr ("main"); /* current package name */
4505 register char *cp;
4506
4507 LOOP_ON_INPUT_LINES (inf, lb, cp)
4508 {
4509 cp = skip_spaces (cp);
4510
4511 if (LOOKING_AT (cp, "package"))
4512 {
4513 free (package);
4514 get_tag (cp, &package);
4515 }
4516 else if (LOOKING_AT (cp, "sub"))
4517 {
4518 char *pos, *sp;
4519
4520 subr:
4521 sp = cp;
4522 while (!notinname (*cp))
4523 cp++;
4524 if (cp == sp)
4525 continue; /* nothing found */
4526 pos = strchr (sp, ':');
4527 if (pos && pos < cp && pos[1] == ':')
4528 /* The name is already qualified. */
4529 make_tag (sp, cp - sp, true,
4530 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4531 else
4532 /* Qualify it. */
4533 {
4534 char savechar, *name;
4535
4536 savechar = *cp;
4537 *cp = '\0';
4538 name = concat (package, "::", sp);
4539 *cp = savechar;
4540 make_tag (name, strlen (name), true,
4541 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4542 free (name);
4543 }
4544 }
4545 else if (LOOKING_AT (cp, "use constant")
4546 || LOOKING_AT (cp, "use constant::defer"))
4547 {
4548 /* For hash style multi-constant like
4549 use constant { FOO => 123,
4550 BAR => 456 };
4551 only the first FOO is picked up. Parsing across the value
4552 expressions would be difficult in general, due to possible nested
4553 hashes, here-documents, etc. */
4554 if (*cp == '{')
4555 cp = skip_spaces (cp+1);
4556 goto subr;
4557 }
4558 else if (globals) /* only if we are tagging global vars */
4559 {
4560 /* Skip a qualifier, if any. */
4561 bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4562 /* After "my" or "local", but before any following paren or space. */
4563 char *varstart = cp;
4564
4565 if (qual /* should this be removed? If yes, how? */
4566 && (*cp == '$' || *cp == '@' || *cp == '%'))
4567 {
4568 varstart += 1;
4569 do
4570 cp++;
4571 while (c_isalnum (*cp) || *cp == '_');
4572 }
4573 else if (qual)
4574 {
4575 /* Should be examining a variable list at this point;
4576 could insist on seeing an open parenthesis. */
4577 while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')')
4578 cp++;
4579 }
4580 else
4581 continue;
4582
4583 make_tag (varstart, cp - varstart, false,
4584 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4585 }
4586 }
4587 free (package);
4588 }
4589
4590
4591 /*
4592 * Python support
4593 * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4594 * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4595 * More ideas by seb bacon <seb@jamkit.com> (2002)
4596 */
4597 static void
4598 Python_functions (FILE *inf)
4599 {
4600 register char *cp;
4601
4602 LOOP_ON_INPUT_LINES (inf, lb, cp)
4603 {
4604 cp = skip_spaces (cp);
4605 if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4606 {
4607 char *name = cp;
4608 while (!notinname (*cp) && *cp != ':')
4609 cp++;
4610 make_tag (name, cp - name, true,
4611 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4612 }
4613 }
4614 }
4615
4616 /*
4617 * Ruby support
4618 * Original code by Xi Lu <lx@shellcodes.org> (2015)
4619 */
4620 static void
4621 Ruby_functions (FILE *inf)
4622 {
4623 char *cp = NULL;
4624
4625 LOOP_ON_INPUT_LINES (inf, lb, cp)
4626 {
4627 bool is_class = false;
4628 bool is_method = false;
4629 char *name;
4630
4631 cp = skip_spaces (cp);
4632 if (c_isalpha (*cp) && c_isupper (*cp)) /* constants */
4633 {
4634 char *bp, *colon = NULL;
4635
4636 name = cp;
4637
4638 for (cp++; c_isalnum (*cp) || *cp == '_' || *cp == ':'; cp++)
4639 {
4640 if (*cp == ':')
4641 colon = cp;
4642 }
4643 if (cp > name + 1)
4644 {
4645 bp = skip_spaces (cp);
4646 if (*bp == '=' && c_isspace (bp[1]))
4647 {
4648 if (colon && !c_isspace (colon[1]))
4649 name = colon + 1;
4650 make_tag (name, cp - name, false,
4651 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4652 }
4653 }
4654 }
4655 else if ((is_method = LOOKING_AT (cp, "def")) /* module/class/method */
4656 || (is_class = LOOKING_AT (cp, "class"))
4657 || LOOKING_AT (cp, "module"))
4658 {
4659 const char self_name[] = "self.";
4660 const size_t self_size1 = sizeof ("self.") - 1;
4661
4662 name = cp;
4663
4664 /* Ruby method names can end in a '='. Also, operator overloading can
4665 define operators whose names include '='. */
4666 while (!notinname (*cp) || *cp == '=')
4667 cp++;
4668
4669 /* Remove "self." from the method name. */
4670 if (cp - name > self_size1
4671 && strneq (name, self_name, self_size1))
4672 name += self_size1;
4673
4674 /* Remove the class/module qualifiers from method names. */
4675 if (is_method)
4676 {
4677 char *q;
4678
4679 for (q = name; q < cp && *q != '.'; q++)
4680 ;
4681 if (q < cp - 1) /* punt if we see just "FOO." */
4682 name = q + 1;
4683 }
4684
4685 /* Don't tag singleton classes. */
4686 if (is_class && strneq (name, "<<", 2) && cp == name + 2)
4687 continue;
4688
4689 make_tag (name, cp - name, true,
4690 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4691 }
4692 }
4693 }
4694
4695 \f
4696 /*
4697 * PHP support
4698 * Look for:
4699 * - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4700 * - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4701 * - /^[ \t]*define\(\"[^\"]+/
4702 * Only with --members:
4703 * - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4704 * Idea by Diez B. Roggisch (2001)
4705 */
4706 static void
4707 PHP_functions (FILE *inf)
4708 {
4709 char *cp, *name;
4710 bool search_identifier = false;
4711
4712 LOOP_ON_INPUT_LINES (inf, lb, cp)
4713 {
4714 cp = skip_spaces (cp);
4715 name = cp;
4716 if (search_identifier
4717 && *cp != '\0')
4718 {
4719 while (!notinname (*cp))
4720 cp++;
4721 make_tag (name, cp - name, true,
4722 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4723 search_identifier = false;
4724 }
4725 else if (LOOKING_AT (cp, "function"))
4726 {
4727 if (*cp == '&')
4728 cp = skip_spaces (cp+1);
4729 if (*cp != '\0')
4730 {
4731 name = cp;
4732 while (!notinname (*cp))
4733 cp++;
4734 make_tag (name, cp - name, true,
4735 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4736 }
4737 else
4738 search_identifier = true;
4739 }
4740 else if (LOOKING_AT (cp, "class"))
4741 {
4742 if (*cp != '\0')
4743 {
4744 name = cp;
4745 while (*cp != '\0' && !c_isspace (*cp))
4746 cp++;
4747 make_tag (name, cp - name, false,
4748 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4749 }
4750 else
4751 search_identifier = true;
4752 }
4753 else if (strneq (cp, "define", 6)
4754 && (cp = skip_spaces (cp+6))
4755 && *cp++ == '('
4756 && (*cp == '"' || *cp == '\''))
4757 {
4758 char quote = *cp++;
4759 name = cp;
4760 while (*cp != quote && *cp != '\0')
4761 cp++;
4762 make_tag (name, cp - name, false,
4763 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4764 }
4765 else if (members
4766 && LOOKING_AT (cp, "var")
4767 && *cp == '$')
4768 {
4769 name = cp;
4770 while (!notinname (*cp))
4771 cp++;
4772 make_tag (name, cp - name, false,
4773 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4774 }
4775 }
4776 }
4777
4778 \f
4779 /*
4780 * Cobol tag functions
4781 * We could look for anything that could be a paragraph name.
4782 * i.e. anything that starts in column 8 is one word and ends in a full stop.
4783 * Idea by Corny de Souza (1993)
4784 */
4785 static void
4786 Cobol_paragraphs (FILE *inf)
4787 {
4788 register char *bp, *ep;
4789
4790 LOOP_ON_INPUT_LINES (inf, lb, bp)
4791 {
4792 if (lb.len < 9)
4793 continue;
4794 bp += 8;
4795
4796 /* If eoln, compiler option or comment ignore whole line. */
4797 if (bp[-1] != ' ' || !c_isalnum (bp[0]))
4798 continue;
4799
4800 for (ep = bp; c_isalnum (*ep) || *ep == '-'; ep++)
4801 continue;
4802 if (*ep++ == '.')
4803 make_tag (bp, ep - bp, true,
4804 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4805 }
4806 }
4807
4808 \f
4809 /*
4810 * Makefile support
4811 * Ideas by Assar Westerlund <assar@sics.se> (2001)
4812 */
4813 static void
4814 Makefile_targets (FILE *inf)
4815 {
4816 register char *bp;
4817
4818 LOOP_ON_INPUT_LINES (inf, lb, bp)
4819 {
4820 if (*bp == '\t' || *bp == '#')
4821 continue;
4822 while (*bp != '\0' && *bp != '=' && *bp != ':')
4823 bp++;
4824 if (*bp == ':' || (globals && *bp == '='))
4825 {
4826 /* We should detect if there is more than one tag, but we do not.
4827 We just skip initial and final spaces. */
4828 char * namestart = skip_spaces (lb.buffer);
4829 while (--bp > namestart)
4830 if (!notinname (*bp))
4831 break;
4832 make_tag (namestart, bp - namestart + 1, true,
4833 lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4834 }
4835 }
4836 }
4837
4838 \f
4839 /*
4840 * Pascal parsing
4841 * Original code by Mosur K. Mohan (1989)
4842 *
4843 * Locates tags for procedures & functions. Doesn't do any type- or
4844 * var-definitions. It does look for the keyword "extern" or
4845 * "forward" immediately following the procedure statement; if found,
4846 * the tag is skipped.
4847 */
4848 static void
4849 Pascal_functions (FILE *inf)
4850 {
4851 linebuffer tline; /* mostly copied from C_entries */
4852 long save_lcno;
4853 int save_lineno, namelen, taglen;
4854 char c, *name;
4855
4856 bool /* each of these flags is true if: */
4857 incomment, /* point is inside a comment */
4858 inquote, /* point is inside '..' string */
4859 get_tagname, /* point is after PROCEDURE/FUNCTION
4860 keyword, so next item = potential tag */
4861 found_tag, /* point is after a potential tag */
4862 inparms, /* point is within parameter-list */
4863 verify_tag; /* point has passed the parm-list, so the
4864 next token will determine whether this
4865 is a FORWARD/EXTERN to be ignored, or
4866 whether it is a real tag */
4867
4868 save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4869 name = NULL; /* keep compiler quiet */
4870 dbp = lb.buffer;
4871 *dbp = '\0';
4872 linebuffer_init (&tline);
4873
4874 incomment = inquote = false;
4875 found_tag = false; /* have a proc name; check if extern */
4876 get_tagname = false; /* found "procedure" keyword */
4877 inparms = false; /* found '(' after "proc" */
4878 verify_tag = false; /* check if "extern" is ahead */
4879
4880
4881 while (perhaps_more_input (inf)) /* long main loop to get next char */
4882 {
4883 c = *dbp++;
4884 if (c == '\0') /* if end of line */
4885 {
4886 readline (&lb, inf);
4887 dbp = lb.buffer;
4888 if (*dbp == '\0')
4889 continue;
4890 if (!((found_tag && verify_tag)
4891 || get_tagname))
4892 c = *dbp++; /* only if don't need *dbp pointing
4893 to the beginning of the name of
4894 the procedure or function */
4895 }
4896 if (incomment)
4897 {
4898 if (c == '}') /* within { } comments */
4899 incomment = false;
4900 else if (c == '*' && *dbp == ')') /* within (* *) comments */
4901 {
4902 dbp++;
4903 incomment = false;
4904 }
4905 continue;
4906 }
4907 else if (inquote)
4908 {
4909 if (c == '\'')
4910 inquote = false;
4911 continue;
4912 }
4913 else
4914 switch (c)
4915 {
4916 case '\'':
4917 inquote = true; /* found first quote */
4918 continue;
4919 case '{': /* found open { comment */
4920 incomment = true;
4921 continue;
4922 case '(':
4923 if (*dbp == '*') /* found open (* comment */
4924 {
4925 incomment = true;
4926 dbp++;
4927 }
4928 else if (found_tag) /* found '(' after tag, i.e., parm-list */
4929 inparms = true;
4930 continue;
4931 case ')': /* end of parms list */
4932 if (inparms)
4933 inparms = false;
4934 continue;
4935 case ';':
4936 if (found_tag && !inparms) /* end of proc or fn stmt */
4937 {
4938 verify_tag = true;
4939 break;
4940 }
4941 continue;
4942 }
4943 if (found_tag && verify_tag && (*dbp != ' '))
4944 {
4945 /* Check if this is an "extern" declaration. */
4946 if (*dbp == '\0')
4947 continue;
4948 if (c_tolower (*dbp) == 'e')
4949 {
4950 if (nocase_tail ("extern")) /* superfluous, really! */
4951 {
4952 found_tag = false;
4953 verify_tag = false;
4954 }
4955 }
4956 else if (c_tolower (*dbp) == 'f')
4957 {
4958 if (nocase_tail ("forward")) /* check for forward reference */
4959 {
4960 found_tag = false;
4961 verify_tag = false;
4962 }
4963 }
4964 if (found_tag && verify_tag) /* not external proc, so make tag */
4965 {
4966 found_tag = false;
4967 verify_tag = false;
4968 make_tag (name, namelen, true,
4969 tline.buffer, taglen, save_lineno, save_lcno);
4970 continue;
4971 }
4972 }
4973 if (get_tagname) /* grab name of proc or fn */
4974 {
4975 char *cp;
4976
4977 if (*dbp == '\0')
4978 continue;
4979
4980 /* Find block name. */
4981 for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4982 continue;
4983
4984 /* Save all values for later tagging. */
4985 linebuffer_setlen (&tline, lb.len);
4986 strcpy (tline.buffer, lb.buffer);
4987 save_lineno = lineno;
4988 save_lcno = linecharno;
4989 name = tline.buffer + (dbp - lb.buffer);
4990 namelen = cp - dbp;
4991 taglen = cp - lb.buffer + 1;
4992
4993 dbp = cp; /* set dbp to e-o-token */
4994 get_tagname = false;
4995 found_tag = true;
4996 continue;
4997
4998 /* And proceed to check for "extern". */
4999 }
5000 else if (!incomment && !inquote && !found_tag)
5001 {
5002 /* Check for proc/fn keywords. */
5003 switch (c_tolower (c))
5004 {
5005 case 'p':
5006 if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
5007 get_tagname = true;
5008 continue;
5009 case 'f':
5010 if (nocase_tail ("unction"))
5011 get_tagname = true;
5012 continue;
5013 }
5014 }
5015 } /* while not eof */
5016
5017 free (tline.buffer);
5018 }
5019
5020 \f
5021 /*
5022 * Lisp tag functions
5023 * look for (def or (DEF, quote or QUOTE
5024 */
5025
5026 static void L_getit (void);
5027
5028 static void
5029 L_getit (void)
5030 {
5031 if (*dbp == '\'') /* Skip prefix quote */
5032 dbp++;
5033 else if (*dbp == '(')
5034 {
5035 dbp++;
5036 /* Try to skip "(quote " */
5037 if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
5038 /* Ok, then skip "(" before name in (defstruct (foo)) */
5039 dbp = skip_spaces (dbp);
5040 }
5041 get_tag (dbp, NULL);
5042 }
5043
5044 static void
5045 Lisp_functions (FILE *inf)
5046 {
5047 LOOP_ON_INPUT_LINES (inf, lb, dbp)
5048 {
5049 if (dbp[0] != '(')
5050 continue;
5051
5052 /* "(defvar foo)" is a declaration rather than a definition. */
5053 if (! declarations)
5054 {
5055 char *p = dbp + 1;
5056 if (LOOKING_AT (p, "defvar"))
5057 {
5058 p = skip_name (p); /* past var name */
5059 p = skip_spaces (p);
5060 if (*p == ')')
5061 continue;
5062 }
5063 }
5064
5065 if (strneq (dbp + 1, "cl-", 3) || strneq (dbp + 1, "CL-", 3))
5066 dbp += 3;
5067
5068 if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
5069 {
5070 dbp = skip_non_spaces (dbp);
5071 dbp = skip_spaces (dbp);
5072 L_getit ();
5073 }
5074 else
5075 {
5076 /* Check for (foo::defmumble name-defined ... */
5077 do
5078 dbp++;
5079 while (!notinname (*dbp) && *dbp != ':');
5080 if (*dbp == ':')
5081 {
5082 do
5083 dbp++;
5084 while (*dbp == ':');
5085
5086 if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
5087 {
5088 dbp = skip_non_spaces (dbp);
5089 dbp = skip_spaces (dbp);
5090 L_getit ();
5091 }
5092 }
5093 }
5094 }
5095 }
5096
5097 \f
5098 /*
5099 * Lua script language parsing
5100 * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
5101 *
5102 * "function" and "local function" are tags if they start at column 1.
5103 */
5104 static void
5105 Lua_functions (FILE *inf)
5106 {
5107 register char *bp;
5108
5109 LOOP_ON_INPUT_LINES (inf, lb, bp)
5110 {
5111 bp = skip_spaces (bp);
5112 if (bp[0] != 'f' && bp[0] != 'l')
5113 continue;
5114
5115 (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
5116
5117 if (LOOKING_AT (bp, "function"))
5118 {
5119 char *tag_name, *tp_dot, *tp_colon;
5120
5121 get_tag (bp, &tag_name);
5122 /* If the tag ends with ".foo" or ":foo", make an additional tag for
5123 "foo". */
5124 tp_dot = strrchr (tag_name, '.');
5125 tp_colon = strrchr (tag_name, ':');
5126 if (tp_dot || tp_colon)
5127 {
5128 char *p = tp_dot > tp_colon ? tp_dot : tp_colon;
5129 int len_add = p - tag_name + 1;
5130
5131 get_tag (bp + len_add, NULL);
5132 }
5133 }
5134 }
5135 }
5136
5137 \f
5138 /*
5139 * PostScript tags
5140 * Just look for lines where the first character is '/'
5141 * Also look at "defineps" for PSWrap
5142 * Ideas by:
5143 * Richard Mlynarik <mly@adoc.xerox.com> (1997)
5144 * Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
5145 */
5146 static void
5147 PS_functions (FILE *inf)
5148 {
5149 register char *bp, *ep;
5150
5151 LOOP_ON_INPUT_LINES (inf, lb, bp)
5152 {
5153 if (bp[0] == '/')
5154 {
5155 for (ep = bp+1;
5156 *ep != '\0' && *ep != ' ' && *ep != '{';
5157 ep++)
5158 continue;
5159 make_tag (bp, ep - bp, true,
5160 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
5161 }
5162 else if (LOOKING_AT (bp, "defineps"))
5163 get_tag (bp, NULL);
5164 }
5165 }
5166
5167 \f
5168 /*
5169 * Forth tags
5170 * Ignore anything after \ followed by space or in ( )
5171 * Look for words defined by :
5172 * Look for constant, code, create, defer, value, and variable
5173 * OBP extensions: Look for buffer:, field,
5174 * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
5175 */
5176 static void
5177 Forth_words (FILE *inf)
5178 {
5179 register char *bp;
5180
5181 LOOP_ON_INPUT_LINES (inf, lb, bp)
5182 while ((bp = skip_spaces (bp))[0] != '\0')
5183 if (bp[0] == '\\' && c_isspace (bp[1]))
5184 break; /* read next line */
5185 else if (bp[0] == '(' && c_isspace (bp[1]))
5186 do /* skip to ) or eol */
5187 bp++;
5188 while (*bp != ')' && *bp != '\0');
5189 else if ((bp[0] == ':' && c_isspace (bp[1]) && bp++)
5190 || LOOKING_AT_NOCASE (bp, "constant")
5191 || LOOKING_AT_NOCASE (bp, "code")
5192 || LOOKING_AT_NOCASE (bp, "create")
5193 || LOOKING_AT_NOCASE (bp, "defer")
5194 || LOOKING_AT_NOCASE (bp, "value")
5195 || LOOKING_AT_NOCASE (bp, "variable")
5196 || LOOKING_AT_NOCASE (bp, "buffer:")
5197 || LOOKING_AT_NOCASE (bp, "field"))
5198 get_tag (skip_spaces (bp), NULL); /* Yay! A definition! */
5199 else
5200 bp = skip_non_spaces (bp);
5201 }
5202
5203 \f
5204 /*
5205 * Scheme tag functions
5206 * look for (def... xyzzy
5207 * (def... (xyzzy
5208 * (def ... ((...(xyzzy ....
5209 * (set! xyzzy
5210 * Original code by Ken Haase (1985?)
5211 */
5212 static void
5213 Scheme_functions (FILE *inf)
5214 {
5215 register char *bp;
5216
5217 LOOP_ON_INPUT_LINES (inf, lb, bp)
5218 {
5219 if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
5220 {
5221 bp = skip_non_spaces (bp+4);
5222 /* Skip over open parens and white space. Don't continue past
5223 '\0'. */
5224 while (*bp && notinname (*bp))
5225 bp++;
5226 get_tag (bp, NULL);
5227 }
5228 if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
5229 get_tag (bp, NULL);
5230 }
5231 }
5232
5233 \f
5234 /* Find tags in TeX and LaTeX input files. */
5235
5236 /* TEX_toktab is a table of TeX control sequences that define tags.
5237 * Each entry records one such control sequence.
5238 *
5239 * Original code from who knows whom.
5240 * Ideas by:
5241 * Stefan Monnier (2002)
5242 */
5243
5244 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
5245
5246 /* Default set of control sequences to put into TEX_toktab.
5247 The value of environment var TEXTAGS is prepended to this. */
5248 static const char *TEX_defenv = "\
5249 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5250 :part:appendix:entry:index:def\
5251 :newcommand:renewcommand:newenvironment:renewenvironment";
5252
5253 static void TEX_decode_env (const char *, const char *);
5254
5255 /*
5256 * TeX/LaTeX scanning loop.
5257 */
5258 static void
5259 TeX_commands (FILE *inf)
5260 {
5261 char *cp;
5262 linebuffer *key;
5263
5264 char TEX_esc = '\0';
5265 char TEX_opgrp, TEX_clgrp;
5266
5267 /* Initialize token table once from environment. */
5268 if (TEX_toktab == NULL)
5269 TEX_decode_env ("TEXTAGS", TEX_defenv);
5270
5271 LOOP_ON_INPUT_LINES (inf, lb, cp)
5272 {
5273 /* Look at each TEX keyword in line. */
5274 for (;;)
5275 {
5276 /* Look for a TEX escape. */
5277 while (true)
5278 {
5279 char c = *cp++;
5280 if (c == '\0' || c == '%')
5281 goto tex_next_line;
5282
5283 /* Select either \ or ! as escape character, whichever comes
5284 first outside a comment. */
5285 if (!TEX_esc)
5286 switch (c)
5287 {
5288 case '\\':
5289 TEX_esc = c;
5290 TEX_opgrp = '{';
5291 TEX_clgrp = '}';
5292 break;
5293
5294 case '!':
5295 TEX_esc = c;
5296 TEX_opgrp = '<';
5297 TEX_clgrp = '>';
5298 break;
5299 }
5300
5301 if (c == TEX_esc)
5302 break;
5303 }
5304
5305 for (key = TEX_toktab; key->buffer != NULL; key++)
5306 if (strneq (cp, key->buffer, key->len))
5307 {
5308 char *p;
5309 int namelen, linelen;
5310 bool opgrp = false;
5311
5312 cp = skip_spaces (cp + key->len);
5313 if (*cp == TEX_opgrp)
5314 {
5315 opgrp = true;
5316 cp++;
5317 }
5318 for (p = cp;
5319 (!c_isspace (*p) && *p != '#' &&
5320 *p != TEX_opgrp && *p != TEX_clgrp);
5321 p++)
5322 continue;
5323 namelen = p - cp;
5324 linelen = lb.len;
5325 if (!opgrp || *p == TEX_clgrp)
5326 {
5327 while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5328 p++;
5329 linelen = p - lb.buffer + 1;
5330 }
5331 make_tag (cp, namelen, true,
5332 lb.buffer, linelen, lineno, linecharno);
5333 goto tex_next_line; /* We only tag a line once */
5334 }
5335 }
5336 tex_next_line:
5337 ;
5338 }
5339 }
5340
5341 /* Read environment and prepend it to the default string.
5342 Build token table. */
5343 static void
5344 TEX_decode_env (const char *evarname, const char *defenv)
5345 {
5346 register const char *env, *p;
5347 int i, len;
5348
5349 /* Append default string to environment. */
5350 env = getenv (evarname);
5351 if (!env)
5352 env = defenv;
5353 else
5354 env = concat (env, defenv, "");
5355
5356 /* Allocate a token table */
5357 for (len = 1, p = env; (p = strchr (p, ':')); )
5358 if (*++p)
5359 len++;
5360 TEX_toktab = xnew (len, linebuffer);
5361
5362 /* Unpack environment string into token table. Be careful about */
5363 /* zero-length strings (leading ':', "::" and trailing ':') */
5364 for (i = 0; *env != '\0';)
5365 {
5366 p = strchr (env, ':');
5367 if (!p) /* End of environment string. */
5368 p = env + strlen (env);
5369 if (p - env > 0)
5370 { /* Only non-zero strings. */
5371 TEX_toktab[i].buffer = savenstr (env, p - env);
5372 TEX_toktab[i].len = p - env;
5373 i++;
5374 }
5375 if (*p)
5376 env = p + 1;
5377 else
5378 {
5379 TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5380 TEX_toktab[i].len = 0;
5381 break;
5382 }
5383 }
5384 }
5385
5386 \f
5387 /* Texinfo support. Dave Love, Mar. 2000. */
5388 static void
5389 Texinfo_nodes (FILE *inf)
5390 {
5391 char *cp, *start;
5392 LOOP_ON_INPUT_LINES (inf, lb, cp)
5393 if (LOOKING_AT (cp, "@node"))
5394 {
5395 start = cp;
5396 while (*cp != '\0' && *cp != ',')
5397 cp++;
5398 make_tag (start, cp - start, true,
5399 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5400 }
5401 }
5402
5403 \f
5404 /*
5405 * HTML support.
5406 * Contents of <title>, <h1>, <h2>, <h3> are tags.
5407 * Contents of <a name=xxx> are tags with name xxx.
5408 *
5409 * Francesco Potortì, 2002.
5410 */
5411 static void
5412 HTML_labels (FILE *inf)
5413 {
5414 bool getnext = false; /* next text outside of HTML tags is a tag */
5415 bool skiptag = false; /* skip to the end of the current HTML tag */
5416 bool intag = false; /* inside an html tag, looking for ID= */
5417 bool inanchor = false; /* when INTAG, is an anchor, look for NAME= */
5418 char *end;
5419
5420
5421 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5422
5423 LOOP_ON_INPUT_LINES (inf, lb, dbp)
5424 for (;;) /* loop on the same line */
5425 {
5426 if (skiptag) /* skip HTML tag */
5427 {
5428 while (*dbp != '\0' && *dbp != '>')
5429 dbp++;
5430 if (*dbp == '>')
5431 {
5432 dbp += 1;
5433 skiptag = false;
5434 continue; /* look on the same line */
5435 }
5436 break; /* go to next line */
5437 }
5438
5439 else if (intag) /* look for "name=" or "id=" */
5440 {
5441 while (*dbp != '\0' && *dbp != '>'
5442 && c_tolower (*dbp) != 'n' && c_tolower (*dbp) != 'i')
5443 dbp++;
5444 if (*dbp == '\0')
5445 break; /* go to next line */
5446 if (*dbp == '>')
5447 {
5448 dbp += 1;
5449 intag = false;
5450 continue; /* look on the same line */
5451 }
5452 if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5453 || LOOKING_AT_NOCASE (dbp, "id="))
5454 {
5455 bool quoted = (dbp[0] == '"');
5456
5457 if (quoted)
5458 for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5459 continue;
5460 else
5461 for (end = dbp; *end != '\0' && intoken (*end); end++)
5462 continue;
5463 linebuffer_setlen (&token_name, end - dbp);
5464 memcpy (token_name.buffer, dbp, end - dbp);
5465 token_name.buffer[end - dbp] = '\0';
5466
5467 dbp = end;
5468 intag = false; /* we found what we looked for */
5469 skiptag = true; /* skip to the end of the tag */
5470 getnext = true; /* then grab the text */
5471 continue; /* look on the same line */
5472 }
5473 dbp += 1;
5474 }
5475
5476 else if (getnext) /* grab next tokens and tag them */
5477 {
5478 dbp = skip_spaces (dbp);
5479 if (*dbp == '\0')
5480 break; /* go to next line */
5481 if (*dbp == '<')
5482 {
5483 intag = true;
5484 inanchor = (c_tolower (dbp[1]) == 'a' && !intoken (dbp[2]));
5485 continue; /* look on the same line */
5486 }
5487
5488 for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5489 continue;
5490 make_tag (token_name.buffer, token_name.len, true,
5491 dbp, end - dbp, lineno, linecharno);
5492 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5493 getnext = false;
5494 break; /* go to next line */
5495 }
5496
5497 else /* look for an interesting HTML tag */
5498 {
5499 while (*dbp != '\0' && *dbp != '<')
5500 dbp++;
5501 if (*dbp == '\0')
5502 break; /* go to next line */
5503 intag = true;
5504 if (c_tolower (dbp[1]) == 'a' && !intoken (dbp[2]))
5505 {
5506 inanchor = true;
5507 continue; /* look on the same line */
5508 }
5509 else if (LOOKING_AT_NOCASE (dbp, "<title>")
5510 || LOOKING_AT_NOCASE (dbp, "<h1>")
5511 || LOOKING_AT_NOCASE (dbp, "<h2>")
5512 || LOOKING_AT_NOCASE (dbp, "<h3>"))
5513 {
5514 intag = false;
5515 getnext = true;
5516 continue; /* look on the same line */
5517 }
5518 dbp += 1;
5519 }
5520 }
5521 }
5522
5523 \f
5524 /*
5525 * Prolog support
5526 *
5527 * Assumes that the predicate or rule starts at column 0.
5528 * Only the first clause of a predicate or rule is added.
5529 * Original code by Sunichirou Sugou (1989)
5530 * Rewritten by Anders Lindgren (1996)
5531 */
5532 static size_t prolog_pr (char *, char *);
5533 static void prolog_skip_comment (linebuffer *, FILE *);
5534 static size_t prolog_atom (char *, size_t);
5535
5536 static void
5537 Prolog_functions (FILE *inf)
5538 {
5539 char *cp, *last;
5540 size_t len;
5541 size_t allocated;
5542
5543 allocated = 0;
5544 len = 0;
5545 last = NULL;
5546
5547 LOOP_ON_INPUT_LINES (inf, lb, cp)
5548 {
5549 if (cp[0] == '\0') /* Empty line */
5550 continue;
5551 else if (c_isspace (cp[0])) /* Not a predicate */
5552 continue;
5553 else if (cp[0] == '/' && cp[1] == '*') /* comment. */
5554 prolog_skip_comment (&lb, inf);
5555 else if ((len = prolog_pr (cp, last)) > 0)
5556 {
5557 /* Predicate or rule. Store the function name so that we
5558 only generate a tag for the first clause. */
5559 if (last == NULL)
5560 last = xnew (len + 1, char);
5561 else if (len + 1 > allocated)
5562 xrnew (last, len + 1, char);
5563 allocated = len + 1;
5564 memcpy (last, cp, len);
5565 last[len] = '\0';
5566 }
5567 }
5568 free (last);
5569 }
5570
5571
5572 static void
5573 prolog_skip_comment (linebuffer *plb, FILE *inf)
5574 {
5575 char *cp;
5576
5577 do
5578 {
5579 for (cp = plb->buffer; *cp != '\0'; cp++)
5580 if (cp[0] == '*' && cp[1] == '/')
5581 return;
5582 readline (plb, inf);
5583 }
5584 while (perhaps_more_input (inf));
5585 }
5586
5587 /*
5588 * A predicate or rule definition is added if it matches:
5589 * <beginning of line><Prolog Atom><whitespace>(
5590 * or <beginning of line><Prolog Atom><whitespace>:-
5591 *
5592 * It is added to the tags database if it doesn't match the
5593 * name of the previous clause header.
5594 *
5595 * Return the size of the name of the predicate or rule, or 0 if no
5596 * header was found.
5597 */
5598 static size_t
5599 prolog_pr (char *s, char *last)
5600
5601 /* Name of last clause. */
5602 {
5603 size_t pos;
5604 size_t len;
5605
5606 pos = prolog_atom (s, 0);
5607 if (! pos)
5608 return 0;
5609
5610 len = pos;
5611 pos = skip_spaces (s + pos) - s;
5612
5613 if ((s[pos] == '.'
5614 || (s[pos] == '(' && (pos += 1))
5615 || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5616 && (last == NULL /* save only the first clause */
5617 || len != strlen (last)
5618 || !strneq (s, last, len)))
5619 {
5620 make_tag (s, len, true, s, pos, lineno, linecharno);
5621 return len;
5622 }
5623 else
5624 return 0;
5625 }
5626
5627 /*
5628 * Consume a Prolog atom.
5629 * Return the number of bytes consumed, or 0 if there was an error.
5630 *
5631 * A prolog atom, in this context, could be one of:
5632 * - An alphanumeric sequence, starting with a lower case letter.
5633 * - A quoted arbitrary string. Single quotes can escape themselves.
5634 * Backslash quotes everything.
5635 */
5636 static size_t
5637 prolog_atom (char *s, size_t pos)
5638 {
5639 size_t origpos;
5640
5641 origpos = pos;
5642
5643 if (c_islower (s[pos]) || s[pos] == '_')
5644 {
5645 /* The atom is unquoted. */
5646 pos++;
5647 while (c_isalnum (s[pos]) || s[pos] == '_')
5648 {
5649 pos++;
5650 }
5651 return pos - origpos;
5652 }
5653 else if (s[pos] == '\'')
5654 {
5655 pos++;
5656
5657 for (;;)
5658 {
5659 if (s[pos] == '\'')
5660 {
5661 pos++;
5662 if (s[pos] != '\'')
5663 break;
5664 pos++; /* A double quote */
5665 }
5666 else if (s[pos] == '\0')
5667 /* Multiline quoted atoms are ignored. */
5668 return 0;
5669 else if (s[pos] == '\\')
5670 {
5671 if (s[pos+1] == '\0')
5672 return 0;
5673 pos += 2;
5674 }
5675 else
5676 pos++;
5677 }
5678 return pos - origpos;
5679 }
5680 else
5681 return 0;
5682 }
5683
5684 \f
5685 /*
5686 * Support for Erlang
5687 *
5688 * Generates tags for functions, defines, and records.
5689 * Assumes that Erlang functions start at column 0.
5690 * Original code by Anders Lindgren (1996)
5691 */
5692 static int erlang_func (char *, char *);
5693 static void erlang_attribute (char *);
5694 static int erlang_atom (char *);
5695
5696 static void
5697 Erlang_functions (FILE *inf)
5698 {
5699 char *cp, *last;
5700 int len;
5701 int allocated;
5702
5703 allocated = 0;
5704 len = 0;
5705 last = NULL;
5706
5707 LOOP_ON_INPUT_LINES (inf, lb, cp)
5708 {
5709 if (cp[0] == '\0') /* Empty line */
5710 continue;
5711 else if (c_isspace (cp[0])) /* Not function nor attribute */
5712 continue;
5713 else if (cp[0] == '%') /* comment */
5714 continue;
5715 else if (cp[0] == '"') /* Sometimes, strings start in column one */
5716 continue;
5717 else if (cp[0] == '-') /* attribute, e.g. "-define" */
5718 {
5719 erlang_attribute (cp);
5720 if (last != NULL)
5721 {
5722 free (last);
5723 last = NULL;
5724 }
5725 }
5726 else if ((len = erlang_func (cp, last)) > 0)
5727 {
5728 /*
5729 * Function. Store the function name so that we only
5730 * generates a tag for the first clause.
5731 */
5732 if (last == NULL)
5733 last = xnew (len + 1, char);
5734 else if (len + 1 > allocated)
5735 xrnew (last, len + 1, char);
5736 allocated = len + 1;
5737 memcpy (last, cp, len);
5738 last[len] = '\0';
5739 }
5740 }
5741 free (last);
5742 }
5743
5744
5745 /*
5746 * A function definition is added if it matches:
5747 * <beginning of line><Erlang Atom><whitespace>(
5748 *
5749 * It is added to the tags database if it doesn't match the
5750 * name of the previous clause header.
5751 *
5752 * Return the size of the name of the function, or 0 if no function
5753 * was found.
5754 */
5755 static int
5756 erlang_func (char *s, char *last)
5757
5758 /* Name of last clause. */
5759 {
5760 int pos;
5761 int len;
5762
5763 pos = erlang_atom (s);
5764 if (pos < 1)
5765 return 0;
5766
5767 len = pos;
5768 pos = skip_spaces (s + pos) - s;
5769
5770 /* Save only the first clause. */
5771 if (s[pos++] == '('
5772 && (last == NULL
5773 || len != (int)strlen (last)
5774 || !strneq (s, last, len)))
5775 {
5776 make_tag (s, len, true, s, pos, lineno, linecharno);
5777 return len;
5778 }
5779
5780 return 0;
5781 }
5782
5783
5784 /*
5785 * Handle attributes. Currently, tags are generated for defines
5786 * and records.
5787 *
5788 * They are on the form:
5789 * -define(foo, bar).
5790 * -define(Foo(M, N), M+N).
5791 * -record(graph, {vtab = notable, cyclic = true}).
5792 */
5793 static void
5794 erlang_attribute (char *s)
5795 {
5796 char *cp = s;
5797
5798 if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5799 && *cp++ == '(')
5800 {
5801 int len = erlang_atom (skip_spaces (cp));
5802 if (len > 0)
5803 make_tag (cp, len, true, s, cp + len - s, lineno, linecharno);
5804 }
5805 return;
5806 }
5807
5808
5809 /*
5810 * Consume an Erlang atom (or variable).
5811 * Return the number of bytes consumed, or -1 if there was an error.
5812 */
5813 static int
5814 erlang_atom (char *s)
5815 {
5816 int pos = 0;
5817
5818 if (c_isalpha (s[pos]) || s[pos] == '_')
5819 {
5820 /* The atom is unquoted. */
5821 do
5822 pos++;
5823 while (c_isalnum (s[pos]) || s[pos] == '_');
5824 }
5825 else if (s[pos] == '\'')
5826 {
5827 for (pos++; s[pos] != '\''; pos++)
5828 if (s[pos] == '\0' /* multiline quoted atoms are ignored */
5829 || (s[pos] == '\\' && s[++pos] == '\0'))
5830 return 0;
5831 pos++;
5832 }
5833
5834 return pos;
5835 }
5836
5837 \f
5838 static char *scan_separators (char *);
5839 static void add_regex (char *, language *);
5840 static char *substitute (char *, char *, struct re_registers *);
5841
5842 /*
5843 * Take a string like "/blah/" and turn it into "blah", verifying
5844 * that the first and last characters are the same, and handling
5845 * quoted separator characters. Actually, stops on the occurrence of
5846 * an unquoted separator. Also process \t, \n, etc. and turn into
5847 * appropriate characters. Works in place. Null terminates name string.
5848 * Returns pointer to terminating separator, or NULL for
5849 * unterminated regexps.
5850 */
5851 static char *
5852 scan_separators (char *name)
5853 {
5854 char sep = name[0];
5855 char *copyto = name;
5856 bool quoted = false;
5857
5858 for (++name; *name != '\0'; ++name)
5859 {
5860 if (quoted)
5861 {
5862 switch (*name)
5863 {
5864 case 'a': *copyto++ = '\007'; break; /* BEL (bell) */
5865 case 'b': *copyto++ = '\b'; break; /* BS (back space) */
5866 case 'd': *copyto++ = 0177; break; /* DEL (delete) */
5867 case 'e': *copyto++ = 033; break; /* ESC (delete) */
5868 case 'f': *copyto++ = '\f'; break; /* FF (form feed) */
5869 case 'n': *copyto++ = '\n'; break; /* NL (new line) */
5870 case 'r': *copyto++ = '\r'; break; /* CR (carriage return) */
5871 case 't': *copyto++ = '\t'; break; /* TAB (horizontal tab) */
5872 case 'v': *copyto++ = '\v'; break; /* VT (vertical tab) */
5873 default:
5874 if (*name == sep)
5875 *copyto++ = sep;
5876 else
5877 {
5878 /* Something else is quoted, so preserve the quote. */
5879 *copyto++ = '\\';
5880 *copyto++ = *name;
5881 }
5882 break;
5883 }
5884 quoted = false;
5885 }
5886 else if (*name == '\\')
5887 quoted = true;
5888 else if (*name == sep)
5889 break;
5890 else
5891 *copyto++ = *name;
5892 }
5893 if (*name != sep)
5894 name = NULL; /* signal unterminated regexp */
5895
5896 /* Terminate copied string. */
5897 *copyto = '\0';
5898 return name;
5899 }
5900
5901 /* Look at the argument of --regex or --no-regex and do the right
5902 thing. Same for each line of a regexp file. */
5903 static void
5904 analyze_regex (char *regex_arg)
5905 {
5906 if (regex_arg == NULL)
5907 {
5908 free_regexps (); /* --no-regex: remove existing regexps */
5909 return;
5910 }
5911
5912 /* A real --regexp option or a line in a regexp file. */
5913 switch (regex_arg[0])
5914 {
5915 /* Comments in regexp file or null arg to --regex. */
5916 case '\0':
5917 case ' ':
5918 case '\t':
5919 break;
5920
5921 /* Read a regex file. This is recursive and may result in a
5922 loop, which will stop when the file descriptors are exhausted. */
5923 case '@':
5924 {
5925 FILE *regexfp;
5926 linebuffer regexbuf;
5927 char *regexfile = regex_arg + 1;
5928
5929 /* regexfile is a file containing regexps, one per line. */
5930 regexfp = fopen (regexfile, "r" FOPEN_BINARY);
5931 if (regexfp == NULL)
5932 pfatal (regexfile);
5933 linebuffer_init (&regexbuf);
5934 while (readline_internal (&regexbuf, regexfp, regexfile) > 0)
5935 analyze_regex (regexbuf.buffer);
5936 free (regexbuf.buffer);
5937 if (fclose (regexfp) != 0)
5938 pfatal (regexfile);
5939 }
5940 break;
5941
5942 /* Regexp to be used for a specific language only. */
5943 case '{':
5944 {
5945 language *lang;
5946 char *lang_name = regex_arg + 1;
5947 char *cp;
5948
5949 for (cp = lang_name; *cp != '}'; cp++)
5950 if (*cp == '\0')
5951 {
5952 error ("unterminated language name in regex: %s", regex_arg);
5953 return;
5954 }
5955 *cp++ = '\0';
5956 lang = get_language_from_langname (lang_name);
5957 if (lang == NULL)
5958 return;
5959 add_regex (cp, lang);
5960 }
5961 break;
5962
5963 /* Regexp to be used for any language. */
5964 default:
5965 add_regex (regex_arg, NULL);
5966 break;
5967 }
5968 }
5969
5970 /* Separate the regexp pattern, compile it,
5971 and care for optional name and modifiers. */
5972 static void
5973 add_regex (char *regexp_pattern, language *lang)
5974 {
5975 static struct re_pattern_buffer zeropattern;
5976 char sep, *pat, *name, *modifiers;
5977 char empty = '\0';
5978 const char *err;
5979 struct re_pattern_buffer *patbuf;
5980 regexp *rp;
5981 bool
5982 force_explicit_name = true, /* do not use implicit tag names */
5983 ignore_case = false, /* case is significant */
5984 multi_line = false, /* matches are done one line at a time */
5985 single_line = false; /* dot does not match newline */
5986
5987
5988 if (strlen (regexp_pattern) < 3)
5989 {
5990 error ("null regexp");
5991 return;
5992 }
5993 sep = regexp_pattern[0];
5994 name = scan_separators (regexp_pattern);
5995 if (name == NULL)
5996 {
5997 error ("%s: unterminated regexp", regexp_pattern);
5998 return;
5999 }
6000 if (name[1] == sep)
6001 {
6002 error ("null name for regexp \"%s\"", regexp_pattern);
6003 return;
6004 }
6005 modifiers = scan_separators (name);
6006 if (modifiers == NULL) /* no terminating separator --> no name */
6007 {
6008 modifiers = name;
6009 name = &empty;
6010 }
6011 else
6012 modifiers += 1; /* skip separator */
6013
6014 /* Parse regex modifiers. */
6015 for (; modifiers[0] != '\0'; modifiers++)
6016 switch (modifiers[0])
6017 {
6018 case 'N':
6019 if (modifiers == name)
6020 error ("forcing explicit tag name but no name, ignoring");
6021 force_explicit_name = true;
6022 break;
6023 case 'i':
6024 ignore_case = true;
6025 break;
6026 case 's':
6027 single_line = true;
6028 /* FALLTHRU */
6029 case 'm':
6030 multi_line = true;
6031 need_filebuf = true;
6032 break;
6033 default:
6034 error ("invalid regexp modifier '%c', ignoring", modifiers[0]);
6035 break;
6036 }
6037
6038 patbuf = xnew (1, struct re_pattern_buffer);
6039 *patbuf = zeropattern;
6040 if (ignore_case)
6041 {
6042 static char lc_trans[UCHAR_MAX + 1];
6043 int i;
6044 for (i = 0; i < UCHAR_MAX + 1; i++)
6045 lc_trans[i] = c_tolower (i);
6046 patbuf->translate = lc_trans; /* translation table to fold case */
6047 }
6048
6049 if (multi_line)
6050 pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
6051 else
6052 pat = regexp_pattern;
6053
6054 if (single_line)
6055 re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
6056 else
6057 re_set_syntax (RE_SYNTAX_EMACS);
6058
6059 err = re_compile_pattern (pat, strlen (pat), patbuf);
6060 if (multi_line)
6061 free (pat);
6062 if (err != NULL)
6063 {
6064 error ("%s while compiling pattern", err);
6065 return;
6066 }
6067
6068 rp = p_head;
6069 p_head = xnew (1, regexp);
6070 p_head->pattern = savestr (regexp_pattern);
6071 p_head->p_next = rp;
6072 p_head->lang = lang;
6073 p_head->pat = patbuf;
6074 p_head->name = savestr (name);
6075 p_head->error_signaled = false;
6076 p_head->force_explicit_name = force_explicit_name;
6077 p_head->ignore_case = ignore_case;
6078 p_head->multi_line = multi_line;
6079 }
6080
6081 /*
6082 * Do the substitutions indicated by the regular expression and
6083 * arguments.
6084 */
6085 static char *
6086 substitute (char *in, char *out, struct re_registers *regs)
6087 {
6088 char *result, *t;
6089 int size, dig, diglen;
6090
6091 result = NULL;
6092 size = strlen (out);
6093
6094 /* Pass 1: figure out how much to allocate by finding all \N strings. */
6095 if (out[size - 1] == '\\')
6096 fatal ("pattern error in \"%s\"", out);
6097 for (t = strchr (out, '\\');
6098 t != NULL;
6099 t = strchr (t + 2, '\\'))
6100 if (c_isdigit (t[1]))
6101 {
6102 dig = t[1] - '0';
6103 diglen = regs->end[dig] - regs->start[dig];
6104 size += diglen - 2;
6105 }
6106 else
6107 size -= 1;
6108
6109 /* Allocate space and do the substitutions. */
6110 assert (size >= 0);
6111 result = xnew (size + 1, char);
6112
6113 for (t = result; *out != '\0'; out++)
6114 if (*out == '\\' && c_isdigit (*++out))
6115 {
6116 dig = *out - '0';
6117 diglen = regs->end[dig] - regs->start[dig];
6118 memcpy (t, in + regs->start[dig], diglen);
6119 t += diglen;
6120 }
6121 else
6122 *t++ = *out;
6123 *t = '\0';
6124
6125 assert (t <= result + size);
6126 assert (t - result == (int)strlen (result));
6127
6128 return result;
6129 }
6130
6131 /* Deallocate all regexps. */
6132 static void
6133 free_regexps (void)
6134 {
6135 regexp *rp;
6136 while (p_head != NULL)
6137 {
6138 rp = p_head->p_next;
6139 free (p_head->pattern);
6140 free (p_head->name);
6141 free (p_head);
6142 p_head = rp;
6143 }
6144 return;
6145 }
6146
6147 /*
6148 * Reads the whole file as a single string from `filebuf' and looks for
6149 * multi-line regular expressions, creating tags on matches.
6150 * readline already dealt with normal regexps.
6151 *
6152 * Idea by Ben Wing <ben@666.com> (2002).
6153 */
6154 static void
6155 regex_tag_multiline (void)
6156 {
6157 char *buffer = filebuf.buffer;
6158 regexp *rp;
6159 char *name;
6160
6161 for (rp = p_head; rp != NULL; rp = rp->p_next)
6162 {
6163 int match = 0;
6164
6165 if (!rp->multi_line)
6166 continue; /* skip normal regexps */
6167
6168 /* Generic initializations before parsing file from memory. */
6169 lineno = 1; /* reset global line number */
6170 charno = 0; /* reset global char number */
6171 linecharno = 0; /* reset global char number of line start */
6172
6173 /* Only use generic regexps or those for the current language. */
6174 if (rp->lang != NULL && rp->lang != curfdp->lang)
6175 continue;
6176
6177 while (match >= 0 && match < filebuf.len)
6178 {
6179 match = re_search (rp->pat, buffer, filebuf.len, charno,
6180 filebuf.len - match, &rp->regs);
6181 switch (match)
6182 {
6183 case -2:
6184 /* Some error. */
6185 if (!rp->error_signaled)
6186 {
6187 error ("regexp stack overflow while matching \"%s\"",
6188 rp->pattern);
6189 rp->error_signaled = true;
6190 }
6191 break;
6192 case -1:
6193 /* No match. */
6194 break;
6195 default:
6196 if (match == rp->regs.end[0])
6197 {
6198 if (!rp->error_signaled)
6199 {
6200 error ("regexp matches the empty string: \"%s\"",
6201 rp->pattern);
6202 rp->error_signaled = true;
6203 }
6204 match = -3; /* exit from while loop */
6205 break;
6206 }
6207
6208 /* Match occurred. Construct a tag. */
6209 while (charno < rp->regs.end[0])
6210 if (buffer[charno++] == '\n')
6211 lineno++, linecharno = charno;
6212 name = rp->name;
6213 if (name[0] == '\0')
6214 name = NULL;
6215 else /* make a named tag */
6216 name = substitute (buffer, rp->name, &rp->regs);
6217 if (rp->force_explicit_name)
6218 /* Force explicit tag name, if a name is there. */
6219 pfnote (name, true, buffer + linecharno,
6220 charno - linecharno + 1, lineno, linecharno);
6221 else
6222 make_tag (name, strlen (name), true, buffer + linecharno,
6223 charno - linecharno + 1, lineno, linecharno);
6224 break;
6225 }
6226 }
6227 }
6228 }
6229
6230 \f
6231 static bool
6232 nocase_tail (const char *cp)
6233 {
6234 int len = 0;
6235
6236 while (*cp != '\0' && c_tolower (*cp) == c_tolower (dbp[len]))
6237 cp++, len++;
6238 if (*cp == '\0' && !intoken (dbp[len]))
6239 {
6240 dbp += len;
6241 return true;
6242 }
6243 return false;
6244 }
6245
6246 static void
6247 get_tag (register char *bp, char **namepp)
6248 {
6249 register char *cp = bp;
6250
6251 if (*bp != '\0')
6252 {
6253 /* Go till you get to white space or a syntactic break */
6254 for (cp = bp + 1; !notinname (*cp); cp++)
6255 continue;
6256 make_tag (bp, cp - bp, true,
6257 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6258 }
6259
6260 if (namepp != NULL)
6261 *namepp = savenstr (bp, cp - bp);
6262 }
6263
6264 /*
6265 * Read a line of text from `stream' into `lbp', excluding the
6266 * newline or CR-NL, if any. Return the number of characters read from
6267 * `stream', which is the length of the line including the newline.
6268 *
6269 * On DOS or Windows we do not count the CR character, if any before the
6270 * NL, in the returned length; this mirrors the behavior of Emacs on those
6271 * platforms (for text files, it translates CR-NL to NL as it reads in the
6272 * file).
6273 *
6274 * If multi-line regular expressions are requested, each line read is
6275 * appended to `filebuf'.
6276 */
6277 static long
6278 readline_internal (linebuffer *lbp, FILE *stream, char const *filename)
6279 {
6280 char *buffer = lbp->buffer;
6281 char *p = lbp->buffer;
6282 char *pend;
6283 int chars_deleted;
6284
6285 pend = p + lbp->size; /* Separate to avoid 386/IX compiler bug. */
6286
6287 for (;;)
6288 {
6289 register int c = getc (stream);
6290 if (p == pend)
6291 {
6292 /* We're at the end of linebuffer: expand it. */
6293 lbp->size *= 2;
6294 xrnew (buffer, lbp->size, char);
6295 p += buffer - lbp->buffer;
6296 pend = buffer + lbp->size;
6297 lbp->buffer = buffer;
6298 }
6299 if (c == EOF)
6300 {
6301 if (ferror (stream))
6302 perror (filename);
6303 *p = '\0';
6304 chars_deleted = 0;
6305 break;
6306 }
6307 if (c == '\n')
6308 {
6309 if (p > buffer && p[-1] == '\r')
6310 {
6311 p -= 1;
6312 chars_deleted = 2;
6313 }
6314 else
6315 {
6316 chars_deleted = 1;
6317 }
6318 *p = '\0';
6319 break;
6320 }
6321 *p++ = c;
6322 }
6323 lbp->len = p - buffer;
6324
6325 if (need_filebuf /* we need filebuf for multi-line regexps */
6326 && chars_deleted > 0) /* not at EOF */
6327 {
6328 while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6329 {
6330 /* Expand filebuf. */
6331 filebuf.size *= 2;
6332 xrnew (filebuf.buffer, filebuf.size, char);
6333 }
6334 memcpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6335 filebuf.len += lbp->len;
6336 filebuf.buffer[filebuf.len++] = '\n';
6337 filebuf.buffer[filebuf.len] = '\0';
6338 }
6339
6340 return lbp->len + chars_deleted;
6341 }
6342
6343 /*
6344 * Like readline_internal, above, but in addition try to match the
6345 * input line against relevant regular expressions and manage #line
6346 * directives.
6347 */
6348 static void
6349 readline (linebuffer *lbp, FILE *stream)
6350 {
6351 long result;
6352
6353 linecharno = charno; /* update global char number of line start */
6354 result = readline_internal (lbp, stream, infilename); /* read line */
6355 lineno += 1; /* increment global line number */
6356 charno += result; /* increment global char number */
6357
6358 /* Honor #line directives. */
6359 if (!no_line_directive)
6360 {
6361 static bool discard_until_line_directive;
6362
6363 /* Check whether this is a #line directive. */
6364 if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6365 {
6366 unsigned int lno;
6367 int start = 0;
6368
6369 if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6370 && start > 0) /* double quote character found */
6371 {
6372 char *endp = lbp->buffer + start;
6373
6374 while ((endp = strchr (endp, '"')) != NULL
6375 && endp[-1] == '\\')
6376 endp++;
6377 if (endp != NULL)
6378 /* Ok, this is a real #line directive. Let's deal with it. */
6379 {
6380 char *taggedabsname; /* absolute name of original file */
6381 char *taggedfname; /* name of original file as given */
6382 char *name; /* temp var */
6383
6384 discard_until_line_directive = false; /* found it */
6385 name = lbp->buffer + start;
6386 *endp = '\0';
6387 canonicalize_filename (name);
6388 taggedabsname = absolute_filename (name, tagfiledir);
6389 if (filename_is_absolute (name)
6390 || filename_is_absolute (curfdp->infname))
6391 taggedfname = savestr (taggedabsname);
6392 else
6393 taggedfname = relative_filename (taggedabsname,tagfiledir);
6394
6395 if (streq (curfdp->taggedfname, taggedfname))
6396 /* The #line directive is only a line number change. We
6397 deal with this afterwards. */
6398 free (taggedfname);
6399 else
6400 /* The tags following this #line directive should be
6401 attributed to taggedfname. In order to do this, set
6402 curfdp accordingly. */
6403 {
6404 fdesc *fdp; /* file description pointer */
6405
6406 /* Go look for a file description already set up for the
6407 file indicated in the #line directive. If there is
6408 one, use it from now until the next #line
6409 directive. */
6410 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6411 if (streq (fdp->infname, curfdp->infname)
6412 && streq (fdp->taggedfname, taggedfname))
6413 /* If we remove the second test above (after the &&)
6414 then all entries pertaining to the same file are
6415 coalesced in the tags file. If we use it, then
6416 entries pertaining to the same file but generated
6417 from different files (via #line directives) will
6418 go into separate sections in the tags file. These
6419 alternatives look equivalent. The first one
6420 destroys some apparently useless information. */
6421 {
6422 curfdp = fdp;
6423 free (taggedfname);
6424 break;
6425 }
6426 /* Else, if we already tagged the real file, skip all
6427 input lines until the next #line directive. */
6428 if (fdp == NULL) /* not found */
6429 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6430 if (streq (fdp->infabsname, taggedabsname))
6431 {
6432 discard_until_line_directive = true;
6433 free (taggedfname);
6434 break;
6435 }
6436 /* Else create a new file description and use that from
6437 now on, until the next #line directive. */
6438 if (fdp == NULL) /* not found */
6439 {
6440 fdp = fdhead;
6441 fdhead = xnew (1, fdesc);
6442 *fdhead = *curfdp; /* copy curr. file description */
6443 fdhead->next = fdp;
6444 fdhead->infname = savestr (curfdp->infname);
6445 fdhead->infabsname = savestr (curfdp->infabsname);
6446 fdhead->infabsdir = savestr (curfdp->infabsdir);
6447 fdhead->taggedfname = taggedfname;
6448 fdhead->usecharno = false;
6449 fdhead->prop = NULL;
6450 fdhead->written = false;
6451 curfdp = fdhead;
6452 }
6453 }
6454 free (taggedabsname);
6455 lineno = lno - 1;
6456 readline (lbp, stream);
6457 return;
6458 } /* if a real #line directive */
6459 } /* if #line is followed by a number */
6460 } /* if line begins with "#line " */
6461
6462 /* If we are here, no #line directive was found. */
6463 if (discard_until_line_directive)
6464 {
6465 if (result > 0)
6466 {
6467 /* Do a tail recursion on ourselves, thus discarding the contents
6468 of the line buffer. */
6469 readline (lbp, stream);
6470 return;
6471 }
6472 /* End of file. */
6473 discard_until_line_directive = false;
6474 return;
6475 }
6476 } /* if #line directives should be considered */
6477
6478 {
6479 int match;
6480 regexp *rp;
6481 char *name;
6482
6483 /* Match against relevant regexps. */
6484 if (lbp->len > 0)
6485 for (rp = p_head; rp != NULL; rp = rp->p_next)
6486 {
6487 /* Only use generic regexps or those for the current language.
6488 Also do not use multiline regexps, which is the job of
6489 regex_tag_multiline. */
6490 if ((rp->lang != NULL && rp->lang != fdhead->lang)
6491 || rp->multi_line)
6492 continue;
6493
6494 match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6495 switch (match)
6496 {
6497 case -2:
6498 /* Some error. */
6499 if (!rp->error_signaled)
6500 {
6501 error ("regexp stack overflow while matching \"%s\"",
6502 rp->pattern);
6503 rp->error_signaled = true;
6504 }
6505 break;
6506 case -1:
6507 /* No match. */
6508 break;
6509 case 0:
6510 /* Empty string matched. */
6511 if (!rp->error_signaled)
6512 {
6513 error ("regexp matches the empty string: \"%s\"", rp->pattern);
6514 rp->error_signaled = true;
6515 }
6516 break;
6517 default:
6518 /* Match occurred. Construct a tag. */
6519 name = rp->name;
6520 if (name[0] == '\0')
6521 name = NULL;
6522 else /* make a named tag */
6523 name = substitute (lbp->buffer, rp->name, &rp->regs);
6524 if (rp->force_explicit_name)
6525 /* Force explicit tag name, if a name is there. */
6526 pfnote (name, true, lbp->buffer, match, lineno, linecharno);
6527 else
6528 make_tag (name, strlen (name), true,
6529 lbp->buffer, match, lineno, linecharno);
6530 break;
6531 }
6532 }
6533 }
6534 }
6535
6536 \f
6537 /*
6538 * Return a pointer to a space of size strlen(cp)+1 allocated
6539 * with xnew where the string CP has been copied.
6540 */
6541 static char *
6542 savestr (const char *cp)
6543 {
6544 return savenstr (cp, strlen (cp));
6545 }
6546
6547 /*
6548 * Return a pointer to a space of size LEN+1 allocated with xnew where
6549 * the string CP has been copied for at most the first LEN characters.
6550 */
6551 static char *
6552 savenstr (const char *cp, int len)
6553 {
6554 char *dp = xnew (len + 1, char);
6555 dp[len] = '\0';
6556 return memcpy (dp, cp, len);
6557 }
6558
6559 /* Skip spaces (end of string is not space), return new pointer. */
6560 static char *
6561 skip_spaces (char *cp)
6562 {
6563 while (c_isspace (*cp))
6564 cp++;
6565 return cp;
6566 }
6567
6568 /* Skip non spaces, except end of string, return new pointer. */
6569 static char *
6570 skip_non_spaces (char *cp)
6571 {
6572 while (*cp != '\0' && !c_isspace (*cp))
6573 cp++;
6574 return cp;
6575 }
6576
6577 /* Skip any chars in the "name" class.*/
6578 static char *
6579 skip_name (char *cp)
6580 {
6581 /* '\0' is a notinname() so loop stops there too */
6582 while (! notinname (*cp))
6583 cp++;
6584 return cp;
6585 }
6586
6587 /* Print error message and exit. */
6588 static void
6589 fatal (char const *format, ...)
6590 {
6591 va_list ap;
6592 va_start (ap, format);
6593 verror (format, ap);
6594 va_end (ap);
6595 exit (EXIT_FAILURE);
6596 }
6597
6598 static void
6599 pfatal (const char *s1)
6600 {
6601 perror (s1);
6602 exit (EXIT_FAILURE);
6603 }
6604
6605 static void
6606 suggest_asking_for_help (void)
6607 {
6608 fprintf (stderr, "\tTry '%s --help' for a complete list of options.\n",
6609 progname);
6610 exit (EXIT_FAILURE);
6611 }
6612
6613 /* Output a diagnostic with printf-style FORMAT and args. */
6614 static void
6615 error (const char *format, ...)
6616 {
6617 va_list ap;
6618 va_start (ap, format);
6619 verror (format, ap);
6620 va_end (ap);
6621 }
6622
6623 static void
6624 verror (char const *format, va_list ap)
6625 {
6626 fprintf (stderr, "%s: ", progname);
6627 vfprintf (stderr, format, ap);
6628 fprintf (stderr, "\n");
6629 }
6630
6631 /* Return a newly-allocated string whose contents
6632 concatenate those of s1, s2, s3. */
6633 static char *
6634 concat (const char *s1, const char *s2, const char *s3)
6635 {
6636 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6637 char *result = xnew (len1 + len2 + len3 + 1, char);
6638
6639 strcpy (result, s1);
6640 strcpy (result + len1, s2);
6641 strcpy (result + len1 + len2, s3);
6642
6643 return result;
6644 }
6645
6646 \f
6647 /* Does the same work as the system V getcwd, but does not need to
6648 guess the buffer size in advance. */
6649 static char *
6650 etags_getcwd (void)
6651 {
6652 int bufsize = 200;
6653 char *path = xnew (bufsize, char);
6654
6655 while (getcwd (path, bufsize) == NULL)
6656 {
6657 if (errno != ERANGE)
6658 pfatal ("getcwd");
6659 bufsize *= 2;
6660 free (path);
6661 path = xnew (bufsize, char);
6662 }
6663
6664 canonicalize_filename (path);
6665 return path;
6666 }
6667
6668 /* Return a newly allocated string containing a name of a temporary file. */
6669 static char *
6670 etags_mktmp (void)
6671 {
6672 const char *tmpdir = getenv ("TMPDIR");
6673 const char *slash = "/";
6674
6675 #if MSDOS || defined (DOS_NT)
6676 if (!tmpdir)
6677 tmpdir = getenv ("TEMP");
6678 if (!tmpdir)
6679 tmpdir = getenv ("TMP");
6680 if (!tmpdir)
6681 tmpdir = ".";
6682 if (tmpdir[strlen (tmpdir) - 1] == '/'
6683 || tmpdir[strlen (tmpdir) - 1] == '\\')
6684 slash = "";
6685 #else
6686 if (!tmpdir)
6687 tmpdir = "/tmp";
6688 if (tmpdir[strlen (tmpdir) - 1] == '/')
6689 slash = "";
6690 #endif
6691
6692 char *templt = concat (tmpdir, slash, "etXXXXXX");
6693 int fd = mkostemp (templt, O_CLOEXEC);
6694 if (fd < 0 || close (fd) != 0)
6695 {
6696 int temp_errno = errno;
6697 free (templt);
6698 errno = temp_errno;
6699 templt = NULL;
6700 }
6701
6702 #if defined (DOS_NT)
6703 /* The file name will be used in shell redirection, so it needs to have
6704 DOS-style backslashes, or else the Windows shell will barf. */
6705 char *p;
6706 for (p = templt; *p; p++)
6707 if (*p == '/')
6708 *p = '\\';
6709 #endif
6710
6711 return templt;
6712 }
6713
6714 /* Return a newly allocated string containing the file name of FILE
6715 relative to the absolute directory DIR (which should end with a slash). */
6716 static char *
6717 relative_filename (char *file, char *dir)
6718 {
6719 char *fp, *dp, *afn, *res;
6720 int i;
6721
6722 /* Find the common root of file and dir (with a trailing slash). */
6723 afn = absolute_filename (file, cwd);
6724 fp = afn;
6725 dp = dir;
6726 while (*fp++ == *dp++)
6727 continue;
6728 fp--, dp--; /* back to the first differing char */
6729 #ifdef DOS_NT
6730 if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6731 return afn;
6732 #endif
6733 do /* look at the equal chars until '/' */
6734 fp--, dp--;
6735 while (*fp != '/');
6736
6737 /* Build a sequence of "../" strings for the resulting relative file name. */
6738 i = 0;
6739 while ((dp = strchr (dp + 1, '/')) != NULL)
6740 i += 1;
6741 res = xnew (3*i + strlen (fp + 1) + 1, char);
6742 char *z = res;
6743 while (i-- > 0)
6744 z = stpcpy (z, "../");
6745
6746 /* Add the file name relative to the common root of file and dir. */
6747 strcpy (z, fp + 1);
6748 free (afn);
6749
6750 return res;
6751 }
6752
6753 /* Return a newly allocated string containing the absolute file name
6754 of FILE given DIR (which should end with a slash). */
6755 static char *
6756 absolute_filename (char *file, char *dir)
6757 {
6758 char *slashp, *cp, *res;
6759
6760 if (filename_is_absolute (file))
6761 res = savestr (file);
6762 #ifdef DOS_NT
6763 /* We don't support non-absolute file names with a drive
6764 letter, like `d:NAME' (it's too much hassle). */
6765 else if (file[1] == ':')
6766 fatal ("%s: relative file names with drive letters not supported", file);
6767 #endif
6768 else
6769 res = concat (dir, file, "");
6770
6771 /* Delete the "/dirname/.." and "/." substrings. */
6772 slashp = strchr (res, '/');
6773 while (slashp != NULL && slashp[0] != '\0')
6774 {
6775 if (slashp[1] == '.')
6776 {
6777 if (slashp[2] == '.'
6778 && (slashp[3] == '/' || slashp[3] == '\0'))
6779 {
6780 cp = slashp;
6781 do
6782 cp--;
6783 while (cp >= res && !filename_is_absolute (cp));
6784 if (cp < res)
6785 cp = slashp; /* the absolute name begins with "/.." */
6786 #ifdef DOS_NT
6787 /* Under MSDOS and NT we get `d:/NAME' as absolute
6788 file name, so the luser could say `d:/../NAME'.
6789 We silently treat this as `d:/NAME'. */
6790 else if (cp[0] != '/')
6791 cp = slashp;
6792 #endif
6793 memmove (cp, slashp + 3, strlen (slashp + 2));
6794 slashp = cp;
6795 continue;
6796 }
6797 else if (slashp[2] == '/' || slashp[2] == '\0')
6798 {
6799 memmove (slashp, slashp + 2, strlen (slashp + 1));
6800 continue;
6801 }
6802 }
6803
6804 slashp = strchr (slashp + 1, '/');
6805 }
6806
6807 if (res[0] == '\0') /* just a safety net: should never happen */
6808 {
6809 free (res);
6810 return savestr ("/");
6811 }
6812 else
6813 return res;
6814 }
6815
6816 /* Return a newly allocated string containing the absolute
6817 file name of dir where FILE resides given DIR (which should
6818 end with a slash). */
6819 static char *
6820 absolute_dirname (char *file, char *dir)
6821 {
6822 char *slashp, *res;
6823 char save;
6824
6825 slashp = strrchr (file, '/');
6826 if (slashp == NULL)
6827 return savestr (dir);
6828 save = slashp[1];
6829 slashp[1] = '\0';
6830 res = absolute_filename (file, dir);
6831 slashp[1] = save;
6832
6833 return res;
6834 }
6835
6836 /* Whether the argument string is an absolute file name. The argument
6837 string must have been canonicalized with canonicalize_filename. */
6838 static bool
6839 filename_is_absolute (char *fn)
6840 {
6841 return (fn[0] == '/'
6842 #ifdef DOS_NT
6843 || (c_isalpha (fn[0]) && fn[1] == ':' && fn[2] == '/')
6844 #endif
6845 );
6846 }
6847
6848 /* Downcase DOS drive letter and collapse separators into single slashes.
6849 Works in place. */
6850 static void
6851 canonicalize_filename (register char *fn)
6852 {
6853 register char* cp;
6854
6855 #ifdef DOS_NT
6856 /* Canonicalize drive letter case. */
6857 if (c_isupper (fn[0]) && fn[1] == ':')
6858 fn[0] = c_tolower (fn[0]);
6859
6860 /* Collapse multiple forward- and back-slashes into a single forward
6861 slash. */
6862 for (cp = fn; *cp != '\0'; cp++, fn++)
6863 if (*cp == '/' || *cp == '\\')
6864 {
6865 *fn = '/';
6866 while (cp[1] == '/' || cp[1] == '\\')
6867 cp++;
6868 }
6869 else
6870 *fn = *cp;
6871
6872 #else /* !DOS_NT */
6873
6874 /* Collapse multiple slashes into a single slash. */
6875 for (cp = fn; *cp != '\0'; cp++, fn++)
6876 if (*cp == '/')
6877 {
6878 *fn = '/';
6879 while (cp[1] == '/')
6880 cp++;
6881 }
6882 else
6883 *fn = *cp;
6884
6885 #endif /* !DOS_NT */
6886
6887 *fn = '\0';
6888 }
6889
6890 \f
6891 /* Initialize a linebuffer for use. */
6892 static void
6893 linebuffer_init (linebuffer *lbp)
6894 {
6895 lbp->size = (DEBUG) ? 3 : 200;
6896 lbp->buffer = xnew (lbp->size, char);
6897 lbp->buffer[0] = '\0';
6898 lbp->len = 0;
6899 }
6900
6901 /* Set the minimum size of a string contained in a linebuffer. */
6902 static void
6903 linebuffer_setlen (linebuffer *lbp, int toksize)
6904 {
6905 while (lbp->size <= toksize)
6906 {
6907 lbp->size *= 2;
6908 xrnew (lbp->buffer, lbp->size, char);
6909 }
6910 lbp->len = toksize;
6911 }
6912
6913 /* Like malloc but get fatal error if memory is exhausted. */
6914 static void *
6915 xmalloc (size_t size)
6916 {
6917 void *result = malloc (size);
6918 if (result == NULL)
6919 fatal ("virtual memory exhausted");
6920 return result;
6921 }
6922
6923 static void *
6924 xrealloc (void *ptr, size_t size)
6925 {
6926 void *result = realloc (ptr, size);
6927 if (result == NULL)
6928 fatal ("virtual memory exhausted");
6929 return result;
6930 }
6931
6932 /*
6933 * Local Variables:
6934 * indent-tabs-mode: t
6935 * tab-width: 8
6936 * fill-column: 79
6937 * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6938 * c-file-style: "gnu"
6939 * End:
6940 */
6941
6942 /* etags.c ends here */