]> code.delx.au - gnu-emacs/blob - lib-src/etags.c
Merge from gnus--devo--0
[gnu-emacs] / lib-src / etags.c
1 /* Tags file maker to go with GNU Emacs -*- coding: latin-1 -*-
2
3 Copyright (C) 1984 The Regents of the University of California
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
7 met:
8 1. Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the
13 distribution.
14 3. Neither the name of the University nor the names of its
15 contributors may be used to endorse or promote products derived
16 from this software without specific prior written permission.
17
18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30
31 Copyright (C) 1984, 1987, 1988, 1989, 1993, 1994, 1995, 1998, 1999,
32 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007
33 Free Software Foundation, Inc.
34
35 This file is not considered part of GNU Emacs.
36
37 This program is free software; you can redistribute it and/or modify
38 it under the terms of the GNU General Public License as published by
39 the Free Software Foundation; either version 3, or (at your option)
40 any later version.
41
42 This program is distributed in the hope that it will be useful,
43 but WITHOUT ANY WARRANTY; without even the implied warranty of
44 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
45 GNU General Public License for more details.
46
47 You should have received a copy of the GNU General Public License
48 along with this program; see the file COPYING. If not, write to the
49 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
50 Boston, MA 02110-1301, USA. */
51
52
53 /* NB To comply with the above BSD license, copyright information is
54 reproduced in etc/ETAGS.README. That file should be updated when the
55 above notices are.
56
57 To the best of our knowledge, this code was originally based on the
58 ctags.c distributed with BSD4.2, which was copyrighted by the
59 University of California, as described above. */
60
61
62 /*
63 * Authors:
64 * 1983 Ctags originally by Ken Arnold.
65 * 1984 Fortran added by Jim Kleckner.
66 * 1984 Ed Pelegri-Llopart added C typedefs.
67 * 1985 Emacs TAGS format by Richard Stallman.
68 * 1989 Sam Kendall added C++.
69 * 1992 Joseph B. Wells improved C and C++ parsing.
70 * 1993 Francesco Potortì reorganised C and C++.
71 * 1994 Line-by-line regexp tags by Tom Tromey.
72 * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
73 * 2002 #line directives by Francesco Potortì.
74 *
75 * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
76 */
77
78 /*
79 * If you want to add support for a new language, start by looking at the LUA
80 * language, which is the simplest. Alternatively, consider shipping a
81 * configuration file containing regexp definitions for etags.
82 */
83
84 char pot_etags_version[] = "@(#) pot revision number is 17.38";
85
86 #define TRUE 1
87 #define FALSE 0
88
89 #ifdef DEBUG
90 # undef DEBUG
91 # define DEBUG TRUE
92 #else
93 # define DEBUG FALSE
94 # define NDEBUG /* disable assert */
95 #endif
96
97 #ifdef HAVE_CONFIG_H
98 # include <config.h>
99 /* On some systems, Emacs defines static as nothing for the sake
100 of unexec. We don't want that here since we don't use unexec. */
101 # undef static
102 # ifndef PTR /* for XEmacs */
103 # define PTR void *
104 # endif
105 # ifndef __P /* for XEmacs */
106 # define __P(args) args
107 # endif
108 #else /* no config.h */
109 # if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
110 # define __P(args) args /* use prototypes */
111 # define PTR void * /* for generic pointers */
112 # else /* not standard C */
113 # define __P(args) () /* no prototypes */
114 # define const /* remove const for old compilers' sake */
115 # define PTR long * /* don't use void* */
116 # endif
117 #endif /* !HAVE_CONFIG_H */
118
119 #ifndef _GNU_SOURCE
120 # define _GNU_SOURCE 1 /* enables some compiler checks on GNU */
121 #endif
122
123 /* WIN32_NATIVE is for XEmacs.
124 MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
125 #ifdef WIN32_NATIVE
126 # undef MSDOS
127 # undef WINDOWSNT
128 # define WINDOWSNT
129 #endif /* WIN32_NATIVE */
130
131 #ifdef MSDOS
132 # undef MSDOS
133 # define MSDOS TRUE
134 # include <fcntl.h>
135 # include <sys/param.h>
136 # include <io.h>
137 # ifndef HAVE_CONFIG_H
138 # define DOS_NT
139 # include <sys/config.h>
140 # endif
141 #else
142 # define MSDOS FALSE
143 #endif /* MSDOS */
144
145 #ifdef WINDOWSNT
146 # include <stdlib.h>
147 # include <fcntl.h>
148 # include <string.h>
149 # include <direct.h>
150 # include <io.h>
151 # define MAXPATHLEN _MAX_PATH
152 # undef HAVE_NTGUI
153 # undef DOS_NT
154 # define DOS_NT
155 # ifndef HAVE_GETCWD
156 # define HAVE_GETCWD
157 # endif /* undef HAVE_GETCWD */
158 #else /* not WINDOWSNT */
159 # ifdef STDC_HEADERS
160 # include <stdlib.h>
161 # include <string.h>
162 # else /* no standard C headers */
163 extern char *getenv __P((const char *));
164 extern char *strcpy __P((char *, const char *));
165 extern char *strncpy __P((char *, const char *, unsigned long));
166 extern char *strcat __P((char *, const char *));
167 extern char *strncat __P((char *, const char *, unsigned long));
168 extern int strcmp __P((const char *, const char *));
169 extern int strncmp __P((const char *, const char *, unsigned long));
170 extern int system __P((const char *));
171 extern unsigned long strlen __P((const char *));
172 extern void *malloc __P((unsigned long));
173 extern void *realloc __P((void *, unsigned long));
174 extern void exit __P((int));
175 extern void free __P((void *));
176 extern void *memmove __P((void *, const void *, unsigned long));
177 # ifdef VMS
178 # define EXIT_SUCCESS 1
179 # define EXIT_FAILURE 0
180 # else /* no VMS */
181 # define EXIT_SUCCESS 0
182 # define EXIT_FAILURE 1
183 # endif
184 # endif
185 #endif /* !WINDOWSNT */
186
187 #ifdef HAVE_UNISTD_H
188 # include <unistd.h>
189 #else
190 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
191 extern char *getcwd (char *buf, size_t size);
192 # endif
193 #endif /* HAVE_UNISTD_H */
194
195 #include <stdio.h>
196 #include <ctype.h>
197 #include <errno.h>
198 #ifndef errno
199 extern int errno;
200 #endif
201 #include <sys/types.h>
202 #include <sys/stat.h>
203
204 #include <assert.h>
205 #ifdef NDEBUG
206 # undef assert /* some systems have a buggy assert.h */
207 # define assert(x) ((void) 0)
208 #endif
209
210 #if !defined (S_ISREG) && defined (S_IFREG)
211 # define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
212 #endif
213
214 #ifdef NO_LONG_OPTIONS /* define this if you don't have GNU getopt */
215 # define NO_LONG_OPTIONS TRUE
216 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
217 extern char *optarg;
218 extern int optind, opterr;
219 #else
220 # define NO_LONG_OPTIONS FALSE
221 # include <getopt.h>
222 #endif /* NO_LONG_OPTIONS */
223
224 #ifndef HAVE_CONFIG_H /* this is a standalone compilation */
225 # ifdef __CYGWIN__ /* compiling on Cygwin */
226 !!! NOTICE !!!
227 the regex.h distributed with Cygwin is not compatible with etags, alas!
228 If you want regular expression support, you should delete this notice and
229 arrange to use the GNU regex.h and regex.c.
230 # endif
231 #endif
232 #include <regex.h>
233
234 /* Define CTAGS to make the program "ctags" compatible with the usual one.
235 Leave it undefined to make the program "etags", which makes emacs-style
236 tag tables and tags typedefs, #defines and struct/union/enum by default. */
237 #ifdef CTAGS
238 # undef CTAGS
239 # define CTAGS TRUE
240 #else
241 # define CTAGS FALSE
242 #endif
243
244 #define streq(s,t) (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
245 #define strcaseeq(s,t) (assert((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
246 #define strneq(s,t,n) (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
247 #define strncaseeq(s,t,n) (assert((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
248
249 #define CHARS 256 /* 2^sizeof(char) */
250 #define CHAR(x) ((unsigned int)(x) & (CHARS - 1))
251 #define iswhite(c) (_wht[CHAR(c)]) /* c is white (see white) */
252 #define notinname(c) (_nin[CHAR(c)]) /* c is not in a name (see nonam) */
253 #define begtoken(c) (_btk[CHAR(c)]) /* c can start token (see begtk) */
254 #define intoken(c) (_itk[CHAR(c)]) /* c can be in token (see midtk) */
255 #define endtoken(c) (_etk[CHAR(c)]) /* c ends tokens (see endtk) */
256
257 #define ISALNUM(c) isalnum (CHAR(c))
258 #define ISALPHA(c) isalpha (CHAR(c))
259 #define ISDIGIT(c) isdigit (CHAR(c))
260 #define ISLOWER(c) islower (CHAR(c))
261
262 #define lowcase(c) tolower (CHAR(c))
263 #define upcase(c) toupper (CHAR(c))
264
265
266 /*
267 * xnew, xrnew -- allocate, reallocate storage
268 *
269 * SYNOPSIS: Type *xnew (int n, Type);
270 * void xrnew (OldPointer, int n, Type);
271 */
272 #if DEBUG
273 # include "chkmalloc.h"
274 # define xnew(n,Type) ((Type *) trace_malloc (__FILE__, __LINE__, \
275 (n) * sizeof (Type)))
276 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
277 (char *) (op), (n) * sizeof (Type)))
278 #else
279 # define xnew(n,Type) ((Type *) xmalloc ((n) * sizeof (Type)))
280 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
281 (char *) (op), (n) * sizeof (Type)))
282 #endif
283
284 #define bool int
285
286 typedef void Lang_function __P((FILE *));
287
288 typedef struct
289 {
290 char *suffix; /* file name suffix for this compressor */
291 char *command; /* takes one arg and decompresses to stdout */
292 } compressor;
293
294 typedef struct
295 {
296 char *name; /* language name */
297 char *help; /* detailed help for the language */
298 Lang_function *function; /* parse function */
299 char **suffixes; /* name suffixes of this language's files */
300 char **filenames; /* names of this language's files */
301 char **interpreters; /* interpreters for this language */
302 bool metasource; /* source used to generate other sources */
303 } language;
304
305 typedef struct fdesc
306 {
307 struct fdesc *next; /* for the linked list */
308 char *infname; /* uncompressed input file name */
309 char *infabsname; /* absolute uncompressed input file name */
310 char *infabsdir; /* absolute dir of input file */
311 char *taggedfname; /* file name to write in tagfile */
312 language *lang; /* language of file */
313 char *prop; /* file properties to write in tagfile */
314 bool usecharno; /* etags tags shall contain char number */
315 bool written; /* entry written in the tags file */
316 } fdesc;
317
318 typedef struct node_st
319 { /* sorting structure */
320 struct node_st *left, *right; /* left and right sons */
321 fdesc *fdp; /* description of file to whom tag belongs */
322 char *name; /* tag name */
323 char *regex; /* search regexp */
324 bool valid; /* write this tag on the tag file */
325 bool is_func; /* function tag: use regexp in CTAGS mode */
326 bool been_warned; /* warning already given for duplicated tag */
327 int lno; /* line number tag is on */
328 long cno; /* character number line starts on */
329 } node;
330
331 /*
332 * A `linebuffer' is a structure which holds a line of text.
333 * `readline_internal' reads a line from a stream into a linebuffer
334 * and works regardless of the length of the line.
335 * SIZE is the size of BUFFER, LEN is the length of the string in
336 * BUFFER after readline reads it.
337 */
338 typedef struct
339 {
340 long size;
341 int len;
342 char *buffer;
343 } linebuffer;
344
345 /* Used to support mixing of --lang and file names. */
346 typedef struct
347 {
348 enum {
349 at_language, /* a language specification */
350 at_regexp, /* a regular expression */
351 at_filename, /* a file name */
352 at_stdin, /* read from stdin here */
353 at_end /* stop parsing the list */
354 } arg_type; /* argument type */
355 language *lang; /* language associated with the argument */
356 char *what; /* the argument itself */
357 } argument;
358
359 /* Structure defining a regular expression. */
360 typedef struct regexp
361 {
362 struct regexp *p_next; /* pointer to next in list */
363 language *lang; /* if set, use only for this language */
364 char *pattern; /* the regexp pattern */
365 char *name; /* tag name */
366 struct re_pattern_buffer *pat; /* the compiled pattern */
367 struct re_registers regs; /* re registers */
368 bool error_signaled; /* already signaled for this regexp */
369 bool force_explicit_name; /* do not allow implict tag name */
370 bool ignore_case; /* ignore case when matching */
371 bool multi_line; /* do a multi-line match on the whole file */
372 } regexp;
373
374
375 /* Many compilers barf on this:
376 Lang_function Ada_funcs;
377 so let's write it this way */
378 static void Ada_funcs __P((FILE *));
379 static void Asm_labels __P((FILE *));
380 static void C_entries __P((int c_ext, FILE *));
381 static void default_C_entries __P((FILE *));
382 static void plain_C_entries __P((FILE *));
383 static void Cjava_entries __P((FILE *));
384 static void Cobol_paragraphs __P((FILE *));
385 static void Cplusplus_entries __P((FILE *));
386 static void Cstar_entries __P((FILE *));
387 static void Erlang_functions __P((FILE *));
388 static void Forth_words __P((FILE *));
389 static void Fortran_functions __P((FILE *));
390 static void HTML_labels __P((FILE *));
391 static void Lisp_functions __P((FILE *));
392 static void Lua_functions __P((FILE *));
393 static void Makefile_targets __P((FILE *));
394 static void Pascal_functions __P((FILE *));
395 static void Perl_functions __P((FILE *));
396 static void PHP_functions __P((FILE *));
397 static void PS_functions __P((FILE *));
398 static void Prolog_functions __P((FILE *));
399 static void Python_functions __P((FILE *));
400 static void Scheme_functions __P((FILE *));
401 static void TeX_commands __P((FILE *));
402 static void Texinfo_nodes __P((FILE *));
403 static void Yacc_entries __P((FILE *));
404 static void just_read_file __P((FILE *));
405
406 static void print_language_names __P((void));
407 static void print_version __P((void));
408 static void print_help __P((argument *));
409 int main __P((int, char **));
410
411 static compressor *get_compressor_from_suffix __P((char *, char **));
412 static language *get_language_from_langname __P((const char *));
413 static language *get_language_from_interpreter __P((char *));
414 static language *get_language_from_filename __P((char *, bool));
415 static void readline __P((linebuffer *, FILE *));
416 static long readline_internal __P((linebuffer *, FILE *));
417 static bool nocase_tail __P((char *));
418 static void get_tag __P((char *, char **));
419
420 static void analyse_regex __P((char *));
421 static void free_regexps __P((void));
422 static void regex_tag_multiline __P((void));
423 static void error __P((const char *, const char *));
424 static void suggest_asking_for_help __P((void));
425 void fatal __P((char *, char *));
426 static void pfatal __P((char *));
427 static void add_node __P((node *, node **));
428
429 static void init __P((void));
430 static void process_file_name __P((char *, language *));
431 static void process_file __P((FILE *, char *, language *));
432 static void find_entries __P((FILE *));
433 static void free_tree __P((node *));
434 static void free_fdesc __P((fdesc *));
435 static void pfnote __P((char *, bool, char *, int, int, long));
436 static void make_tag __P((char *, int, bool, char *, int, int, long));
437 static void invalidate_nodes __P((fdesc *, node **));
438 static void put_entries __P((node *));
439
440 static char *concat __P((char *, char *, char *));
441 static char *skip_spaces __P((char *));
442 static char *skip_non_spaces __P((char *));
443 static char *savenstr __P((char *, int));
444 static char *savestr __P((char *));
445 static char *etags_strchr __P((const char *, int));
446 static char *etags_strrchr __P((const char *, int));
447 static int etags_strcasecmp __P((const char *, const char *));
448 static int etags_strncasecmp __P((const char *, const char *, int));
449 static char *etags_getcwd __P((void));
450 static char *relative_filename __P((char *, char *));
451 static char *absolute_filename __P((char *, char *));
452 static char *absolute_dirname __P((char *, char *));
453 static bool filename_is_absolute __P((char *f));
454 static void canonicalize_filename __P((char *));
455 static void linebuffer_init __P((linebuffer *));
456 static void linebuffer_setlen __P((linebuffer *, int));
457 static PTR xmalloc __P((unsigned int));
458 static PTR xrealloc __P((char *, unsigned int));
459
460 \f
461 static char searchar = '/'; /* use /.../ searches */
462
463 static char *tagfile; /* output file */
464 static char *progname; /* name this program was invoked with */
465 static char *cwd; /* current working directory */
466 static char *tagfiledir; /* directory of tagfile */
467 static FILE *tagf; /* ioptr for tags file */
468
469 static fdesc *fdhead; /* head of file description list */
470 static fdesc *curfdp; /* current file description */
471 static int lineno; /* line number of current line */
472 static long charno; /* current character number */
473 static long linecharno; /* charno of start of current line */
474 static char *dbp; /* pointer to start of current tag */
475
476 static const int invalidcharno = -1;
477
478 static node *nodehead; /* the head of the binary tree of tags */
479 static node *last_node; /* the last node created */
480
481 static linebuffer lb; /* the current line */
482 static linebuffer filebuf; /* a buffer containing the whole file */
483 static linebuffer token_name; /* a buffer containing a tag name */
484
485 /* boolean "functions" (see init) */
486 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
487 static char
488 /* white chars */
489 *white = " \f\t\n\r\v",
490 /* not in a name */
491 *nonam = " \f\t\n\r()=,;", /* look at make_tag before modifying! */
492 /* token ending chars */
493 *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
494 /* token starting chars */
495 *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
496 /* valid in-token chars */
497 *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
498
499 static bool append_to_tagfile; /* -a: append to tags */
500 /* The next five default to TRUE in C and derived languages. */
501 static bool typedefs; /* -t: create tags for C and Ada typedefs */
502 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
503 /* 0 struct/enum/union decls, and C++ */
504 /* member functions. */
505 static bool constantypedefs; /* -d: create tags for C #define, enum */
506 /* constants and variables. */
507 /* -D: opposite of -d. Default under ctags. */
508 static bool globals; /* create tags for global variables */
509 static bool members; /* create tags for C member variables */
510 static bool declarations; /* --declarations: tag them and extern in C&Co*/
511 static bool no_line_directive; /* ignore #line directives (undocumented) */
512 static bool no_duplicates; /* no duplicate tags for ctags (undocumented) */
513 static bool update; /* -u: update tags */
514 static bool vgrind_style; /* -v: create vgrind style index output */
515 static bool no_warnings; /* -w: suppress warnings (undocumented) */
516 static bool cxref_style; /* -x: create cxref style output */
517 static bool cplusplus; /* .[hc] means C++, not C */
518 static bool ignoreindent; /* -I: ignore indentation in C */
519 static bool packages_only; /* --packages-only: in Ada, only tag packages*/
520
521 /* STDIN is defined in LynxOS system headers */
522 #ifdef STDIN
523 # undef STDIN
524 #endif
525
526 #define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */
527 static bool parsing_stdin; /* --parse-stdin used */
528
529 static regexp *p_head; /* list of all regexps */
530 static bool need_filebuf; /* some regexes are multi-line */
531
532 static struct option longopts[] =
533 {
534 { "append", no_argument, NULL, 'a' },
535 { "packages-only", no_argument, &packages_only, TRUE },
536 { "c++", no_argument, NULL, 'C' },
537 { "declarations", no_argument, &declarations, TRUE },
538 { "no-line-directive", no_argument, &no_line_directive, TRUE },
539 { "no-duplicates", no_argument, &no_duplicates, TRUE },
540 { "help", no_argument, NULL, 'h' },
541 { "help", no_argument, NULL, 'H' },
542 { "ignore-indentation", no_argument, NULL, 'I' },
543 { "language", required_argument, NULL, 'l' },
544 { "members", no_argument, &members, TRUE },
545 { "no-members", no_argument, &members, FALSE },
546 { "output", required_argument, NULL, 'o' },
547 { "regex", required_argument, NULL, 'r' },
548 { "no-regex", no_argument, NULL, 'R' },
549 { "ignore-case-regex", required_argument, NULL, 'c' },
550 { "parse-stdin", required_argument, NULL, STDIN },
551 { "version", no_argument, NULL, 'V' },
552
553 #if CTAGS /* Ctags options */
554 { "backward-search", no_argument, NULL, 'B' },
555 { "cxref", no_argument, NULL, 'x' },
556 { "defines", no_argument, NULL, 'd' },
557 { "globals", no_argument, &globals, TRUE },
558 { "typedefs", no_argument, NULL, 't' },
559 { "typedefs-and-c++", no_argument, NULL, 'T' },
560 { "update", no_argument, NULL, 'u' },
561 { "vgrind", no_argument, NULL, 'v' },
562 { "no-warn", no_argument, NULL, 'w' },
563
564 #else /* Etags options */
565 { "no-defines", no_argument, NULL, 'D' },
566 { "no-globals", no_argument, &globals, FALSE },
567 { "include", required_argument, NULL, 'i' },
568 #endif
569 { NULL }
570 };
571
572 static compressor compressors[] =
573 {
574 { "z", "gzip -d -c"},
575 { "Z", "gzip -d -c"},
576 { "gz", "gzip -d -c"},
577 { "GZ", "gzip -d -c"},
578 { "bz2", "bzip2 -d -c" },
579 { NULL }
580 };
581
582 /*
583 * Language stuff.
584 */
585
586 /* Ada code */
587 static char *Ada_suffixes [] =
588 { "ads", "adb", "ada", NULL };
589 static char Ada_help [] =
590 "In Ada code, functions, procedures, packages, tasks and types are\n\
591 tags. Use the `--packages-only' option to create tags for\n\
592 packages only.\n\
593 Ada tag names have suffixes indicating the type of entity:\n\
594 Entity type: Qualifier:\n\
595 ------------ ----------\n\
596 function /f\n\
597 procedure /p\n\
598 package spec /s\n\
599 package body /b\n\
600 type /t\n\
601 task /k\n\
602 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
603 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
604 will just search for any tag `bidule'.";
605
606 /* Assembly code */
607 static char *Asm_suffixes [] =
608 { "a", /* Unix assembler */
609 "asm", /* Microcontroller assembly */
610 "def", /* BSO/Tasking definition includes */
611 "inc", /* Microcontroller include files */
612 "ins", /* Microcontroller include files */
613 "s", "sa", /* Unix assembler */
614 "S", /* cpp-processed Unix assembler */
615 "src", /* BSO/Tasking C compiler output */
616 NULL
617 };
618 static char Asm_help [] =
619 "In assembler code, labels appearing at the beginning of a line,\n\
620 followed by a colon, are tags.";
621
622
623 /* Note that .c and .h can be considered C++, if the --c++ flag was
624 given, or if the `class' or `template' keyowrds are met inside the file.
625 That is why default_C_entries is called for these. */
626 static char *default_C_suffixes [] =
627 { "c", "h", NULL };
628 static char default_C_help [] =
629 "In C code, any C function or typedef is a tag, and so are\n\
630 definitions of `struct', `union' and `enum'. `#define' macro\n\
631 definitions and `enum' constants are tags unless you specify\n\
632 `--no-defines'. Global variables are tags unless you specify\n\
633 `--no-globals' and so are struct members unless you specify\n\
634 `--no-members'. Use of `--no-globals', `--no-defines' and\n\
635 `--no-members' can make the tags table file much smaller.\n\
636 You can tag function declarations and external variables by\n\
637 using `--declarations'.";
638
639 static char *Cplusplus_suffixes [] =
640 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
641 "M", /* Objective C++ */
642 "pdb", /* Postscript with C syntax */
643 NULL };
644 static char Cplusplus_help [] =
645 "In C++ code, all the tag constructs of C code are tagged. (Use\n\
646 --help --lang=c --lang=c++ for full help.)\n\
647 In addition to C tags, member functions are also recognized. Member\n\
648 variables are recognized unless you use the `--no-members' option.\n\
649 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
650 and `CLASS::FUNCTION'. `operator' definitions have tag names like\n\
651 `operator+'.";
652
653 static char *Cjava_suffixes [] =
654 { "java", NULL };
655 static char Cjava_help [] =
656 "In Java code, all the tags constructs of C and C++ code are\n\
657 tagged. (Use --help --lang=c --lang=c++ --lang=java for full help.)";
658
659
660 static char *Cobol_suffixes [] =
661 { "COB", "cob", NULL };
662 static char Cobol_help [] =
663 "In Cobol code, tags are paragraph names; that is, any word\n\
664 starting in column 8 and followed by a period.";
665
666 static char *Cstar_suffixes [] =
667 { "cs", "hs", NULL };
668
669 static char *Erlang_suffixes [] =
670 { "erl", "hrl", NULL };
671 static char Erlang_help [] =
672 "In Erlang code, the tags are the functions, records and macros\n\
673 defined in the file.";
674
675 char *Forth_suffixes [] =
676 { "fth", "tok", NULL };
677 static char Forth_help [] =
678 "In Forth code, tags are words defined by `:',\n\
679 constant, code, create, defer, value, variable, buffer:, field.";
680
681 static char *Fortran_suffixes [] =
682 { "F", "f", "f90", "for", NULL };
683 static char Fortran_help [] =
684 "In Fortran code, functions, subroutines and block data are tags.";
685
686 static char *HTML_suffixes [] =
687 { "htm", "html", "shtml", NULL };
688 static char HTML_help [] =
689 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
690 `h3' headers. Also, tags are `name=' in anchors and all\n\
691 occurrences of `id='.";
692
693 static char *Lisp_suffixes [] =
694 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
695 static char Lisp_help [] =
696 "In Lisp code, any function defined with `defun', any variable\n\
697 defined with `defvar' or `defconst', and in general the first\n\
698 argument of any expression that starts with `(def' in column zero\n\
699 is a tag.";
700
701 static char *Lua_suffixes [] =
702 { "lua", "LUA", NULL };
703 static char Lua_help [] =
704 "In Lua scripts, all functions are tags.";
705
706 static char *Makefile_filenames [] =
707 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
708 static char Makefile_help [] =
709 "In makefiles, targets are tags; additionally, variables are tags\n\
710 unless you specify `--no-globals'.";
711
712 static char *Objc_suffixes [] =
713 { "lm", /* Objective lex file */
714 "m", /* Objective C file */
715 NULL };
716 static char Objc_help [] =
717 "In Objective C code, tags include Objective C definitions for classes,\n\
718 class categories, methods and protocols. Tags for variables and\n\
719 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
720 (Use --help --lang=c --lang=objc --lang=java for full help.)";
721
722 static char *Pascal_suffixes [] =
723 { "p", "pas", NULL };
724 static char Pascal_help [] =
725 "In Pascal code, the tags are the functions and procedures defined\n\
726 in the file.";
727 /* " // this is for working around an Emacs highlighting bug... */
728
729 static char *Perl_suffixes [] =
730 { "pl", "pm", NULL };
731 static char *Perl_interpreters [] =
732 { "perl", "@PERL@", NULL };
733 static char Perl_help [] =
734 "In Perl code, the tags are the packages, subroutines and variables\n\
735 defined by the `package', `sub', `my' and `local' keywords. Use\n\
736 `--globals' if you want to tag global variables. Tags for\n\
737 subroutines are named `PACKAGE::SUB'. The name for subroutines\n\
738 defined in the default package is `main::SUB'.";
739
740 static char *PHP_suffixes [] =
741 { "php", "php3", "php4", NULL };
742 static char PHP_help [] =
743 "In PHP code, tags are functions, classes and defines. Unless you use\n\
744 the `--no-members' option, vars are tags too.";
745
746 static char *plain_C_suffixes [] =
747 { "pc", /* Pro*C file */
748 NULL };
749
750 static char *PS_suffixes [] =
751 { "ps", "psw", NULL }; /* .psw is for PSWrap */
752 static char PS_help [] =
753 "In PostScript code, the tags are the functions.";
754
755 static char *Prolog_suffixes [] =
756 { "prolog", NULL };
757 static char Prolog_help [] =
758 "In Prolog code, tags are predicates and rules at the beginning of\n\
759 line.";
760
761 static char *Python_suffixes [] =
762 { "py", NULL };
763 static char Python_help [] =
764 "In Python code, `def' or `class' at the beginning of a line\n\
765 generate a tag.";
766
767 /* Can't do the `SCM' or `scm' prefix with a version number. */
768 static char *Scheme_suffixes [] =
769 { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
770 static char Scheme_help [] =
771 "In Scheme code, tags include anything defined with `def' or with a\n\
772 construct whose name starts with `def'. They also include\n\
773 variables set with `set!' at top level in the file.";
774
775 static char *TeX_suffixes [] =
776 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
777 static char TeX_help [] =
778 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
779 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
780 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
781 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
782 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
783 \n\
784 Other commands can be specified by setting the environment variable\n\
785 `TEXTAGS' to a colon-separated list like, for example,\n\
786 TEXTAGS=\"mycommand:myothercommand\".";
787
788
789 static char *Texinfo_suffixes [] =
790 { "texi", "texinfo", "txi", NULL };
791 static char Texinfo_help [] =
792 "for texinfo files, lines starting with @node are tagged.";
793
794 static char *Yacc_suffixes [] =
795 { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
796 static char Yacc_help [] =
797 "In Bison or Yacc input files, each rule defines as a tag the\n\
798 nonterminal it constructs. The portions of the file that contain\n\
799 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
800 for full help).";
801
802 static char auto_help [] =
803 "`auto' is not a real language, it indicates to use\n\
804 a default language for files base on file name suffix and file contents.";
805
806 static char none_help [] =
807 "`none' is not a real language, it indicates to only do\n\
808 regexp processing on files.";
809
810 static char no_lang_help [] =
811 "No detailed help available for this language.";
812
813
814 /*
815 * Table of languages.
816 *
817 * It is ok for a given function to be listed under more than one
818 * name. I just didn't.
819 */
820
821 static language lang_names [] =
822 {
823 { "ada", Ada_help, Ada_funcs, Ada_suffixes },
824 { "asm", Asm_help, Asm_labels, Asm_suffixes },
825 { "c", default_C_help, default_C_entries, default_C_suffixes },
826 { "c++", Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
827 { "c*", no_lang_help, Cstar_entries, Cstar_suffixes },
828 { "cobol", Cobol_help, Cobol_paragraphs, Cobol_suffixes },
829 { "erlang", Erlang_help, Erlang_functions, Erlang_suffixes },
830 { "forth", Forth_help, Forth_words, Forth_suffixes },
831 { "fortran", Fortran_help, Fortran_functions, Fortran_suffixes },
832 { "html", HTML_help, HTML_labels, HTML_suffixes },
833 { "java", Cjava_help, Cjava_entries, Cjava_suffixes },
834 { "lisp", Lisp_help, Lisp_functions, Lisp_suffixes },
835 { "lua", Lua_help, Lua_functions, Lua_suffixes },
836 { "makefile", Makefile_help,Makefile_targets,NULL,Makefile_filenames},
837 { "objc", Objc_help, plain_C_entries, Objc_suffixes },
838 { "pascal", Pascal_help, Pascal_functions, Pascal_suffixes },
839 { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
840 { "php", PHP_help, PHP_functions, PHP_suffixes },
841 { "postscript",PS_help, PS_functions, PS_suffixes },
842 { "proc", no_lang_help, plain_C_entries, plain_C_suffixes },
843 { "prolog", Prolog_help, Prolog_functions, Prolog_suffixes },
844 { "python", Python_help, Python_functions, Python_suffixes },
845 { "scheme", Scheme_help, Scheme_functions, Scheme_suffixes },
846 { "tex", TeX_help, TeX_commands, TeX_suffixes },
847 { "texinfo", Texinfo_help, Texinfo_nodes, Texinfo_suffixes },
848 { "yacc", Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
849 { "auto", auto_help }, /* default guessing scheme */
850 { "none", none_help, just_read_file }, /* regexp matching only */
851 { NULL } /* end of list */
852 };
853
854 \f
855 static void
856 print_language_names ()
857 {
858 language *lang;
859 char **name, **ext;
860
861 puts ("\nThese are the currently supported languages, along with the\n\
862 default file names and dot suffixes:");
863 for (lang = lang_names; lang->name != NULL; lang++)
864 {
865 printf (" %-*s", 10, lang->name);
866 if (lang->filenames != NULL)
867 for (name = lang->filenames; *name != NULL; name++)
868 printf (" %s", *name);
869 if (lang->suffixes != NULL)
870 for (ext = lang->suffixes; *ext != NULL; ext++)
871 printf (" .%s", *ext);
872 puts ("");
873 }
874 puts ("where `auto' means use default language for files based on file\n\
875 name suffix, and `none' means only do regexp processing on files.\n\
876 If no language is specified and no matching suffix is found,\n\
877 the first line of the file is read for a sharp-bang (#!) sequence\n\
878 followed by the name of an interpreter. If no such sequence is found,\n\
879 Fortran is tried first; if no tags are found, C is tried next.\n\
880 When parsing any C file, a \"class\" or \"template\" keyword\n\
881 switches to C++.");
882 puts ("Compressed files are supported using gzip and bzip2.\n\
883 \n\
884 For detailed help on a given language use, for example,\n\
885 etags --help --lang=ada.");
886 }
887
888 #ifndef EMACS_NAME
889 # define EMACS_NAME "standalone"
890 #endif
891 #ifndef VERSION
892 # define VERSION "17.38"
893 #endif
894 static void
895 print_version ()
896 {
897 /* Makes it easier to update automatically. */
898 char emacs_copyright[] = "Copyright (C) 2007 Free Software Foundation, Inc.";
899
900 printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
901 puts (emacs_copyright);
902 puts ("This program is distributed under the terms in ETAGS.README");
903
904 exit (EXIT_SUCCESS);
905 }
906
907 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
908 # define PRINT_UNDOCUMENTED_OPTIONS_HELP FALSE
909 #endif
910
911 static void
912 print_help (argbuffer)
913 argument *argbuffer;
914 {
915 bool help_for_lang = FALSE;
916
917 for (; argbuffer->arg_type != at_end; argbuffer++)
918 if (argbuffer->arg_type == at_language)
919 {
920 if (help_for_lang)
921 puts ("");
922 puts (argbuffer->lang->help);
923 help_for_lang = TRUE;
924 }
925
926 if (help_for_lang)
927 exit (EXIT_SUCCESS);
928
929 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
930 \n\
931 These are the options accepted by %s.\n", progname, progname);
932 if (NO_LONG_OPTIONS)
933 puts ("WARNING: long option names do not work with this executable,\n\
934 as it is not linked with GNU getopt.");
935 else
936 puts ("You may use unambiguous abbreviations for the long option names.");
937 puts (" A - as file name means read names from stdin (one per line).\n\
938 Absolute names are stored in the output file as they are.\n\
939 Relative ones are stored relative to the output file's directory.\n");
940
941 puts ("-a, --append\n\
942 Append tag entries to existing tags file.");
943
944 puts ("--packages-only\n\
945 For Ada files, only generate tags for packages.");
946
947 if (CTAGS)
948 puts ("-B, --backward-search\n\
949 Write the search commands for the tag entries using '?', the\n\
950 backward-search command instead of '/', the forward-search command.");
951
952 /* This option is mostly obsolete, because etags can now automatically
953 detect C++. Retained for backward compatibility and for debugging and
954 experimentation. In principle, we could want to tag as C++ even
955 before any "class" or "template" keyword.
956 puts ("-C, --c++\n\
957 Treat files whose name suffix defaults to C language as C++ files.");
958 */
959
960 puts ("--declarations\n\
961 In C and derived languages, create tags for function declarations,");
962 if (CTAGS)
963 puts ("\tand create tags for extern variables if --globals is used.");
964 else
965 puts
966 ("\tand create tags for extern variables unless --no-globals is used.");
967
968 if (CTAGS)
969 puts ("-d, --defines\n\
970 Create tag entries for C #define constants and enum constants, too.");
971 else
972 puts ("-D, --no-defines\n\
973 Don't create tag entries for C #define constants and enum constants.\n\
974 This makes the tags file smaller.");
975
976 if (!CTAGS)
977 puts ("-i FILE, --include=FILE\n\
978 Include a note in tag file indicating that, when searching for\n\
979 a tag, one should also consult the tags file FILE after\n\
980 checking the current file.");
981
982 puts ("-l LANG, --language=LANG\n\
983 Force the following files to be considered as written in the\n\
984 named language up to the next --language=LANG option.");
985
986 if (CTAGS)
987 puts ("--globals\n\
988 Create tag entries for global variables in some languages.");
989 else
990 puts ("--no-globals\n\
991 Do not create tag entries for global variables in some\n\
992 languages. This makes the tags file smaller.");
993
994 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
995 puts ("--no-line-directive\n\
996 Ignore #line preprocessor directives in C and derived languages.");
997
998 if (CTAGS)
999 puts ("--members\n\
1000 Create tag entries for members of structures in some languages.");
1001 else
1002 puts ("--no-members\n\
1003 Do not create tag entries for members of structures\n\
1004 in some languages.");
1005
1006 puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
1007 Make a tag for each line matching a regular expression pattern\n\
1008 in the following files. {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
1009 files only. REGEXFILE is a file containing one REGEXP per line.\n\
1010 REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
1011 optional. The TAGREGEXP pattern is anchored (as if preceded by ^).");
1012 puts (" If TAGNAME/ is present, the tags created are named.\n\
1013 For example Tcl named tags can be created with:\n\
1014 --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
1015 MODS are optional one-letter modifiers: `i' means to ignore case,\n\
1016 `m' means to allow multi-line matches, `s' implies `m' and\n\
1017 causes dot to match any character, including newline.");
1018
1019 puts ("-R, --no-regex\n\
1020 Don't create tags from regexps for the following files.");
1021
1022 puts ("-I, --ignore-indentation\n\
1023 In C and C++ do not assume that a closing brace in the first\n\
1024 column is the final brace of a function or structure definition.");
1025
1026 puts ("-o FILE, --output=FILE\n\
1027 Write the tags to FILE.");
1028
1029 puts ("--parse-stdin=NAME\n\
1030 Read from standard input and record tags as belonging to file NAME.");
1031
1032 if (CTAGS)
1033 {
1034 puts ("-t, --typedefs\n\
1035 Generate tag entries for C and Ada typedefs.");
1036 puts ("-T, --typedefs-and-c++\n\
1037 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
1038 and C++ member functions.");
1039 }
1040
1041 if (CTAGS)
1042 puts ("-u, --update\n\
1043 Update the tag entries for the given files, leaving tag\n\
1044 entries for other files in place. Currently, this is\n\
1045 implemented by deleting the existing entries for the given\n\
1046 files and then rewriting the new entries at the end of the\n\
1047 tags file. It is often faster to simply rebuild the entire\n\
1048 tag file than to use this.");
1049
1050 if (CTAGS)
1051 {
1052 puts ("-v, --vgrind\n\
1053 Print on the standard output an index of items intended for\n\
1054 human consumption, similar to the output of vgrind. The index\n\
1055 is sorted, and gives the page number of each item.");
1056
1057 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1058 puts ("-w, --no-duplicates\n\
1059 Do not create duplicate tag entries, for compatibility with\n\
1060 traditional ctags.");
1061
1062 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1063 puts ("-w, --no-warn\n\
1064 Suppress warning messages about duplicate tag entries.");
1065
1066 puts ("-x, --cxref\n\
1067 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1068 The output uses line numbers instead of page numbers, but\n\
1069 beyond that the differences are cosmetic; try both to see\n\
1070 which you like.");
1071 }
1072
1073 puts ("-V, --version\n\
1074 Print the version of the program.\n\
1075 -h, --help\n\
1076 Print this help message.\n\
1077 Followed by one or more `--language' options prints detailed\n\
1078 help about tag generation for the specified languages.");
1079
1080 print_language_names ();
1081
1082 puts ("");
1083 puts ("Report bugs to bug-gnu-emacs@gnu.org");
1084
1085 exit (EXIT_SUCCESS);
1086 }
1087
1088 \f
1089 #ifdef VMS /* VMS specific functions */
1090
1091 #define EOS '\0'
1092
1093 /* This is a BUG! ANY arbitrary limit is a BUG!
1094 Won't someone please fix this? */
1095 #define MAX_FILE_SPEC_LEN 255
1096 typedef struct {
1097 short curlen;
1098 char body[MAX_FILE_SPEC_LEN + 1];
1099 } vspec;
1100
1101 /*
1102 v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
1103 returning in each successive call the next file name matching the input
1104 spec. The function expects that each in_spec passed
1105 to it will be processed to completion; in particular, up to and
1106 including the call following that in which the last matching name
1107 is returned, the function ignores the value of in_spec, and will
1108 only start processing a new spec with the following call.
1109 If an error occurs, on return out_spec contains the value
1110 of in_spec when the error occurred.
1111
1112 With each successive file name returned in out_spec, the
1113 function's return value is one. When there are no more matching
1114 names the function returns zero. If on the first call no file
1115 matches in_spec, or there is any other error, -1 is returned.
1116 */
1117
1118 #include <rmsdef.h>
1119 #include <descrip.h>
1120 #define OUTSIZE MAX_FILE_SPEC_LEN
1121 static short
1122 fn_exp (out, in)
1123 vspec *out;
1124 char *in;
1125 {
1126 static long context = 0;
1127 static struct dsc$descriptor_s o;
1128 static struct dsc$descriptor_s i;
1129 static bool pass1 = TRUE;
1130 long status;
1131 short retval;
1132
1133 if (pass1)
1134 {
1135 pass1 = FALSE;
1136 o.dsc$a_pointer = (char *) out;
1137 o.dsc$w_length = (short)OUTSIZE;
1138 i.dsc$a_pointer = in;
1139 i.dsc$w_length = (short)strlen(in);
1140 i.dsc$b_dtype = DSC$K_DTYPE_T;
1141 i.dsc$b_class = DSC$K_CLASS_S;
1142 o.dsc$b_dtype = DSC$K_DTYPE_VT;
1143 o.dsc$b_class = DSC$K_CLASS_VS;
1144 }
1145 if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
1146 {
1147 out->body[out->curlen] = EOS;
1148 return 1;
1149 }
1150 else if (status == RMS$_NMF)
1151 retval = 0;
1152 else
1153 {
1154 strcpy(out->body, in);
1155 retval = -1;
1156 }
1157 lib$find_file_end(&context);
1158 pass1 = TRUE;
1159 return retval;
1160 }
1161
1162 /*
1163 v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
1164 name of each file specified by the provided arg expanding wildcards.
1165 */
1166 static char *
1167 gfnames (arg, p_error)
1168 char *arg;
1169 bool *p_error;
1170 {
1171 static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
1172
1173 switch (fn_exp (&filename, arg))
1174 {
1175 case 1:
1176 *p_error = FALSE;
1177 return filename.body;
1178 case 0:
1179 *p_error = FALSE;
1180 return NULL;
1181 default:
1182 *p_error = TRUE;
1183 return filename.body;
1184 }
1185 }
1186
1187 #ifndef OLD /* Newer versions of VMS do provide `system'. */
1188 system (cmd)
1189 char *cmd;
1190 {
1191 error ("%s", "system() function not implemented under VMS");
1192 }
1193 #endif
1194
1195 #define VERSION_DELIM ';'
1196 char *massage_name (s)
1197 char *s;
1198 {
1199 char *start = s;
1200
1201 for ( ; *s; s++)
1202 if (*s == VERSION_DELIM)
1203 {
1204 *s = EOS;
1205 break;
1206 }
1207 else
1208 *s = lowcase (*s);
1209 return start;
1210 }
1211 #endif /* VMS */
1212
1213 \f
1214 int
1215 main (argc, argv)
1216 int argc;
1217 char *argv[];
1218 {
1219 int i;
1220 unsigned int nincluded_files;
1221 char **included_files;
1222 argument *argbuffer;
1223 int current_arg, file_count;
1224 linebuffer filename_lb;
1225 bool help_asked = FALSE;
1226 #ifdef VMS
1227 bool got_err;
1228 #endif
1229 char *optstring;
1230 int opt;
1231
1232
1233 #ifdef DOS_NT
1234 _fmode = O_BINARY; /* all of files are treated as binary files */
1235 #endif /* DOS_NT */
1236
1237 progname = argv[0];
1238 nincluded_files = 0;
1239 included_files = xnew (argc, char *);
1240 current_arg = 0;
1241 file_count = 0;
1242
1243 /* Allocate enough no matter what happens. Overkill, but each one
1244 is small. */
1245 argbuffer = xnew (argc, argument);
1246
1247 /*
1248 * Always find typedefs and structure tags.
1249 * Also default to find macro constants, enum constants, struct
1250 * members and global variables. Do it for both etags and ctags.
1251 */
1252 typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1253 globals = members = TRUE;
1254
1255 /* When the optstring begins with a '-' getopt_long does not rearrange the
1256 non-options arguments to be at the end, but leaves them alone. */
1257 optstring = concat (NO_LONG_OPTIONS ? "" : "-",
1258 "ac:Cf:Il:o:r:RSVhH",
1259 (CTAGS) ? "BxdtTuvw" : "Di:");
1260
1261 while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1262 switch (opt)
1263 {
1264 case 0:
1265 /* If getopt returns 0, then it has already processed a
1266 long-named option. We should do nothing. */
1267 break;
1268
1269 case 1:
1270 /* This means that a file name has been seen. Record it. */
1271 argbuffer[current_arg].arg_type = at_filename;
1272 argbuffer[current_arg].what = optarg;
1273 ++current_arg;
1274 ++file_count;
1275 break;
1276
1277 case STDIN:
1278 /* Parse standard input. Idea by Vivek <vivek@etla.org>. */
1279 argbuffer[current_arg].arg_type = at_stdin;
1280 argbuffer[current_arg].what = optarg;
1281 ++current_arg;
1282 ++file_count;
1283 if (parsing_stdin)
1284 fatal ("cannot parse standard input more than once", (char *)NULL);
1285 parsing_stdin = TRUE;
1286 break;
1287
1288 /* Common options. */
1289 case 'a': append_to_tagfile = TRUE; break;
1290 case 'C': cplusplus = TRUE; break;
1291 case 'f': /* for compatibility with old makefiles */
1292 case 'o':
1293 if (tagfile)
1294 {
1295 error ("-o option may only be given once.", (char *)NULL);
1296 suggest_asking_for_help ();
1297 /* NOTREACHED */
1298 }
1299 tagfile = optarg;
1300 break;
1301 case 'I':
1302 case 'S': /* for backward compatibility */
1303 ignoreindent = TRUE;
1304 break;
1305 case 'l':
1306 {
1307 language *lang = get_language_from_langname (optarg);
1308 if (lang != NULL)
1309 {
1310 argbuffer[current_arg].lang = lang;
1311 argbuffer[current_arg].arg_type = at_language;
1312 ++current_arg;
1313 }
1314 }
1315 break;
1316 case 'c':
1317 /* Backward compatibility: support obsolete --ignore-case-regexp. */
1318 optarg = concat (optarg, "i", ""); /* memory leak here */
1319 /* FALLTHRU */
1320 case 'r':
1321 argbuffer[current_arg].arg_type = at_regexp;
1322 argbuffer[current_arg].what = optarg;
1323 ++current_arg;
1324 break;
1325 case 'R':
1326 argbuffer[current_arg].arg_type = at_regexp;
1327 argbuffer[current_arg].what = NULL;
1328 ++current_arg;
1329 break;
1330 case 'V':
1331 print_version ();
1332 break;
1333 case 'h':
1334 case 'H':
1335 help_asked = TRUE;
1336 break;
1337
1338 /* Etags options */
1339 case 'D': constantypedefs = FALSE; break;
1340 case 'i': included_files[nincluded_files++] = optarg; break;
1341
1342 /* Ctags options. */
1343 case 'B': searchar = '?'; break;
1344 case 'd': constantypedefs = TRUE; break;
1345 case 't': typedefs = TRUE; break;
1346 case 'T': typedefs = typedefs_or_cplusplus = TRUE; break;
1347 case 'u': update = TRUE; break;
1348 case 'v': vgrind_style = TRUE; /*FALLTHRU*/
1349 case 'x': cxref_style = TRUE; break;
1350 case 'w': no_warnings = TRUE; break;
1351 default:
1352 suggest_asking_for_help ();
1353 /* NOTREACHED */
1354 }
1355
1356 /* No more options. Store the rest of arguments. */
1357 for (; optind < argc; optind++)
1358 {
1359 argbuffer[current_arg].arg_type = at_filename;
1360 argbuffer[current_arg].what = argv[optind];
1361 ++current_arg;
1362 ++file_count;
1363 }
1364
1365 argbuffer[current_arg].arg_type = at_end;
1366
1367 if (help_asked)
1368 print_help (argbuffer);
1369 /* NOTREACHED */
1370
1371 if (nincluded_files == 0 && file_count == 0)
1372 {
1373 error ("no input files specified.", (char *)NULL);
1374 suggest_asking_for_help ();
1375 /* NOTREACHED */
1376 }
1377
1378 if (tagfile == NULL)
1379 tagfile = CTAGS ? "tags" : "TAGS";
1380 cwd = etags_getcwd (); /* the current working directory */
1381 if (cwd[strlen (cwd) - 1] != '/')
1382 {
1383 char *oldcwd = cwd;
1384 cwd = concat (oldcwd, "/", "");
1385 free (oldcwd);
1386 }
1387 /* Relative file names are made relative to the current directory. */
1388 if (streq (tagfile, "-")
1389 || strneq (tagfile, "/dev/", 5))
1390 tagfiledir = cwd;
1391 else
1392 tagfiledir = absolute_dirname (tagfile, cwd);
1393
1394 init (); /* set up boolean "functions" */
1395
1396 linebuffer_init (&lb);
1397 linebuffer_init (&filename_lb);
1398 linebuffer_init (&filebuf);
1399 linebuffer_init (&token_name);
1400
1401 if (!CTAGS)
1402 {
1403 if (streq (tagfile, "-"))
1404 {
1405 tagf = stdout;
1406 #ifdef DOS_NT
1407 /* Switch redirected `stdout' to binary mode (setting `_fmode'
1408 doesn't take effect until after `stdout' is already open). */
1409 if (!isatty (fileno (stdout)))
1410 setmode (fileno (stdout), O_BINARY);
1411 #endif /* DOS_NT */
1412 }
1413 else
1414 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1415 if (tagf == NULL)
1416 pfatal (tagfile);
1417 }
1418
1419 /*
1420 * Loop through files finding functions.
1421 */
1422 for (i = 0; i < current_arg; i++)
1423 {
1424 static language *lang; /* non-NULL if language is forced */
1425 char *this_file;
1426
1427 switch (argbuffer[i].arg_type)
1428 {
1429 case at_language:
1430 lang = argbuffer[i].lang;
1431 break;
1432 case at_regexp:
1433 analyse_regex (argbuffer[i].what);
1434 break;
1435 case at_filename:
1436 #ifdef VMS
1437 while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
1438 {
1439 if (got_err)
1440 {
1441 error ("can't find file %s\n", this_file);
1442 argc--, argv++;
1443 }
1444 else
1445 {
1446 this_file = massage_name (this_file);
1447 }
1448 #else
1449 this_file = argbuffer[i].what;
1450 #endif
1451 /* Input file named "-" means read file names from stdin
1452 (one per line) and use them. */
1453 if (streq (this_file, "-"))
1454 {
1455 if (parsing_stdin)
1456 fatal ("cannot parse standard input AND read file names from it",
1457 (char *)NULL);
1458 while (readline_internal (&filename_lb, stdin) > 0)
1459 process_file_name (filename_lb.buffer, lang);
1460 }
1461 else
1462 process_file_name (this_file, lang);
1463 #ifdef VMS
1464 }
1465 #endif
1466 break;
1467 case at_stdin:
1468 this_file = argbuffer[i].what;
1469 process_file (stdin, this_file, lang);
1470 break;
1471 }
1472 }
1473
1474 free_regexps ();
1475 free (lb.buffer);
1476 free (filebuf.buffer);
1477 free (token_name.buffer);
1478
1479 if (!CTAGS || cxref_style)
1480 {
1481 /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1482 put_entries (nodehead);
1483 free_tree (nodehead);
1484 nodehead = NULL;
1485 if (!CTAGS)
1486 {
1487 fdesc *fdp;
1488
1489 /* Output file entries that have no tags. */
1490 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1491 if (!fdp->written)
1492 fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1493
1494 while (nincluded_files-- > 0)
1495 fprintf (tagf, "\f\n%s,include\n", *included_files++);
1496
1497 if (fclose (tagf) == EOF)
1498 pfatal (tagfile);
1499 }
1500
1501 exit (EXIT_SUCCESS);
1502 }
1503
1504 /* From here on, we are in (CTAGS && !cxref_style) */
1505 if (update)
1506 {
1507 char cmd[BUFSIZ];
1508 for (i = 0; i < current_arg; ++i)
1509 {
1510 switch (argbuffer[i].arg_type)
1511 {
1512 case at_filename:
1513 case at_stdin:
1514 break;
1515 default:
1516 continue; /* the for loop */
1517 }
1518 sprintf (cmd,
1519 "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1520 tagfile, argbuffer[i].what, tagfile);
1521 if (system (cmd) != EXIT_SUCCESS)
1522 fatal ("failed to execute shell command", (char *)NULL);
1523 }
1524 append_to_tagfile = TRUE;
1525 }
1526
1527 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1528 if (tagf == NULL)
1529 pfatal (tagfile);
1530 put_entries (nodehead); /* write all the tags (CTAGS) */
1531 free_tree (nodehead);
1532 nodehead = NULL;
1533 if (fclose (tagf) == EOF)
1534 pfatal (tagfile);
1535
1536 if (CTAGS)
1537 if (append_to_tagfile || update)
1538 {
1539 char cmd[2*BUFSIZ+20];
1540 /* Maybe these should be used:
1541 setenv ("LC_COLLATE", "C", 1);
1542 setenv ("LC_ALL", "C", 1); */
1543 sprintf (cmd, "sort -u -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile);
1544 exit (system (cmd));
1545 }
1546 return EXIT_SUCCESS;
1547 }
1548
1549
1550 /*
1551 * Return a compressor given the file name. If EXTPTR is non-zero,
1552 * return a pointer into FILE where the compressor-specific
1553 * extension begins. If no compressor is found, NULL is returned
1554 * and EXTPTR is not significant.
1555 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1556 */
1557 static compressor *
1558 get_compressor_from_suffix (file, extptr)
1559 char *file;
1560 char **extptr;
1561 {
1562 compressor *compr;
1563 char *slash, *suffix;
1564
1565 /* This relies on FN to be after canonicalize_filename,
1566 so we don't need to consider backslashes on DOS_NT. */
1567 slash = etags_strrchr (file, '/');
1568 suffix = etags_strrchr (file, '.');
1569 if (suffix == NULL || suffix < slash)
1570 return NULL;
1571 if (extptr != NULL)
1572 *extptr = suffix;
1573 suffix += 1;
1574 /* Let those poor souls who live with DOS 8+3 file name limits get
1575 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1576 Only the first do loop is run if not MSDOS */
1577 do
1578 {
1579 for (compr = compressors; compr->suffix != NULL; compr++)
1580 if (streq (compr->suffix, suffix))
1581 return compr;
1582 if (!MSDOS)
1583 break; /* do it only once: not really a loop */
1584 if (extptr != NULL)
1585 *extptr = ++suffix;
1586 } while (*suffix != '\0');
1587 return NULL;
1588 }
1589
1590
1591
1592 /*
1593 * Return a language given the name.
1594 */
1595 static language *
1596 get_language_from_langname (name)
1597 const char *name;
1598 {
1599 language *lang;
1600
1601 if (name == NULL)
1602 error ("empty language name", (char *)NULL);
1603 else
1604 {
1605 for (lang = lang_names; lang->name != NULL; lang++)
1606 if (streq (name, lang->name))
1607 return lang;
1608 error ("unknown language \"%s\"", name);
1609 }
1610
1611 return NULL;
1612 }
1613
1614
1615 /*
1616 * Return a language given the interpreter name.
1617 */
1618 static language *
1619 get_language_from_interpreter (interpreter)
1620 char *interpreter;
1621 {
1622 language *lang;
1623 char **iname;
1624
1625 if (interpreter == NULL)
1626 return NULL;
1627 for (lang = lang_names; lang->name != NULL; lang++)
1628 if (lang->interpreters != NULL)
1629 for (iname = lang->interpreters; *iname != NULL; iname++)
1630 if (streq (*iname, interpreter))
1631 return lang;
1632
1633 return NULL;
1634 }
1635
1636
1637
1638 /*
1639 * Return a language given the file name.
1640 */
1641 static language *
1642 get_language_from_filename (file, case_sensitive)
1643 char *file;
1644 bool case_sensitive;
1645 {
1646 language *lang;
1647 char **name, **ext, *suffix;
1648
1649 /* Try whole file name first. */
1650 for (lang = lang_names; lang->name != NULL; lang++)
1651 if (lang->filenames != NULL)
1652 for (name = lang->filenames; *name != NULL; name++)
1653 if ((case_sensitive)
1654 ? streq (*name, file)
1655 : strcaseeq (*name, file))
1656 return lang;
1657
1658 /* If not found, try suffix after last dot. */
1659 suffix = etags_strrchr (file, '.');
1660 if (suffix == NULL)
1661 return NULL;
1662 suffix += 1;
1663 for (lang = lang_names; lang->name != NULL; lang++)
1664 if (lang->suffixes != NULL)
1665 for (ext = lang->suffixes; *ext != NULL; ext++)
1666 if ((case_sensitive)
1667 ? streq (*ext, suffix)
1668 : strcaseeq (*ext, suffix))
1669 return lang;
1670 return NULL;
1671 }
1672
1673 \f
1674 /*
1675 * This routine is called on each file argument.
1676 */
1677 static void
1678 process_file_name (file, lang)
1679 char *file;
1680 language *lang;
1681 {
1682 struct stat stat_buf;
1683 FILE *inf;
1684 fdesc *fdp;
1685 compressor *compr;
1686 char *compressed_name, *uncompressed_name;
1687 char *ext, *real_name;
1688 int retval;
1689
1690 canonicalize_filename (file);
1691 if (streq (file, tagfile) && !streq (tagfile, "-"))
1692 {
1693 error ("skipping inclusion of %s in self.", file);
1694 return;
1695 }
1696 if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1697 {
1698 compressed_name = NULL;
1699 real_name = uncompressed_name = savestr (file);
1700 }
1701 else
1702 {
1703 real_name = compressed_name = savestr (file);
1704 uncompressed_name = savenstr (file, ext - file);
1705 }
1706
1707 /* If the canonicalized uncompressed name
1708 has already been dealt with, skip it silently. */
1709 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1710 {
1711 assert (fdp->infname != NULL);
1712 if (streq (uncompressed_name, fdp->infname))
1713 goto cleanup;
1714 }
1715
1716 if (stat (real_name, &stat_buf) != 0)
1717 {
1718 /* Reset real_name and try with a different name. */
1719 real_name = NULL;
1720 if (compressed_name != NULL) /* try with the given suffix */
1721 {
1722 if (stat (uncompressed_name, &stat_buf) == 0)
1723 real_name = uncompressed_name;
1724 }
1725 else /* try all possible suffixes */
1726 {
1727 for (compr = compressors; compr->suffix != NULL; compr++)
1728 {
1729 compressed_name = concat (file, ".", compr->suffix);
1730 if (stat (compressed_name, &stat_buf) != 0)
1731 {
1732 if (MSDOS)
1733 {
1734 char *suf = compressed_name + strlen (file);
1735 size_t suflen = strlen (compr->suffix) + 1;
1736 for ( ; suf[1]; suf++, suflen--)
1737 {
1738 memmove (suf, suf + 1, suflen);
1739 if (stat (compressed_name, &stat_buf) == 0)
1740 {
1741 real_name = compressed_name;
1742 break;
1743 }
1744 }
1745 if (real_name != NULL)
1746 break;
1747 } /* MSDOS */
1748 free (compressed_name);
1749 compressed_name = NULL;
1750 }
1751 else
1752 {
1753 real_name = compressed_name;
1754 break;
1755 }
1756 }
1757 }
1758 if (real_name == NULL)
1759 {
1760 perror (file);
1761 goto cleanup;
1762 }
1763 } /* try with a different name */
1764
1765 if (!S_ISREG (stat_buf.st_mode))
1766 {
1767 error ("skipping %s: it is not a regular file.", real_name);
1768 goto cleanup;
1769 }
1770 if (real_name == compressed_name)
1771 {
1772 char *cmd = concat (compr->command, " ", real_name);
1773 inf = (FILE *) popen (cmd, "r");
1774 free (cmd);
1775 }
1776 else
1777 inf = fopen (real_name, "r");
1778 if (inf == NULL)
1779 {
1780 perror (real_name);
1781 goto cleanup;
1782 }
1783
1784 process_file (inf, uncompressed_name, lang);
1785
1786 if (real_name == compressed_name)
1787 retval = pclose (inf);
1788 else
1789 retval = fclose (inf);
1790 if (retval < 0)
1791 pfatal (file);
1792
1793 cleanup:
1794 if (compressed_name) free (compressed_name);
1795 if (uncompressed_name) free (uncompressed_name);
1796 last_node = NULL;
1797 curfdp = NULL;
1798 return;
1799 }
1800
1801 static void
1802 process_file (fh, fn, lang)
1803 FILE *fh;
1804 char *fn;
1805 language *lang;
1806 {
1807 static const fdesc emptyfdesc;
1808 fdesc *fdp;
1809
1810 /* Create a new input file description entry. */
1811 fdp = xnew (1, fdesc);
1812 *fdp = emptyfdesc;
1813 fdp->next = fdhead;
1814 fdp->infname = savestr (fn);
1815 fdp->lang = lang;
1816 fdp->infabsname = absolute_filename (fn, cwd);
1817 fdp->infabsdir = absolute_dirname (fn, cwd);
1818 if (filename_is_absolute (fn))
1819 {
1820 /* An absolute file name. Canonicalize it. */
1821 fdp->taggedfname = absolute_filename (fn, NULL);
1822 }
1823 else
1824 {
1825 /* A file name relative to cwd. Make it relative
1826 to the directory of the tags file. */
1827 fdp->taggedfname = relative_filename (fn, tagfiledir);
1828 }
1829 fdp->usecharno = TRUE; /* use char position when making tags */
1830 fdp->prop = NULL;
1831 fdp->written = FALSE; /* not written on tags file yet */
1832
1833 fdhead = fdp;
1834 curfdp = fdhead; /* the current file description */
1835
1836 find_entries (fh);
1837
1838 /* If not Ctags, and if this is not metasource and if it contained no #line
1839 directives, we can write the tags and free all nodes pointing to
1840 curfdp. */
1841 if (!CTAGS
1842 && curfdp->usecharno /* no #line directives in this file */
1843 && !curfdp->lang->metasource)
1844 {
1845 node *np, *prev;
1846
1847 /* Look for the head of the sublist relative to this file. See add_node
1848 for the structure of the node tree. */
1849 prev = NULL;
1850 for (np = nodehead; np != NULL; prev = np, np = np->left)
1851 if (np->fdp == curfdp)
1852 break;
1853
1854 /* If we generated tags for this file, write and delete them. */
1855 if (np != NULL)
1856 {
1857 /* This is the head of the last sublist, if any. The following
1858 instructions depend on this being true. */
1859 assert (np->left == NULL);
1860
1861 assert (fdhead == curfdp);
1862 assert (last_node->fdp == curfdp);
1863 put_entries (np); /* write tags for file curfdp->taggedfname */
1864 free_tree (np); /* remove the written nodes */
1865 if (prev == NULL)
1866 nodehead = NULL; /* no nodes left */
1867 else
1868 prev->left = NULL; /* delete the pointer to the sublist */
1869 }
1870 }
1871 }
1872
1873 /*
1874 * This routine sets up the boolean pseudo-functions which work
1875 * by setting boolean flags dependent upon the corresponding character.
1876 * Every char which is NOT in that string is not a white char. Therefore,
1877 * all of the array "_wht" is set to FALSE, and then the elements
1878 * subscripted by the chars in "white" are set to TRUE. Thus "_wht"
1879 * of a char is TRUE if it is the string "white", else FALSE.
1880 */
1881 static void
1882 init ()
1883 {
1884 register char *sp;
1885 register int i;
1886
1887 for (i = 0; i < CHARS; i++)
1888 iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1889 for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1890 for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1891 notinname('\0') = notinname('\n');
1892 for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1893 begtoken('\0') = begtoken('\n');
1894 for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1895 intoken('\0') = intoken('\n');
1896 for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1897 endtoken('\0') = endtoken('\n');
1898 }
1899
1900 /*
1901 * This routine opens the specified file and calls the function
1902 * which finds the function and type definitions.
1903 */
1904 static void
1905 find_entries (inf)
1906 FILE *inf;
1907 {
1908 char *cp;
1909 language *lang = curfdp->lang;
1910 Lang_function *parser = NULL;
1911
1912 /* If user specified a language, use it. */
1913 if (lang != NULL && lang->function != NULL)
1914 {
1915 parser = lang->function;
1916 }
1917
1918 /* Else try to guess the language given the file name. */
1919 if (parser == NULL)
1920 {
1921 lang = get_language_from_filename (curfdp->infname, TRUE);
1922 if (lang != NULL && lang->function != NULL)
1923 {
1924 curfdp->lang = lang;
1925 parser = lang->function;
1926 }
1927 }
1928
1929 /* Else look for sharp-bang as the first two characters. */
1930 if (parser == NULL
1931 && readline_internal (&lb, inf) > 0
1932 && lb.len >= 2
1933 && lb.buffer[0] == '#'
1934 && lb.buffer[1] == '!')
1935 {
1936 char *lp;
1937
1938 /* Set lp to point at the first char after the last slash in the
1939 line or, if no slashes, at the first nonblank. Then set cp to
1940 the first successive blank and terminate the string. */
1941 lp = etags_strrchr (lb.buffer+2, '/');
1942 if (lp != NULL)
1943 lp += 1;
1944 else
1945 lp = skip_spaces (lb.buffer + 2);
1946 cp = skip_non_spaces (lp);
1947 *cp = '\0';
1948
1949 if (strlen (lp) > 0)
1950 {
1951 lang = get_language_from_interpreter (lp);
1952 if (lang != NULL && lang->function != NULL)
1953 {
1954 curfdp->lang = lang;
1955 parser = lang->function;
1956 }
1957 }
1958 }
1959
1960 /* We rewind here, even if inf may be a pipe. We fail if the
1961 length of the first line is longer than the pipe block size,
1962 which is unlikely. */
1963 rewind (inf);
1964
1965 /* Else try to guess the language given the case insensitive file name. */
1966 if (parser == NULL)
1967 {
1968 lang = get_language_from_filename (curfdp->infname, FALSE);
1969 if (lang != NULL && lang->function != NULL)
1970 {
1971 curfdp->lang = lang;
1972 parser = lang->function;
1973 }
1974 }
1975
1976 /* Else try Fortran or C. */
1977 if (parser == NULL)
1978 {
1979 node *old_last_node = last_node;
1980
1981 curfdp->lang = get_language_from_langname ("fortran");
1982 find_entries (inf);
1983
1984 if (old_last_node == last_node)
1985 /* No Fortran entries found. Try C. */
1986 {
1987 /* We do not tag if rewind fails.
1988 Only the file name will be recorded in the tags file. */
1989 rewind (inf);
1990 curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1991 find_entries (inf);
1992 }
1993 return;
1994 }
1995
1996 if (!no_line_directive
1997 && curfdp->lang != NULL && curfdp->lang->metasource)
1998 /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1999 file, or anyway we parsed a file that is automatically generated from
2000 this one. If this is the case, the bingo.c file contained #line
2001 directives that generated tags pointing to this file. Let's delete
2002 them all before parsing this file, which is the real source. */
2003 {
2004 fdesc **fdpp = &fdhead;
2005 while (*fdpp != NULL)
2006 if (*fdpp != curfdp
2007 && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
2008 /* We found one of those! We must delete both the file description
2009 and all tags referring to it. */
2010 {
2011 fdesc *badfdp = *fdpp;
2012
2013 /* Delete the tags referring to badfdp->taggedfname
2014 that were obtained from badfdp->infname. */
2015 invalidate_nodes (badfdp, &nodehead);
2016
2017 *fdpp = badfdp->next; /* remove the bad description from the list */
2018 free_fdesc (badfdp);
2019 }
2020 else
2021 fdpp = &(*fdpp)->next; /* advance the list pointer */
2022 }
2023
2024 assert (parser != NULL);
2025
2026 /* Generic initialisations before reading from file. */
2027 linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
2028
2029 /* Generic initialisations before parsing file with readline. */
2030 lineno = 0; /* reset global line number */
2031 charno = 0; /* reset global char number */
2032 linecharno = 0; /* reset global char number of line start */
2033
2034 parser (inf);
2035
2036 regex_tag_multiline ();
2037 }
2038
2039 \f
2040 /*
2041 * Check whether an implicitly named tag should be created,
2042 * then call `pfnote'.
2043 * NAME is a string that is internally copied by this function.
2044 *
2045 * TAGS format specification
2046 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
2047 * The following is explained in some more detail in etc/ETAGS.EBNF.
2048 *
2049 * make_tag creates tags with "implicit tag names" (unnamed tags)
2050 * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
2051 * 1. NAME does not contain any of the characters in NONAM;
2052 * 2. LINESTART contains name as either a rightmost, or rightmost but
2053 * one character, substring;
2054 * 3. the character, if any, immediately before NAME in LINESTART must
2055 * be a character in NONAM;
2056 * 4. the character, if any, immediately after NAME in LINESTART must
2057 * also be a character in NONAM.
2058 *
2059 * The implementation uses the notinname() macro, which recognises the
2060 * characters stored in the string `nonam'.
2061 * etags.el needs to use the same characters that are in NONAM.
2062 */
2063 static void
2064 make_tag (name, namelen, is_func, linestart, linelen, lno, cno)
2065 char *name; /* tag name, or NULL if unnamed */
2066 int namelen; /* tag length */
2067 bool is_func; /* tag is a function */
2068 char *linestart; /* start of the line where tag is */
2069 int linelen; /* length of the line where tag is */
2070 int lno; /* line number */
2071 long cno; /* character number */
2072 {
2073 bool named = (name != NULL && namelen > 0);
2074
2075 if (!CTAGS && named) /* maybe set named to false */
2076 /* Let's try to make an implicit tag name, that is, create an unnamed tag
2077 such that etags.el can guess a name from it. */
2078 {
2079 int i;
2080 register char *cp = name;
2081
2082 for (i = 0; i < namelen; i++)
2083 if (notinname (*cp++))
2084 break;
2085 if (i == namelen) /* rule #1 */
2086 {
2087 cp = linestart + linelen - namelen;
2088 if (notinname (linestart[linelen-1]))
2089 cp -= 1; /* rule #4 */
2090 if (cp >= linestart /* rule #2 */
2091 && (cp == linestart
2092 || notinname (cp[-1])) /* rule #3 */
2093 && strneq (name, cp, namelen)) /* rule #2 */
2094 named = FALSE; /* use implicit tag name */
2095 }
2096 }
2097
2098 if (named)
2099 name = savenstr (name, namelen);
2100 else
2101 name = NULL;
2102 pfnote (name, is_func, linestart, linelen, lno, cno);
2103 }
2104
2105 /* Record a tag. */
2106 static void
2107 pfnote (name, is_func, linestart, linelen, lno, cno)
2108 char *name; /* tag name, or NULL if unnamed */
2109 bool is_func; /* tag is a function */
2110 char *linestart; /* start of the line where tag is */
2111 int linelen; /* length of the line where tag is */
2112 int lno; /* line number */
2113 long cno; /* character number */
2114 {
2115 register node *np;
2116
2117 assert (name == NULL || name[0] != '\0');
2118 if (CTAGS && name == NULL)
2119 return;
2120
2121 np = xnew (1, node);
2122
2123 /* If ctags mode, change name "main" to M<thisfilename>. */
2124 if (CTAGS && !cxref_style && streq (name, "main"))
2125 {
2126 register char *fp = etags_strrchr (curfdp->taggedfname, '/');
2127 np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
2128 fp = etags_strrchr (np->name, '.');
2129 if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
2130 fp[0] = '\0';
2131 }
2132 else
2133 np->name = name;
2134 np->valid = TRUE;
2135 np->been_warned = FALSE;
2136 np->fdp = curfdp;
2137 np->is_func = is_func;
2138 np->lno = lno;
2139 if (np->fdp->usecharno)
2140 /* Our char numbers are 0-base, because of C language tradition?
2141 ctags compatibility? old versions compatibility? I don't know.
2142 Anyway, since emacs's are 1-base we expect etags.el to take care
2143 of the difference. If we wanted to have 1-based numbers, we would
2144 uncomment the +1 below. */
2145 np->cno = cno /* + 1 */ ;
2146 else
2147 np->cno = invalidcharno;
2148 np->left = np->right = NULL;
2149 if (CTAGS && !cxref_style)
2150 {
2151 if (strlen (linestart) < 50)
2152 np->regex = concat (linestart, "$", "");
2153 else
2154 np->regex = savenstr (linestart, 50);
2155 }
2156 else
2157 np->regex = savenstr (linestart, linelen);
2158
2159 add_node (np, &nodehead);
2160 }
2161
2162 /*
2163 * free_tree ()
2164 * recurse on left children, iterate on right children.
2165 */
2166 static void
2167 free_tree (np)
2168 register node *np;
2169 {
2170 while (np)
2171 {
2172 register node *node_right = np->right;
2173 free_tree (np->left);
2174 if (np->name != NULL)
2175 free (np->name);
2176 free (np->regex);
2177 free (np);
2178 np = node_right;
2179 }
2180 }
2181
2182 /*
2183 * free_fdesc ()
2184 * delete a file description
2185 */
2186 static void
2187 free_fdesc (fdp)
2188 register fdesc *fdp;
2189 {
2190 if (fdp->infname != NULL) free (fdp->infname);
2191 if (fdp->infabsname != NULL) free (fdp->infabsname);
2192 if (fdp->infabsdir != NULL) free (fdp->infabsdir);
2193 if (fdp->taggedfname != NULL) free (fdp->taggedfname);
2194 if (fdp->prop != NULL) free (fdp->prop);
2195 free (fdp);
2196 }
2197
2198 /*
2199 * add_node ()
2200 * Adds a node to the tree of nodes. In etags mode, sort by file
2201 * name. In ctags mode, sort by tag name. Make no attempt at
2202 * balancing.
2203 *
2204 * add_node is the only function allowed to add nodes, so it can
2205 * maintain state.
2206 */
2207 static void
2208 add_node (np, cur_node_p)
2209 node *np, **cur_node_p;
2210 {
2211 register int dif;
2212 register node *cur_node = *cur_node_p;
2213
2214 if (cur_node == NULL)
2215 {
2216 *cur_node_p = np;
2217 last_node = np;
2218 return;
2219 }
2220
2221 if (!CTAGS)
2222 /* Etags Mode */
2223 {
2224 /* For each file name, tags are in a linked sublist on the right
2225 pointer. The first tags of different files are a linked list
2226 on the left pointer. last_node points to the end of the last
2227 used sublist. */
2228 if (last_node != NULL && last_node->fdp == np->fdp)
2229 {
2230 /* Let's use the same sublist as the last added node. */
2231 assert (last_node->right == NULL);
2232 last_node->right = np;
2233 last_node = np;
2234 }
2235 else if (cur_node->fdp == np->fdp)
2236 {
2237 /* Scanning the list we found the head of a sublist which is
2238 good for us. Let's scan this sublist. */
2239 add_node (np, &cur_node->right);
2240 }
2241 else
2242 /* The head of this sublist is not good for us. Let's try the
2243 next one. */
2244 add_node (np, &cur_node->left);
2245 } /* if ETAGS mode */
2246
2247 else
2248 {
2249 /* Ctags Mode */
2250 dif = strcmp (np->name, cur_node->name);
2251
2252 /*
2253 * If this tag name matches an existing one, then
2254 * do not add the node, but maybe print a warning.
2255 */
2256 if (no_duplicates && !dif)
2257 {
2258 if (np->fdp == cur_node->fdp)
2259 {
2260 if (!no_warnings)
2261 {
2262 fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2263 np->fdp->infname, lineno, np->name);
2264 fprintf (stderr, "Second entry ignored\n");
2265 }
2266 }
2267 else if (!cur_node->been_warned && !no_warnings)
2268 {
2269 fprintf
2270 (stderr,
2271 "Duplicate entry in files %s and %s: %s (Warning only)\n",
2272 np->fdp->infname, cur_node->fdp->infname, np->name);
2273 cur_node->been_warned = TRUE;
2274 }
2275 return;
2276 }
2277
2278 /* Actually add the node */
2279 add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2280 } /* if CTAGS mode */
2281 }
2282
2283 /*
2284 * invalidate_nodes ()
2285 * Scan the node tree and invalidate all nodes pointing to the
2286 * given file description (CTAGS case) or free them (ETAGS case).
2287 */
2288 static void
2289 invalidate_nodes (badfdp, npp)
2290 fdesc *badfdp;
2291 node **npp;
2292 {
2293 node *np = *npp;
2294
2295 if (np == NULL)
2296 return;
2297
2298 if (CTAGS)
2299 {
2300 if (np->left != NULL)
2301 invalidate_nodes (badfdp, &np->left);
2302 if (np->fdp == badfdp)
2303 np->valid = FALSE;
2304 if (np->right != NULL)
2305 invalidate_nodes (badfdp, &np->right);
2306 }
2307 else
2308 {
2309 assert (np->fdp != NULL);
2310 if (np->fdp == badfdp)
2311 {
2312 *npp = np->left; /* detach the sublist from the list */
2313 np->left = NULL; /* isolate it */
2314 free_tree (np); /* free it */
2315 invalidate_nodes (badfdp, npp);
2316 }
2317 else
2318 invalidate_nodes (badfdp, &np->left);
2319 }
2320 }
2321
2322 \f
2323 static int total_size_of_entries __P((node *));
2324 static int number_len __P((long));
2325
2326 /* Length of a non-negative number's decimal representation. */
2327 static int
2328 number_len (num)
2329 long num;
2330 {
2331 int len = 1;
2332 while ((num /= 10) > 0)
2333 len += 1;
2334 return len;
2335 }
2336
2337 /*
2338 * Return total number of characters that put_entries will output for
2339 * the nodes in the linked list at the right of the specified node.
2340 * This count is irrelevant with etags.el since emacs 19.34 at least,
2341 * but is still supplied for backward compatibility.
2342 */
2343 static int
2344 total_size_of_entries (np)
2345 register node *np;
2346 {
2347 register int total = 0;
2348
2349 for (; np != NULL; np = np->right)
2350 if (np->valid)
2351 {
2352 total += strlen (np->regex) + 1; /* pat\177 */
2353 if (np->name != NULL)
2354 total += strlen (np->name) + 1; /* name\001 */
2355 total += number_len ((long) np->lno) + 1; /* lno, */
2356 if (np->cno != invalidcharno) /* cno */
2357 total += number_len (np->cno);
2358 total += 1; /* newline */
2359 }
2360
2361 return total;
2362 }
2363
2364 static void
2365 put_entries (np)
2366 register node *np;
2367 {
2368 register char *sp;
2369 static fdesc *fdp = NULL;
2370
2371 if (np == NULL)
2372 return;
2373
2374 /* Output subentries that precede this one */
2375 if (CTAGS)
2376 put_entries (np->left);
2377
2378 /* Output this entry */
2379 if (np->valid)
2380 {
2381 if (!CTAGS)
2382 {
2383 /* Etags mode */
2384 if (fdp != np->fdp)
2385 {
2386 fdp = np->fdp;
2387 fprintf (tagf, "\f\n%s,%d\n",
2388 fdp->taggedfname, total_size_of_entries (np));
2389 fdp->written = TRUE;
2390 }
2391 fputs (np->regex, tagf);
2392 fputc ('\177', tagf);
2393 if (np->name != NULL)
2394 {
2395 fputs (np->name, tagf);
2396 fputc ('\001', tagf);
2397 }
2398 fprintf (tagf, "%d,", np->lno);
2399 if (np->cno != invalidcharno)
2400 fprintf (tagf, "%ld", np->cno);
2401 fputs ("\n", tagf);
2402 }
2403 else
2404 {
2405 /* Ctags mode */
2406 if (np->name == NULL)
2407 error ("internal error: NULL name in ctags mode.", (char *)NULL);
2408
2409 if (cxref_style)
2410 {
2411 if (vgrind_style)
2412 fprintf (stdout, "%s %s %d\n",
2413 np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2414 else
2415 fprintf (stdout, "%-16s %3d %-16s %s\n",
2416 np->name, np->lno, np->fdp->taggedfname, np->regex);
2417 }
2418 else
2419 {
2420 fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2421
2422 if (np->is_func)
2423 { /* function or #define macro with args */
2424 putc (searchar, tagf);
2425 putc ('^', tagf);
2426
2427 for (sp = np->regex; *sp; sp++)
2428 {
2429 if (*sp == '\\' || *sp == searchar)
2430 putc ('\\', tagf);
2431 putc (*sp, tagf);
2432 }
2433 putc (searchar, tagf);
2434 }
2435 else
2436 { /* anything else; text pattern inadequate */
2437 fprintf (tagf, "%d", np->lno);
2438 }
2439 putc ('\n', tagf);
2440 }
2441 }
2442 } /* if this node contains a valid tag */
2443
2444 /* Output subentries that follow this one */
2445 put_entries (np->right);
2446 if (!CTAGS)
2447 put_entries (np->left);
2448 }
2449
2450 \f
2451 /* C extensions. */
2452 #define C_EXT 0x00fff /* C extensions */
2453 #define C_PLAIN 0x00000 /* C */
2454 #define C_PLPL 0x00001 /* C++ */
2455 #define C_STAR 0x00003 /* C* */
2456 #define C_JAVA 0x00005 /* JAVA */
2457 #define C_AUTO 0x01000 /* C, but switch to C++ if `class' is met */
2458 #define YACC 0x10000 /* yacc file */
2459
2460 /*
2461 * The C symbol tables.
2462 */
2463 enum sym_type
2464 {
2465 st_none,
2466 st_C_objprot, st_C_objimpl, st_C_objend,
2467 st_C_gnumacro,
2468 st_C_ignore, st_C_attribute,
2469 st_C_javastruct,
2470 st_C_operator,
2471 st_C_class, st_C_template,
2472 st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2473 };
2474
2475 static unsigned int hash __P((const char *, unsigned int));
2476 static struct C_stab_entry * in_word_set __P((const char *, unsigned int));
2477 static enum sym_type C_symtype __P((char *, int, int));
2478
2479 /* Feed stuff between (but not including) %[ and %] lines to:
2480 gperf -m 5
2481 %[
2482 %compare-strncmp
2483 %enum
2484 %struct-type
2485 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2486 %%
2487 if, 0, st_C_ignore
2488 for, 0, st_C_ignore
2489 while, 0, st_C_ignore
2490 switch, 0, st_C_ignore
2491 return, 0, st_C_ignore
2492 __attribute__, 0, st_C_attribute
2493 @interface, 0, st_C_objprot
2494 @protocol, 0, st_C_objprot
2495 @implementation,0, st_C_objimpl
2496 @end, 0, st_C_objend
2497 import, (C_JAVA & ~C_PLPL), st_C_ignore
2498 package, (C_JAVA & ~C_PLPL), st_C_ignore
2499 friend, C_PLPL, st_C_ignore
2500 extends, (C_JAVA & ~C_PLPL), st_C_javastruct
2501 implements, (C_JAVA & ~C_PLPL), st_C_javastruct
2502 interface, (C_JAVA & ~C_PLPL), st_C_struct
2503 class, 0, st_C_class
2504 namespace, C_PLPL, st_C_struct
2505 domain, C_STAR, st_C_struct
2506 union, 0, st_C_struct
2507 struct, 0, st_C_struct
2508 extern, 0, st_C_extern
2509 enum, 0, st_C_enum
2510 typedef, 0, st_C_typedef
2511 define, 0, st_C_define
2512 undef, 0, st_C_define
2513 operator, C_PLPL, st_C_operator
2514 template, 0, st_C_template
2515 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2516 DEFUN, 0, st_C_gnumacro
2517 SYSCALL, 0, st_C_gnumacro
2518 ENTRY, 0, st_C_gnumacro
2519 PSEUDO, 0, st_C_gnumacro
2520 # These are defined inside C functions, so currently they are not met.
2521 # EXFUN used in glibc, DEFVAR_* in emacs.
2522 #EXFUN, 0, st_C_gnumacro
2523 #DEFVAR_, 0, st_C_gnumacro
2524 %]
2525 and replace lines between %< and %> with its output, then:
2526 - remove the #if characterset check
2527 - make in_word_set static and not inline. */
2528 /*%<*/
2529 /* C code produced by gperf version 3.0.1 */
2530 /* Command-line: gperf -m 5 */
2531 /* Computed positions: -k'2-3' */
2532
2533 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
2534 /* maximum key range = 33, duplicates = 0 */
2535
2536 #ifdef __GNUC__
2537 __inline
2538 #else
2539 #ifdef __cplusplus
2540 inline
2541 #endif
2542 #endif
2543 static unsigned int
2544 hash (str, len)
2545 register const char *str;
2546 register unsigned int len;
2547 {
2548 static unsigned char asso_values[] =
2549 {
2550 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2551 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2552 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2553 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2554 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2555 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2556 35, 35, 35, 35, 35, 35, 35, 35, 35, 15,
2557 14, 35, 35, 35, 35, 35, 35, 35, 14, 35,
2558 35, 35, 35, 12, 13, 35, 35, 35, 35, 12,
2559 35, 35, 35, 35, 35, 1, 35, 16, 35, 6,
2560 23, 0, 0, 35, 22, 0, 35, 35, 5, 0,
2561 0, 15, 1, 35, 6, 35, 8, 19, 35, 16,
2562 4, 5, 35, 35, 35, 35, 35, 35, 35, 35,
2563 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2564 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2565 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2566 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2567 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2568 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2569 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2570 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2571 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2572 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2573 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2574 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2575 35, 35, 35, 35, 35, 35
2576 };
2577 register int hval = len;
2578
2579 switch (hval)
2580 {
2581 default:
2582 hval += asso_values[(unsigned char)str[2]];
2583 /*FALLTHROUGH*/
2584 case 2:
2585 hval += asso_values[(unsigned char)str[1]];
2586 break;
2587 }
2588 return hval;
2589 }
2590
2591 static struct C_stab_entry *
2592 in_word_set (str, len)
2593 register const char *str;
2594 register unsigned int len;
2595 {
2596 enum
2597 {
2598 TOTAL_KEYWORDS = 32,
2599 MIN_WORD_LENGTH = 2,
2600 MAX_WORD_LENGTH = 15,
2601 MIN_HASH_VALUE = 2,
2602 MAX_HASH_VALUE = 34
2603 };
2604
2605 static struct C_stab_entry wordlist[] =
2606 {
2607 {""}, {""},
2608 {"if", 0, st_C_ignore},
2609 {""},
2610 {"@end", 0, st_C_objend},
2611 {"union", 0, st_C_struct},
2612 {"define", 0, st_C_define},
2613 {"import", (C_JAVA & ~C_PLPL), st_C_ignore},
2614 {"template", 0, st_C_template},
2615 {"operator", C_PLPL, st_C_operator},
2616 {"@interface", 0, st_C_objprot},
2617 {"implements", (C_JAVA & ~C_PLPL), st_C_javastruct},
2618 {"friend", C_PLPL, st_C_ignore},
2619 {"typedef", 0, st_C_typedef},
2620 {"return", 0, st_C_ignore},
2621 {"@implementation",0, st_C_objimpl},
2622 {"@protocol", 0, st_C_objprot},
2623 {"interface", (C_JAVA & ~C_PLPL), st_C_struct},
2624 {"extern", 0, st_C_extern},
2625 {"extends", (C_JAVA & ~C_PLPL), st_C_javastruct},
2626 {"struct", 0, st_C_struct},
2627 {"domain", C_STAR, st_C_struct},
2628 {"switch", 0, st_C_ignore},
2629 {"enum", 0, st_C_enum},
2630 {"for", 0, st_C_ignore},
2631 {"namespace", C_PLPL, st_C_struct},
2632 {"class", 0, st_C_class},
2633 {"while", 0, st_C_ignore},
2634 {"undef", 0, st_C_define},
2635 {"package", (C_JAVA & ~C_PLPL), st_C_ignore},
2636 {"__attribute__", 0, st_C_attribute},
2637 {"SYSCALL", 0, st_C_gnumacro},
2638 {"ENTRY", 0, st_C_gnumacro},
2639 {"PSEUDO", 0, st_C_gnumacro},
2640 {"DEFUN", 0, st_C_gnumacro}
2641 };
2642
2643 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2644 {
2645 register int key = hash (str, len);
2646
2647 if (key <= MAX_HASH_VALUE && key >= 0)
2648 {
2649 register const char *s = wordlist[key].name;
2650
2651 if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2652 return &wordlist[key];
2653 }
2654 }
2655 return 0;
2656 }
2657 /*%>*/
2658
2659 static enum sym_type
2660 C_symtype (str, len, c_ext)
2661 char *str;
2662 int len;
2663 int c_ext;
2664 {
2665 register struct C_stab_entry *se = in_word_set (str, len);
2666
2667 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2668 return st_none;
2669 return se->type;
2670 }
2671
2672 \f
2673 /*
2674 * Ignoring __attribute__ ((list))
2675 */
2676 static bool inattribute; /* looking at an __attribute__ construct */
2677
2678 /*
2679 * C functions and variables are recognized using a simple
2680 * finite automaton. fvdef is its state variable.
2681 */
2682 static enum
2683 {
2684 fvnone, /* nothing seen */
2685 fdefunkey, /* Emacs DEFUN keyword seen */
2686 fdefunname, /* Emacs DEFUN name seen */
2687 foperator, /* func: operator keyword seen (cplpl) */
2688 fvnameseen, /* function or variable name seen */
2689 fstartlist, /* func: just after open parenthesis */
2690 finlist, /* func: in parameter list */
2691 flistseen, /* func: after parameter list */
2692 fignore, /* func: before open brace */
2693 vignore /* var-like: ignore until ';' */
2694 } fvdef;
2695
2696 static bool fvextern; /* func or var: extern keyword seen; */
2697
2698 /*
2699 * typedefs are recognized using a simple finite automaton.
2700 * typdef is its state variable.
2701 */
2702 static enum
2703 {
2704 tnone, /* nothing seen */
2705 tkeyseen, /* typedef keyword seen */
2706 ttypeseen, /* defined type seen */
2707 tinbody, /* inside typedef body */
2708 tend, /* just before typedef tag */
2709 tignore /* junk after typedef tag */
2710 } typdef;
2711
2712 /*
2713 * struct-like structures (enum, struct and union) are recognized
2714 * using another simple finite automaton. `structdef' is its state
2715 * variable.
2716 */
2717 static enum
2718 {
2719 snone, /* nothing seen yet,
2720 or in struct body if bracelev > 0 */
2721 skeyseen, /* struct-like keyword seen */
2722 stagseen, /* struct-like tag seen */
2723 scolonseen /* colon seen after struct-like tag */
2724 } structdef;
2725
2726 /*
2727 * When objdef is different from onone, objtag is the name of the class.
2728 */
2729 static char *objtag = "<uninited>";
2730
2731 /*
2732 * Yet another little state machine to deal with preprocessor lines.
2733 */
2734 static enum
2735 {
2736 dnone, /* nothing seen */
2737 dsharpseen, /* '#' seen as first char on line */
2738 ddefineseen, /* '#' and 'define' seen */
2739 dignorerest /* ignore rest of line */
2740 } definedef;
2741
2742 /*
2743 * State machine for Objective C protocols and implementations.
2744 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2745 */
2746 static enum
2747 {
2748 onone, /* nothing seen */
2749 oprotocol, /* @interface or @protocol seen */
2750 oimplementation, /* @implementations seen */
2751 otagseen, /* class name seen */
2752 oparenseen, /* parenthesis before category seen */
2753 ocatseen, /* category name seen */
2754 oinbody, /* in @implementation body */
2755 omethodsign, /* in @implementation body, after +/- */
2756 omethodtag, /* after method name */
2757 omethodcolon, /* after method colon */
2758 omethodparm, /* after method parameter */
2759 oignore /* wait for @end */
2760 } objdef;
2761
2762
2763 /*
2764 * Use this structure to keep info about the token read, and how it
2765 * should be tagged. Used by the make_C_tag function to build a tag.
2766 */
2767 static struct tok
2768 {
2769 char *line; /* string containing the token */
2770 int offset; /* where the token starts in LINE */
2771 int length; /* token length */
2772 /*
2773 The previous members can be used to pass strings around for generic
2774 purposes. The following ones specifically refer to creating tags. In this
2775 case the token contained here is the pattern that will be used to create a
2776 tag.
2777 */
2778 bool valid; /* do not create a tag; the token should be
2779 invalidated whenever a state machine is
2780 reset prematurely */
2781 bool named; /* create a named tag */
2782 int lineno; /* source line number of tag */
2783 long linepos; /* source char number of tag */
2784 } token; /* latest token read */
2785
2786 /*
2787 * Variables and functions for dealing with nested structures.
2788 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2789 */
2790 static void pushclass_above __P((int, char *, int));
2791 static void popclass_above __P((int));
2792 static void write_classname __P((linebuffer *, char *qualifier));
2793
2794 static struct {
2795 char **cname; /* nested class names */
2796 int *bracelev; /* nested class brace level */
2797 int nl; /* class nesting level (elements used) */
2798 int size; /* length of the array */
2799 } cstack; /* stack for nested declaration tags */
2800 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2801 #define nestlev (cstack.nl)
2802 /* After struct keyword or in struct body, not inside a nested function. */
2803 #define instruct (structdef == snone && nestlev > 0 \
2804 && bracelev == cstack.bracelev[nestlev-1] + 1)
2805
2806 static void
2807 pushclass_above (bracelev, str, len)
2808 int bracelev;
2809 char *str;
2810 int len;
2811 {
2812 int nl;
2813
2814 popclass_above (bracelev);
2815 nl = cstack.nl;
2816 if (nl >= cstack.size)
2817 {
2818 int size = cstack.size *= 2;
2819 xrnew (cstack.cname, size, char *);
2820 xrnew (cstack.bracelev, size, int);
2821 }
2822 assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2823 cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2824 cstack.bracelev[nl] = bracelev;
2825 cstack.nl = nl + 1;
2826 }
2827
2828 static void
2829 popclass_above (bracelev)
2830 int bracelev;
2831 {
2832 int nl;
2833
2834 for (nl = cstack.nl - 1;
2835 nl >= 0 && cstack.bracelev[nl] >= bracelev;
2836 nl--)
2837 {
2838 if (cstack.cname[nl] != NULL)
2839 free (cstack.cname[nl]);
2840 cstack.nl = nl;
2841 }
2842 }
2843
2844 static void
2845 write_classname (cn, qualifier)
2846 linebuffer *cn;
2847 char *qualifier;
2848 {
2849 int i, len;
2850 int qlen = strlen (qualifier);
2851
2852 if (cstack.nl == 0 || cstack.cname[0] == NULL)
2853 {
2854 len = 0;
2855 cn->len = 0;
2856 cn->buffer[0] = '\0';
2857 }
2858 else
2859 {
2860 len = strlen (cstack.cname[0]);
2861 linebuffer_setlen (cn, len);
2862 strcpy (cn->buffer, cstack.cname[0]);
2863 }
2864 for (i = 1; i < cstack.nl; i++)
2865 {
2866 char *s;
2867 int slen;
2868
2869 s = cstack.cname[i];
2870 if (s == NULL)
2871 continue;
2872 slen = strlen (s);
2873 len += slen + qlen;
2874 linebuffer_setlen (cn, len);
2875 strncat (cn->buffer, qualifier, qlen);
2876 strncat (cn->buffer, s, slen);
2877 }
2878 }
2879
2880 \f
2881 static bool consider_token __P((char *, int, int, int *, int, int, bool *));
2882 static void make_C_tag __P((bool));
2883
2884 /*
2885 * consider_token ()
2886 * checks to see if the current token is at the start of a
2887 * function or variable, or corresponds to a typedef, or
2888 * is a struct/union/enum tag, or #define, or an enum constant.
2889 *
2890 * *IS_FUNC gets TRUE if the token is a function or #define macro
2891 * with args. C_EXTP points to which language we are looking at.
2892 *
2893 * Globals
2894 * fvdef IN OUT
2895 * structdef IN OUT
2896 * definedef IN OUT
2897 * typdef IN OUT
2898 * objdef IN OUT
2899 */
2900
2901 static bool
2902 consider_token (str, len, c, c_extp, bracelev, parlev, is_func_or_var)
2903 register char *str; /* IN: token pointer */
2904 register int len; /* IN: token length */
2905 register int c; /* IN: first char after the token */
2906 int *c_extp; /* IN, OUT: C extensions mask */
2907 int bracelev; /* IN: brace level */
2908 int parlev; /* IN: parenthesis level */
2909 bool *is_func_or_var; /* OUT: function or variable found */
2910 {
2911 /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2912 structtype is the type of the preceding struct-like keyword, and
2913 structbracelev is the brace level where it has been seen. */
2914 static enum sym_type structtype;
2915 static int structbracelev;
2916 static enum sym_type toktype;
2917
2918
2919 toktype = C_symtype (str, len, *c_extp);
2920
2921 /*
2922 * Skip __attribute__
2923 */
2924 if (toktype == st_C_attribute)
2925 {
2926 inattribute = TRUE;
2927 return FALSE;
2928 }
2929
2930 /*
2931 * Advance the definedef state machine.
2932 */
2933 switch (definedef)
2934 {
2935 case dnone:
2936 /* We're not on a preprocessor line. */
2937 if (toktype == st_C_gnumacro)
2938 {
2939 fvdef = fdefunkey;
2940 return FALSE;
2941 }
2942 break;
2943 case dsharpseen:
2944 if (toktype == st_C_define)
2945 {
2946 definedef = ddefineseen;
2947 }
2948 else
2949 {
2950 definedef = dignorerest;
2951 }
2952 return FALSE;
2953 case ddefineseen:
2954 /*
2955 * Make a tag for any macro, unless it is a constant
2956 * and constantypedefs is FALSE.
2957 */
2958 definedef = dignorerest;
2959 *is_func_or_var = (c == '(');
2960 if (!*is_func_or_var && !constantypedefs)
2961 return FALSE;
2962 else
2963 return TRUE;
2964 case dignorerest:
2965 return FALSE;
2966 default:
2967 error ("internal error: definedef value.", (char *)NULL);
2968 }
2969
2970 /*
2971 * Now typedefs
2972 */
2973 switch (typdef)
2974 {
2975 case tnone:
2976 if (toktype == st_C_typedef)
2977 {
2978 if (typedefs)
2979 typdef = tkeyseen;
2980 fvextern = FALSE;
2981 fvdef = fvnone;
2982 return FALSE;
2983 }
2984 break;
2985 case tkeyseen:
2986 switch (toktype)
2987 {
2988 case st_none:
2989 case st_C_class:
2990 case st_C_struct:
2991 case st_C_enum:
2992 typdef = ttypeseen;
2993 }
2994 break;
2995 case ttypeseen:
2996 if (structdef == snone && fvdef == fvnone)
2997 {
2998 fvdef = fvnameseen;
2999 return TRUE;
3000 }
3001 break;
3002 case tend:
3003 switch (toktype)
3004 {
3005 case st_C_class:
3006 case st_C_struct:
3007 case st_C_enum:
3008 return FALSE;
3009 }
3010 return TRUE;
3011 }
3012
3013 switch (toktype)
3014 {
3015 case st_C_javastruct:
3016 if (structdef == stagseen)
3017 structdef = scolonseen;
3018 return FALSE;
3019 case st_C_template:
3020 case st_C_class:
3021 if ((*c_extp & C_AUTO) /* automatic detection of C++ language */
3022 && bracelev == 0
3023 && definedef == dnone && structdef == snone
3024 && typdef == tnone && fvdef == fvnone)
3025 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3026 if (toktype == st_C_template)
3027 break;
3028 /* FALLTHRU */
3029 case st_C_struct:
3030 case st_C_enum:
3031 if (parlev == 0
3032 && fvdef != vignore
3033 && (typdef == tkeyseen
3034 || (typedefs_or_cplusplus && structdef == snone)))
3035 {
3036 structdef = skeyseen;
3037 structtype = toktype;
3038 structbracelev = bracelev;
3039 if (fvdef == fvnameseen)
3040 fvdef = fvnone;
3041 }
3042 return FALSE;
3043 }
3044
3045 if (structdef == skeyseen)
3046 {
3047 structdef = stagseen;
3048 return TRUE;
3049 }
3050
3051 if (typdef != tnone)
3052 definedef = dnone;
3053
3054 /* Detect Objective C constructs. */
3055 switch (objdef)
3056 {
3057 case onone:
3058 switch (toktype)
3059 {
3060 case st_C_objprot:
3061 objdef = oprotocol;
3062 return FALSE;
3063 case st_C_objimpl:
3064 objdef = oimplementation;
3065 return FALSE;
3066 }
3067 break;
3068 case oimplementation:
3069 /* Save the class tag for functions or variables defined inside. */
3070 objtag = savenstr (str, len);
3071 objdef = oinbody;
3072 return FALSE;
3073 case oprotocol:
3074 /* Save the class tag for categories. */
3075 objtag = savenstr (str, len);
3076 objdef = otagseen;
3077 *is_func_or_var = TRUE;
3078 return TRUE;
3079 case oparenseen:
3080 objdef = ocatseen;
3081 *is_func_or_var = TRUE;
3082 return TRUE;
3083 case oinbody:
3084 break;
3085 case omethodsign:
3086 if (parlev == 0)
3087 {
3088 fvdef = fvnone;
3089 objdef = omethodtag;
3090 linebuffer_setlen (&token_name, len);
3091 strncpy (token_name.buffer, str, len);
3092 token_name.buffer[len] = '\0';
3093 return TRUE;
3094 }
3095 return FALSE;
3096 case omethodcolon:
3097 if (parlev == 0)
3098 objdef = omethodparm;
3099 return FALSE;
3100 case omethodparm:
3101 if (parlev == 0)
3102 {
3103 fvdef = fvnone;
3104 objdef = omethodtag;
3105 linebuffer_setlen (&token_name, token_name.len + len);
3106 strncat (token_name.buffer, str, len);
3107 return TRUE;
3108 }
3109 return FALSE;
3110 case oignore:
3111 if (toktype == st_C_objend)
3112 {
3113 /* Memory leakage here: the string pointed by objtag is
3114 never released, because many tests would be needed to
3115 avoid breaking on incorrect input code. The amount of
3116 memory leaked here is the sum of the lengths of the
3117 class tags.
3118 free (objtag); */
3119 objdef = onone;
3120 }
3121 return FALSE;
3122 }
3123
3124 /* A function, variable or enum constant? */
3125 switch (toktype)
3126 {
3127 case st_C_extern:
3128 fvextern = TRUE;
3129 switch (fvdef)
3130 {
3131 case finlist:
3132 case flistseen:
3133 case fignore:
3134 case vignore:
3135 break;
3136 default:
3137 fvdef = fvnone;
3138 }
3139 return FALSE;
3140 case st_C_ignore:
3141 fvextern = FALSE;
3142 fvdef = vignore;
3143 return FALSE;
3144 case st_C_operator:
3145 fvdef = foperator;
3146 *is_func_or_var = TRUE;
3147 return TRUE;
3148 case st_none:
3149 if (constantypedefs
3150 && structdef == snone
3151 && structtype == st_C_enum && bracelev > structbracelev)
3152 return TRUE; /* enum constant */
3153 switch (fvdef)
3154 {
3155 case fdefunkey:
3156 if (bracelev > 0)
3157 break;
3158 fvdef = fdefunname; /* GNU macro */
3159 *is_func_or_var = TRUE;
3160 return TRUE;
3161 case fvnone:
3162 switch (typdef)
3163 {
3164 case ttypeseen:
3165 return FALSE;
3166 case tnone:
3167 if ((strneq (str, "asm", 3) && endtoken (str[3]))
3168 || (strneq (str, "__asm__", 7) && endtoken (str[7])))
3169 {
3170 fvdef = vignore;
3171 return FALSE;
3172 }
3173 break;
3174 }
3175 /* FALLTHRU */
3176 case fvnameseen:
3177 if (len >= 10 && strneq (str+len-10, "::operator", 10))
3178 {
3179 if (*c_extp & C_AUTO) /* automatic detection of C++ */
3180 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3181 fvdef = foperator;
3182 *is_func_or_var = TRUE;
3183 return TRUE;
3184 }
3185 if (bracelev > 0 && !instruct)
3186 break;
3187 fvdef = fvnameseen; /* function or variable */
3188 *is_func_or_var = TRUE;
3189 return TRUE;
3190 }
3191 break;
3192 }
3193
3194 return FALSE;
3195 }
3196
3197 \f
3198 /*
3199 * C_entries often keeps pointers to tokens or lines which are older than
3200 * the line currently read. By keeping two line buffers, and switching
3201 * them at end of line, it is possible to use those pointers.
3202 */
3203 static struct
3204 {
3205 long linepos;
3206 linebuffer lb;
3207 } lbs[2];
3208
3209 #define current_lb_is_new (newndx == curndx)
3210 #define switch_line_buffers() (curndx = 1 - curndx)
3211
3212 #define curlb (lbs[curndx].lb)
3213 #define newlb (lbs[newndx].lb)
3214 #define curlinepos (lbs[curndx].linepos)
3215 #define newlinepos (lbs[newndx].linepos)
3216
3217 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3218 #define cplpl (c_ext & C_PLPL)
3219 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3220
3221 #define CNL_SAVE_DEFINEDEF() \
3222 do { \
3223 curlinepos = charno; \
3224 readline (&curlb, inf); \
3225 lp = curlb.buffer; \
3226 quotednl = FALSE; \
3227 newndx = curndx; \
3228 } while (0)
3229
3230 #define CNL() \
3231 do { \
3232 CNL_SAVE_DEFINEDEF(); \
3233 if (savetoken.valid) \
3234 { \
3235 token = savetoken; \
3236 savetoken.valid = FALSE; \
3237 } \
3238 definedef = dnone; \
3239 } while (0)
3240
3241
3242 static void
3243 make_C_tag (isfun)
3244 bool isfun;
3245 {
3246 /* This function is never called when token.valid is FALSE, but
3247 we must protect against invalid input or internal errors. */
3248 if (token.valid)
3249 make_tag (token_name.buffer, token_name.len, isfun, token.line,
3250 token.offset+token.length+1, token.lineno, token.linepos);
3251 else if (DEBUG)
3252 { /* this branch is optimised away if !DEBUG */
3253 make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3254 token_name.len + 17, isfun, token.line,
3255 token.offset+token.length+1, token.lineno, token.linepos);
3256 error ("INVALID TOKEN", NULL);
3257 }
3258
3259 token.valid = FALSE;
3260 }
3261
3262
3263 /*
3264 * C_entries ()
3265 * This routine finds functions, variables, typedefs,
3266 * #define's, enum constants and struct/union/enum definitions in
3267 * C syntax and adds them to the list.
3268 */
3269 static void
3270 C_entries (c_ext, inf)
3271 int c_ext; /* extension of C */
3272 FILE *inf; /* input file */
3273 {
3274 register char c; /* latest char read; '\0' for end of line */
3275 register char *lp; /* pointer one beyond the character `c' */
3276 int curndx, newndx; /* indices for current and new lb */
3277 register int tokoff; /* offset in line of start of current token */
3278 register int toklen; /* length of current token */
3279 char *qualifier; /* string used to qualify names */
3280 int qlen; /* length of qualifier */
3281 int bracelev; /* current brace level */
3282 int bracketlev; /* current bracket level */
3283 int parlev; /* current parenthesis level */
3284 int attrparlev; /* __attribute__ parenthesis level */
3285 int templatelev; /* current template level */
3286 int typdefbracelev; /* bracelev where a typedef struct body begun */
3287 bool incomm, inquote, inchar, quotednl, midtoken;
3288 bool yacc_rules; /* in the rules part of a yacc file */
3289 struct tok savetoken = {0}; /* token saved during preprocessor handling */
3290
3291
3292 linebuffer_init (&lbs[0].lb);
3293 linebuffer_init (&lbs[1].lb);
3294 if (cstack.size == 0)
3295 {
3296 cstack.size = (DEBUG) ? 1 : 4;
3297 cstack.nl = 0;
3298 cstack.cname = xnew (cstack.size, char *);
3299 cstack.bracelev = xnew (cstack.size, int);
3300 }
3301
3302 tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3303 curndx = newndx = 0;
3304 lp = curlb.buffer;
3305 *lp = 0;
3306
3307 fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3308 structdef = snone; definedef = dnone; objdef = onone;
3309 yacc_rules = FALSE;
3310 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3311 token.valid = savetoken.valid = FALSE;
3312 bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3313 if (cjava)
3314 { qualifier = "."; qlen = 1; }
3315 else
3316 { qualifier = "::"; qlen = 2; }
3317
3318
3319 while (!feof (inf))
3320 {
3321 c = *lp++;
3322 if (c == '\\')
3323 {
3324 /* If we are at the end of the line, the next character is a
3325 '\0'; do not skip it, because it is what tells us
3326 to read the next line. */
3327 if (*lp == '\0')
3328 {
3329 quotednl = TRUE;
3330 continue;
3331 }
3332 lp++;
3333 c = ' ';
3334 }
3335 else if (incomm)
3336 {
3337 switch (c)
3338 {
3339 case '*':
3340 if (*lp == '/')
3341 {
3342 c = *lp++;
3343 incomm = FALSE;
3344 }
3345 break;
3346 case '\0':
3347 /* Newlines inside comments do not end macro definitions in
3348 traditional cpp. */
3349 CNL_SAVE_DEFINEDEF ();
3350 break;
3351 }
3352 continue;
3353 }
3354 else if (inquote)
3355 {
3356 switch (c)
3357 {
3358 case '"':
3359 inquote = FALSE;
3360 break;
3361 case '\0':
3362 /* Newlines inside strings do not end macro definitions
3363 in traditional cpp, even though compilers don't
3364 usually accept them. */
3365 CNL_SAVE_DEFINEDEF ();
3366 break;
3367 }
3368 continue;
3369 }
3370 else if (inchar)
3371 {
3372 switch (c)
3373 {
3374 case '\0':
3375 /* Hmmm, something went wrong. */
3376 CNL ();
3377 /* FALLTHRU */
3378 case '\'':
3379 inchar = FALSE;
3380 break;
3381 }
3382 continue;
3383 }
3384 else if (bracketlev > 0)
3385 {
3386 switch (c)
3387 {
3388 case ']':
3389 if (--bracketlev > 0)
3390 continue;
3391 break;
3392 case '\0':
3393 CNL_SAVE_DEFINEDEF ();
3394 break;
3395 }
3396 continue;
3397 }
3398 else switch (c)
3399 {
3400 case '"':
3401 inquote = TRUE;
3402 if (inattribute)
3403 break;
3404 switch (fvdef)
3405 {
3406 case fdefunkey:
3407 case fstartlist:
3408 case finlist:
3409 case fignore:
3410 case vignore:
3411 break;
3412 default:
3413 fvextern = FALSE;
3414 fvdef = fvnone;
3415 }
3416 continue;
3417 case '\'':
3418 inchar = TRUE;
3419 if (inattribute)
3420 break;
3421 if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
3422 {
3423 fvextern = FALSE;
3424 fvdef = fvnone;
3425 }
3426 continue;
3427 case '/':
3428 if (*lp == '*')
3429 {
3430 incomm = TRUE;
3431 lp++;
3432 c = ' ';
3433 }
3434 else if (/* cplpl && */ *lp == '/')
3435 {
3436 c = '\0';
3437 }
3438 break;
3439 case '%':
3440 if ((c_ext & YACC) && *lp == '%')
3441 {
3442 /* Entering or exiting rules section in yacc file. */
3443 lp++;
3444 definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3445 typdef = tnone; structdef = snone;
3446 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3447 bracelev = 0;
3448 yacc_rules = !yacc_rules;
3449 continue;
3450 }
3451 else
3452 break;
3453 case '#':
3454 if (definedef == dnone)
3455 {
3456 char *cp;
3457 bool cpptoken = TRUE;
3458
3459 /* Look back on this line. If all blanks, or nonblanks
3460 followed by an end of comment, this is a preprocessor
3461 token. */
3462 for (cp = newlb.buffer; cp < lp-1; cp++)
3463 if (!iswhite (*cp))
3464 {
3465 if (*cp == '*' && *(cp+1) == '/')
3466 {
3467 cp++;
3468 cpptoken = TRUE;
3469 }
3470 else
3471 cpptoken = FALSE;
3472 }
3473 if (cpptoken)
3474 definedef = dsharpseen;
3475 } /* if (definedef == dnone) */
3476 continue;
3477 case '[':
3478 bracketlev++;
3479 continue;
3480 } /* switch (c) */
3481
3482
3483 /* Consider token only if some involved conditions are satisfied. */
3484 if (typdef != tignore
3485 && definedef != dignorerest
3486 && fvdef != finlist
3487 && templatelev == 0
3488 && (definedef != dnone
3489 || structdef != scolonseen)
3490 && !inattribute)
3491 {
3492 if (midtoken)
3493 {
3494 if (endtoken (c))
3495 {
3496 if (c == ':' && *lp == ':' && begtoken (lp[1]))
3497 /* This handles :: in the middle,
3498 but not at the beginning of an identifier.
3499 Also, space-separated :: is not recognised. */
3500 {
3501 if (c_ext & C_AUTO) /* automatic detection of C++ */
3502 c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3503 lp += 2;
3504 toklen += 2;
3505 c = lp[-1];
3506 goto still_in_token;
3507 }
3508 else
3509 {
3510 bool funorvar = FALSE;
3511
3512 if (yacc_rules
3513 || consider_token (newlb.buffer + tokoff, toklen, c,
3514 &c_ext, bracelev, parlev,
3515 &funorvar))
3516 {
3517 if (fvdef == foperator)
3518 {
3519 char *oldlp = lp;
3520 lp = skip_spaces (lp-1);
3521 if (*lp != '\0')
3522 lp += 1;
3523 while (*lp != '\0'
3524 && !iswhite (*lp) && *lp != '(')
3525 lp += 1;
3526 c = *lp++;
3527 toklen += lp - oldlp;
3528 }
3529 token.named = FALSE;
3530 if (!plainc
3531 && nestlev > 0 && definedef == dnone)
3532 /* in struct body */
3533 {
3534 write_classname (&token_name, qualifier);
3535 linebuffer_setlen (&token_name,
3536 token_name.len+qlen+toklen);
3537 strcat (token_name.buffer, qualifier);
3538 strncat (token_name.buffer,
3539 newlb.buffer + tokoff, toklen);
3540 token.named = TRUE;
3541 }
3542 else if (objdef == ocatseen)
3543 /* Objective C category */
3544 {
3545 int len = strlen (objtag) + 2 + toklen;
3546 linebuffer_setlen (&token_name, len);
3547 strcpy (token_name.buffer, objtag);
3548 strcat (token_name.buffer, "(");
3549 strncat (token_name.buffer,
3550 newlb.buffer + tokoff, toklen);
3551 strcat (token_name.buffer, ")");
3552 token.named = TRUE;
3553 }
3554 else if (objdef == omethodtag
3555 || objdef == omethodparm)
3556 /* Objective C method */
3557 {
3558 token.named = TRUE;
3559 }
3560 else if (fvdef == fdefunname)
3561 /* GNU DEFUN and similar macros */
3562 {
3563 bool defun = (newlb.buffer[tokoff] == 'F');
3564 int off = tokoff;
3565 int len = toklen;
3566
3567 /* Rewrite the tag so that emacs lisp DEFUNs
3568 can be found by their elisp name */
3569 if (defun)
3570 {
3571 off += 1;
3572 len -= 1;
3573 }
3574 linebuffer_setlen (&token_name, len);
3575 strncpy (token_name.buffer,
3576 newlb.buffer + off, len);
3577 token_name.buffer[len] = '\0';
3578 if (defun)
3579 while (--len >= 0)
3580 if (token_name.buffer[len] == '_')
3581 token_name.buffer[len] = '-';
3582 token.named = defun;
3583 }
3584 else
3585 {
3586 linebuffer_setlen (&token_name, toklen);
3587 strncpy (token_name.buffer,
3588 newlb.buffer + tokoff, toklen);
3589 token_name.buffer[toklen] = '\0';
3590 /* Name macros and members. */
3591 token.named = (structdef == stagseen
3592 || typdef == ttypeseen
3593 || typdef == tend
3594 || (funorvar
3595 && definedef == dignorerest)
3596 || (funorvar
3597 && definedef == dnone
3598 && structdef == snone
3599 && bracelev > 0));
3600 }
3601 token.lineno = lineno;
3602 token.offset = tokoff;
3603 token.length = toklen;
3604 token.line = newlb.buffer;
3605 token.linepos = newlinepos;
3606 token.valid = TRUE;
3607
3608 if (definedef == dnone
3609 && (fvdef == fvnameseen
3610 || fvdef == foperator
3611 || structdef == stagseen
3612 || typdef == tend
3613 || typdef == ttypeseen
3614 || objdef != onone))
3615 {
3616 if (current_lb_is_new)
3617 switch_line_buffers ();
3618 }
3619 else if (definedef != dnone
3620 || fvdef == fdefunname
3621 || instruct)
3622 make_C_tag (funorvar);
3623 }
3624 else /* not yacc and consider_token failed */
3625 {
3626 if (inattribute && fvdef == fignore)
3627 {
3628 /* We have just met __attribute__ after a
3629 function parameter list: do not tag the
3630 function again. */
3631 fvdef = fvnone;
3632 }
3633 }
3634 midtoken = FALSE;
3635 }
3636 } /* if (endtoken (c)) */
3637 else if (intoken (c))
3638 still_in_token:
3639 {
3640 toklen++;
3641 continue;
3642 }
3643 } /* if (midtoken) */
3644 else if (begtoken (c))
3645 {
3646 switch (definedef)
3647 {
3648 case dnone:
3649 switch (fvdef)
3650 {
3651 case fstartlist:
3652 /* This prevents tagging fb in
3653 void (__attribute__((noreturn)) *fb) (void);
3654 Fixing this is not easy and not very important. */
3655 fvdef = finlist;
3656 continue;
3657 case flistseen:
3658 if (plainc || declarations)
3659 {
3660 make_C_tag (TRUE); /* a function */
3661 fvdef = fignore;
3662 }
3663 break;
3664 }
3665 if (structdef == stagseen && !cjava)
3666 {
3667 popclass_above (bracelev);
3668 structdef = snone;
3669 }
3670 break;
3671 case dsharpseen:
3672 savetoken = token;
3673 break;
3674 }
3675 if (!yacc_rules || lp == newlb.buffer + 1)
3676 {
3677 tokoff = lp - 1 - newlb.buffer;
3678 toklen = 1;
3679 midtoken = TRUE;
3680 }
3681 continue;
3682 } /* if (begtoken) */
3683 } /* if must look at token */
3684
3685
3686 /* Detect end of line, colon, comma, semicolon and various braces
3687 after having handled a token.*/
3688 switch (c)
3689 {
3690 case ':':
3691 if (inattribute)
3692 break;
3693 if (yacc_rules && token.offset == 0 && token.valid)
3694 {
3695 make_C_tag (FALSE); /* a yacc function */
3696 break;
3697 }
3698 if (definedef != dnone)
3699 break;
3700 switch (objdef)
3701 {
3702 case otagseen:
3703 objdef = oignore;
3704 make_C_tag (TRUE); /* an Objective C class */
3705 break;
3706 case omethodtag:
3707 case omethodparm:
3708 objdef = omethodcolon;
3709 linebuffer_setlen (&token_name, token_name.len + 1);
3710 strcat (token_name.buffer, ":");
3711 break;
3712 }
3713 if (structdef == stagseen)
3714 {
3715 structdef = scolonseen;
3716 break;
3717 }
3718 /* Should be useless, but may be work as a safety net. */
3719 if (cplpl && fvdef == flistseen)
3720 {
3721 make_C_tag (TRUE); /* a function */
3722 fvdef = fignore;
3723 break;
3724 }
3725 break;
3726 case ';':
3727 if (definedef != dnone || inattribute)
3728 break;
3729 switch (typdef)
3730 {
3731 case tend:
3732 case ttypeseen:
3733 make_C_tag (FALSE); /* a typedef */
3734 typdef = tnone;
3735 fvdef = fvnone;
3736 break;
3737 case tnone:
3738 case tinbody:
3739 case tignore:
3740 switch (fvdef)
3741 {
3742 case fignore:
3743 if (typdef == tignore || cplpl)
3744 fvdef = fvnone;
3745 break;
3746 case fvnameseen:
3747 if ((globals && bracelev == 0 && (!fvextern || declarations))
3748 || (members && instruct))
3749 make_C_tag (FALSE); /* a variable */
3750 fvextern = FALSE;
3751 fvdef = fvnone;
3752 token.valid = FALSE;
3753 break;
3754 case flistseen:
3755 if ((declarations
3756 && (cplpl || !instruct)
3757 && (typdef == tnone || (typdef != tignore && instruct)))
3758 || (members
3759 && plainc && instruct))
3760 make_C_tag (TRUE); /* a function */
3761 /* FALLTHRU */
3762 default:
3763 fvextern = FALSE;
3764 fvdef = fvnone;
3765 if (declarations
3766 && cplpl && structdef == stagseen)
3767 make_C_tag (FALSE); /* forward declaration */
3768 else
3769 token.valid = FALSE;
3770 } /* switch (fvdef) */
3771 /* FALLTHRU */
3772 default:
3773 if (!instruct)
3774 typdef = tnone;
3775 }
3776 if (structdef == stagseen)
3777 structdef = snone;
3778 break;
3779 case ',':
3780 if (definedef != dnone || inattribute)
3781 break;
3782 switch (objdef)
3783 {
3784 case omethodtag:
3785 case omethodparm:
3786 make_C_tag (TRUE); /* an Objective C method */
3787 objdef = oinbody;
3788 break;
3789 }
3790 switch (fvdef)
3791 {
3792 case fdefunkey:
3793 case foperator:
3794 case fstartlist:
3795 case finlist:
3796 case fignore:
3797 case vignore:
3798 break;
3799 case fdefunname:
3800 fvdef = fignore;
3801 break;
3802 case fvnameseen:
3803 if (parlev == 0
3804 && ((globals
3805 && bracelev == 0
3806 && templatelev == 0
3807 && (!fvextern || declarations))
3808 || (members && instruct)))
3809 make_C_tag (FALSE); /* a variable */
3810 break;
3811 case flistseen:
3812 if ((declarations && typdef == tnone && !instruct)
3813 || (members && typdef != tignore && instruct))
3814 {
3815 make_C_tag (TRUE); /* a function */
3816 fvdef = fvnameseen;
3817 }
3818 else if (!declarations)
3819 fvdef = fvnone;
3820 token.valid = FALSE;
3821 break;
3822 default:
3823 fvdef = fvnone;
3824 }
3825 if (structdef == stagseen)
3826 structdef = snone;
3827 break;
3828 case ']':
3829 if (definedef != dnone || inattribute)
3830 break;
3831 if (structdef == stagseen)
3832 structdef = snone;
3833 switch (typdef)
3834 {
3835 case ttypeseen:
3836 case tend:
3837 typdef = tignore;
3838 make_C_tag (FALSE); /* a typedef */
3839 break;
3840 case tnone:
3841 case tinbody:
3842 switch (fvdef)
3843 {
3844 case foperator:
3845 case finlist:
3846 case fignore:
3847 case vignore:
3848 break;
3849 case fvnameseen:
3850 if ((members && bracelev == 1)
3851 || (globals && bracelev == 0
3852 && (!fvextern || declarations)))
3853 make_C_tag (FALSE); /* a variable */
3854 /* FALLTHRU */
3855 default:
3856 fvdef = fvnone;
3857 }
3858 break;
3859 }
3860 break;
3861 case '(':
3862 if (inattribute)
3863 {
3864 attrparlev++;
3865 break;
3866 }
3867 if (definedef != dnone)
3868 break;
3869 if (objdef == otagseen && parlev == 0)
3870 objdef = oparenseen;
3871 switch (fvdef)
3872 {
3873 case fvnameseen:
3874 if (typdef == ttypeseen
3875 && *lp != '*'
3876 && !instruct)
3877 {
3878 /* This handles constructs like:
3879 typedef void OperatorFun (int fun); */
3880 make_C_tag (FALSE);
3881 typdef = tignore;
3882 fvdef = fignore;
3883 break;
3884 }
3885 /* FALLTHRU */
3886 case foperator:
3887 fvdef = fstartlist;
3888 break;
3889 case flistseen:
3890 fvdef = finlist;
3891 break;
3892 }
3893 parlev++;
3894 break;
3895 case ')':
3896 if (inattribute)
3897 {
3898 if (--attrparlev == 0)
3899 inattribute = FALSE;
3900 break;
3901 }
3902 if (definedef != dnone)
3903 break;
3904 if (objdef == ocatseen && parlev == 1)
3905 {
3906 make_C_tag (TRUE); /* an Objective C category */
3907 objdef = oignore;
3908 }
3909 if (--parlev == 0)
3910 {
3911 switch (fvdef)
3912 {
3913 case fstartlist:
3914 case finlist:
3915 fvdef = flistseen;
3916 break;
3917 }
3918 if (!instruct
3919 && (typdef == tend
3920 || typdef == ttypeseen))
3921 {
3922 typdef = tignore;
3923 make_C_tag (FALSE); /* a typedef */
3924 }
3925 }
3926 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
3927 parlev = 0;
3928 break;
3929 case '{':
3930 if (definedef != dnone)
3931 break;
3932 if (typdef == ttypeseen)
3933 {
3934 /* Whenever typdef is set to tinbody (currently only
3935 here), typdefbracelev should be set to bracelev. */
3936 typdef = tinbody;
3937 typdefbracelev = bracelev;
3938 }
3939 switch (fvdef)
3940 {
3941 case flistseen:
3942 make_C_tag (TRUE); /* a function */
3943 /* FALLTHRU */
3944 case fignore:
3945 fvdef = fvnone;
3946 break;
3947 case fvnone:
3948 switch (objdef)
3949 {
3950 case otagseen:
3951 make_C_tag (TRUE); /* an Objective C class */
3952 objdef = oignore;
3953 break;
3954 case omethodtag:
3955 case omethodparm:
3956 make_C_tag (TRUE); /* an Objective C method */
3957 objdef = oinbody;
3958 break;
3959 default:
3960 /* Neutralize `extern "C" {' grot. */
3961 if (bracelev == 0 && structdef == snone && nestlev == 0
3962 && typdef == tnone)
3963 bracelev = -1;
3964 }
3965 break;
3966 }
3967 switch (structdef)
3968 {
3969 case skeyseen: /* unnamed struct */
3970 pushclass_above (bracelev, NULL, 0);
3971 structdef = snone;
3972 break;
3973 case stagseen: /* named struct or enum */
3974 case scolonseen: /* a class */
3975 pushclass_above (bracelev,token.line+token.offset, token.length);
3976 structdef = snone;
3977 make_C_tag (FALSE); /* a struct or enum */
3978 break;
3979 }
3980 bracelev += 1;
3981 break;
3982 case '*':
3983 if (definedef != dnone)
3984 break;
3985 if (fvdef == fstartlist)
3986 {
3987 fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
3988 token.valid = FALSE;
3989 }
3990 break;
3991 case '}':
3992 if (definedef != dnone)
3993 break;
3994 bracelev -= 1;
3995 if (!ignoreindent && lp == newlb.buffer + 1)
3996 {
3997 if (bracelev != 0)
3998 token.valid = FALSE; /* unexpected value, token unreliable */
3999 bracelev = 0; /* reset brace level if first column */
4000 parlev = 0; /* also reset paren level, just in case... */
4001 }
4002 else if (bracelev < 0)
4003 {
4004 token.valid = FALSE; /* something gone amiss, token unreliable */
4005 bracelev = 0;
4006 }
4007 if (bracelev == 0 && fvdef == vignore)
4008 fvdef = fvnone; /* end of function */
4009 popclass_above (bracelev);
4010 structdef = snone;
4011 /* Only if typdef == tinbody is typdefbracelev significant. */
4012 if (typdef == tinbody && bracelev <= typdefbracelev)
4013 {
4014 assert (bracelev == typdefbracelev);
4015 typdef = tend;
4016 }
4017 break;
4018 case '=':
4019 if (definedef != dnone)
4020 break;
4021 switch (fvdef)
4022 {
4023 case foperator:
4024 case finlist:
4025 case fignore:
4026 case vignore:
4027 break;
4028 case fvnameseen:
4029 if ((members && bracelev == 1)
4030 || (globals && bracelev == 0 && (!fvextern || declarations)))
4031 make_C_tag (FALSE); /* a variable */
4032 /* FALLTHRU */
4033 default:
4034 fvdef = vignore;
4035 }
4036 break;
4037 case '<':
4038 if (cplpl
4039 && (structdef == stagseen || fvdef == fvnameseen))
4040 {
4041 templatelev++;
4042 break;
4043 }
4044 goto resetfvdef;
4045 case '>':
4046 if (templatelev > 0)
4047 {
4048 templatelev--;
4049 break;
4050 }
4051 goto resetfvdef;
4052 case '+':
4053 case '-':
4054 if (objdef == oinbody && bracelev == 0)
4055 {
4056 objdef = omethodsign;
4057 break;
4058 }
4059 /* FALLTHRU */
4060 resetfvdef:
4061 case '#': case '~': case '&': case '%': case '/':
4062 case '|': case '^': case '!': case '.': case '?':
4063 if (definedef != dnone)
4064 break;
4065 /* These surely cannot follow a function tag in C. */
4066 switch (fvdef)
4067 {
4068 case foperator:
4069 case finlist:
4070 case fignore:
4071 case vignore:
4072 break;
4073 default:
4074 fvdef = fvnone;
4075 }
4076 break;
4077 case '\0':
4078 if (objdef == otagseen)
4079 {
4080 make_C_tag (TRUE); /* an Objective C class */
4081 objdef = oignore;
4082 }
4083 /* If a macro spans multiple lines don't reset its state. */
4084 if (quotednl)
4085 CNL_SAVE_DEFINEDEF ();
4086 else
4087 CNL ();
4088 break;
4089 } /* switch (c) */
4090
4091 } /* while not eof */
4092
4093 free (lbs[0].lb.buffer);
4094 free (lbs[1].lb.buffer);
4095 }
4096
4097 /*
4098 * Process either a C++ file or a C file depending on the setting
4099 * of a global flag.
4100 */
4101 static void
4102 default_C_entries (inf)
4103 FILE *inf;
4104 {
4105 C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
4106 }
4107
4108 /* Always do plain C. */
4109 static void
4110 plain_C_entries (inf)
4111 FILE *inf;
4112 {
4113 C_entries (0, inf);
4114 }
4115
4116 /* Always do C++. */
4117 static void
4118 Cplusplus_entries (inf)
4119 FILE *inf;
4120 {
4121 C_entries (C_PLPL, inf);
4122 }
4123
4124 /* Always do Java. */
4125 static void
4126 Cjava_entries (inf)
4127 FILE *inf;
4128 {
4129 C_entries (C_JAVA, inf);
4130 }
4131
4132 /* Always do C*. */
4133 static void
4134 Cstar_entries (inf)
4135 FILE *inf;
4136 {
4137 C_entries (C_STAR, inf);
4138 }
4139
4140 /* Always do Yacc. */
4141 static void
4142 Yacc_entries (inf)
4143 FILE *inf;
4144 {
4145 C_entries (YACC, inf);
4146 }
4147
4148 \f
4149 /* Useful macros. */
4150 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
4151 for (; /* loop initialization */ \
4152 !feof (file_pointer) /* loop test */ \
4153 && /* instructions at start of loop */ \
4154 (readline (&line_buffer, file_pointer), \
4155 char_pointer = line_buffer.buffer, \
4156 TRUE); \
4157 )
4158
4159 #define LOOKING_AT(cp, kw) /* kw is the keyword, a literal string */ \
4160 ((assert("" kw), TRUE) /* syntax error if not a literal string */ \
4161 && strneq ((cp), kw, sizeof(kw)-1) /* cp points at kw */ \
4162 && notinname ((cp)[sizeof(kw)-1]) /* end of kw */ \
4163 && ((cp) = skip_spaces((cp)+sizeof(kw)-1))) /* skip spaces */
4164
4165 /* Similar to LOOKING_AT but does not use notinname, does not skip */
4166 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
4167 ((assert("" kw), TRUE) /* syntax error if not a literal string */ \
4168 && strncaseeq ((cp), kw, sizeof(kw)-1) /* cp points at kw */ \
4169 && ((cp) += sizeof(kw)-1)) /* skip spaces */
4170
4171 /*
4172 * Read a file, but do no processing. This is used to do regexp
4173 * matching on files that have no language defined.
4174 */
4175 static void
4176 just_read_file (inf)
4177 FILE *inf;
4178 {
4179 register char *dummy;
4180
4181 LOOP_ON_INPUT_LINES (inf, lb, dummy)
4182 continue;
4183 }
4184
4185 \f
4186 /* Fortran parsing */
4187
4188 static void F_takeprec __P((void));
4189 static void F_getit __P((FILE *));
4190
4191 static void
4192 F_takeprec ()
4193 {
4194 dbp = skip_spaces (dbp);
4195 if (*dbp != '*')
4196 return;
4197 dbp++;
4198 dbp = skip_spaces (dbp);
4199 if (strneq (dbp, "(*)", 3))
4200 {
4201 dbp += 3;
4202 return;
4203 }
4204 if (!ISDIGIT (*dbp))
4205 {
4206 --dbp; /* force failure */
4207 return;
4208 }
4209 do
4210 dbp++;
4211 while (ISDIGIT (*dbp));
4212 }
4213
4214 static void
4215 F_getit (inf)
4216 FILE *inf;
4217 {
4218 register char *cp;
4219
4220 dbp = skip_spaces (dbp);
4221 if (*dbp == '\0')
4222 {
4223 readline (&lb, inf);
4224 dbp = lb.buffer;
4225 if (dbp[5] != '&')
4226 return;
4227 dbp += 6;
4228 dbp = skip_spaces (dbp);
4229 }
4230 if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
4231 return;
4232 for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4233 continue;
4234 make_tag (dbp, cp-dbp, TRUE,
4235 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4236 }
4237
4238
4239 static void
4240 Fortran_functions (inf)
4241 FILE *inf;
4242 {
4243 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4244 {
4245 if (*dbp == '%')
4246 dbp++; /* Ratfor escape to fortran */
4247 dbp = skip_spaces (dbp);
4248 if (*dbp == '\0')
4249 continue;
4250 switch (lowcase (*dbp))
4251 {
4252 case 'i':
4253 if (nocase_tail ("integer"))
4254 F_takeprec ();
4255 break;
4256 case 'r':
4257 if (nocase_tail ("real"))
4258 F_takeprec ();
4259 break;
4260 case 'l':
4261 if (nocase_tail ("logical"))
4262 F_takeprec ();
4263 break;
4264 case 'c':
4265 if (nocase_tail ("complex") || nocase_tail ("character"))
4266 F_takeprec ();
4267 break;
4268 case 'd':
4269 if (nocase_tail ("double"))
4270 {
4271 dbp = skip_spaces (dbp);
4272 if (*dbp == '\0')
4273 continue;
4274 if (nocase_tail ("precision"))
4275 break;
4276 continue;
4277 }
4278 break;
4279 }
4280 dbp = skip_spaces (dbp);
4281 if (*dbp == '\0')
4282 continue;
4283 switch (lowcase (*dbp))
4284 {
4285 case 'f':
4286 if (nocase_tail ("function"))
4287 F_getit (inf);
4288 continue;
4289 case 's':
4290 if (nocase_tail ("subroutine"))
4291 F_getit (inf);
4292 continue;
4293 case 'e':
4294 if (nocase_tail ("entry"))
4295 F_getit (inf);
4296 continue;
4297 case 'b':
4298 if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4299 {
4300 dbp = skip_spaces (dbp);
4301 if (*dbp == '\0') /* assume un-named */
4302 make_tag ("blockdata", 9, TRUE,
4303 lb.buffer, dbp - lb.buffer, lineno, linecharno);
4304 else
4305 F_getit (inf); /* look for name */
4306 }
4307 continue;
4308 }
4309 }
4310 }
4311
4312 \f
4313 /*
4314 * Ada parsing
4315 * Original code by
4316 * Philippe Waroquiers (1998)
4317 */
4318
4319 static void Ada_getit __P((FILE *, char *));
4320
4321 /* Once we are positioned after an "interesting" keyword, let's get
4322 the real tag value necessary. */
4323 static void
4324 Ada_getit (inf, name_qualifier)
4325 FILE *inf;
4326 char *name_qualifier;
4327 {
4328 register char *cp;
4329 char *name;
4330 char c;
4331
4332 while (!feof (inf))
4333 {
4334 dbp = skip_spaces (dbp);
4335 if (*dbp == '\0'
4336 || (dbp[0] == '-' && dbp[1] == '-'))
4337 {
4338 readline (&lb, inf);
4339 dbp = lb.buffer;
4340 }
4341 switch (lowcase(*dbp))
4342 {
4343 case 'b':
4344 if (nocase_tail ("body"))
4345 {
4346 /* Skipping body of procedure body or package body or ....
4347 resetting qualifier to body instead of spec. */
4348 name_qualifier = "/b";
4349 continue;
4350 }
4351 break;
4352 case 't':
4353 /* Skipping type of task type or protected type ... */
4354 if (nocase_tail ("type"))
4355 continue;
4356 break;
4357 }
4358 if (*dbp == '"')
4359 {
4360 dbp += 1;
4361 for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4362 continue;
4363 }
4364 else
4365 {
4366 dbp = skip_spaces (dbp);
4367 for (cp = dbp;
4368 (*cp != '\0'
4369 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4370 cp++)
4371 continue;
4372 if (cp == dbp)
4373 return;
4374 }
4375 c = *cp;
4376 *cp = '\0';
4377 name = concat (dbp, name_qualifier, "");
4378 *cp = c;
4379 make_tag (name, strlen (name), TRUE,
4380 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4381 free (name);
4382 if (c == '"')
4383 dbp = cp + 1;
4384 return;
4385 }
4386 }
4387
4388 static void
4389 Ada_funcs (inf)
4390 FILE *inf;
4391 {
4392 bool inquote = FALSE;
4393 bool skip_till_semicolumn = FALSE;
4394
4395 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4396 {
4397 while (*dbp != '\0')
4398 {
4399 /* Skip a string i.e. "abcd". */
4400 if (inquote || (*dbp == '"'))
4401 {
4402 dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
4403 if (dbp != NULL)
4404 {
4405 inquote = FALSE;
4406 dbp += 1;
4407 continue; /* advance char */
4408 }
4409 else
4410 {
4411 inquote = TRUE;
4412 break; /* advance line */
4413 }
4414 }
4415
4416 /* Skip comments. */
4417 if (dbp[0] == '-' && dbp[1] == '-')
4418 break; /* advance line */
4419
4420 /* Skip character enclosed in single quote i.e. 'a'
4421 and skip single quote starting an attribute i.e. 'Image. */
4422 if (*dbp == '\'')
4423 {
4424 dbp++ ;
4425 if (*dbp != '\0')
4426 dbp++;
4427 continue;
4428 }
4429
4430 if (skip_till_semicolumn)
4431 {
4432 if (*dbp == ';')
4433 skip_till_semicolumn = FALSE;
4434 dbp++;
4435 continue; /* advance char */
4436 }
4437
4438 /* Search for beginning of a token. */
4439 if (!begtoken (*dbp))
4440 {
4441 dbp++;
4442 continue; /* advance char */
4443 }
4444
4445 /* We are at the beginning of a token. */
4446 switch (lowcase(*dbp))
4447 {
4448 case 'f':
4449 if (!packages_only && nocase_tail ("function"))
4450 Ada_getit (inf, "/f");
4451 else
4452 break; /* from switch */
4453 continue; /* advance char */
4454 case 'p':
4455 if (!packages_only && nocase_tail ("procedure"))
4456 Ada_getit (inf, "/p");
4457 else if (nocase_tail ("package"))
4458 Ada_getit (inf, "/s");
4459 else if (nocase_tail ("protected")) /* protected type */
4460 Ada_getit (inf, "/t");
4461 else
4462 break; /* from switch */
4463 continue; /* advance char */
4464
4465 case 'u':
4466 if (typedefs && !packages_only && nocase_tail ("use"))
4467 {
4468 /* when tagging types, avoid tagging use type Pack.Typename;
4469 for this, we will skip everything till a ; */
4470 skip_till_semicolumn = TRUE;
4471 continue; /* advance char */
4472 }
4473
4474 case 't':
4475 if (!packages_only && nocase_tail ("task"))
4476 Ada_getit (inf, "/k");
4477 else if (typedefs && !packages_only && nocase_tail ("type"))
4478 {
4479 Ada_getit (inf, "/t");
4480 while (*dbp != '\0')
4481 dbp += 1;
4482 }
4483 else
4484 break; /* from switch */
4485 continue; /* advance char */
4486 }
4487
4488 /* Look for the end of the token. */
4489 while (!endtoken (*dbp))
4490 dbp++;
4491
4492 } /* advance char */
4493 } /* advance line */
4494 }
4495
4496 \f
4497 /*
4498 * Unix and microcontroller assembly tag handling
4499 * Labels: /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4500 * Idea by Bob Weiner, Motorola Inc. (1994)
4501 */
4502 static void
4503 Asm_labels (inf)
4504 FILE *inf;
4505 {
4506 register char *cp;
4507
4508 LOOP_ON_INPUT_LINES (inf, lb, cp)
4509 {
4510 /* If first char is alphabetic or one of [_.$], test for colon
4511 following identifier. */
4512 if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4513 {
4514 /* Read past label. */
4515 cp++;
4516 while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4517 cp++;
4518 if (*cp == ':' || iswhite (*cp))
4519 /* Found end of label, so copy it and add it to the table. */
4520 make_tag (lb.buffer, cp - lb.buffer, TRUE,
4521 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4522 }
4523 }
4524 }
4525
4526 \f
4527 /*
4528 * Perl support
4529 * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4530 * Perl variable names: /^(my|local).../
4531 * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4532 * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4533 * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4534 */
4535 static void
4536 Perl_functions (inf)
4537 FILE *inf;
4538 {
4539 char *package = savestr ("main"); /* current package name */
4540 register char *cp;
4541
4542 LOOP_ON_INPUT_LINES (inf, lb, cp)
4543 {
4544 skip_spaces(cp);
4545
4546 if (LOOKING_AT (cp, "package"))
4547 {
4548 free (package);
4549 get_tag (cp, &package);
4550 }
4551 else if (LOOKING_AT (cp, "sub"))
4552 {
4553 char *pos;
4554 char *sp = cp;
4555
4556 while (!notinname (*cp))
4557 cp++;
4558 if (cp == sp)
4559 continue; /* nothing found */
4560 if ((pos = etags_strchr (sp, ':')) != NULL
4561 && pos < cp && pos[1] == ':')
4562 /* The name is already qualified. */
4563 make_tag (sp, cp - sp, TRUE,
4564 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4565 else
4566 /* Qualify it. */
4567 {
4568 char savechar, *name;
4569
4570 savechar = *cp;
4571 *cp = '\0';
4572 name = concat (package, "::", sp);
4573 *cp = savechar;
4574 make_tag (name, strlen(name), TRUE,
4575 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4576 free (name);
4577 }
4578 }
4579 else if (globals) /* only if we are tagging global vars */
4580 {
4581 /* Skip a qualifier, if any. */
4582 bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4583 /* After "my" or "local", but before any following paren or space. */
4584 char *varstart = cp;
4585
4586 if (qual /* should this be removed? If yes, how? */
4587 && (*cp == '$' || *cp == '@' || *cp == '%'))
4588 {
4589 varstart += 1;
4590 do
4591 cp++;
4592 while (ISALNUM (*cp) || *cp == '_');
4593 }
4594 else if (qual)
4595 {
4596 /* Should be examining a variable list at this point;
4597 could insist on seeing an open parenthesis. */
4598 while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')')
4599 cp++;
4600 }
4601 else
4602 continue;
4603
4604 make_tag (varstart, cp - varstart, FALSE,
4605 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4606 }
4607 }
4608 free (package);
4609 }
4610
4611
4612 /*
4613 * Python support
4614 * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4615 * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4616 * More ideas by seb bacon <seb@jamkit.com> (2002)
4617 */
4618 static void
4619 Python_functions (inf)
4620 FILE *inf;
4621 {
4622 register char *cp;
4623
4624 LOOP_ON_INPUT_LINES (inf, lb, cp)
4625 {
4626 cp = skip_spaces (cp);
4627 if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4628 {
4629 char *name = cp;
4630 while (!notinname (*cp) && *cp != ':')
4631 cp++;
4632 make_tag (name, cp - name, TRUE,
4633 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4634 }
4635 }
4636 }
4637
4638 \f
4639 /*
4640 * PHP support
4641 * Look for:
4642 * - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4643 * - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4644 * - /^[ \t]*define\(\"[^\"]+/
4645 * Only with --members:
4646 * - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4647 * Idea by Diez B. Roggisch (2001)
4648 */
4649 static void
4650 PHP_functions (inf)
4651 FILE *inf;
4652 {
4653 register char *cp, *name;
4654 bool search_identifier = FALSE;
4655
4656 LOOP_ON_INPUT_LINES (inf, lb, cp)
4657 {
4658 cp = skip_spaces (cp);
4659 name = cp;
4660 if (search_identifier
4661 && *cp != '\0')
4662 {
4663 while (!notinname (*cp))
4664 cp++;
4665 make_tag (name, cp - name, TRUE,
4666 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4667 search_identifier = FALSE;
4668 }
4669 else if (LOOKING_AT (cp, "function"))
4670 {
4671 if(*cp == '&')
4672 cp = skip_spaces (cp+1);
4673 if(*cp != '\0')
4674 {
4675 name = cp;
4676 while (!notinname (*cp))
4677 cp++;
4678 make_tag (name, cp - name, TRUE,
4679 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4680 }
4681 else
4682 search_identifier = TRUE;
4683 }
4684 else if (LOOKING_AT (cp, "class"))
4685 {
4686 if (*cp != '\0')
4687 {
4688 name = cp;
4689 while (*cp != '\0' && !iswhite (*cp))
4690 cp++;
4691 make_tag (name, cp - name, FALSE,
4692 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4693 }
4694 else
4695 search_identifier = TRUE;
4696 }
4697 else if (strneq (cp, "define", 6)
4698 && (cp = skip_spaces (cp+6))
4699 && *cp++ == '('
4700 && (*cp == '"' || *cp == '\''))
4701 {
4702 char quote = *cp++;
4703 name = cp;
4704 while (*cp != quote && *cp != '\0')
4705 cp++;
4706 make_tag (name, cp - name, FALSE,
4707 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4708 }
4709 else if (members
4710 && LOOKING_AT (cp, "var")
4711 && *cp == '$')
4712 {
4713 name = cp;
4714 while (!notinname(*cp))
4715 cp++;
4716 make_tag (name, cp - name, FALSE,
4717 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4718 }
4719 }
4720 }
4721
4722 \f
4723 /*
4724 * Cobol tag functions
4725 * We could look for anything that could be a paragraph name.
4726 * i.e. anything that starts in column 8 is one word and ends in a full stop.
4727 * Idea by Corny de Souza (1993)
4728 */
4729 static void
4730 Cobol_paragraphs (inf)
4731 FILE *inf;
4732 {
4733 register char *bp, *ep;
4734
4735 LOOP_ON_INPUT_LINES (inf, lb, bp)
4736 {
4737 if (lb.len < 9)
4738 continue;
4739 bp += 8;
4740
4741 /* If eoln, compiler option or comment ignore whole line. */
4742 if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4743 continue;
4744
4745 for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4746 continue;
4747 if (*ep++ == '.')
4748 make_tag (bp, ep - bp, TRUE,
4749 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4750 }
4751 }
4752
4753 \f
4754 /*
4755 * Makefile support
4756 * Ideas by Assar Westerlund <assar@sics.se> (2001)
4757 */
4758 static void
4759 Makefile_targets (inf)
4760 FILE *inf;
4761 {
4762 register char *bp;
4763
4764 LOOP_ON_INPUT_LINES (inf, lb, bp)
4765 {
4766 if (*bp == '\t' || *bp == '#')
4767 continue;
4768 while (*bp != '\0' && *bp != '=' && *bp != ':')
4769 bp++;
4770 if (*bp == ':' || (globals && *bp == '='))
4771 {
4772 /* We should detect if there is more than one tag, but we do not.
4773 We just skip initial and final spaces. */
4774 char * namestart = skip_spaces (lb.buffer);
4775 while (--bp > namestart)
4776 if (!notinname (*bp))
4777 break;
4778 make_tag (namestart, bp - namestart + 1, TRUE,
4779 lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4780 }
4781 }
4782 }
4783
4784 \f
4785 /*
4786 * Pascal parsing
4787 * Original code by Mosur K. Mohan (1989)
4788 *
4789 * Locates tags for procedures & functions. Doesn't do any type- or
4790 * var-definitions. It does look for the keyword "extern" or
4791 * "forward" immediately following the procedure statement; if found,
4792 * the tag is skipped.
4793 */
4794 static void
4795 Pascal_functions (inf)
4796 FILE *inf;
4797 {
4798 linebuffer tline; /* mostly copied from C_entries */
4799 long save_lcno;
4800 int save_lineno, namelen, taglen;
4801 char c, *name;
4802
4803 bool /* each of these flags is TRUE if: */
4804 incomment, /* point is inside a comment */
4805 inquote, /* point is inside '..' string */
4806 get_tagname, /* point is after PROCEDURE/FUNCTION
4807 keyword, so next item = potential tag */
4808 found_tag, /* point is after a potential tag */
4809 inparms, /* point is within parameter-list */
4810 verify_tag; /* point has passed the parm-list, so the
4811 next token will determine whether this
4812 is a FORWARD/EXTERN to be ignored, or
4813 whether it is a real tag */
4814
4815 save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4816 name = NULL; /* keep compiler quiet */
4817 dbp = lb.buffer;
4818 *dbp = '\0';
4819 linebuffer_init (&tline);
4820
4821 incomment = inquote = FALSE;
4822 found_tag = FALSE; /* have a proc name; check if extern */
4823 get_tagname = FALSE; /* found "procedure" keyword */
4824 inparms = FALSE; /* found '(' after "proc" */
4825 verify_tag = FALSE; /* check if "extern" is ahead */
4826
4827
4828 while (!feof (inf)) /* long main loop to get next char */
4829 {
4830 c = *dbp++;
4831 if (c == '\0') /* if end of line */
4832 {
4833 readline (&lb, inf);
4834 dbp = lb.buffer;
4835 if (*dbp == '\0')
4836 continue;
4837 if (!((found_tag && verify_tag)
4838 || get_tagname))
4839 c = *dbp++; /* only if don't need *dbp pointing
4840 to the beginning of the name of
4841 the procedure or function */
4842 }
4843 if (incomment)
4844 {
4845 if (c == '}') /* within { } comments */
4846 incomment = FALSE;
4847 else if (c == '*' && *dbp == ')') /* within (* *) comments */
4848 {
4849 dbp++;
4850 incomment = FALSE;
4851 }
4852 continue;
4853 }
4854 else if (inquote)
4855 {
4856 if (c == '\'')
4857 inquote = FALSE;
4858 continue;
4859 }
4860 else
4861 switch (c)
4862 {
4863 case '\'':
4864 inquote = TRUE; /* found first quote */
4865 continue;
4866 case '{': /* found open { comment */
4867 incomment = TRUE;
4868 continue;
4869 case '(':
4870 if (*dbp == '*') /* found open (* comment */
4871 {
4872 incomment = TRUE;
4873 dbp++;
4874 }
4875 else if (found_tag) /* found '(' after tag, i.e., parm-list */
4876 inparms = TRUE;
4877 continue;
4878 case ')': /* end of parms list */
4879 if (inparms)
4880 inparms = FALSE;
4881 continue;
4882 case ';':
4883 if (found_tag && !inparms) /* end of proc or fn stmt */
4884 {
4885 verify_tag = TRUE;
4886 break;
4887 }
4888 continue;
4889 }
4890 if (found_tag && verify_tag && (*dbp != ' '))
4891 {
4892 /* Check if this is an "extern" declaration. */
4893 if (*dbp == '\0')
4894 continue;
4895 if (lowcase (*dbp == 'e'))
4896 {
4897 if (nocase_tail ("extern")) /* superfluous, really! */
4898 {
4899 found_tag = FALSE;
4900 verify_tag = FALSE;
4901 }
4902 }
4903 else if (lowcase (*dbp) == 'f')
4904 {
4905 if (nocase_tail ("forward")) /* check for forward reference */
4906 {
4907 found_tag = FALSE;
4908 verify_tag = FALSE;
4909 }
4910 }
4911 if (found_tag && verify_tag) /* not external proc, so make tag */
4912 {
4913 found_tag = FALSE;
4914 verify_tag = FALSE;
4915 make_tag (name, namelen, TRUE,
4916 tline.buffer, taglen, save_lineno, save_lcno);
4917 continue;
4918 }
4919 }
4920 if (get_tagname) /* grab name of proc or fn */
4921 {
4922 char *cp;
4923
4924 if (*dbp == '\0')
4925 continue;
4926
4927 /* Find block name. */
4928 for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4929 continue;
4930
4931 /* Save all values for later tagging. */
4932 linebuffer_setlen (&tline, lb.len);
4933 strcpy (tline.buffer, lb.buffer);
4934 save_lineno = lineno;
4935 save_lcno = linecharno;
4936 name = tline.buffer + (dbp - lb.buffer);
4937 namelen = cp - dbp;
4938 taglen = cp - lb.buffer + 1;
4939
4940 dbp = cp; /* set dbp to e-o-token */
4941 get_tagname = FALSE;
4942 found_tag = TRUE;
4943 continue;
4944
4945 /* And proceed to check for "extern". */
4946 }
4947 else if (!incomment && !inquote && !found_tag)
4948 {
4949 /* Check for proc/fn keywords. */
4950 switch (lowcase (c))
4951 {
4952 case 'p':
4953 if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4954 get_tagname = TRUE;
4955 continue;
4956 case 'f':
4957 if (nocase_tail ("unction"))
4958 get_tagname = TRUE;
4959 continue;
4960 }
4961 }
4962 } /* while not eof */
4963
4964 free (tline.buffer);
4965 }
4966
4967 \f
4968 /*
4969 * Lisp tag functions
4970 * look for (def or (DEF, quote or QUOTE
4971 */
4972
4973 static void L_getit __P((void));
4974
4975 static void
4976 L_getit ()
4977 {
4978 if (*dbp == '\'') /* Skip prefix quote */
4979 dbp++;
4980 else if (*dbp == '(')
4981 {
4982 dbp++;
4983 /* Try to skip "(quote " */
4984 if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4985 /* Ok, then skip "(" before name in (defstruct (foo)) */
4986 dbp = skip_spaces (dbp);
4987 }
4988 get_tag (dbp, NULL);
4989 }
4990
4991 static void
4992 Lisp_functions (inf)
4993 FILE *inf;
4994 {
4995 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4996 {
4997 if (dbp[0] != '(')
4998 continue;
4999
5000 if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
5001 {
5002 dbp = skip_non_spaces (dbp);
5003 dbp = skip_spaces (dbp);
5004 L_getit ();
5005 }
5006 else
5007 {
5008 /* Check for (foo::defmumble name-defined ... */
5009 do
5010 dbp++;
5011 while (!notinname (*dbp) && *dbp != ':');
5012 if (*dbp == ':')
5013 {
5014 do
5015 dbp++;
5016 while (*dbp == ':');
5017
5018 if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
5019 {
5020 dbp = skip_non_spaces (dbp);
5021 dbp = skip_spaces (dbp);
5022 L_getit ();
5023 }
5024 }
5025 }
5026 }
5027 }
5028
5029 \f
5030 /*
5031 * Lua script language parsing
5032 * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
5033 *
5034 * "function" and "local function" are tags if they start at column 1.
5035 */
5036 static void
5037 Lua_functions (inf)
5038 FILE *inf;
5039 {
5040 register char *bp;
5041
5042 LOOP_ON_INPUT_LINES (inf, lb, bp)
5043 {
5044 if (bp[0] != 'f' && bp[0] != 'l')
5045 continue;
5046
5047 (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
5048
5049 if (LOOKING_AT (bp, "function"))
5050 get_tag (bp, NULL);
5051 }
5052 }
5053
5054 \f
5055 /*
5056 * Postscript tags
5057 * Just look for lines where the first character is '/'
5058 * Also look at "defineps" for PSWrap
5059 * Ideas by:
5060 * Richard Mlynarik <mly@adoc.xerox.com> (1997)
5061 * Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
5062 */
5063 static void
5064 PS_functions (inf)
5065 FILE *inf;
5066 {
5067 register char *bp, *ep;
5068
5069 LOOP_ON_INPUT_LINES (inf, lb, bp)
5070 {
5071 if (bp[0] == '/')
5072 {
5073 for (ep = bp+1;
5074 *ep != '\0' && *ep != ' ' && *ep != '{';
5075 ep++)
5076 continue;
5077 make_tag (bp, ep - bp, TRUE,
5078 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
5079 }
5080 else if (LOOKING_AT (bp, "defineps"))
5081 get_tag (bp, NULL);
5082 }
5083 }
5084
5085 \f
5086 /*
5087 * Forth tags
5088 * Ignore anything after \ followed by space or in ( )
5089 * Look for words defined by :
5090 * Look for constant, code, create, defer, value, and variable
5091 * OBP extensions: Look for buffer:, field,
5092 * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
5093 */
5094 static void
5095 Forth_words (inf)
5096 FILE *inf;
5097 {
5098 register char *bp;
5099
5100 LOOP_ON_INPUT_LINES (inf, lb, bp)
5101 while ((bp = skip_spaces (bp))[0] != '\0')
5102 if (bp[0] == '\\' && iswhite(bp[1]))
5103 break; /* read next line */
5104 else if (bp[0] == '(' && iswhite(bp[1]))
5105 do /* skip to ) or eol */
5106 bp++;
5107 while (*bp != ')' && *bp != '\0');
5108 else if ((bp[0] == ':' && iswhite(bp[1]) && bp++)
5109 || LOOKING_AT_NOCASE (bp, "constant")
5110 || LOOKING_AT_NOCASE (bp, "code")
5111 || LOOKING_AT_NOCASE (bp, "create")
5112 || LOOKING_AT_NOCASE (bp, "defer")
5113 || LOOKING_AT_NOCASE (bp, "value")
5114 || LOOKING_AT_NOCASE (bp, "variable")
5115 || LOOKING_AT_NOCASE (bp, "buffer:")
5116 || LOOKING_AT_NOCASE (bp, "field"))
5117 get_tag (skip_spaces (bp), NULL); /* Yay! A definition! */
5118 else
5119 bp = skip_non_spaces (bp);
5120 }
5121
5122 \f
5123 /*
5124 * Scheme tag functions
5125 * look for (def... xyzzy
5126 * (def... (xyzzy
5127 * (def ... ((...(xyzzy ....
5128 * (set! xyzzy
5129 * Original code by Ken Haase (1985?)
5130 */
5131 static void
5132 Scheme_functions (inf)
5133 FILE *inf;
5134 {
5135 register char *bp;
5136
5137 LOOP_ON_INPUT_LINES (inf, lb, bp)
5138 {
5139 if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
5140 {
5141 bp = skip_non_spaces (bp+4);
5142 /* Skip over open parens and white space */
5143 while (notinname (*bp))
5144 bp++;
5145 get_tag (bp, NULL);
5146 }
5147 if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
5148 get_tag (bp, NULL);
5149 }
5150 }
5151
5152 \f
5153 /* Find tags in TeX and LaTeX input files. */
5154
5155 /* TEX_toktab is a table of TeX control sequences that define tags.
5156 * Each entry records one such control sequence.
5157 *
5158 * Original code from who knows whom.
5159 * Ideas by:
5160 * Stefan Monnier (2002)
5161 */
5162
5163 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
5164
5165 /* Default set of control sequences to put into TEX_toktab.
5166 The value of environment var TEXTAGS is prepended to this. */
5167 static char *TEX_defenv = "\
5168 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5169 :part:appendix:entry:index:def\
5170 :newcommand:renewcommand:newenvironment:renewenvironment";
5171
5172 static void TEX_mode __P((FILE *));
5173 static void TEX_decode_env __P((char *, char *));
5174
5175 static char TEX_esc = '\\';
5176 static char TEX_opgrp = '{';
5177 static char TEX_clgrp = '}';
5178
5179 /*
5180 * TeX/LaTeX scanning loop.
5181 */
5182 static void
5183 TeX_commands (inf)
5184 FILE *inf;
5185 {
5186 char *cp;
5187 linebuffer *key;
5188
5189 /* Select either \ or ! as escape character. */
5190 TEX_mode (inf);
5191
5192 /* Initialize token table once from environment. */
5193 if (TEX_toktab == NULL)
5194 TEX_decode_env ("TEXTAGS", TEX_defenv);
5195
5196 LOOP_ON_INPUT_LINES (inf, lb, cp)
5197 {
5198 /* Look at each TEX keyword in line. */
5199 for (;;)
5200 {
5201 /* Look for a TEX escape. */
5202 while (*cp++ != TEX_esc)
5203 if (cp[-1] == '\0' || cp[-1] == '%')
5204 goto tex_next_line;
5205
5206 for (key = TEX_toktab; key->buffer != NULL; key++)
5207 if (strneq (cp, key->buffer, key->len))
5208 {
5209 register char *p;
5210 int namelen, linelen;
5211 bool opgrp = FALSE;
5212
5213 cp = skip_spaces (cp + key->len);
5214 if (*cp == TEX_opgrp)
5215 {
5216 opgrp = TRUE;
5217 cp++;
5218 }
5219 for (p = cp;
5220 (!iswhite (*p) && *p != '#' &&
5221 *p != TEX_opgrp && *p != TEX_clgrp);
5222 p++)
5223 continue;
5224 namelen = p - cp;
5225 linelen = lb.len;
5226 if (!opgrp || *p == TEX_clgrp)
5227 {
5228 while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5229 p++;
5230 linelen = p - lb.buffer + 1;
5231 }
5232 make_tag (cp, namelen, TRUE,
5233 lb.buffer, linelen, lineno, linecharno);
5234 goto tex_next_line; /* We only tag a line once */
5235 }
5236 }
5237 tex_next_line:
5238 ;
5239 }
5240 }
5241
5242 #define TEX_LESC '\\'
5243 #define TEX_SESC '!'
5244
5245 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
5246 chars accordingly. */
5247 static void
5248 TEX_mode (inf)
5249 FILE *inf;
5250 {
5251 int c;
5252
5253 while ((c = getc (inf)) != EOF)
5254 {
5255 /* Skip to next line if we hit the TeX comment char. */
5256 if (c == '%')
5257 while (c != '\n' && c != EOF)
5258 c = getc (inf);
5259 else if (c == TEX_LESC || c == TEX_SESC )
5260 break;
5261 }
5262
5263 if (c == TEX_LESC)
5264 {
5265 TEX_esc = TEX_LESC;
5266 TEX_opgrp = '{';
5267 TEX_clgrp = '}';
5268 }
5269 else
5270 {
5271 TEX_esc = TEX_SESC;
5272 TEX_opgrp = '<';
5273 TEX_clgrp = '>';
5274 }
5275 /* If the input file is compressed, inf is a pipe, and rewind may fail.
5276 No attempt is made to correct the situation. */
5277 rewind (inf);
5278 }
5279
5280 /* Read environment and prepend it to the default string.
5281 Build token table. */
5282 static void
5283 TEX_decode_env (evarname, defenv)
5284 char *evarname;
5285 char *defenv;
5286 {
5287 register char *env, *p;
5288 int i, len;
5289
5290 /* Append default string to environment. */
5291 env = getenv (evarname);
5292 if (!env)
5293 env = defenv;
5294 else
5295 {
5296 char *oldenv = env;
5297 env = concat (oldenv, defenv, "");
5298 }
5299
5300 /* Allocate a token table */
5301 for (len = 1, p = env; p;)
5302 if ((p = etags_strchr (p, ':')) && *++p != '\0')
5303 len++;
5304 TEX_toktab = xnew (len, linebuffer);
5305
5306 /* Unpack environment string into token table. Be careful about */
5307 /* zero-length strings (leading ':', "::" and trailing ':') */
5308 for (i = 0; *env != '\0';)
5309 {
5310 p = etags_strchr (env, ':');
5311 if (!p) /* End of environment string. */
5312 p = env + strlen (env);
5313 if (p - env > 0)
5314 { /* Only non-zero strings. */
5315 TEX_toktab[i].buffer = savenstr (env, p - env);
5316 TEX_toktab[i].len = p - env;
5317 i++;
5318 }
5319 if (*p)
5320 env = p + 1;
5321 else
5322 {
5323 TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5324 TEX_toktab[i].len = 0;
5325 break;
5326 }
5327 }
5328 }
5329
5330 \f
5331 /* Texinfo support. Dave Love, Mar. 2000. */
5332 static void
5333 Texinfo_nodes (inf)
5334 FILE * inf;
5335 {
5336 char *cp, *start;
5337 LOOP_ON_INPUT_LINES (inf, lb, cp)
5338 if (LOOKING_AT (cp, "@node"))
5339 {
5340 start = cp;
5341 while (*cp != '\0' && *cp != ',')
5342 cp++;
5343 make_tag (start, cp - start, TRUE,
5344 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5345 }
5346 }
5347
5348 \f
5349 /*
5350 * HTML support.
5351 * Contents of <title>, <h1>, <h2>, <h3> are tags.
5352 * Contents of <a name=xxx> are tags with name xxx.
5353 *
5354 * Francesco Potortì, 2002.
5355 */
5356 static void
5357 HTML_labels (inf)
5358 FILE * inf;
5359 {
5360 bool getnext = FALSE; /* next text outside of HTML tags is a tag */
5361 bool skiptag = FALSE; /* skip to the end of the current HTML tag */
5362 bool intag = FALSE; /* inside an html tag, looking for ID= */
5363 bool inanchor = FALSE; /* when INTAG, is an anchor, look for NAME= */
5364 char *end;
5365
5366
5367 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5368
5369 LOOP_ON_INPUT_LINES (inf, lb, dbp)
5370 for (;;) /* loop on the same line */
5371 {
5372 if (skiptag) /* skip HTML tag */
5373 {
5374 while (*dbp != '\0' && *dbp != '>')
5375 dbp++;
5376 if (*dbp == '>')
5377 {
5378 dbp += 1;
5379 skiptag = FALSE;
5380 continue; /* look on the same line */
5381 }
5382 break; /* go to next line */
5383 }
5384
5385 else if (intag) /* look for "name=" or "id=" */
5386 {
5387 while (*dbp != '\0' && *dbp != '>'
5388 && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5389 dbp++;
5390 if (*dbp == '\0')
5391 break; /* go to next line */
5392 if (*dbp == '>')
5393 {
5394 dbp += 1;
5395 intag = FALSE;
5396 continue; /* look on the same line */
5397 }
5398 if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5399 || LOOKING_AT_NOCASE (dbp, "id="))
5400 {
5401 bool quoted = (dbp[0] == '"');
5402
5403 if (quoted)
5404 for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5405 continue;
5406 else
5407 for (end = dbp; *end != '\0' && intoken (*end); end++)
5408 continue;
5409 linebuffer_setlen (&token_name, end - dbp);
5410 strncpy (token_name.buffer, dbp, end - dbp);
5411 token_name.buffer[end - dbp] = '\0';
5412
5413 dbp = end;
5414 intag = FALSE; /* we found what we looked for */
5415 skiptag = TRUE; /* skip to the end of the tag */
5416 getnext = TRUE; /* then grab the text */
5417 continue; /* look on the same line */
5418 }
5419 dbp += 1;
5420 }
5421
5422 else if (getnext) /* grab next tokens and tag them */
5423 {
5424 dbp = skip_spaces (dbp);
5425 if (*dbp == '\0')
5426 break; /* go to next line */
5427 if (*dbp == '<')
5428 {
5429 intag = TRUE;
5430 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5431 continue; /* look on the same line */
5432 }
5433
5434 for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5435 continue;
5436 make_tag (token_name.buffer, token_name.len, TRUE,
5437 dbp, end - dbp, lineno, linecharno);
5438 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5439 getnext = FALSE;
5440 break; /* go to next line */
5441 }
5442
5443 else /* look for an interesting HTML tag */
5444 {
5445 while (*dbp != '\0' && *dbp != '<')
5446 dbp++;
5447 if (*dbp == '\0')
5448 break; /* go to next line */
5449 intag = TRUE;
5450 if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5451 {
5452 inanchor = TRUE;
5453 continue; /* look on the same line */
5454 }
5455 else if (LOOKING_AT_NOCASE (dbp, "<title>")
5456 || LOOKING_AT_NOCASE (dbp, "<h1>")
5457 || LOOKING_AT_NOCASE (dbp, "<h2>")
5458 || LOOKING_AT_NOCASE (dbp, "<h3>"))
5459 {
5460 intag = FALSE;
5461 getnext = TRUE;
5462 continue; /* look on the same line */
5463 }
5464 dbp += 1;
5465 }
5466 }
5467 }
5468
5469 \f
5470 /*
5471 * Prolog support
5472 *
5473 * Assumes that the predicate or rule starts at column 0.
5474 * Only the first clause of a predicate or rule is added.
5475 * Original code by Sunichirou Sugou (1989)
5476 * Rewritten by Anders Lindgren (1996)
5477 */
5478 static int prolog_pr __P((char *, char *));
5479 static void prolog_skip_comment __P((linebuffer *, FILE *));
5480 static int prolog_atom __P((char *, int));
5481
5482 static void
5483 Prolog_functions (inf)
5484 FILE *inf;
5485 {
5486 char *cp, *last;
5487 int len;
5488 int allocated;
5489
5490 allocated = 0;
5491 len = 0;
5492 last = NULL;
5493
5494 LOOP_ON_INPUT_LINES (inf, lb, cp)
5495 {
5496 if (cp[0] == '\0') /* Empty line */
5497 continue;
5498 else if (iswhite (cp[0])) /* Not a predicate */
5499 continue;
5500 else if (cp[0] == '/' && cp[1] == '*') /* comment. */
5501 prolog_skip_comment (&lb, inf);
5502 else if ((len = prolog_pr (cp, last)) > 0)
5503 {
5504 /* Predicate or rule. Store the function name so that we
5505 only generate a tag for the first clause. */
5506 if (last == NULL)
5507 last = xnew(len + 1, char);
5508 else if (len + 1 > allocated)
5509 xrnew (last, len + 1, char);
5510 allocated = len + 1;
5511 strncpy (last, cp, len);
5512 last[len] = '\0';
5513 }
5514 }
5515 if (last != NULL)
5516 free (last);
5517 }
5518
5519
5520 static void
5521 prolog_skip_comment (plb, inf)
5522 linebuffer *plb;
5523 FILE *inf;
5524 {
5525 char *cp;
5526
5527 do
5528 {
5529 for (cp = plb->buffer; *cp != '\0'; cp++)
5530 if (cp[0] == '*' && cp[1] == '/')
5531 return;
5532 readline (plb, inf);
5533 }
5534 while (!feof(inf));
5535 }
5536
5537 /*
5538 * A predicate or rule definition is added if it matches:
5539 * <beginning of line><Prolog Atom><whitespace>(
5540 * or <beginning of line><Prolog Atom><whitespace>:-
5541 *
5542 * It is added to the tags database if it doesn't match the
5543 * name of the previous clause header.
5544 *
5545 * Return the size of the name of the predicate or rule, or 0 if no
5546 * header was found.
5547 */
5548 static int
5549 prolog_pr (s, last)
5550 char *s;
5551 char *last; /* Name of last clause. */
5552 {
5553 int pos;
5554 int len;
5555
5556 pos = prolog_atom (s, 0);
5557 if (pos < 1)
5558 return 0;
5559
5560 len = pos;
5561 pos = skip_spaces (s + pos) - s;
5562
5563 if ((s[pos] == '.'
5564 || (s[pos] == '(' && (pos += 1))
5565 || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5566 && (last == NULL /* save only the first clause */
5567 || len != (int)strlen (last)
5568 || !strneq (s, last, len)))
5569 {
5570 make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5571 return len;
5572 }
5573 else
5574 return 0;
5575 }
5576
5577 /*
5578 * Consume a Prolog atom.
5579 * Return the number of bytes consumed, or -1 if there was an error.
5580 *
5581 * A prolog atom, in this context, could be one of:
5582 * - An alphanumeric sequence, starting with a lower case letter.
5583 * - A quoted arbitrary string. Single quotes can escape themselves.
5584 * Backslash quotes everything.
5585 */
5586 static int
5587 prolog_atom (s, pos)
5588 char *s;
5589 int pos;
5590 {
5591 int origpos;
5592
5593 origpos = pos;
5594
5595 if (ISLOWER(s[pos]) || (s[pos] == '_'))
5596 {
5597 /* The atom is unquoted. */
5598 pos++;
5599 while (ISALNUM(s[pos]) || (s[pos] == '_'))
5600 {
5601 pos++;
5602 }
5603 return pos - origpos;
5604 }
5605 else if (s[pos] == '\'')
5606 {
5607 pos++;
5608
5609 for (;;)
5610 {
5611 if (s[pos] == '\'')
5612 {
5613 pos++;
5614 if (s[pos] != '\'')
5615 break;
5616 pos++; /* A double quote */
5617 }
5618 else if (s[pos] == '\0')
5619 /* Multiline quoted atoms are ignored. */
5620 return -1;
5621 else if (s[pos] == '\\')
5622 {
5623 if (s[pos+1] == '\0')
5624 return -1;
5625 pos += 2;
5626 }
5627 else
5628 pos++;
5629 }
5630 return pos - origpos;
5631 }
5632 else
5633 return -1;
5634 }
5635
5636 \f
5637 /*
5638 * Support for Erlang
5639 *
5640 * Generates tags for functions, defines, and records.
5641 * Assumes that Erlang functions start at column 0.
5642 * Original code by Anders Lindgren (1996)
5643 */
5644 static int erlang_func __P((char *, char *));
5645 static void erlang_attribute __P((char *));
5646 static int erlang_atom __P((char *));
5647
5648 static void
5649 Erlang_functions (inf)
5650 FILE *inf;
5651 {
5652 char *cp, *last;
5653 int len;
5654 int allocated;
5655
5656 allocated = 0;
5657 len = 0;
5658 last = NULL;
5659
5660 LOOP_ON_INPUT_LINES (inf, lb, cp)
5661 {
5662 if (cp[0] == '\0') /* Empty line */
5663 continue;
5664 else if (iswhite (cp[0])) /* Not function nor attribute */
5665 continue;
5666 else if (cp[0] == '%') /* comment */
5667 continue;
5668 else if (cp[0] == '"') /* Sometimes, strings start in column one */
5669 continue;
5670 else if (cp[0] == '-') /* attribute, e.g. "-define" */
5671 {
5672 erlang_attribute (cp);
5673 if (last != NULL)
5674 {
5675 free (last);
5676 last = NULL;
5677 }
5678 }
5679 else if ((len = erlang_func (cp, last)) > 0)
5680 {
5681 /*
5682 * Function. Store the function name so that we only
5683 * generates a tag for the first clause.
5684 */
5685 if (last == NULL)
5686 last = xnew (len + 1, char);
5687 else if (len + 1 > allocated)
5688 xrnew (last, len + 1, char);
5689 allocated = len + 1;
5690 strncpy (last, cp, len);
5691 last[len] = '\0';
5692 }
5693 }
5694 if (last != NULL)
5695 free (last);
5696 }
5697
5698
5699 /*
5700 * A function definition is added if it matches:
5701 * <beginning of line><Erlang Atom><whitespace>(
5702 *
5703 * It is added to the tags database if it doesn't match the
5704 * name of the previous clause header.
5705 *
5706 * Return the size of the name of the function, or 0 if no function
5707 * was found.
5708 */
5709 static int
5710 erlang_func (s, last)
5711 char *s;
5712 char *last; /* Name of last clause. */
5713 {
5714 int pos;
5715 int len;
5716
5717 pos = erlang_atom (s);
5718 if (pos < 1)
5719 return 0;
5720
5721 len = pos;
5722 pos = skip_spaces (s + pos) - s;
5723
5724 /* Save only the first clause. */
5725 if (s[pos++] == '('
5726 && (last == NULL
5727 || len != (int)strlen (last)
5728 || !strneq (s, last, len)))
5729 {
5730 make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5731 return len;
5732 }
5733
5734 return 0;
5735 }
5736
5737
5738 /*
5739 * Handle attributes. Currently, tags are generated for defines
5740 * and records.
5741 *
5742 * They are on the form:
5743 * -define(foo, bar).
5744 * -define(Foo(M, N), M+N).
5745 * -record(graph, {vtab = notable, cyclic = true}).
5746 */
5747 static void
5748 erlang_attribute (s)
5749 char *s;
5750 {
5751 char *cp = s;
5752
5753 if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5754 && *cp++ == '(')
5755 {
5756 int len = erlang_atom (skip_spaces (cp));
5757 if (len > 0)
5758 make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5759 }
5760 return;
5761 }
5762
5763
5764 /*
5765 * Consume an Erlang atom (or variable).
5766 * Return the number of bytes consumed, or -1 if there was an error.
5767 */
5768 static int
5769 erlang_atom (s)
5770 char *s;
5771 {
5772 int pos = 0;
5773
5774 if (ISALPHA (s[pos]) || s[pos] == '_')
5775 {
5776 /* The atom is unquoted. */
5777 do
5778 pos++;
5779 while (ISALNUM (s[pos]) || s[pos] == '_');
5780 }
5781 else if (s[pos] == '\'')
5782 {
5783 for (pos++; s[pos] != '\''; pos++)
5784 if (s[pos] == '\0' /* multiline quoted atoms are ignored */
5785 || (s[pos] == '\\' && s[++pos] == '\0'))
5786 return 0;
5787 pos++;
5788 }
5789
5790 return pos;
5791 }
5792
5793 \f
5794 static char *scan_separators __P((char *));
5795 static void add_regex __P((char *, language *));
5796 static char *substitute __P((char *, char *, struct re_registers *));
5797
5798 /*
5799 * Take a string like "/blah/" and turn it into "blah", verifying
5800 * that the first and last characters are the same, and handling
5801 * quoted separator characters. Actually, stops on the occurrence of
5802 * an unquoted separator. Also process \t, \n, etc. and turn into
5803 * appropriate characters. Works in place. Null terminates name string.
5804 * Returns pointer to terminating separator, or NULL for
5805 * unterminated regexps.
5806 */
5807 static char *
5808 scan_separators (name)
5809 char *name;
5810 {
5811 char sep = name[0];
5812 char *copyto = name;
5813 bool quoted = FALSE;
5814
5815 for (++name; *name != '\0'; ++name)
5816 {
5817 if (quoted)
5818 {
5819 switch (*name)
5820 {
5821 case 'a': *copyto++ = '\007'; break; /* BEL (bell) */
5822 case 'b': *copyto++ = '\b'; break; /* BS (back space) */
5823 case 'd': *copyto++ = 0177; break; /* DEL (delete) */
5824 case 'e': *copyto++ = 033; break; /* ESC (delete) */
5825 case 'f': *copyto++ = '\f'; break; /* FF (form feed) */
5826 case 'n': *copyto++ = '\n'; break; /* NL (new line) */
5827 case 'r': *copyto++ = '\r'; break; /* CR (carriage return) */
5828 case 't': *copyto++ = '\t'; break; /* TAB (horizontal tab) */
5829 case 'v': *copyto++ = '\v'; break; /* VT (vertical tab) */
5830 default:
5831 if (*name == sep)
5832 *copyto++ = sep;
5833 else
5834 {
5835 /* Something else is quoted, so preserve the quote. */
5836 *copyto++ = '\\';
5837 *copyto++ = *name;
5838 }
5839 break;
5840 }
5841 quoted = FALSE;
5842 }
5843 else if (*name == '\\')
5844 quoted = TRUE;
5845 else if (*name == sep)
5846 break;
5847 else
5848 *copyto++ = *name;
5849 }
5850 if (*name != sep)
5851 name = NULL; /* signal unterminated regexp */
5852
5853 /* Terminate copied string. */
5854 *copyto = '\0';
5855 return name;
5856 }
5857
5858 /* Look at the argument of --regex or --no-regex and do the right
5859 thing. Same for each line of a regexp file. */
5860 static void
5861 analyse_regex (regex_arg)
5862 char *regex_arg;
5863 {
5864 if (regex_arg == NULL)
5865 {
5866 free_regexps (); /* --no-regex: remove existing regexps */
5867 return;
5868 }
5869
5870 /* A real --regexp option or a line in a regexp file. */
5871 switch (regex_arg[0])
5872 {
5873 /* Comments in regexp file or null arg to --regex. */
5874 case '\0':
5875 case ' ':
5876 case '\t':
5877 break;
5878
5879 /* Read a regex file. This is recursive and may result in a
5880 loop, which will stop when the file descriptors are exhausted. */
5881 case '@':
5882 {
5883 FILE *regexfp;
5884 linebuffer regexbuf;
5885 char *regexfile = regex_arg + 1;
5886
5887 /* regexfile is a file containing regexps, one per line. */
5888 regexfp = fopen (regexfile, "r");
5889 if (regexfp == NULL)
5890 {
5891 pfatal (regexfile);
5892 return;
5893 }
5894 linebuffer_init (&regexbuf);
5895 while (readline_internal (&regexbuf, regexfp) > 0)
5896 analyse_regex (regexbuf.buffer);
5897 free (regexbuf.buffer);
5898 fclose (regexfp);
5899 }
5900 break;
5901
5902 /* Regexp to be used for a specific language only. */
5903 case '{':
5904 {
5905 language *lang;
5906 char *lang_name = regex_arg + 1;
5907 char *cp;
5908
5909 for (cp = lang_name; *cp != '}'; cp++)
5910 if (*cp == '\0')
5911 {
5912 error ("unterminated language name in regex: %s", regex_arg);
5913 return;
5914 }
5915 *cp++ = '\0';
5916 lang = get_language_from_langname (lang_name);
5917 if (lang == NULL)
5918 return;
5919 add_regex (cp, lang);
5920 }
5921 break;
5922
5923 /* Regexp to be used for any language. */
5924 default:
5925 add_regex (regex_arg, NULL);
5926 break;
5927 }
5928 }
5929
5930 /* Separate the regexp pattern, compile it,
5931 and care for optional name and modifiers. */
5932 static void
5933 add_regex (regexp_pattern, lang)
5934 char *regexp_pattern;
5935 language *lang;
5936 {
5937 static struct re_pattern_buffer zeropattern;
5938 char sep, *pat, *name, *modifiers;
5939 const char *err;
5940 struct re_pattern_buffer *patbuf;
5941 regexp *rp;
5942 bool
5943 force_explicit_name = TRUE, /* do not use implicit tag names */
5944 ignore_case = FALSE, /* case is significant */
5945 multi_line = FALSE, /* matches are done one line at a time */
5946 single_line = FALSE; /* dot does not match newline */
5947
5948
5949 if (strlen(regexp_pattern) < 3)
5950 {
5951 error ("null regexp", (char *)NULL);
5952 return;
5953 }
5954 sep = regexp_pattern[0];
5955 name = scan_separators (regexp_pattern);
5956 if (name == NULL)
5957 {
5958 error ("%s: unterminated regexp", regexp_pattern);
5959 return;
5960 }
5961 if (name[1] == sep)
5962 {
5963 error ("null name for regexp \"%s\"", regexp_pattern);
5964 return;
5965 }
5966 modifiers = scan_separators (name);
5967 if (modifiers == NULL) /* no terminating separator --> no name */
5968 {
5969 modifiers = name;
5970 name = "";
5971 }
5972 else
5973 modifiers += 1; /* skip separator */
5974
5975 /* Parse regex modifiers. */
5976 for (; modifiers[0] != '\0'; modifiers++)
5977 switch (modifiers[0])
5978 {
5979 case 'N':
5980 if (modifiers == name)
5981 error ("forcing explicit tag name but no name, ignoring", NULL);
5982 force_explicit_name = TRUE;
5983 break;
5984 case 'i':
5985 ignore_case = TRUE;
5986 break;
5987 case 's':
5988 single_line = TRUE;
5989 /* FALLTHRU */
5990 case 'm':
5991 multi_line = TRUE;
5992 need_filebuf = TRUE;
5993 break;
5994 default:
5995 {
5996 char wrongmod [2];
5997 wrongmod[0] = modifiers[0];
5998 wrongmod[1] = '\0';
5999 error ("invalid regexp modifier `%s', ignoring", wrongmod);
6000 }
6001 break;
6002 }
6003
6004 patbuf = xnew (1, struct re_pattern_buffer);
6005 *patbuf = zeropattern;
6006 if (ignore_case)
6007 {
6008 static char lc_trans[CHARS];
6009 int i;
6010 for (i = 0; i < CHARS; i++)
6011 lc_trans[i] = lowcase (i);
6012 patbuf->translate = lc_trans; /* translation table to fold case */
6013 }
6014
6015 if (multi_line)
6016 pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
6017 else
6018 pat = regexp_pattern;
6019
6020 if (single_line)
6021 re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
6022 else
6023 re_set_syntax (RE_SYNTAX_EMACS);
6024
6025 err = re_compile_pattern (pat, strlen (regexp_pattern), patbuf);
6026 if (multi_line)
6027 free (pat);
6028 if (err != NULL)
6029 {
6030 error ("%s while compiling pattern", err);
6031 return;
6032 }
6033
6034 rp = p_head;
6035 p_head = xnew (1, regexp);
6036 p_head->pattern = savestr (regexp_pattern);
6037 p_head->p_next = rp;
6038 p_head->lang = lang;
6039 p_head->pat = patbuf;
6040 p_head->name = savestr (name);
6041 p_head->error_signaled = FALSE;
6042 p_head->force_explicit_name = force_explicit_name;
6043 p_head->ignore_case = ignore_case;
6044 p_head->multi_line = multi_line;
6045 }
6046
6047 /*
6048 * Do the substitutions indicated by the regular expression and
6049 * arguments.
6050 */
6051 static char *
6052 substitute (in, out, regs)
6053 char *in, *out;
6054 struct re_registers *regs;
6055 {
6056 char *result, *t;
6057 int size, dig, diglen;
6058
6059 result = NULL;
6060 size = strlen (out);
6061
6062 /* Pass 1: figure out how much to allocate by finding all \N strings. */
6063 if (out[size - 1] == '\\')
6064 fatal ("pattern error in \"%s\"", out);
6065 for (t = etags_strchr (out, '\\');
6066 t != NULL;
6067 t = etags_strchr (t + 2, '\\'))
6068 if (ISDIGIT (t[1]))
6069 {
6070 dig = t[1] - '0';
6071 diglen = regs->end[dig] - regs->start[dig];
6072 size += diglen - 2;
6073 }
6074 else
6075 size -= 1;
6076
6077 /* Allocate space and do the substitutions. */
6078 assert (size >= 0);
6079 result = xnew (size + 1, char);
6080
6081 for (t = result; *out != '\0'; out++)
6082 if (*out == '\\' && ISDIGIT (*++out))
6083 {
6084 dig = *out - '0';
6085 diglen = regs->end[dig] - regs->start[dig];
6086 strncpy (t, in + regs->start[dig], diglen);
6087 t += diglen;
6088 }
6089 else
6090 *t++ = *out;
6091 *t = '\0';
6092
6093 assert (t <= result + size);
6094 assert (t - result == (int)strlen (result));
6095
6096 return result;
6097 }
6098
6099 /* Deallocate all regexps. */
6100 static void
6101 free_regexps ()
6102 {
6103 regexp *rp;
6104 while (p_head != NULL)
6105 {
6106 rp = p_head->p_next;
6107 free (p_head->pattern);
6108 free (p_head->name);
6109 free (p_head);
6110 p_head = rp;
6111 }
6112 return;
6113 }
6114
6115 /*
6116 * Reads the whole file as a single string from `filebuf' and looks for
6117 * multi-line regular expressions, creating tags on matches.
6118 * readline already dealt with normal regexps.
6119 *
6120 * Idea by Ben Wing <ben@666.com> (2002).
6121 */
6122 static void
6123 regex_tag_multiline ()
6124 {
6125 char *buffer = filebuf.buffer;
6126 regexp *rp;
6127 char *name;
6128
6129 for (rp = p_head; rp != NULL; rp = rp->p_next)
6130 {
6131 int match = 0;
6132
6133 if (!rp->multi_line)
6134 continue; /* skip normal regexps */
6135
6136 /* Generic initialisations before parsing file from memory. */
6137 lineno = 1; /* reset global line number */
6138 charno = 0; /* reset global char number */
6139 linecharno = 0; /* reset global char number of line start */
6140
6141 /* Only use generic regexps or those for the current language. */
6142 if (rp->lang != NULL && rp->lang != curfdp->lang)
6143 continue;
6144
6145 while (match >= 0 && match < filebuf.len)
6146 {
6147 match = re_search (rp->pat, buffer, filebuf.len, charno,
6148 filebuf.len - match, &rp->regs);
6149 switch (match)
6150 {
6151 case -2:
6152 /* Some error. */
6153 if (!rp->error_signaled)
6154 {
6155 error ("regexp stack overflow while matching \"%s\"",
6156 rp->pattern);
6157 rp->error_signaled = TRUE;
6158 }
6159 break;
6160 case -1:
6161 /* No match. */
6162 break;
6163 default:
6164 if (match == rp->regs.end[0])
6165 {
6166 if (!rp->error_signaled)
6167 {
6168 error ("regexp matches the empty string: \"%s\"",
6169 rp->pattern);
6170 rp->error_signaled = TRUE;
6171 }
6172 match = -3; /* exit from while loop */
6173 break;
6174 }
6175
6176 /* Match occurred. Construct a tag. */
6177 while (charno < rp->regs.end[0])
6178 if (buffer[charno++] == '\n')
6179 lineno++, linecharno = charno;
6180 name = rp->name;
6181 if (name[0] == '\0')
6182 name = NULL;
6183 else /* make a named tag */
6184 name = substitute (buffer, rp->name, &rp->regs);
6185 if (rp->force_explicit_name)
6186 /* Force explicit tag name, if a name is there. */
6187 pfnote (name, TRUE, buffer + linecharno,
6188 charno - linecharno + 1, lineno, linecharno);
6189 else
6190 make_tag (name, strlen (name), TRUE, buffer + linecharno,
6191 charno - linecharno + 1, lineno, linecharno);
6192 break;
6193 }
6194 }
6195 }
6196 }
6197
6198 \f
6199 static bool
6200 nocase_tail (cp)
6201 char *cp;
6202 {
6203 register int len = 0;
6204
6205 while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
6206 cp++, len++;
6207 if (*cp == '\0' && !intoken (dbp[len]))
6208 {
6209 dbp += len;
6210 return TRUE;
6211 }
6212 return FALSE;
6213 }
6214
6215 static void
6216 get_tag (bp, namepp)
6217 register char *bp;
6218 char **namepp;
6219 {
6220 register char *cp = bp;
6221
6222 if (*bp != '\0')
6223 {
6224 /* Go till you get to white space or a syntactic break */
6225 for (cp = bp + 1; !notinname (*cp); cp++)
6226 continue;
6227 make_tag (bp, cp - bp, TRUE,
6228 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6229 }
6230
6231 if (namepp != NULL)
6232 *namepp = savenstr (bp, cp - bp);
6233 }
6234
6235 /*
6236 * Read a line of text from `stream' into `lbp', excluding the
6237 * newline or CR-NL, if any. Return the number of characters read from
6238 * `stream', which is the length of the line including the newline.
6239 *
6240 * On DOS or Windows we do not count the CR character, if any before the
6241 * NL, in the returned length; this mirrors the behavior of Emacs on those
6242 * platforms (for text files, it translates CR-NL to NL as it reads in the
6243 * file).
6244 *
6245 * If multi-line regular expressions are requested, each line read is
6246 * appended to `filebuf'.
6247 */
6248 static long
6249 readline_internal (lbp, stream)
6250 linebuffer *lbp;
6251 register FILE *stream;
6252 {
6253 char *buffer = lbp->buffer;
6254 register char *p = lbp->buffer;
6255 register char *pend;
6256 int chars_deleted;
6257
6258 pend = p + lbp->size; /* Separate to avoid 386/IX compiler bug. */
6259
6260 for (;;)
6261 {
6262 register int c = getc (stream);
6263 if (p == pend)
6264 {
6265 /* We're at the end of linebuffer: expand it. */
6266 lbp->size *= 2;
6267 xrnew (buffer, lbp->size, char);
6268 p += buffer - lbp->buffer;
6269 pend = buffer + lbp->size;
6270 lbp->buffer = buffer;
6271 }
6272 if (c == EOF)
6273 {
6274 *p = '\0';
6275 chars_deleted = 0;
6276 break;
6277 }
6278 if (c == '\n')
6279 {
6280 if (p > buffer && p[-1] == '\r')
6281 {
6282 p -= 1;
6283 #ifdef DOS_NT
6284 /* Assume CRLF->LF translation will be performed by Emacs
6285 when loading this file, so CRs won't appear in the buffer.
6286 It would be cleaner to compensate within Emacs;
6287 however, Emacs does not know how many CRs were deleted
6288 before any given point in the file. */
6289 chars_deleted = 1;
6290 #else
6291 chars_deleted = 2;
6292 #endif
6293 }
6294 else
6295 {
6296 chars_deleted = 1;
6297 }
6298 *p = '\0';
6299 break;
6300 }
6301 *p++ = c;
6302 }
6303 lbp->len = p - buffer;
6304
6305 if (need_filebuf /* we need filebuf for multi-line regexps */
6306 && chars_deleted > 0) /* not at EOF */
6307 {
6308 while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6309 {
6310 /* Expand filebuf. */
6311 filebuf.size *= 2;
6312 xrnew (filebuf.buffer, filebuf.size, char);
6313 }
6314 strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6315 filebuf.len += lbp->len;
6316 filebuf.buffer[filebuf.len++] = '\n';
6317 filebuf.buffer[filebuf.len] = '\0';
6318 }
6319
6320 return lbp->len + chars_deleted;
6321 }
6322
6323 /*
6324 * Like readline_internal, above, but in addition try to match the
6325 * input line against relevant regular expressions and manage #line
6326 * directives.
6327 */
6328 static void
6329 readline (lbp, stream)
6330 linebuffer *lbp;
6331 FILE *stream;
6332 {
6333 long result;
6334
6335 linecharno = charno; /* update global char number of line start */
6336 result = readline_internal (lbp, stream); /* read line */
6337 lineno += 1; /* increment global line number */
6338 charno += result; /* increment global char number */
6339
6340 /* Honour #line directives. */
6341 if (!no_line_directive)
6342 {
6343 static bool discard_until_line_directive;
6344
6345 /* Check whether this is a #line directive. */
6346 if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6347 {
6348 unsigned int lno;
6349 int start = 0;
6350
6351 if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6352 && start > 0) /* double quote character found */
6353 {
6354 char *endp = lbp->buffer + start;
6355
6356 while ((endp = etags_strchr (endp, '"')) != NULL
6357 && endp[-1] == '\\')
6358 endp++;
6359 if (endp != NULL)
6360 /* Ok, this is a real #line directive. Let's deal with it. */
6361 {
6362 char *taggedabsname; /* absolute name of original file */
6363 char *taggedfname; /* name of original file as given */
6364 char *name; /* temp var */
6365
6366 discard_until_line_directive = FALSE; /* found it */
6367 name = lbp->buffer + start;
6368 *endp = '\0';
6369 canonicalize_filename (name); /* for DOS */
6370 taggedabsname = absolute_filename (name, tagfiledir);
6371 if (filename_is_absolute (name)
6372 || filename_is_absolute (curfdp->infname))
6373 taggedfname = savestr (taggedabsname);
6374 else
6375 taggedfname = relative_filename (taggedabsname,tagfiledir);
6376
6377 if (streq (curfdp->taggedfname, taggedfname))
6378 /* The #line directive is only a line number change. We
6379 deal with this afterwards. */
6380 free (taggedfname);
6381 else
6382 /* The tags following this #line directive should be
6383 attributed to taggedfname. In order to do this, set
6384 curfdp accordingly. */
6385 {
6386 fdesc *fdp; /* file description pointer */
6387
6388 /* Go look for a file description already set up for the
6389 file indicated in the #line directive. If there is
6390 one, use it from now until the next #line
6391 directive. */
6392 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6393 if (streq (fdp->infname, curfdp->infname)
6394 && streq (fdp->taggedfname, taggedfname))
6395 /* If we remove the second test above (after the &&)
6396 then all entries pertaining to the same file are
6397 coalesced in the tags file. If we use it, then
6398 entries pertaining to the same file but generated
6399 from different files (via #line directives) will
6400 go into separate sections in the tags file. These
6401 alternatives look equivalent. The first one
6402 destroys some apparently useless information. */
6403 {
6404 curfdp = fdp;
6405 free (taggedfname);
6406 break;
6407 }
6408 /* Else, if we already tagged the real file, skip all
6409 input lines until the next #line directive. */
6410 if (fdp == NULL) /* not found */
6411 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6412 if (streq (fdp->infabsname, taggedabsname))
6413 {
6414 discard_until_line_directive = TRUE;
6415 free (taggedfname);
6416 break;
6417 }
6418 /* Else create a new file description and use that from
6419 now on, until the next #line directive. */
6420 if (fdp == NULL) /* not found */
6421 {
6422 fdp = fdhead;
6423 fdhead = xnew (1, fdesc);
6424 *fdhead = *curfdp; /* copy curr. file description */
6425 fdhead->next = fdp;
6426 fdhead->infname = savestr (curfdp->infname);
6427 fdhead->infabsname = savestr (curfdp->infabsname);
6428 fdhead->infabsdir = savestr (curfdp->infabsdir);
6429 fdhead->taggedfname = taggedfname;
6430 fdhead->usecharno = FALSE;
6431 fdhead->prop = NULL;
6432 fdhead->written = FALSE;
6433 curfdp = fdhead;
6434 }
6435 }
6436 free (taggedabsname);
6437 lineno = lno - 1;
6438 readline (lbp, stream);
6439 return;
6440 } /* if a real #line directive */
6441 } /* if #line is followed by a a number */
6442 } /* if line begins with "#line " */
6443
6444 /* If we are here, no #line directive was found. */
6445 if (discard_until_line_directive)
6446 {
6447 if (result > 0)
6448 {
6449 /* Do a tail recursion on ourselves, thus discarding the contents
6450 of the line buffer. */
6451 readline (lbp, stream);
6452 return;
6453 }
6454 /* End of file. */
6455 discard_until_line_directive = FALSE;
6456 return;
6457 }
6458 } /* if #line directives should be considered */
6459
6460 {
6461 int match;
6462 regexp *rp;
6463 char *name;
6464
6465 /* Match against relevant regexps. */
6466 if (lbp->len > 0)
6467 for (rp = p_head; rp != NULL; rp = rp->p_next)
6468 {
6469 /* Only use generic regexps or those for the current language.
6470 Also do not use multiline regexps, which is the job of
6471 regex_tag_multiline. */
6472 if ((rp->lang != NULL && rp->lang != fdhead->lang)
6473 || rp->multi_line)
6474 continue;
6475
6476 match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6477 switch (match)
6478 {
6479 case -2:
6480 /* Some error. */
6481 if (!rp->error_signaled)
6482 {
6483 error ("regexp stack overflow while matching \"%s\"",
6484 rp->pattern);
6485 rp->error_signaled = TRUE;
6486 }
6487 break;
6488 case -1:
6489 /* No match. */
6490 break;
6491 case 0:
6492 /* Empty string matched. */
6493 if (!rp->error_signaled)
6494 {
6495 error ("regexp matches the empty string: \"%s\"", rp->pattern);
6496 rp->error_signaled = TRUE;
6497 }
6498 break;
6499 default:
6500 /* Match occurred. Construct a tag. */
6501 name = rp->name;
6502 if (name[0] == '\0')
6503 name = NULL;
6504 else /* make a named tag */
6505 name = substitute (lbp->buffer, rp->name, &rp->regs);
6506 if (rp->force_explicit_name)
6507 /* Force explicit tag name, if a name is there. */
6508 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6509 else
6510 make_tag (name, strlen (name), TRUE,
6511 lbp->buffer, match, lineno, linecharno);
6512 break;
6513 }
6514 }
6515 }
6516 }
6517
6518 \f
6519 /*
6520 * Return a pointer to a space of size strlen(cp)+1 allocated
6521 * with xnew where the string CP has been copied.
6522 */
6523 static char *
6524 savestr (cp)
6525 char *cp;
6526 {
6527 return savenstr (cp, strlen (cp));
6528 }
6529
6530 /*
6531 * Return a pointer to a space of size LEN+1 allocated with xnew where
6532 * the string CP has been copied for at most the first LEN characters.
6533 */
6534 static char *
6535 savenstr (cp, len)
6536 char *cp;
6537 int len;
6538 {
6539 register char *dp;
6540
6541 dp = xnew (len + 1, char);
6542 strncpy (dp, cp, len);
6543 dp[len] = '\0';
6544 return dp;
6545 }
6546
6547 /*
6548 * Return the ptr in sp at which the character c last
6549 * appears; NULL if not found
6550 *
6551 * Identical to POSIX strrchr, included for portability.
6552 */
6553 static char *
6554 etags_strrchr (sp, c)
6555 register const char *sp;
6556 register int c;
6557 {
6558 register const char *r;
6559
6560 r = NULL;
6561 do
6562 {
6563 if (*sp == c)
6564 r = sp;
6565 } while (*sp++);
6566 return (char *)r;
6567 }
6568
6569 /*
6570 * Return the ptr in sp at which the character c first
6571 * appears; NULL if not found
6572 *
6573 * Identical to POSIX strchr, included for portability.
6574 */
6575 static char *
6576 etags_strchr (sp, c)
6577 register const char *sp;
6578 register int c;
6579 {
6580 do
6581 {
6582 if (*sp == c)
6583 return (char *)sp;
6584 } while (*sp++);
6585 return NULL;
6586 }
6587
6588 /*
6589 * Compare two strings, ignoring case for alphabetic characters.
6590 *
6591 * Same as BSD's strcasecmp, included for portability.
6592 */
6593 static int
6594 etags_strcasecmp (s1, s2)
6595 register const char *s1;
6596 register const char *s2;
6597 {
6598 while (*s1 != '\0'
6599 && (ISALPHA (*s1) && ISALPHA (*s2)
6600 ? lowcase (*s1) == lowcase (*s2)
6601 : *s1 == *s2))
6602 s1++, s2++;
6603
6604 return (ISALPHA (*s1) && ISALPHA (*s2)
6605 ? lowcase (*s1) - lowcase (*s2)
6606 : *s1 - *s2);
6607 }
6608
6609 /*
6610 * Compare two strings, ignoring case for alphabetic characters.
6611 * Stop after a given number of characters
6612 *
6613 * Same as BSD's strncasecmp, included for portability.
6614 */
6615 static int
6616 etags_strncasecmp (s1, s2, n)
6617 register const char *s1;
6618 register const char *s2;
6619 register int n;
6620 {
6621 while (*s1 != '\0' && n-- > 0
6622 && (ISALPHA (*s1) && ISALPHA (*s2)
6623 ? lowcase (*s1) == lowcase (*s2)
6624 : *s1 == *s2))
6625 s1++, s2++;
6626
6627 if (n < 0)
6628 return 0;
6629 else
6630 return (ISALPHA (*s1) && ISALPHA (*s2)
6631 ? lowcase (*s1) - lowcase (*s2)
6632 : *s1 - *s2);
6633 }
6634
6635 /* Skip spaces (end of string is not space), return new pointer. */
6636 static char *
6637 skip_spaces (cp)
6638 char *cp;
6639 {
6640 while (iswhite (*cp))
6641 cp++;
6642 return cp;
6643 }
6644
6645 /* Skip non spaces, except end of string, return new pointer. */
6646 static char *
6647 skip_non_spaces (cp)
6648 char *cp;
6649 {
6650 while (*cp != '\0' && !iswhite (*cp))
6651 cp++;
6652 return cp;
6653 }
6654
6655 /* Print error message and exit. */
6656 void
6657 fatal (s1, s2)
6658 char *s1, *s2;
6659 {
6660 error (s1, s2);
6661 exit (EXIT_FAILURE);
6662 }
6663
6664 static void
6665 pfatal (s1)
6666 char *s1;
6667 {
6668 perror (s1);
6669 exit (EXIT_FAILURE);
6670 }
6671
6672 static void
6673 suggest_asking_for_help ()
6674 {
6675 fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
6676 progname, NO_LONG_OPTIONS ? "-h" : "--help");
6677 exit (EXIT_FAILURE);
6678 }
6679
6680 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
6681 static void
6682 error (s1, s2)
6683 const char *s1, *s2;
6684 {
6685 fprintf (stderr, "%s: ", progname);
6686 fprintf (stderr, s1, s2);
6687 fprintf (stderr, "\n");
6688 }
6689
6690 /* Return a newly-allocated string whose contents
6691 concatenate those of s1, s2, s3. */
6692 static char *
6693 concat (s1, s2, s3)
6694 char *s1, *s2, *s3;
6695 {
6696 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6697 char *result = xnew (len1 + len2 + len3 + 1, char);
6698
6699 strcpy (result, s1);
6700 strcpy (result + len1, s2);
6701 strcpy (result + len1 + len2, s3);
6702 result[len1 + len2 + len3] = '\0';
6703
6704 return result;
6705 }
6706
6707 \f
6708 /* Does the same work as the system V getcwd, but does not need to
6709 guess the buffer size in advance. */
6710 static char *
6711 etags_getcwd ()
6712 {
6713 #ifdef HAVE_GETCWD
6714 int bufsize = 200;
6715 char *path = xnew (bufsize, char);
6716
6717 while (getcwd (path, bufsize) == NULL)
6718 {
6719 if (errno != ERANGE)
6720 pfatal ("getcwd");
6721 bufsize *= 2;
6722 free (path);
6723 path = xnew (bufsize, char);
6724 }
6725
6726 canonicalize_filename (path);
6727 return path;
6728
6729 #else /* not HAVE_GETCWD */
6730 #if MSDOS
6731
6732 char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS. */
6733
6734 getwd (path);
6735
6736 for (p = path; *p != '\0'; p++)
6737 if (*p == '\\')
6738 *p = '/';
6739 else
6740 *p = lowcase (*p);
6741
6742 return strdup (path);
6743 #else /* not MSDOS */
6744 linebuffer path;
6745 FILE *pipe;
6746
6747 linebuffer_init (&path);
6748 pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6749 if (pipe == NULL || readline_internal (&path, pipe) == 0)
6750 pfatal ("pwd");
6751 pclose (pipe);
6752
6753 return path.buffer;
6754 #endif /* not MSDOS */
6755 #endif /* not HAVE_GETCWD */
6756 }
6757
6758 /* Return a newly allocated string containing the file name of FILE
6759 relative to the absolute directory DIR (which should end with a slash). */
6760 static char *
6761 relative_filename (file, dir)
6762 char *file, *dir;
6763 {
6764 char *fp, *dp, *afn, *res;
6765 int i;
6766
6767 /* Find the common root of file and dir (with a trailing slash). */
6768 afn = absolute_filename (file, cwd);
6769 fp = afn;
6770 dp = dir;
6771 while (*fp++ == *dp++)
6772 continue;
6773 fp--, dp--; /* back to the first differing char */
6774 #ifdef DOS_NT
6775 if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6776 return afn;
6777 #endif
6778 do /* look at the equal chars until '/' */
6779 fp--, dp--;
6780 while (*fp != '/');
6781
6782 /* Build a sequence of "../" strings for the resulting relative file name. */
6783 i = 0;
6784 while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6785 i += 1;
6786 res = xnew (3*i + strlen (fp + 1) + 1, char);
6787 res[0] = '\0';
6788 while (i-- > 0)
6789 strcat (res, "../");
6790
6791 /* Add the file name relative to the common root of file and dir. */
6792 strcat (res, fp + 1);
6793 free (afn);
6794
6795 return res;
6796 }
6797
6798 /* Return a newly allocated string containing the absolute file name
6799 of FILE given DIR (which should end with a slash). */
6800 static char *
6801 absolute_filename (file, dir)
6802 char *file, *dir;
6803 {
6804 char *slashp, *cp, *res;
6805
6806 if (filename_is_absolute (file))
6807 res = savestr (file);
6808 #ifdef DOS_NT
6809 /* We don't support non-absolute file names with a drive
6810 letter, like `d:NAME' (it's too much hassle). */
6811 else if (file[1] == ':')
6812 fatal ("%s: relative file names with drive letters not supported", file);
6813 #endif
6814 else
6815 res = concat (dir, file, "");
6816
6817 /* Delete the "/dirname/.." and "/." substrings. */
6818 slashp = etags_strchr (res, '/');
6819 while (slashp != NULL && slashp[0] != '\0')
6820 {
6821 if (slashp[1] == '.')
6822 {
6823 if (slashp[2] == '.'
6824 && (slashp[3] == '/' || slashp[3] == '\0'))
6825 {
6826 cp = slashp;
6827 do
6828 cp--;
6829 while (cp >= res && !filename_is_absolute (cp));
6830 if (cp < res)
6831 cp = slashp; /* the absolute name begins with "/.." */
6832 #ifdef DOS_NT
6833 /* Under MSDOS and NT we get `d:/NAME' as absolute
6834 file name, so the luser could say `d:/../NAME'.
6835 We silently treat this as `d:/NAME'. */
6836 else if (cp[0] != '/')
6837 cp = slashp;
6838 #endif
6839 strcpy (cp, slashp + 3);
6840 slashp = cp;
6841 continue;
6842 }
6843 else if (slashp[2] == '/' || slashp[2] == '\0')
6844 {
6845 strcpy (slashp, slashp + 2);
6846 continue;
6847 }
6848 }
6849
6850 slashp = etags_strchr (slashp + 1, '/');
6851 }
6852
6853 if (res[0] == '\0') /* just a safety net: should never happen */
6854 {
6855 free (res);
6856 return savestr ("/");
6857 }
6858 else
6859 return res;
6860 }
6861
6862 /* Return a newly allocated string containing the absolute
6863 file name of dir where FILE resides given DIR (which should
6864 end with a slash). */
6865 static char *
6866 absolute_dirname (file, dir)
6867 char *file, *dir;
6868 {
6869 char *slashp, *res;
6870 char save;
6871
6872 canonicalize_filename (file);
6873 slashp = etags_strrchr (file, '/');
6874 if (slashp == NULL)
6875 return savestr (dir);
6876 save = slashp[1];
6877 slashp[1] = '\0';
6878 res = absolute_filename (file, dir);
6879 slashp[1] = save;
6880
6881 return res;
6882 }
6883
6884 /* Whether the argument string is an absolute file name. The argument
6885 string must have been canonicalized with canonicalize_filename. */
6886 static bool
6887 filename_is_absolute (fn)
6888 char *fn;
6889 {
6890 return (fn[0] == '/'
6891 #ifdef DOS_NT
6892 || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
6893 #endif
6894 );
6895 }
6896
6897 /* Translate backslashes into slashes. Works in place. */
6898 static void
6899 canonicalize_filename (fn)
6900 register char *fn;
6901 {
6902 #ifdef DOS_NT
6903 /* Canonicalize drive letter case. */
6904 if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
6905 fn[0] = upcase (fn[0]);
6906 /* Convert backslashes to slashes. */
6907 for (; *fn != '\0'; fn++)
6908 if (*fn == '\\')
6909 *fn = '/';
6910 #else
6911 /* No action. */
6912 fn = NULL; /* shut up the compiler */
6913 #endif
6914 }
6915
6916 \f
6917 /* Initialize a linebuffer for use */
6918 static void
6919 linebuffer_init (lbp)
6920 linebuffer *lbp;
6921 {
6922 lbp->size = (DEBUG) ? 3 : 200;
6923 lbp->buffer = xnew (lbp->size, char);
6924 lbp->buffer[0] = '\0';
6925 lbp->len = 0;
6926 }
6927
6928 /* Set the minimum size of a string contained in a linebuffer. */
6929 static void
6930 linebuffer_setlen (lbp, toksize)
6931 linebuffer *lbp;
6932 int toksize;
6933 {
6934 while (lbp->size <= toksize)
6935 {
6936 lbp->size *= 2;
6937 xrnew (lbp->buffer, lbp->size, char);
6938 }
6939 lbp->len = toksize;
6940 }
6941
6942 /* Like malloc but get fatal error if memory is exhausted. */
6943 static PTR
6944 xmalloc (size)
6945 unsigned int size;
6946 {
6947 PTR result = (PTR) malloc (size);
6948 if (result == NULL)
6949 fatal ("virtual memory exhausted", (char *)NULL);
6950 return result;
6951 }
6952
6953 static PTR
6954 xrealloc (ptr, size)
6955 char *ptr;
6956 unsigned int size;
6957 {
6958 PTR result = (PTR) realloc (ptr, size);
6959 if (result == NULL)
6960 fatal ("virtual memory exhausted", (char *)NULL);
6961 return result;
6962 }
6963
6964 /*
6965 * Local Variables:
6966 * indent-tabs-mode: t
6967 * tab-width: 8
6968 * fill-column: 79
6969 * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6970 * c-file-style: "gnu"
6971 * End:
6972 */
6973
6974 /* arch-tag: 8a9b748d-390c-4922-99db-2eeefa921051
6975 (do not change this comment) */
6976
6977 /* etags.c ends here */