code.delx.au - gnu-emacs/blob - lib-src/etags.c

   1 /* Tags file maker to go with GNU Emacs           -*- coding: utf-8 -*-
   2
   3 Copyright (C) 1984 The Regents of the University of California
   4
   5 Redistribution and use in source and binary forms, with or without
   6 modification, are permitted provided that the following conditions are
   7 met:
   8 1. Redistributions of source code must retain the above copyright
   9    notice, this list of conditions and the following disclaimer.
  10 2. Redistributions in binary form must reproduce the above copyright
  11    notice, this list of conditions and the following disclaimer in the
  12    documentation and/or other materials provided with the
  13    distribution.
  14 3. Neither the name of the University nor the names of its
  15    contributors may be used to endorse or promote products derived
  16    from this software without specific prior written permission.
  17
  18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
  19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  21 PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
  22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
  28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29
  30
  31 Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2016 Free Software
  32 Foundation, Inc.
  33
  34 This file is not considered part of GNU Emacs.
  35
  36 This program is free software: you can redistribute it and/or modify
  37 it under the terms of the GNU General Public License as published by
  38 the Free Software Foundation, either version 3 of the License, or
  39 (at your option) any later version.
  40
  41 This program is distributed in the hope that it will be useful,
  42 but WITHOUT ANY WARRANTY; without even the implied warranty of
  43 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  44 GNU General Public License for more details.
  45
  46 You should have received a copy of the GNU General Public License
  47 along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  48
  49
  50 /* NB To comply with the above BSD license, copyright information is
  51 reproduced in etc/ETAGS.README.  That file should be updated when the
  52 above notices are.
  53
  54 To the best of our knowledge, this code was originally based on the
  55 ctags.c distributed with BSD4.2, which was copyrighted by the
  56 University of California, as described above. */
  57
  58
  59 /*
  60  * Authors:
  61  * 1983 Ctags originally by Ken Arnold.
  62  * 1984 Fortran added by Jim Kleckner.
  63  * 1984 Ed Pelegri-Llopart added C typedefs.
  64  * 1985 Emacs TAGS format by Richard Stallman.
  65  * 1989 Sam Kendall added C++.
  66  * 1992 Joseph B. Wells improved C and C++ parsing.
  67  * 1993 Francesco Potortì reorganized C and C++.
  68  * 1994 Line-by-line regexp tags by Tom Tromey.
  69  * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
  70  * 2002 #line directives by Francesco Potortì.
  71  * Francesco Potortì maintained and improved it for many years
  72    starting in 1993.
  73  */
  74
  75 /*
  76  * If you want to add support for a new language, start by looking at the LUA
  77  * language, which is the simplest.  Alternatively, consider distributing etags
  78  * together with a configuration file containing regexp definitions for etags.
  79  */
  80
  81 char pot_etags_version[] = "@(#) pot revision number is 17.38.1.4";
  82
  83 #ifdef DEBUG
  84 #  undef DEBUG
  85 #  define DEBUG true
  86 #else
  87 #  define DEBUG  false
  88 #  define NDEBUG                /* disable assert */
  89 #endif
  90
  91 #include <config.h>
  92
  93 #ifndef _GNU_SOURCE
  94 # define _GNU_SOURCE 1          /* enables some compiler checks on GNU */
  95 #endif
  96
  97 /* WIN32_NATIVE is for XEmacs.
  98    MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
  99 #ifdef WIN32_NATIVE
 100 # undef MSDOS
 101 # undef  WINDOWSNT
 102 # define WINDOWSNT
 103 #endif /* WIN32_NATIVE */
 104
 105 #ifdef MSDOS
 106 # undef MSDOS
 107 # define MSDOS true
 108 # include <sys/param.h>
 109 #else
 110 # define MSDOS false
 111 #endif /* MSDOS */
 112
 113 #ifdef WINDOWSNT
 114 # include <direct.h>
 115 # define MAXPATHLEN _MAX_PATH
 116 # undef HAVE_NTGUI
 117 # undef  DOS_NT
 118 # define DOS_NT
 119 # define O_CLOEXEC O_NOINHERIT
 120 #endif /* WINDOWSNT */
 121
 122 #include <limits.h>
 123 #include <unistd.h>
 124 #include <stdarg.h>
 125 #include <stdlib.h>
 126 #include <string.h>
 127 #include <sysstdio.h>
 128 #include <errno.h>
 129 #include <fcntl.h>
 130 #include <binary-io.h>
 131 #include <c-ctype.h>
 132 #include <c-strcase.h>
 133
 134 #include <assert.h>
 135 #ifdef NDEBUG
 136 # undef  assert                 /* some systems have a buggy assert.h */
 137 # define assert(x) ((void) 0)
 138 #endif
 139
 140 #include <getopt.h>
 141 #include <regex.h>
 142
 143 /* Define CTAGS to make the program "ctags" compatible with the usual one.
 144  Leave it undefined to make the program "etags", which makes emacs-style
 145  tag tables and tags typedefs, #defines and struct/union/enum by default. */
 146 #ifdef CTAGS
 147 # undef  CTAGS
 148 # define CTAGS true
 149 #else
 150 # define CTAGS false
 151 #endif
 152
 153 static bool
 154 streq (char const *s, char const *t)
 155 {
 156   return strcmp (s, t) == 0;
 157 }
 158
 159 static bool
 160 strcaseeq (char const *s, char const *t)
 161 {
 162   return c_strcasecmp (s, t) == 0;
 163 }
 164
 165 static bool
 166 strneq (char const *s, char const *t, size_t n)
 167 {
 168   return strncmp (s, t, n) == 0;
 169 }
 170
 171 static bool
 172 strncaseeq (char const *s, char const *t, size_t n)
 173 {
 174   return c_strncasecmp (s, t, n) == 0;
 175 }
 176
 177 /* C is not in a name.  */
 178 static bool
 179 notinname (unsigned char c)
 180 {
 181   /* Look at make_tag before modifying!  */
 182   static bool const table[UCHAR_MAX + 1] = {
 183     ['\0']=1, ['\t']=1, ['\n']=1, ['\f']=1, ['\r']=1, [' ']=1,
 184     ['(']=1, [')']=1, [',']=1, [';']=1, ['=']=1
 185   };
 186   return table[c];
 187 }
 188
 189 /* C can start a token.  */
 190 static bool
 191 begtoken (unsigned char c)
 192 {
 193   static bool const table[UCHAR_MAX + 1] = {
 194     ['$']=1, ['@']=1,
 195     ['A']=1, ['B']=1, ['C']=1, ['D']=1, ['E']=1, ['F']=1, ['G']=1, ['H']=1,
 196     ['I']=1, ['J']=1, ['K']=1, ['L']=1, ['M']=1, ['N']=1, ['O']=1, ['P']=1,
 197     ['Q']=1, ['R']=1, ['S']=1, ['T']=1, ['U']=1, ['V']=1, ['W']=1, ['X']=1,
 198     ['Y']=1, ['Z']=1,
 199     ['_']=1,
 200     ['a']=1, ['b']=1, ['c']=1, ['d']=1, ['e']=1, ['f']=1, ['g']=1, ['h']=1,
 201     ['i']=1, ['j']=1, ['k']=1, ['l']=1, ['m']=1, ['n']=1, ['o']=1, ['p']=1,
 202     ['q']=1, ['r']=1, ['s']=1, ['t']=1, ['u']=1, ['v']=1, ['w']=1, ['x']=1,
 203     ['y']=1, ['z']=1,
 204     ['~']=1
 205   };
 206   return table[c];
 207 }
 208
 209 /* C can be in the middle of a token.  */
 210 static bool
 211 intoken (unsigned char c)
 212 {
 213   static bool const table[UCHAR_MAX + 1] = {
 214     ['$']=1,
 215     ['0']=1, ['1']=1, ['2']=1, ['3']=1, ['4']=1,
 216     ['5']=1, ['6']=1, ['7']=1, ['8']=1, ['9']=1,
 217     ['A']=1, ['B']=1, ['C']=1, ['D']=1, ['E']=1, ['F']=1, ['G']=1, ['H']=1,
 218     ['I']=1, ['J']=1, ['K']=1, ['L']=1, ['M']=1, ['N']=1, ['O']=1, ['P']=1,
 219     ['Q']=1, ['R']=1, ['S']=1, ['T']=1, ['U']=1, ['V']=1, ['W']=1, ['X']=1,
 220     ['Y']=1, ['Z']=1,
 221     ['_']=1,
 222     ['a']=1, ['b']=1, ['c']=1, ['d']=1, ['e']=1, ['f']=1, ['g']=1, ['h']=1,
 223     ['i']=1, ['j']=1, ['k']=1, ['l']=1, ['m']=1, ['n']=1, ['o']=1, ['p']=1,
 224     ['q']=1, ['r']=1, ['s']=1, ['t']=1, ['u']=1, ['v']=1, ['w']=1, ['x']=1,
 225     ['y']=1, ['z']=1
 226   };
 227   return table[c];
 228 }
 229
 230 /* C can end a token.  */
 231 static bool
 232 endtoken (unsigned char c)
 233 {
 234   static bool const table[UCHAR_MAX + 1] = {
 235     ['\0']=1, ['\t']=1, ['\n']=1, ['\r']=1, [' ']=1,
 236     ['!']=1, ['"']=1, ['#']=1, ['%']=1, ['&']=1, ['\'']=1, ['(']=1, [')']=1,
 237     ['*']=1, ['+']=1, [',']=1, ['-']=1, ['.']=1, ['/']=1, [':']=1, [';']=1,
 238     ['<']=1, ['=']=1, ['>']=1, ['?']=1, ['[']=1, [']']=1, ['^']=1,
 239     ['{']=1, ['|']=1, ['}']=1, ['~']=1
 240   };
 241   return table[c];
 242 }
 243
 244 /*
 245  *      xnew, xrnew -- allocate, reallocate storage
 246  *
 247  * SYNOPSIS:    Type *xnew (int n, Type);
 248  *              void xrnew (OldPointer, int n, Type);
 249  */
 250 #define xnew(n, Type)      ((Type *) xmalloc ((n) * sizeof (Type)))
 251 #define xrnew(op, n, Type) ((op) = (Type *) xrealloc (op, (n) * sizeof (Type)))
 252
 253 typedef void Lang_function (FILE *);
 254
 255 typedef struct
 256 {
 257   const char *suffix;           /* file name suffix for this compressor */
 258   const char *command;          /* takes one arg and decompresses to stdout */
 259 } compressor;
 260
 261 typedef struct
 262 {
 263   const char *name;             /* language name */
 264   const char *help;             /* detailed help for the language */
 265   Lang_function *function;      /* parse function */
 266   const char **suffixes;        /* name suffixes of this language's files */
 267   const char **filenames;       /* names of this language's files */
 268   const char **interpreters;    /* interpreters for this language */
 269   bool metasource;              /* source used to generate other sources */
 270 } language;
 271
 272 typedef struct fdesc
 273 {
 274   struct fdesc *next;           /* for the linked list */
 275   char *infname;                /* uncompressed input file name */
 276   char *infabsname;             /* absolute uncompressed input file name */
 277   char *infabsdir;              /* absolute dir of input file */
 278   char *taggedfname;            /* file name to write in tagfile */
 279   language *lang;               /* language of file */
 280   char *prop;                   /* file properties to write in tagfile */
 281   bool usecharno;               /* etags tags shall contain char number */
 282   bool written;                 /* entry written in the tags file */
 283 } fdesc;
 284
 285 typedef struct node_st
 286 {                               /* sorting structure */
 287   struct node_st *left, *right; /* left and right sons */
 288   fdesc *fdp;                   /* description of file to whom tag belongs */
 289   char *name;                   /* tag name */
 290   char *regex;                  /* search regexp */
 291   bool valid;                   /* write this tag on the tag file */
 292   bool is_func;                 /* function tag: use regexp in CTAGS mode */
 293   bool been_warned;             /* warning already given for duplicated tag */
 294   int lno;                      /* line number tag is on */
 295   long cno;                     /* character number line starts on */
 296 } node;
 297
 298 /*
 299  * A `linebuffer' is a structure which holds a line of text.
 300  * `readline_internal' reads a line from a stream into a linebuffer
 301  * and works regardless of the length of the line.
 302  * SIZE is the size of BUFFER, LEN is the length of the string in
 303  * BUFFER after readline reads it.
 304  */
 305 typedef struct
 306 {
 307   long size;
 308   int len;
 309   char *buffer;
 310 } linebuffer;
 311
 312 /* Used to support mixing of --lang and file names. */
 313 typedef struct
 314 {
 315   enum {
 316     at_language,                /* a language specification */
 317     at_regexp,                  /* a regular expression */
 318     at_filename,                /* a file name */
 319     at_stdin,                   /* read from stdin here */
 320     at_end                      /* stop parsing the list */
 321   } arg_type;                   /* argument type */
 322   language *lang;               /* language associated with the argument */
 323   char *what;                   /* the argument itself */
 324 } argument;
 325
 326 /* Structure defining a regular expression. */
 327 typedef struct regexp
 328 {
 329   struct regexp *p_next;        /* pointer to next in list */
 330   language *lang;               /* if set, use only for this language */
 331   char *pattern;                /* the regexp pattern */
 332   char *name;                   /* tag name */
 333   struct re_pattern_buffer *pat; /* the compiled pattern */
 334   struct re_registers regs;     /* re registers */
 335   bool error_signaled;          /* already signaled for this regexp */
 336   bool force_explicit_name;     /* do not allow implicit tag name */
 337   bool ignore_case;             /* ignore case when matching */
 338   bool multi_line;              /* do a multi-line match on the whole file */
 339 } regexp;
 340
 341
 342 /* Many compilers barf on this:
 343         Lang_function Ada_funcs;
 344    so let's write it this way */
 345 static void Ada_funcs (FILE *);
 346 static void Asm_labels (FILE *);
 347 static void C_entries (int c_ext, FILE *);
 348 static void default_C_entries (FILE *);
 349 static void plain_C_entries (FILE *);
 350 static void Cjava_entries (FILE *);
 351 static void Cobol_paragraphs (FILE *);
 352 static void Cplusplus_entries (FILE *);
 353 static void Cstar_entries (FILE *);
 354 static void Erlang_functions (FILE *);
 355 static void Forth_words (FILE *);
 356 static void Fortran_functions (FILE *);
 357 static void Go_functions (FILE *);
 358 static void HTML_labels (FILE *);
 359 static void Lisp_functions (FILE *);
 360 static void Lua_functions (FILE *);
 361 static void Makefile_targets (FILE *);
 362 static void Pascal_functions (FILE *);
 363 static void Perl_functions (FILE *);
 364 static void PHP_functions (FILE *);
 365 static void PS_functions (FILE *);
 366 static void Prolog_functions (FILE *);
 367 static void Python_functions (FILE *);
 368 static void Ruby_functions (FILE *);
 369 static void Scheme_functions (FILE *);
 370 static void TeX_commands (FILE *);
 371 static void Texinfo_nodes (FILE *);
 372 static void Yacc_entries (FILE *);
 373 static void just_read_file (FILE *);
 374
 375 static language *get_language_from_langname (const char *);
 376 static void readline (linebuffer *, FILE *);
 377 static long readline_internal (linebuffer *, FILE *, char const *);
 378 static bool nocase_tail (const char *);
 379 static void get_tag (char *, char **);
 380
 381 static void analyze_regex (char *);
 382 static void free_regexps (void);
 383 static void regex_tag_multiline (void);
 384 static void error (const char *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
 385 static void verror (char const *, va_list) ATTRIBUTE_FORMAT_PRINTF (1, 0);
 386 static _Noreturn void suggest_asking_for_help (void);
 387 static _Noreturn void fatal (char const *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
 388 static _Noreturn void pfatal (const char *);
 389 static void add_node (node *, node **);
 390
 391 static void process_file_name (char *, language *);
 392 static void process_file (FILE *, char *, language *);
 393 static void find_entries (FILE *);
 394 static void free_tree (node *);
 395 static void free_fdesc (fdesc *);
 396 static void pfnote (char *, bool, char *, int, int, long);
 397 static void invalidate_nodes (fdesc *, node **);
 398 static void put_entries (node *);
 399
 400 static char *concat (const char *, const char *, const char *);
 401 static char *skip_spaces (char *);
 402 static char *skip_non_spaces (char *);
 403 static char *skip_name (char *);
 404 static char *savenstr (const char *, int);
 405 static char *savestr (const char *);
 406 static char *etags_getcwd (void);
 407 static char *relative_filename (char *, char *);
 408 static char *absolute_filename (char *, char *);
 409 static char *absolute_dirname (char *, char *);
 410 static bool filename_is_absolute (char *f);
 411 static void canonicalize_filename (char *);
 412 static char *etags_mktmp (void);
 413 static void linebuffer_init (linebuffer *);
 414 static void linebuffer_setlen (linebuffer *, int);
 415 static void *xmalloc (size_t);
 416 static void *xrealloc (void *, size_t);
 417
 418 \f
 419 static char searchar = '/';     /* use /.../ searches */
 420
 421 static char *tagfile;           /* output file */
 422 static char *progname;          /* name this program was invoked with */
 423 static char *cwd;               /* current working directory */
 424 static char *tagfiledir;        /* directory of tagfile */
 425 static FILE *tagf;              /* ioptr for tags file */
 426 static ptrdiff_t whatlen_max;   /* maximum length of any 'what' member */
 427
 428 static fdesc *fdhead;           /* head of file description list */
 429 static fdesc *curfdp;           /* current file description */
 430 static char *infilename;        /* current input file name */
 431 static int lineno;              /* line number of current line */
 432 static long charno;             /* current character number */
 433 static long linecharno;         /* charno of start of current line */
 434 static char *dbp;               /* pointer to start of current tag */
 435
 436 static const int invalidcharno = -1;
 437
 438 static node *nodehead;          /* the head of the binary tree of tags */
 439 static node *last_node;         /* the last node created */
 440
 441 static linebuffer lb;           /* the current line */
 442 static linebuffer filebuf;      /* a buffer containing the whole file */
 443 static linebuffer token_name;   /* a buffer containing a tag name */
 444
 445 static bool append_to_tagfile;  /* -a: append to tags */
 446 /* The next five default to true in C and derived languages.  */
 447 static bool typedefs;           /* -t: create tags for C and Ada typedefs */
 448 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
 449                                 /* 0 struct/enum/union decls, and C++ */
 450                                 /* member functions. */
 451 static bool constantypedefs;    /* -d: create tags for C #define, enum */
 452                                 /* constants and variables. */
 453                                 /* -D: opposite of -d.  Default under ctags. */
 454 static int globals;             /* create tags for global variables */
 455 static int members;             /* create tags for C member variables */
 456 static int declarations;        /* --declarations: tag them and extern in C&Co*/
 457 static int no_line_directive;   /* ignore #line directives (undocumented) */
 458 static int no_duplicates;       /* no duplicate tags for ctags (undocumented) */
 459 static bool update;             /* -u: update tags */
 460 static bool vgrind_style;       /* -v: create vgrind style index output */
 461 static bool no_warnings;        /* -w: suppress warnings (undocumented) */
 462 static bool cxref_style;        /* -x: create cxref style output */
 463 static bool cplusplus;          /* .[hc] means C++, not C (undocumented) */
 464 static bool ignoreindent;       /* -I: ignore indentation in C */
 465 static int packages_only;       /* --packages-only: in Ada, only tag packages*/
 466 static int class_qualify;       /* -Q: produce class-qualified tags in C++/Java */
 467
 468 /* STDIN is defined in LynxOS system headers */
 469 #ifdef STDIN
 470 # undef STDIN
 471 #endif
 472
 473 #define STDIN 0x1001            /* returned by getopt_long on --parse-stdin */
 474 static bool parsing_stdin;      /* --parse-stdin used */
 475
 476 static regexp *p_head;          /* list of all regexps */
 477 static bool need_filebuf;       /* some regexes are multi-line */
 478
 479 static struct option longopts[] =
 480 {
 481   { "append",             no_argument,       NULL,               'a'   },
 482   { "packages-only",      no_argument,       &packages_only,     1     },
 483   { "c++",                no_argument,       NULL,               'C'   },
 484   { "declarations",       no_argument,       &declarations,      1     },
 485   { "no-line-directive",  no_argument,       &no_line_directive, 1     },
 486   { "no-duplicates",      no_argument,       &no_duplicates,     1     },
 487   { "help",               no_argument,       NULL,               'h'   },
 488   { "help",               no_argument,       NULL,               'H'   },
 489   { "ignore-indentation", no_argument,       NULL,               'I'   },
 490   { "language",           required_argument, NULL,               'l'   },
 491   { "members",            no_argument,       &members,           1     },
 492   { "no-members",         no_argument,       &members,           0     },
 493   { "output",             required_argument, NULL,               'o'   },
 494   { "class-qualify",      no_argument,       &class_qualify,     'Q'   },
 495   { "regex",              required_argument, NULL,               'r'   },
 496   { "no-regex",           no_argument,       NULL,               'R'   },
 497   { "ignore-case-regex",  required_argument, NULL,               'c'   },
 498   { "parse-stdin",        required_argument, NULL,               STDIN },
 499   { "version",            no_argument,       NULL,               'V'   },
 500
 501 #if CTAGS /* Ctags options */
 502   { "backward-search",    no_argument,       NULL,               'B'   },
 503   { "cxref",              no_argument,       NULL,               'x'   },
 504   { "defines",            no_argument,       NULL,               'd'   },
 505   { "globals",            no_argument,       &globals,           1     },
 506   { "typedefs",           no_argument,       NULL,               't'   },
 507   { "typedefs-and-c++",   no_argument,       NULL,               'T'   },
 508   { "update",             no_argument,       NULL,               'u'   },
 509   { "vgrind",             no_argument,       NULL,               'v'   },
 510   { "no-warn",            no_argument,       NULL,               'w'   },
 511
 512 #else /* Etags options */
 513   { "no-defines",         no_argument,       NULL,               'D'   },
 514   { "no-globals",         no_argument,       &globals,           0     },
 515   { "include",            required_argument, NULL,               'i'   },
 516 #endif
 517   { NULL }
 518 };
 519
 520 static compressor compressors[] =
 521 {
 522   { "z", "gzip -d -c"},
 523   { "Z", "gzip -d -c"},
 524   { "gz", "gzip -d -c"},
 525   { "GZ", "gzip -d -c"},
 526   { "bz2", "bzip2 -d -c" },
 527   { "xz", "xz -d -c" },
 528   { NULL }
 529 };
 530
 531 /*
 532  * Language stuff.
 533  */
 534
 535 /* Ada code */
 536 static const char *Ada_suffixes [] =
 537   { "ads", "adb", "ada", NULL };
 538 static const char Ada_help [] =
 539 "In Ada code, functions, procedures, packages, tasks and types are\n\
 540 tags.  Use the '--packages-only' option to create tags for\n\
 541 packages only.\n\
 542 Ada tag names have suffixes indicating the type of entity:\n\
 543         Entity type:    Qualifier:\n\
 544         ------------    ----------\n\
 545         function        /f\n\
 546         procedure       /p\n\
 547         package spec    /s\n\
 548         package body    /b\n\
 549         type            /t\n\
 550         task            /k\n\
 551 Thus, 'M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
 552 body of the package 'bidule', while 'M-x find-tag <RET> bidule <RET>'\n\
 553 will just search for any tag 'bidule'.";
 554
 555 /* Assembly code */
 556 static const char *Asm_suffixes [] =
 557   { "a",        /* Unix assembler */
 558     "asm", /* Microcontroller assembly */
 559     "def", /* BSO/Tasking definition includes  */
 560     "inc", /* Microcontroller include files */
 561     "ins", /* Microcontroller include files */
 562     "s", "sa", /* Unix assembler */
 563     "S",   /* cpp-processed Unix assembler */
 564     "src", /* BSO/Tasking C compiler output */
 565     NULL
 566   };
 567 static const char Asm_help [] =
 568 "In assembler code, labels appearing at the beginning of a line,\n\
 569 followed by a colon, are tags.";
 570
 571
 572 /* Note that .c and .h can be considered C++, if the --c++ flag was
 573    given, or if the `class' or `template' keywords are met inside the file.
 574    That is why default_C_entries is called for these. */
 575 static const char *default_C_suffixes [] =
 576   { "c", "h", NULL };
 577 #if CTAGS                               /* C help for Ctags */
 578 static const char default_C_help [] =
 579 "In C code, any C function is a tag.  Use -t to tag typedefs.\n\
 580 Use -T to tag definitions of 'struct', 'union' and 'enum'.\n\
 581 Use -d to tag '#define' macro definitions and 'enum' constants.\n\
 582 Use --globals to tag global variables.\n\
 583 You can tag function declarations and external variables by\n\
 584 using '--declarations', and struct members by using '--members'.";
 585 #else                                   /* C help for Etags */
 586 static const char default_C_help [] =
 587 "In C code, any C function or typedef is a tag, and so are\n\
 588 definitions of 'struct', 'union' and 'enum'.  '#define' macro\n\
 589 definitions and 'enum' constants are tags unless you specify\n\
 590 '--no-defines'.  Global variables are tags unless you specify\n\
 591 '--no-globals' and so are struct members unless you specify\n\
 592 '--no-members'.  Use of '--no-globals', '--no-defines' and\n\
 593 '--no-members' can make the tags table file much smaller.\n\
 594 You can tag function declarations and external variables by\n\
 595 using '--declarations'.";
 596 #endif  /* C help for Ctags and Etags */
 597
 598 static const char *Cplusplus_suffixes [] =
 599   { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
 600     "M",                        /* Objective C++ */
 601     "pdb",                      /* PostScript with C syntax */
 602     NULL };
 603 static const char Cplusplus_help [] =
 604 "In C++ code, all the tag constructs of C code are tagged.  (Use\n\
 605 --help --lang=c --lang=c++ for full help.)\n\
 606 In addition to C tags, member functions are also recognized.  Member\n\
 607 variables are recognized unless you use the '--no-members' option.\n\
 608 Tags for variables and functions in classes are named 'CLASS::VARIABLE'\n\
 609 and 'CLASS::FUNCTION'.  'operator' definitions have tag names like\n\
 610 'operator+'.";
 611
 612 static const char *Cjava_suffixes [] =
 613   { "java", NULL };
 614 static char Cjava_help [] =
 615 "In Java code, all the tags constructs of C and C++ code are\n\
 616 tagged.  (Use --help --lang=c --lang=c++ --lang=java for full help.)";
 617
 618
 619 static const char *Cobol_suffixes [] =
 620   { "COB", "cob", NULL };
 621 static char Cobol_help [] =
 622 "In Cobol code, tags are paragraph names; that is, any word\n\
 623 starting in column 8 and followed by a period.";
 624
 625 static const char *Cstar_suffixes [] =
 626   { "cs", "hs", NULL };
 627
 628 static const char *Erlang_suffixes [] =
 629   { "erl", "hrl", NULL };
 630 static const char Erlang_help [] =
 631 "In Erlang code, the tags are the functions, records and macros\n\
 632 defined in the file.";
 633
 634 const char *Forth_suffixes [] =
 635   { "fth", "tok", NULL };
 636 static const char Forth_help [] =
 637 "In Forth code, tags are words defined by ':',\n\
 638 constant, code, create, defer, value, variable, buffer:, field.";
 639
 640 static const char *Fortran_suffixes [] =
 641   { "F", "f", "f90", "for", NULL };
 642 static const char Fortran_help [] =
 643 "In Fortran code, functions, subroutines and block data are tags.";
 644
 645 static const char *Go_suffixes [] = {"go", NULL};
 646 static const char Go_help [] =
 647   "In Go code, functions, interfaces and packages are tags.";
 648
 649 static const char *HTML_suffixes [] =
 650   { "htm", "html", "shtml", NULL };
 651 static const char HTML_help [] =
 652 "In HTML input files, the tags are the 'title' and the 'h1', 'h2',\n\
 653 'h3' headers.  Also, tags are 'name=' in anchors and all\n\
 654 occurrences of 'id='.";
 655
 656 static const char *Lisp_suffixes [] =
 657   { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
 658 static const char Lisp_help [] =
 659 "In Lisp code, any function defined with 'defun', any variable\n\
 660 defined with 'defvar' or 'defconst', and in general the first\n\
 661 argument of any expression that starts with '(def' in column zero\n\
 662 is a tag.\n\
 663 The '--declarations' option tags \"(defvar foo)\" constructs too.";
 664
 665 static const char *Lua_suffixes [] =
 666   { "lua", "LUA", NULL };
 667 static const char Lua_help [] =
 668 "In Lua scripts, all functions are tags.";
 669
 670 static const char *Makefile_filenames [] =
 671   { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
 672 static const char Makefile_help [] =
 673 "In makefiles, targets are tags; additionally, variables are tags\n\
 674 unless you specify '--no-globals'.";
 675
 676 static const char *Objc_suffixes [] =
 677   { "lm",                       /* Objective lex file */
 678     "m",                        /* Objective C file */
 679      NULL };
 680 static const char Objc_help [] =
 681 "In Objective C code, tags include Objective C definitions for classes,\n\
 682 class categories, methods and protocols.  Tags for variables and\n\
 683 functions in classes are named 'CLASS::VARIABLE' and 'CLASS::FUNCTION'.\n\
 684 (Use --help --lang=c --lang=objc --lang=java for full help.)";
 685
 686 static const char *Pascal_suffixes [] =
 687   { "p", "pas", NULL };
 688 static const char Pascal_help [] =
 689 "In Pascal code, the tags are the functions and procedures defined\n\
 690 in the file.";
 691 /* " // this is for working around an Emacs highlighting bug... */
 692
 693 static const char *Perl_suffixes [] =
 694   { "pl", "pm", NULL };
 695 static const char *Perl_interpreters [] =
 696   { "perl", "@PERL@", NULL };
 697 static const char Perl_help [] =
 698 "In Perl code, the tags are the packages, subroutines and variables\n\
 699 defined by the 'package', 'sub', 'my' and 'local' keywords.  Use\n\
 700 '--globals' if you want to tag global variables.  Tags for\n\
 701 subroutines are named 'PACKAGE::SUB'.  The name for subroutines\n\
 702 defined in the default package is 'main::SUB'.";
 703
 704 static const char *PHP_suffixes [] =
 705   { "php", "php3", "php4", NULL };
 706 static const char PHP_help [] =
 707 "In PHP code, tags are functions, classes and defines.  Unless you use\n\
 708 the '--no-members' option, vars are tags too.";
 709
 710 static const char *plain_C_suffixes [] =
 711   { "pc",                       /* Pro*C file */
 712      NULL };
 713
 714 static const char *PS_suffixes [] =
 715   { "ps", "psw", NULL };        /* .psw is for PSWrap */
 716 static const char PS_help [] =
 717 "In PostScript code, the tags are the functions.";
 718
 719 static const char *Prolog_suffixes [] =
 720   { "prolog", NULL };
 721 static const char Prolog_help [] =
 722 "In Prolog code, tags are predicates and rules at the beginning of\n\
 723 line.";
 724
 725 static const char *Python_suffixes [] =
 726   { "py", NULL };
 727 static const char Python_help [] =
 728 "In Python code, 'def' or 'class' at the beginning of a line\n\
 729 generate a tag.";
 730
 731 static const char *Ruby_suffixes [] =
 732   { "rb", "ru", "rbw", NULL };
 733 static const char *Ruby_filenames [] =
 734   { "Rakefile", "Thorfile", NULL };
 735 static const char Ruby_help [] =
 736   "In Ruby code, 'def' or 'class' or 'module' at the beginning of\n\
 737 a line generate a tag.  Constants also generate a tag.";
 738
 739 /* Can't do the `SCM' or `scm' prefix with a version number. */
 740 static const char *Scheme_suffixes [] =
 741   { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
 742 static const char Scheme_help [] =
 743 "In Scheme code, tags include anything defined with 'def' or with a\n\
 744 construct whose name starts with 'def'.  They also include\n\
 745 variables set with 'set!' at top level in the file.";
 746
 747 static const char *TeX_suffixes [] =
 748   { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
 749 static const char TeX_help [] =
 750 "In LaTeX text, the argument of any of the commands '\\chapter',\n\
 751 '\\section', '\\subsection', '\\subsubsection', '\\eqno', '\\label',\n\
 752 '\\ref', '\\cite', '\\bibitem', '\\part', '\\appendix', '\\entry',\n\
 753 '\\index', '\\def', '\\newcommand', '\\renewcommand',\n\
 754 '\\newenvironment' or '\\renewenvironment' is a tag.\n\
 755 \n\
 756 Other commands can be specified by setting the environment variable\n\
 757 'TEXTAGS' to a colon-separated list like, for example,\n\
 758      TEXTAGS=\"mycommand:myothercommand\".";
 759
 760
 761 static const char *Texinfo_suffixes [] =
 762   { "texi", "texinfo", "txi", NULL };
 763 static const char Texinfo_help [] =
 764 "for texinfo files, lines starting with @node are tagged.";
 765
 766 static const char *Yacc_suffixes [] =
 767   { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
 768 static const char Yacc_help [] =
 769 "In Bison or Yacc input files, each rule defines as a tag the\n\
 770 nonterminal it constructs.  The portions of the file that contain\n\
 771 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
 772 for full help).";
 773
 774 static const char auto_help [] =
 775 "'auto' is not a real language, it indicates to use\n\
 776 a default language for files base on file name suffix and file contents.";
 777
 778 static const char none_help [] =
 779 "'none' is not a real language, it indicates to only do\n\
 780 regexp processing on files.";
 781
 782 static const char no_lang_help [] =
 783 "No detailed help available for this language.";
 784
 785
 786 /*
 787  * Table of languages.
 788  *
 789  * It is ok for a given function to be listed under more than one
 790  * name.  I just didn't.
 791  */
 792
 793 static language lang_names [] =
 794 {
 795   { "ada",       Ada_help,       Ada_funcs,         Ada_suffixes       },
 796   { "asm",       Asm_help,       Asm_labels,        Asm_suffixes       },
 797   { "c",         default_C_help, default_C_entries, default_C_suffixes },
 798   { "c++",       Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
 799   { "c*",        no_lang_help,   Cstar_entries,     Cstar_suffixes     },
 800   { "cobol",     Cobol_help,     Cobol_paragraphs,  Cobol_suffixes     },
 801   { "erlang",    Erlang_help,    Erlang_functions,  Erlang_suffixes    },
 802   { "forth",     Forth_help,     Forth_words,       Forth_suffixes     },
 803   { "fortran",   Fortran_help,   Fortran_functions, Fortran_suffixes   },
 804   { "go",        Go_help,        Go_functions,      Go_suffixes        },
 805   { "html",      HTML_help,      HTML_labels,       HTML_suffixes      },
 806   { "java",      Cjava_help,     Cjava_entries,     Cjava_suffixes     },
 807   { "lisp",      Lisp_help,      Lisp_functions,    Lisp_suffixes      },
 808   { "lua",       Lua_help,       Lua_functions,     Lua_suffixes       },
 809   { "makefile",  Makefile_help,Makefile_targets,NULL,Makefile_filenames},
 810   { "objc",      Objc_help,      plain_C_entries,   Objc_suffixes      },
 811   { "pascal",    Pascal_help,    Pascal_functions,  Pascal_suffixes    },
 812   { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
 813   { "php",       PHP_help,       PHP_functions,     PHP_suffixes       },
 814   { "postscript",PS_help,        PS_functions,      PS_suffixes        },
 815   { "proc",      no_lang_help,   plain_C_entries,   plain_C_suffixes   },
 816   { "prolog",    Prolog_help,    Prolog_functions,  Prolog_suffixes    },
 817   { "python",    Python_help,    Python_functions,  Python_suffixes    },
 818   { "ruby",      Ruby_help,Ruby_functions,Ruby_suffixes,Ruby_filenames },
 819   { "scheme",    Scheme_help,    Scheme_functions,  Scheme_suffixes    },
 820   { "tex",       TeX_help,       TeX_commands,      TeX_suffixes       },
 821   { "texinfo",   Texinfo_help,   Texinfo_nodes,     Texinfo_suffixes   },
 822   { "yacc",      Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,true},
 823   { "auto",      auto_help },                      /* default guessing scheme */
 824   { "none",      none_help,      just_read_file }, /* regexp matching only */
 825   { NULL }                /* end of list */
 826 };
 827
 828 \f
 829 static void
 830 print_language_names (void)
 831 {
 832   language *lang;
 833   const char **name, **ext;
 834
 835   puts ("\nThese are the currently supported languages, along with the\n\
 836 default file names and dot suffixes:");
 837   for (lang = lang_names; lang->name != NULL; lang++)
 838     {
 839       printf ("  %-*s", 10, lang->name);
 840       if (lang->filenames != NULL)
 841         for (name = lang->filenames; *name != NULL; name++)
 842           printf (" %s", *name);
 843       if (lang->suffixes != NULL)
 844         for (ext = lang->suffixes; *ext != NULL; ext++)
 845           printf (" .%s", *ext);
 846       puts ("");
 847     }
 848   puts ("where 'auto' means use default language for files based on file\n\
 849 name suffix, and 'none' means only do regexp processing on files.\n\
 850 If no language is specified and no matching suffix is found,\n\
 851 the first line of the file is read for a sharp-bang (#!) sequence\n\
 852 followed by the name of an interpreter.  If no such sequence is found,\n\
 853 Fortran is tried first; if no tags are found, C is tried next.\n\
 854 When parsing any C file, a \"class\" or \"template\" keyword\n\
 855 switches to C++.");
 856   puts ("Compressed files are supported using gzip, bzip2, and xz.\n\
 857 \n\
 858 For detailed help on a given language use, for example,\n\
 859 etags --help --lang=ada.");
 860 }
 861
 862 #ifndef EMACS_NAME
 863 # define EMACS_NAME "standalone"
 864 #endif
 865 #ifndef VERSION
 866 # define VERSION "17.38.1.4"
 867 #endif
 868 static _Noreturn void
 869 print_version (void)
 870 {
 871   char emacs_copyright[] = COPYRIGHT;
 872
 873   printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
 874   puts (emacs_copyright);
 875   puts ("This program is distributed under the terms in ETAGS.README");
 876
 877   exit (EXIT_SUCCESS);
 878 }
 879
 880 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
 881 # define PRINT_UNDOCUMENTED_OPTIONS_HELP false
 882 #endif
 883
 884 static _Noreturn void
 885 print_help (argument *argbuffer)
 886 {
 887   bool help_for_lang = false;
 888
 889   for (; argbuffer->arg_type != at_end; argbuffer++)
 890     if (argbuffer->arg_type == at_language)
 891       {
 892         if (help_for_lang)
 893           puts ("");
 894         puts (argbuffer->lang->help);
 895         help_for_lang = true;
 896       }
 897
 898   if (help_for_lang)
 899     exit (EXIT_SUCCESS);
 900
 901   printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
 902 \n\
 903 These are the options accepted by %s.\n", progname, progname);
 904   puts ("You may use unambiguous abbreviations for the long option names.");
 905   puts ("  A - as file name means read names from stdin (one per line).\n\
 906 Absolute names are stored in the output file as they are.\n\
 907 Relative ones are stored relative to the output file's directory.\n");
 908
 909   puts ("-a, --append\n\
 910         Append tag entries to existing tags file.");
 911
 912   puts ("--packages-only\n\
 913         For Ada files, only generate tags for packages.");
 914
 915   if (CTAGS)
 916     puts ("-B, --backward-search\n\
 917         Write the search commands for the tag entries using '?', the\n\
 918         backward-search command instead of '/', the forward-search command.");
 919
 920   /* This option is mostly obsolete, because etags can now automatically
 921      detect C++.  Retained for backward compatibility and for debugging and
 922      experimentation.  In principle, we could want to tag as C++ even
 923      before any "class" or "template" keyword.
 924   puts ("-C, --c++\n\
 925         Treat files whose name suffix defaults to C language as C++ files.");
 926   */
 927
 928   puts ("--declarations\n\
 929         In C and derived languages, create tags for function declarations,");
 930   if (CTAGS)
 931     puts ("\tand create tags for extern variables if --globals is used.");
 932   else
 933     puts
 934       ("\tand create tags for extern variables unless --no-globals is used.");
 935
 936   if (CTAGS)
 937     puts ("-d, --defines\n\
 938         Create tag entries for C #define constants and enum constants, too.");
 939   else
 940     puts ("-D, --no-defines\n\
 941         Don't create tag entries for C #define constants and enum constants.\n\
 942         This makes the tags file smaller.");
 943
 944   if (!CTAGS)
 945     puts ("-i FILE, --include=FILE\n\
 946         Include a note in tag file indicating that, when searching for\n\
 947         a tag, one should also consult the tags file FILE after\n\
 948         checking the current file.");
 949
 950   puts ("-l LANG, --language=LANG\n\
 951         Force the following files to be considered as written in the\n\
 952         named language up to the next --language=LANG option.");
 953
 954   if (CTAGS)
 955     puts ("--globals\n\
 956         Create tag entries for global variables in some languages.");
 957   else
 958     puts ("--no-globals\n\
 959         Do not create tag entries for global variables in some\n\
 960         languages.  This makes the tags file smaller.");
 961
 962   puts ("--no-line-directive\n\
 963         Ignore #line preprocessor directives in C and derived languages.");
 964
 965   if (CTAGS)
 966     puts ("--members\n\
 967         Create tag entries for members of structures in some languages.");
 968   else
 969     puts ("--no-members\n\
 970         Do not create tag entries for members of structures\n\
 971         in some languages.");
 972
 973   puts ("-Q, --class-qualify\n\
 974         Qualify tag names with their class name in C++, ObjC, and Java.\n\
 975         This produces tag names of the form \"class::member\" for C++,\n\
 976         \"class(category)\" for Objective C, and \"class.member\" for Java.\n\
 977         For Objective C, this also produces class methods qualified with\n\
 978         their arguments, as in \"foo:bar:baz:more\".");
 979   puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
 980         Make a tag for each line matching a regular expression pattern\n\
 981         in the following files.  {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
 982         files only.  REGEXFILE is a file containing one REGEXP per line.\n\
 983         REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
 984         optional.  The TAGREGEXP pattern is anchored (as if preceded by ^).");
 985   puts ("       If TAGNAME/ is present, the tags created are named.\n\
 986         For example Tcl named tags can be created with:\n\
 987           --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
 988         MODS are optional one-letter modifiers: 'i' means to ignore case,\n\
 989         'm' means to allow multi-line matches, 's' implies 'm' and\n\
 990         causes dot to match any character, including newline.");
 991
 992   puts ("-R, --no-regex\n\
 993         Don't create tags from regexps for the following files.");
 994
 995   puts ("-I, --ignore-indentation\n\
 996         In C and C++ do not assume that a closing brace in the first\n\
 997         column is the final brace of a function or structure definition.");
 998
 999   puts ("-o FILE, --output=FILE\n\
1000         Write the tags to FILE.");
1001
1002   puts ("--parse-stdin=NAME\n\
1003         Read from standard input and record tags as belonging to file NAME.");
1004
1005   if (CTAGS)
1006     {
1007       puts ("-t, --typedefs\n\
1008         Generate tag entries for C and Ada typedefs.");
1009       puts ("-T, --typedefs-and-c++\n\
1010         Generate tag entries for C typedefs, C struct/enum/union tags,\n\
1011         and C++ member functions.");
1012     }
1013
1014   if (CTAGS)
1015     puts ("-u, --update\n\
1016         Update the tag entries for the given files, leaving tag\n\
1017         entries for other files in place.  Currently, this is\n\
1018         implemented by deleting the existing entries for the given\n\
1019         files and then rewriting the new entries at the end of the\n\
1020         tags file.  It is often faster to simply rebuild the entire\n\
1021         tag file than to use this.");
1022
1023   if (CTAGS)
1024     {
1025       puts ("-v, --vgrind\n\
1026         Print on the standard output an index of items intended for\n\
1027         human consumption, similar to the output of vgrind.  The index\n\
1028         is sorted, and gives the page number of each item.");
1029
1030       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1031         puts ("-w, --no-duplicates\n\
1032         Do not create duplicate tag entries, for compatibility with\n\
1033         traditional ctags.");
1034
1035       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1036         puts ("-w, --no-warn\n\
1037         Suppress warning messages about duplicate tag entries.");
1038
1039       puts ("-x, --cxref\n\
1040         Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1041         The output uses line numbers instead of page numbers, but\n\
1042         beyond that the differences are cosmetic; try both to see\n\
1043         which you like.");
1044     }
1045
1046   puts ("-V, --version\n\
1047         Print the version of the program.\n\
1048 -h, --help\n\
1049         Print this help message.\n\
1050         Followed by one or more '--language' options prints detailed\n\
1051         help about tag generation for the specified languages.");
1052
1053   print_language_names ();
1054
1055   puts ("");
1056   puts ("Report bugs to bug-gnu-emacs@gnu.org");
1057
1058   exit (EXIT_SUCCESS);
1059 }
1060
1061 \f
1062 int
1063 main (int argc, char **argv)
1064 {
1065   int i;
1066   unsigned int nincluded_files;
1067   char **included_files;
1068   argument *argbuffer;
1069   int current_arg, file_count;
1070   linebuffer filename_lb;
1071   bool help_asked = false;
1072   ptrdiff_t len;
1073   char *optstring;
1074   int opt;
1075
1076   progname = argv[0];
1077   nincluded_files = 0;
1078   included_files = xnew (argc, char *);
1079   current_arg = 0;
1080   file_count = 0;
1081
1082   /* Allocate enough no matter what happens.  Overkill, but each one
1083      is small. */
1084   argbuffer = xnew (argc, argument);
1085
1086   /*
1087    * Always find typedefs and structure tags.
1088    * Also default to find macro constants, enum constants, struct
1089    * members and global variables.  Do it for both etags and ctags.
1090    */
1091   typedefs = typedefs_or_cplusplus = constantypedefs = true;
1092   globals = members = true;
1093
1094   /* When the optstring begins with a '-' getopt_long does not rearrange the
1095      non-options arguments to be at the end, but leaves them alone. */
1096   optstring = concat ("-ac:Cf:Il:o:Qr:RSVhH",
1097                       (CTAGS) ? "BxdtTuvw" : "Di:",
1098                       "");
1099
1100   while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1101     switch (opt)
1102       {
1103       case 0:
1104         /* If getopt returns 0, then it has already processed a
1105            long-named option.  We should do nothing.  */
1106         break;
1107
1108       case 1:
1109         /* This means that a file name has been seen.  Record it. */
1110         argbuffer[current_arg].arg_type = at_filename;
1111         argbuffer[current_arg].what     = optarg;
1112         len = strlen (optarg);
1113         if (whatlen_max < len)
1114           whatlen_max = len;
1115         ++current_arg;
1116         ++file_count;
1117         break;
1118
1119       case STDIN:
1120         /* Parse standard input.  Idea by Vivek <vivek@etla.org>. */
1121         argbuffer[current_arg].arg_type = at_stdin;
1122         argbuffer[current_arg].what     = optarg;
1123         len = strlen (optarg);
1124         if (whatlen_max < len)
1125           whatlen_max = len;
1126         ++current_arg;
1127         ++file_count;
1128         if (parsing_stdin)
1129           fatal ("cannot parse standard input more than once");
1130         parsing_stdin = true;
1131         break;
1132
1133         /* Common options. */
1134       case 'a': append_to_tagfile = true;       break;
1135       case 'C': cplusplus = true;               break;
1136       case 'f':         /* for compatibility with old makefiles */
1137       case 'o':
1138         if (tagfile)
1139           {
1140             error ("-o option may only be given once.");
1141             suggest_asking_for_help ();
1142             /* NOTREACHED */
1143           }
1144         tagfile = optarg;
1145         break;
1146       case 'I':
1147       case 'S':         /* for backward compatibility */
1148         ignoreindent = true;
1149         break;
1150       case 'l':
1151         {
1152           language *lang = get_language_from_langname (optarg);
1153           if (lang != NULL)
1154             {
1155               argbuffer[current_arg].lang = lang;
1156               argbuffer[current_arg].arg_type = at_language;
1157               ++current_arg;
1158             }
1159         }
1160         break;
1161       case 'c':
1162         /* Backward compatibility: support obsolete --ignore-case-regexp. */
1163         optarg = concat (optarg, "i", ""); /* memory leak here */
1164         /* FALLTHRU */
1165       case 'r':
1166         argbuffer[current_arg].arg_type = at_regexp;
1167         argbuffer[current_arg].what = optarg;
1168         len = strlen (optarg);
1169         if (whatlen_max < len)
1170           whatlen_max = len;
1171         ++current_arg;
1172         break;
1173       case 'R':
1174         argbuffer[current_arg].arg_type = at_regexp;
1175         argbuffer[current_arg].what = NULL;
1176         ++current_arg;
1177         break;
1178       case 'V':
1179         print_version ();
1180         break;
1181       case 'h':
1182       case 'H':
1183         help_asked = true;
1184         break;
1185       case 'Q':
1186         class_qualify = 1;
1187         break;
1188
1189         /* Etags options */
1190       case 'D': constantypedefs = false;                        break;
1191       case 'i': included_files[nincluded_files++] = optarg;     break;
1192
1193         /* Ctags options. */
1194       case 'B': searchar = '?';                                 break;
1195       case 'd': constantypedefs = true;                         break;
1196       case 't': typedefs = true;                                break;
1197       case 'T': typedefs = typedefs_or_cplusplus = true;        break;
1198       case 'u': update = true;                                  break;
1199       case 'v': vgrind_style = true;                      /*FALLTHRU*/
1200       case 'x': cxref_style = true;                             break;
1201       case 'w': no_warnings = true;                             break;
1202       default:
1203         suggest_asking_for_help ();
1204         /* NOTREACHED */
1205       }
1206
1207   /* No more options.  Store the rest of arguments. */
1208   for (; optind < argc; optind++)
1209     {
1210       argbuffer[current_arg].arg_type = at_filename;
1211       argbuffer[current_arg].what = argv[optind];
1212       len = strlen (argv[optind]);
1213       if (whatlen_max < len)
1214         whatlen_max = len;
1215       ++current_arg;
1216       ++file_count;
1217     }
1218
1219   argbuffer[current_arg].arg_type = at_end;
1220
1221   if (help_asked)
1222     print_help (argbuffer);
1223     /* NOTREACHED */
1224
1225   if (nincluded_files == 0 && file_count == 0)
1226     {
1227       error ("no input files specified.");
1228       suggest_asking_for_help ();
1229       /* NOTREACHED */
1230     }
1231
1232   if (tagfile == NULL)
1233     tagfile = savestr (CTAGS ? "tags" : "TAGS");
1234   cwd = etags_getcwd ();        /* the current working directory */
1235   if (cwd[strlen (cwd) - 1] != '/')
1236     {
1237       char *oldcwd = cwd;
1238       cwd = concat (oldcwd, "/", "");
1239       free (oldcwd);
1240     }
1241
1242   /* Compute base directory for relative file names. */
1243   if (streq (tagfile, "-")
1244       || strneq (tagfile, "/dev/", 5))
1245     tagfiledir = cwd;            /* relative file names are relative to cwd */
1246   else
1247     {
1248       canonicalize_filename (tagfile);
1249       tagfiledir = absolute_dirname (tagfile, cwd);
1250     }
1251
1252   linebuffer_init (&lb);
1253   linebuffer_init (&filename_lb);
1254   linebuffer_init (&filebuf);
1255   linebuffer_init (&token_name);
1256
1257   if (!CTAGS)
1258     {
1259       if (streq (tagfile, "-"))
1260         {
1261           tagf = stdout;
1262           SET_BINARY (fileno (stdout));
1263         }
1264       else
1265         tagf = fopen (tagfile, append_to_tagfile ? "ab" : "wb");
1266       if (tagf == NULL)
1267         pfatal (tagfile);
1268     }
1269
1270   /*
1271    * Loop through files finding functions.
1272    */
1273   for (i = 0; i < current_arg; i++)
1274     {
1275       static language *lang;    /* non-NULL if language is forced */
1276       char *this_file;
1277
1278       switch (argbuffer[i].arg_type)
1279         {
1280         case at_language:
1281           lang = argbuffer[i].lang;
1282           break;
1283         case at_regexp:
1284           analyze_regex (argbuffer[i].what);
1285           break;
1286         case at_filename:
1287               this_file = argbuffer[i].what;
1288               /* Input file named "-" means read file names from stdin
1289                  (one per line) and use them. */
1290               if (streq (this_file, "-"))
1291                 {
1292                   if (parsing_stdin)
1293                     fatal ("cannot parse standard input "
1294                            "AND read file names from it");
1295                   while (readline_internal (&filename_lb, stdin, "-") > 0)
1296                     process_file_name (filename_lb.buffer, lang);
1297                 }
1298               else
1299                 process_file_name (this_file, lang);
1300           break;
1301         case at_stdin:
1302           this_file = argbuffer[i].what;
1303           process_file (stdin, this_file, lang);
1304           break;
1305         default:
1306           error ("internal error: arg_type");
1307         }
1308     }
1309
1310   free_regexps ();
1311   free (lb.buffer);
1312   free (filebuf.buffer);
1313   free (token_name.buffer);
1314
1315   if (!CTAGS || cxref_style)
1316     {
1317       /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1318       put_entries (nodehead);
1319       free_tree (nodehead);
1320       nodehead = NULL;
1321       if (!CTAGS)
1322         {
1323           fdesc *fdp;
1324
1325           /* Output file entries that have no tags. */
1326           for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1327             if (!fdp->written)
1328               fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1329
1330           while (nincluded_files-- > 0)
1331             fprintf (tagf, "\f\n%s,include\n", *included_files++);
1332
1333           if (fclose (tagf) == EOF)
1334             pfatal (tagfile);
1335         }
1336
1337       exit (EXIT_SUCCESS);
1338     }
1339
1340   /* From here on, we are in (CTAGS && !cxref_style) */
1341   if (update)
1342     {
1343       char *cmd =
1344         xmalloc (strlen (tagfile) + whatlen_max +
1345                  sizeof "mv..OTAGS;fgrep -v '\t\t' OTAGS >;rm OTAGS");
1346       for (i = 0; i < current_arg; ++i)
1347         {
1348           switch (argbuffer[i].arg_type)
1349             {
1350             case at_filename:
1351             case at_stdin:
1352               break;
1353             default:
1354               continue;         /* the for loop */
1355             }
1356           char *z = stpcpy (cmd, "mv ");
1357           z = stpcpy (z, tagfile);
1358           z = stpcpy (z, " OTAGS;fgrep -v '\t");
1359           z = stpcpy (z, argbuffer[i].what);
1360           z = stpcpy (z, "\t' OTAGS >");
1361           z = stpcpy (z, tagfile);
1362           strcpy (z, ";rm OTAGS");
1363           if (system (cmd) != EXIT_SUCCESS)
1364             fatal ("failed to execute shell command");
1365         }
1366       free (cmd);
1367       append_to_tagfile = true;
1368     }
1369
1370   tagf = fopen (tagfile, append_to_tagfile ? "ab" : "wb");
1371   if (tagf == NULL)
1372     pfatal (tagfile);
1373   put_entries (nodehead);       /* write all the tags (CTAGS) */
1374   free_tree (nodehead);
1375   nodehead = NULL;
1376   if (fclose (tagf) == EOF)
1377     pfatal (tagfile);
1378
1379   if (CTAGS)
1380     if (append_to_tagfile || update)
1381       {
1382         char *cmd = xmalloc (2 * strlen (tagfile) + sizeof "sort -u -o..");
1383         /* Maybe these should be used:
1384            setenv ("LC_COLLATE", "C", 1);
1385            setenv ("LC_ALL", "C", 1); */
1386         char *z = stpcpy (cmd, "sort -u -o ");
1387         z = stpcpy (z, tagfile);
1388         *z++ = ' ';
1389         strcpy (z, tagfile);
1390         exit (system (cmd));
1391       }
1392   return EXIT_SUCCESS;
1393 }
1394
1395
1396 /*
1397  * Return a compressor given the file name.  If EXTPTR is non-zero,
1398  * return a pointer into FILE where the compressor-specific
1399  * extension begins.  If no compressor is found, NULL is returned
1400  * and EXTPTR is not significant.
1401  * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1402  */
1403 static compressor *
1404 get_compressor_from_suffix (char *file, char **extptr)
1405 {
1406   compressor *compr;
1407   char *slash, *suffix;
1408
1409   /* File has been processed by canonicalize_filename,
1410      so we don't need to consider backslashes on DOS_NT.  */
1411   slash = strrchr (file, '/');
1412   suffix = strrchr (file, '.');
1413   if (suffix == NULL || suffix < slash)
1414     return NULL;
1415   if (extptr != NULL)
1416     *extptr = suffix;
1417   suffix += 1;
1418   /* Let those poor souls who live with DOS 8+3 file name limits get
1419      some solace by treating foo.cgz as if it were foo.c.gz, etc.
1420      Only the first do loop is run if not MSDOS */
1421   do
1422     {
1423       for (compr = compressors; compr->suffix != NULL; compr++)
1424         if (streq (compr->suffix, suffix))
1425           return compr;
1426       if (!MSDOS)
1427         break;                  /* do it only once: not really a loop */
1428       if (extptr != NULL)
1429         *extptr = ++suffix;
1430     } while (*suffix != '\0');
1431   return NULL;
1432 }
1433
1434
1435
1436 /*
1437  * Return a language given the name.
1438  */
1439 static language *
1440 get_language_from_langname (const char *name)
1441 {
1442   language *lang;
1443
1444   if (name == NULL)
1445     error ("empty language name");
1446   else
1447     {
1448       for (lang = lang_names; lang->name != NULL; lang++)
1449         if (streq (name, lang->name))
1450           return lang;
1451       error ("unknown language \"%s\"", name);
1452     }
1453
1454   return NULL;
1455 }
1456
1457
1458 /*
1459  * Return a language given the interpreter name.
1460  */
1461 static language *
1462 get_language_from_interpreter (char *interpreter)
1463 {
1464   language *lang;
1465   const char **iname;
1466
1467   if (interpreter == NULL)
1468     return NULL;
1469   for (lang = lang_names; lang->name != NULL; lang++)
1470     if (lang->interpreters != NULL)
1471       for (iname = lang->interpreters; *iname != NULL; iname++)
1472         if (streq (*iname, interpreter))
1473             return lang;
1474
1475   return NULL;
1476 }
1477
1478
1479
1480 /*
1481  * Return a language given the file name.
1482  */
1483 static language *
1484 get_language_from_filename (char *file, int case_sensitive)
1485 {
1486   language *lang;
1487   const char **name, **ext, *suffix;
1488   char *slash;
1489
1490   /* Try whole file name first. */
1491   slash = strrchr (file, '/');
1492   if (slash != NULL)
1493     file = slash + 1;
1494 #ifdef DOS_NT
1495   else if (file[0] && file[1] == ':')
1496     file += 2;
1497 #endif
1498   for (lang = lang_names; lang->name != NULL; lang++)
1499     if (lang->filenames != NULL)
1500       for (name = lang->filenames; *name != NULL; name++)
1501         if ((case_sensitive)
1502             ? streq (*name, file)
1503             : strcaseeq (*name, file))
1504           return lang;
1505
1506   /* If not found, try suffix after last dot. */
1507   suffix = strrchr (file, '.');
1508   if (suffix == NULL)
1509     return NULL;
1510   suffix += 1;
1511   for (lang = lang_names; lang->name != NULL; lang++)
1512     if (lang->suffixes != NULL)
1513       for (ext = lang->suffixes; *ext != NULL; ext++)
1514         if ((case_sensitive)
1515             ? streq (*ext, suffix)
1516             : strcaseeq (*ext, suffix))
1517           return lang;
1518   return NULL;
1519 }
1520
1521 \f
1522 /*
1523  * This routine is called on each file argument.
1524  */
1525 static void
1526 process_file_name (char *file, language *lang)
1527 {
1528   FILE *inf;
1529   fdesc *fdp;
1530   compressor *compr;
1531   char *compressed_name, *uncompressed_name;
1532   char *ext, *real_name, *tmp_name;
1533   int retval;
1534
1535   canonicalize_filename (file);
1536   if (streq (file, tagfile) && !streq (tagfile, "-"))
1537     {
1538       error ("skipping inclusion of %s in self.", file);
1539       return;
1540     }
1541   compr = get_compressor_from_suffix (file, &ext);
1542   if (compr)
1543     {
1544       compressed_name = file;
1545       uncompressed_name = savenstr (file, ext - file);
1546     }
1547   else
1548     {
1549       compressed_name = NULL;
1550       uncompressed_name = file;
1551     }
1552
1553   /* If the canonicalized uncompressed name
1554      has already been dealt with, skip it silently. */
1555   for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1556     {
1557       assert (fdp->infname != NULL);
1558       if (streq (uncompressed_name, fdp->infname))
1559         goto cleanup;
1560     }
1561
1562   inf = fopen (file, "r" FOPEN_BINARY);
1563   if (inf)
1564     real_name = file;
1565   else
1566     {
1567       int file_errno = errno;
1568       if (compressed_name)
1569         {
1570           /* Try with the given suffix.  */
1571           inf = fopen (uncompressed_name, "r" FOPEN_BINARY);
1572           if (inf)
1573             real_name = uncompressed_name;
1574         }
1575       else
1576         {
1577           /* Try all possible suffixes.  */
1578           for (compr = compressors; compr->suffix != NULL; compr++)
1579             {
1580               compressed_name = concat (file, ".", compr->suffix);
1581               inf = fopen (compressed_name, "r" FOPEN_BINARY);
1582               if (inf)
1583                 {
1584                   real_name = compressed_name;
1585                   break;
1586                 }
1587               if (MSDOS)
1588                 {
1589                   char *suf = compressed_name + strlen (file);
1590                   size_t suflen = strlen (compr->suffix) + 1;
1591                   for ( ; suf[1]; suf++, suflen--)
1592                     {
1593                       memmove (suf, suf + 1, suflen);
1594                       inf = fopen (compressed_name, "r" FOPEN_BINARY);
1595                       if (inf)
1596                         {
1597                           real_name = compressed_name;
1598                           break;
1599                         }
1600                     }
1601                   if (inf)
1602                     break;
1603                 }
1604               free (compressed_name);
1605               compressed_name = NULL;
1606             }
1607         }
1608       if (! inf)
1609         {
1610           errno = file_errno;
1611           perror (file);
1612           goto cleanup;
1613         }
1614     }
1615
1616   if (real_name == compressed_name)
1617     {
1618       fclose (inf);
1619       tmp_name = etags_mktmp ();
1620       if (!tmp_name)
1621         inf = NULL;
1622       else
1623         {
1624 #if MSDOS || defined (DOS_NT)
1625           char *cmd1 = concat (compr->command, " \"", real_name);
1626           char *cmd = concat (cmd1, "\" > ", tmp_name);
1627 #else
1628           char *cmd1 = concat (compr->command, " '", real_name);
1629           char *cmd = concat (cmd1, "' > ", tmp_name);
1630 #endif
1631           free (cmd1);
1632           int tmp_errno;
1633           if (system (cmd) == -1)
1634             {
1635               inf = NULL;
1636               tmp_errno = EINVAL;
1637             }
1638           else
1639             {
1640               inf = fopen (tmp_name, "r" FOPEN_BINARY);
1641               tmp_errno = errno;
1642             }
1643           free (cmd);
1644           errno = tmp_errno;
1645         }
1646
1647       if (!inf)
1648         {
1649           perror (real_name);
1650           goto cleanup;
1651         }
1652     }
1653
1654   process_file (inf, uncompressed_name, lang);
1655
1656   retval = fclose (inf);
1657   if (real_name == compressed_name)
1658     {
1659       remove (tmp_name);
1660       free (tmp_name);
1661     }
1662   if (retval < 0)
1663     pfatal (file);
1664
1665  cleanup:
1666   if (compressed_name != file)
1667     free (compressed_name);
1668   if (uncompressed_name != file)
1669     free (uncompressed_name);
1670   last_node = NULL;
1671   curfdp = NULL;
1672   return;
1673 }
1674
1675 static void
1676 process_file (FILE *fh, char *fn, language *lang)
1677 {
1678   static const fdesc emptyfdesc;
1679   fdesc *fdp;
1680
1681   infilename = fn;
1682   /* Create a new input file description entry. */
1683   fdp = xnew (1, fdesc);
1684   *fdp = emptyfdesc;
1685   fdp->next = fdhead;
1686   fdp->infname = savestr (fn);
1687   fdp->lang = lang;
1688   fdp->infabsname = absolute_filename (fn, cwd);
1689   fdp->infabsdir = absolute_dirname (fn, cwd);
1690   if (filename_is_absolute (fn))
1691     {
1692       /* An absolute file name.  Canonicalize it. */
1693       fdp->taggedfname = absolute_filename (fn, NULL);
1694     }
1695   else
1696     {
1697       /* A file name relative to cwd.  Make it relative
1698          to the directory of the tags file. */
1699       fdp->taggedfname = relative_filename (fn, tagfiledir);
1700     }
1701   fdp->usecharno = true;        /* use char position when making tags */
1702   fdp->prop = NULL;
1703   fdp->written = false;         /* not written on tags file yet */
1704
1705   fdhead = fdp;
1706   curfdp = fdhead;              /* the current file description */
1707
1708   find_entries (fh);
1709
1710   /* If not Ctags, and if this is not metasource and if it contained no #line
1711      directives, we can write the tags and free all nodes pointing to
1712      curfdp. */
1713   if (!CTAGS
1714       && curfdp->usecharno      /* no #line directives in this file */
1715       && !curfdp->lang->metasource)
1716     {
1717       node *np, *prev;
1718
1719       /* Look for the head of the sublist relative to this file.  See add_node
1720          for the structure of the node tree. */
1721       prev = NULL;
1722       for (np = nodehead; np != NULL; prev = np, np = np->left)
1723         if (np->fdp == curfdp)
1724           break;
1725
1726       /* If we generated tags for this file, write and delete them. */
1727       if (np != NULL)
1728         {
1729           /* This is the head of the last sublist, if any.  The following
1730              instructions depend on this being true. */
1731           assert (np->left == NULL);
1732
1733           assert (fdhead == curfdp);
1734           assert (last_node->fdp == curfdp);
1735           put_entries (np);     /* write tags for file curfdp->taggedfname */
1736           free_tree (np);       /* remove the written nodes */
1737           if (prev == NULL)
1738             nodehead = NULL;    /* no nodes left */
1739           else
1740             prev->left = NULL;  /* delete the pointer to the sublist */
1741         }
1742     }
1743 }
1744
1745 static void
1746 reset_input (FILE *inf)
1747 {
1748   if (fseek (inf, 0, SEEK_SET) != 0)
1749     perror (infilename);
1750 }
1751
1752 /*
1753  * This routine opens the specified file and calls the function
1754  * which finds the function and type definitions.
1755  */
1756 static void
1757 find_entries (FILE *inf)
1758 {
1759   char *cp;
1760   language *lang = curfdp->lang;
1761   Lang_function *parser = NULL;
1762
1763   /* If user specified a language, use it. */
1764   if (lang != NULL && lang->function != NULL)
1765     {
1766       parser = lang->function;
1767     }
1768
1769   /* Else try to guess the language given the file name. */
1770   if (parser == NULL)
1771     {
1772       lang = get_language_from_filename (curfdp->infname, true);
1773       if (lang != NULL && lang->function != NULL)
1774         {
1775           curfdp->lang = lang;
1776           parser = lang->function;
1777         }
1778     }
1779
1780   /* Else look for sharp-bang as the first two characters. */
1781   if (parser == NULL
1782       && readline_internal (&lb, inf, infilename) > 0
1783       && lb.len >= 2
1784       && lb.buffer[0] == '#'
1785       && lb.buffer[1] == '!')
1786     {
1787       char *lp;
1788
1789       /* Set lp to point at the first char after the last slash in the
1790          line or, if no slashes, at the first nonblank.  Then set cp to
1791          the first successive blank and terminate the string. */
1792       lp = strrchr (lb.buffer+2, '/');
1793       if (lp != NULL)
1794         lp += 1;
1795       else
1796         lp = skip_spaces (lb.buffer + 2);
1797       cp = skip_non_spaces (lp);
1798       *cp = '\0';
1799
1800       if (strlen (lp) > 0)
1801         {
1802           lang = get_language_from_interpreter (lp);
1803           if (lang != NULL && lang->function != NULL)
1804             {
1805               curfdp->lang = lang;
1806               parser = lang->function;
1807             }
1808         }
1809     }
1810
1811   reset_input (inf);
1812
1813   /* Else try to guess the language given the case insensitive file name. */
1814   if (parser == NULL)
1815     {
1816       lang = get_language_from_filename (curfdp->infname, false);
1817       if (lang != NULL && lang->function != NULL)
1818         {
1819           curfdp->lang = lang;
1820           parser = lang->function;
1821         }
1822     }
1823
1824   /* Else try Fortran or C. */
1825   if (parser == NULL)
1826     {
1827       node *old_last_node = last_node;
1828
1829       curfdp->lang = get_language_from_langname ("fortran");
1830       find_entries (inf);
1831
1832       if (old_last_node == last_node)
1833         /* No Fortran entries found.  Try C. */
1834         {
1835           reset_input (inf);
1836           curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1837           find_entries (inf);
1838         }
1839       return;
1840     }
1841
1842   if (!no_line_directive
1843       && curfdp->lang != NULL && curfdp->lang->metasource)
1844     /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1845        file, or anyway we parsed a file that is automatically generated from
1846        this one.  If this is the case, the bingo.c file contained #line
1847        directives that generated tags pointing to this file.  Let's delete
1848        them all before parsing this file, which is the real source. */
1849     {
1850       fdesc **fdpp = &fdhead;
1851       while (*fdpp != NULL)
1852         if (*fdpp != curfdp
1853             && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1854           /* We found one of those!  We must delete both the file description
1855              and all tags referring to it. */
1856           {
1857             fdesc *badfdp = *fdpp;
1858
1859             /* Delete the tags referring to badfdp->taggedfname
1860                that were obtained from badfdp->infname. */
1861             invalidate_nodes (badfdp, &nodehead);
1862
1863             *fdpp = badfdp->next; /* remove the bad description from the list */
1864             free_fdesc (badfdp);
1865           }
1866         else
1867           fdpp = &(*fdpp)->next; /* advance the list pointer */
1868     }
1869
1870   assert (parser != NULL);
1871
1872   /* Generic initializations before reading from file. */
1873   linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1874
1875   /* Generic initializations before parsing file with readline. */
1876   lineno = 0;                  /* reset global line number */
1877   charno = 0;                  /* reset global char number */
1878   linecharno = 0;              /* reset global char number of line start */
1879
1880   parser (inf);
1881
1882   regex_tag_multiline ();
1883 }
1884
1885 \f
1886 /*
1887  * Check whether an implicitly named tag should be created,
1888  * then call `pfnote'.
1889  * NAME is a string that is internally copied by this function.
1890  *
1891  * TAGS format specification
1892  * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1893  * The following is explained in some more detail in etc/ETAGS.EBNF.
1894  *
1895  * make_tag creates tags with "implicit tag names" (unnamed tags)
1896  * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1897  *  1. NAME does not contain any of the characters in NONAM;
1898  *  2. LINESTART contains name as either a rightmost, or rightmost but
1899  *     one character, substring;
1900  *  3. the character, if any, immediately before NAME in LINESTART must
1901  *     be a character in NONAM;
1902  *  4. the character, if any, immediately after NAME in LINESTART must
1903  *     also be a character in NONAM.
1904  *
1905  * The implementation uses the notinname() macro, which recognizes the
1906  * characters stored in the string `nonam'.
1907  * etags.el needs to use the same characters that are in NONAM.
1908  */
1909 static void
1910 make_tag (const char *name,     /* tag name, or NULL if unnamed */
1911           int namelen,          /* tag length */
1912           bool is_func,         /* tag is a function */
1913           char *linestart,      /* start of the line where tag is */
1914           int linelen,          /* length of the line where tag is */
1915           int lno,              /* line number */
1916           long int cno)         /* character number */
1917 {
1918   bool named = (name != NULL && namelen > 0);
1919   char *nname = NULL;
1920
1921   if (!CTAGS && named)          /* maybe set named to false */
1922     /* Let's try to make an implicit tag name, that is, create an unnamed tag
1923        such that etags.el can guess a name from it. */
1924     {
1925       int i;
1926       register const char *cp = name;
1927
1928       for (i = 0; i < namelen; i++)
1929         if (notinname (*cp++))
1930           break;
1931       if (i == namelen)                         /* rule #1 */
1932         {
1933           cp = linestart + linelen - namelen;
1934           if (notinname (linestart[linelen-1]))
1935             cp -= 1;                            /* rule #4 */
1936           if (cp >= linestart                   /* rule #2 */
1937               && (cp == linestart
1938                   || notinname (cp[-1]))        /* rule #3 */
1939               && strneq (name, cp, namelen))    /* rule #2 */
1940             named = false;      /* use implicit tag name */
1941         }
1942     }
1943
1944   if (named)
1945     nname = savenstr (name, namelen);
1946
1947   pfnote (nname, is_func, linestart, linelen, lno, cno);
1948 }
1949
1950 /* Record a tag. */
1951 static void
1952 pfnote (char *name, bool is_func, char *linestart, int linelen, int lno,
1953         long int cno)
1954                                 /* tag name, or NULL if unnamed */
1955                                 /* tag is a function */
1956                                 /* start of the line where tag is */
1957                                 /* length of the line where tag is */
1958                                 /* line number */
1959                                 /* character number */
1960 {
1961   register node *np;
1962
1963   assert (name == NULL || name[0] != '\0');
1964   if (CTAGS && name == NULL)
1965     return;
1966
1967   np = xnew (1, node);
1968
1969   /* If ctags mode, change name "main" to M<thisfilename>. */
1970   if (CTAGS && !cxref_style && streq (name, "main"))
1971     {
1972       char *fp = strrchr (curfdp->taggedfname, '/');
1973       np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
1974       fp = strrchr (np->name, '.');
1975       if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1976         fp[0] = '\0';
1977     }
1978   else
1979     np->name = name;
1980   np->valid = true;
1981   np->been_warned = false;
1982   np->fdp = curfdp;
1983   np->is_func = is_func;
1984   np->lno = lno;
1985   if (np->fdp->usecharno)
1986     /* Our char numbers are 0-base, because of C language tradition?
1987        ctags compatibility?  old versions compatibility?   I don't know.
1988        Anyway, since emacs's are 1-base we expect etags.el to take care
1989        of the difference.  If we wanted to have 1-based numbers, we would
1990        uncomment the +1 below. */
1991     np->cno = cno /* + 1 */ ;
1992   else
1993     np->cno = invalidcharno;
1994   np->left = np->right = NULL;
1995   if (CTAGS && !cxref_style)
1996     {
1997       if (strlen (linestart) < 50)
1998         np->regex = concat (linestart, "$", "");
1999       else
2000         np->regex = savenstr (linestart, 50);
2001     }
2002   else
2003     np->regex = savenstr (linestart, linelen);
2004
2005   add_node (np, &nodehead);
2006 }
2007
2008 /*
2009  * free_tree ()
2010  *      recurse on left children, iterate on right children.
2011  */
2012 static void
2013 free_tree (register node *np)
2014 {
2015   while (np)
2016     {
2017       register node *node_right = np->right;
2018       free_tree (np->left);
2019       free (np->name);
2020       free (np->regex);
2021       free (np);
2022       np = node_right;
2023     }
2024 }
2025
2026 /*
2027  * free_fdesc ()
2028  *      delete a file description
2029  */
2030 static void
2031 free_fdesc (register fdesc *fdp)
2032 {
2033   free (fdp->infname);
2034   free (fdp->infabsname);
2035   free (fdp->infabsdir);
2036   free (fdp->taggedfname);
2037   free (fdp->prop);
2038   free (fdp);
2039 }
2040
2041 /*
2042  * add_node ()
2043  *      Adds a node to the tree of nodes.  In etags mode, sort by file
2044  *      name.  In ctags mode, sort by tag name.  Make no attempt at
2045  *      balancing.
2046  *
2047  *      add_node is the only function allowed to add nodes, so it can
2048  *      maintain state.
2049  */
2050 static void
2051 add_node (node *np, node **cur_node_p)
2052 {
2053   register int dif;
2054   register node *cur_node = *cur_node_p;
2055
2056   if (cur_node == NULL)
2057     {
2058       *cur_node_p = np;
2059       last_node = np;
2060       return;
2061     }
2062
2063   if (!CTAGS)
2064     /* Etags Mode */
2065     {
2066       /* For each file name, tags are in a linked sublist on the right
2067          pointer.  The first tags of different files are a linked list
2068          on the left pointer.  last_node points to the end of the last
2069          used sublist. */
2070       if (last_node != NULL && last_node->fdp == np->fdp)
2071         {
2072           /* Let's use the same sublist as the last added node. */
2073           assert (last_node->right == NULL);
2074           last_node->right = np;
2075           last_node = np;
2076         }
2077       else if (cur_node->fdp == np->fdp)
2078         {
2079           /* Scanning the list we found the head of a sublist which is
2080              good for us.  Let's scan this sublist. */
2081           add_node (np, &cur_node->right);
2082         }
2083       else
2084         /* The head of this sublist is not good for us.  Let's try the
2085            next one. */
2086         add_node (np, &cur_node->left);
2087     } /* if ETAGS mode */
2088
2089   else
2090     {
2091       /* Ctags Mode */
2092       dif = strcmp (np->name, cur_node->name);
2093
2094       /*
2095        * If this tag name matches an existing one, then
2096        * do not add the node, but maybe print a warning.
2097        */
2098       if (no_duplicates && !dif)
2099         {
2100           if (np->fdp == cur_node->fdp)
2101             {
2102               if (!no_warnings)
2103                 {
2104                   fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2105                            np->fdp->infname, lineno, np->name);
2106                   fprintf (stderr, "Second entry ignored\n");
2107                 }
2108             }
2109           else if (!cur_node->been_warned && !no_warnings)
2110             {
2111               fprintf
2112                 (stderr,
2113                  "Duplicate entry in files %s and %s: %s (Warning only)\n",
2114                  np->fdp->infname, cur_node->fdp->infname, np->name);
2115               cur_node->been_warned = true;
2116             }
2117           return;
2118         }
2119
2120       /* Actually add the node */
2121       add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2122     } /* if CTAGS mode */
2123 }
2124
2125 /*
2126  * invalidate_nodes ()
2127  *      Scan the node tree and invalidate all nodes pointing to the
2128  *      given file description (CTAGS case) or free them (ETAGS case).
2129  */
2130 static void
2131 invalidate_nodes (fdesc *badfdp, node **npp)
2132 {
2133   node *np = *npp;
2134
2135   if (np == NULL)
2136     return;
2137
2138   if (CTAGS)
2139     {
2140       if (np->left != NULL)
2141         invalidate_nodes (badfdp, &np->left);
2142       if (np->fdp == badfdp)
2143         np->valid = false;
2144       if (np->right != NULL)
2145         invalidate_nodes (badfdp, &np->right);
2146     }
2147   else
2148     {
2149       assert (np->fdp != NULL);
2150       if (np->fdp == badfdp)
2151         {
2152           *npp = np->left;      /* detach the sublist from the list */
2153           np->left = NULL;      /* isolate it */
2154           free_tree (np);       /* free it */
2155           invalidate_nodes (badfdp, npp);
2156         }
2157       else
2158         invalidate_nodes (badfdp, &np->left);
2159     }
2160 }
2161
2162 \f
2163 static int total_size_of_entries (node *);
2164 static int number_len (long) ATTRIBUTE_CONST;
2165
2166 /* Length of a non-negative number's decimal representation. */
2167 static int
2168 number_len (long int num)
2169 {
2170   int len = 1;
2171   while ((num /= 10) > 0)
2172     len += 1;
2173   return len;
2174 }
2175
2176 /*
2177  * Return total number of characters that put_entries will output for
2178  * the nodes in the linked list at the right of the specified node.
2179  * This count is irrelevant with etags.el since emacs 19.34 at least,
2180  * but is still supplied for backward compatibility.
2181  */
2182 static int
2183 total_size_of_entries (register node *np)
2184 {
2185   register int total = 0;
2186
2187   for (; np != NULL; np = np->right)
2188     if (np->valid)
2189       {
2190         total += strlen (np->regex) + 1;                /* pat\177 */
2191         if (np->name != NULL)
2192           total += strlen (np->name) + 1;               /* name\001 */
2193         total += number_len ((long) np->lno) + 1;       /* lno, */
2194         if (np->cno != invalidcharno)                   /* cno */
2195           total += number_len (np->cno);
2196         total += 1;                                     /* newline */
2197       }
2198
2199   return total;
2200 }
2201
2202 static void
2203 put_entries (register node *np)
2204 {
2205   register char *sp;
2206   static fdesc *fdp = NULL;
2207
2208   if (np == NULL)
2209     return;
2210
2211   /* Output subentries that precede this one */
2212   if (CTAGS)
2213     put_entries (np->left);
2214
2215   /* Output this entry */
2216   if (np->valid)
2217     {
2218       if (!CTAGS)
2219         {
2220           /* Etags mode */
2221           if (fdp != np->fdp)
2222             {
2223               fdp = np->fdp;
2224               fprintf (tagf, "\f\n%s,%d\n",
2225                        fdp->taggedfname, total_size_of_entries (np));
2226               fdp->written = true;
2227             }
2228           fputs (np->regex, tagf);
2229           fputc ('\177', tagf);
2230           if (np->name != NULL)
2231             {
2232               fputs (np->name, tagf);
2233               fputc ('\001', tagf);
2234             }
2235           fprintf (tagf, "%d,", np->lno);
2236           if (np->cno != invalidcharno)
2237             fprintf (tagf, "%ld", np->cno);
2238           fputs ("\n", tagf);
2239         }
2240       else
2241         {
2242           /* Ctags mode */
2243           if (np->name == NULL)
2244             error ("internal error: NULL name in ctags mode.");
2245
2246           if (cxref_style)
2247             {
2248               if (vgrind_style)
2249                 fprintf (stdout, "%s %s %d\n",
2250                          np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2251               else
2252                 fprintf (stdout, "%-16s %3d %-16s %s\n",
2253                          np->name, np->lno, np->fdp->taggedfname, np->regex);
2254             }
2255           else
2256             {
2257               fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2258
2259               if (np->is_func)
2260                 {               /* function or #define macro with args */
2261                   putc (searchar, tagf);
2262                   putc ('^', tagf);
2263
2264                   for (sp = np->regex; *sp; sp++)
2265                     {
2266                       if (*sp == '\\' || *sp == searchar)
2267                         putc ('\\', tagf);
2268                       putc (*sp, tagf);
2269                     }
2270                   putc (searchar, tagf);
2271                 }
2272               else
2273                 {               /* anything else; text pattern inadequate */
2274                   fprintf (tagf, "%d", np->lno);
2275                 }
2276               putc ('\n', tagf);
2277             }
2278         }
2279     } /* if this node contains a valid tag */
2280
2281   /* Output subentries that follow this one */
2282   put_entries (np->right);
2283   if (!CTAGS)
2284     put_entries (np->left);
2285 }
2286
2287 \f
2288 /* C extensions. */
2289 #define C_EXT   0x00fff         /* C extensions */
2290 #define C_PLAIN 0x00000         /* C */
2291 #define C_PLPL  0x00001         /* C++ */
2292 #define C_STAR  0x00003         /* C* */
2293 #define C_JAVA  0x00005         /* JAVA */
2294 #define C_AUTO  0x01000         /* C, but switch to C++ if `class' is met */
2295 #define YACC    0x10000         /* yacc file */
2296
2297 /*
2298  * The C symbol tables.
2299  */
2300 enum sym_type
2301 {
2302   st_none,
2303   st_C_objprot, st_C_objimpl, st_C_objend,
2304   st_C_gnumacro,
2305   st_C_ignore, st_C_attribute,
2306   st_C_javastruct,
2307   st_C_operator,
2308   st_C_class, st_C_template,
2309   st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2310 };
2311
2312 /* Feed stuff between (but not including) %[ and %] lines to:
2313      gperf -m 5
2314 %[
2315 %compare-strncmp
2316 %enum
2317 %struct-type
2318 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2319 %%
2320 if,             0,                      st_C_ignore
2321 for,            0,                      st_C_ignore
2322 while,          0,                      st_C_ignore
2323 switch,         0,                      st_C_ignore
2324 return,         0,                      st_C_ignore
2325 __attribute__,  0,                      st_C_attribute
2326 GTY,            0,                      st_C_attribute
2327 @interface,     0,                      st_C_objprot
2328 @protocol,      0,                      st_C_objprot
2329 @implementation,0,                      st_C_objimpl
2330 @end,           0,                      st_C_objend
2331 import,         (C_JAVA & ~C_PLPL),     st_C_ignore
2332 package,        (C_JAVA & ~C_PLPL),     st_C_ignore
2333 friend,         C_PLPL,                 st_C_ignore
2334 extends,        (C_JAVA & ~C_PLPL),     st_C_javastruct
2335 implements,     (C_JAVA & ~C_PLPL),     st_C_javastruct
2336 interface,      (C_JAVA & ~C_PLPL),     st_C_struct
2337 class,          0,                      st_C_class
2338 namespace,      C_PLPL,                 st_C_struct
2339 domain,         C_STAR,                 st_C_struct
2340 union,          0,                      st_C_struct
2341 struct,         0,                      st_C_struct
2342 extern,         0,                      st_C_extern
2343 enum,           0,                      st_C_enum
2344 typedef,        0,                      st_C_typedef
2345 define,         0,                      st_C_define
2346 undef,          0,                      st_C_define
2347 operator,       C_PLPL,                 st_C_operator
2348 template,       0,                      st_C_template
2349 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2350 DEFUN,          0,                      st_C_gnumacro
2351 SYSCALL,        0,                      st_C_gnumacro
2352 ENTRY,          0,                      st_C_gnumacro
2353 PSEUDO,         0,                      st_C_gnumacro
2354 # These are defined inside C functions, so currently they are not met.
2355 # EXFUN used in glibc, DEFVAR_* in emacs.
2356 #EXFUN,         0,                      st_C_gnumacro
2357 #DEFVAR_,       0,                      st_C_gnumacro
2358 %]
2359 and replace lines between %< and %> with its output, then:
2360  - remove the #if characterset check
2361  - make in_word_set static and not inline. */
2362 /*%<*/
2363 /* C code produced by gperf version 3.0.1 */
2364 /* Command-line: gperf -m 5  */
2365 /* Computed positions: -k'2-3' */
2366
2367 struct C_stab_entry { const char *name; int c_ext; enum sym_type type; };
2368 /* maximum key range = 33, duplicates = 0 */
2369
2370 static int
2371 hash (const char *str, int len)
2372 {
2373   static char const asso_values[] =
2374     {
2375       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2376       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2377       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2378       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2379       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2380       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2381       35, 35, 35, 35, 35, 35, 35, 35, 35,  3,
2382       26, 35, 35, 35, 35, 35, 35, 35, 27, 35,
2383       35, 35, 35, 24,  0, 35, 35, 35, 35,  0,
2384       35, 35, 35, 35, 35,  1, 35, 16, 35,  6,
2385       23,  0,  0, 35, 22,  0, 35, 35,  5,  0,
2386        0, 15,  1, 35,  6, 35,  8, 19, 35, 16,
2387        4,  5, 35, 35, 35, 35, 35, 35, 35, 35,
2388       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2389       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2390       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2391       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2392       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2393       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2394       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2395       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2396       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2397       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2398       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2399       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2400       35, 35, 35, 35, 35, 35
2401     };
2402   int hval = len;
2403
2404   switch (hval)
2405     {
2406       default:
2407         hval += asso_values[(unsigned char) str[2]];
2408       /*FALLTHROUGH*/
2409       case 2:
2410         hval += asso_values[(unsigned char) str[1]];
2411         break;
2412     }
2413   return hval;
2414 }
2415
2416 static struct C_stab_entry *
2417 in_word_set (register const char *str, register unsigned int len)
2418 {
2419   enum
2420     {
2421       TOTAL_KEYWORDS = 33,
2422       MIN_WORD_LENGTH = 2,
2423       MAX_WORD_LENGTH = 15,
2424       MIN_HASH_VALUE = 2,
2425       MAX_HASH_VALUE = 34
2426     };
2427
2428   static struct C_stab_entry wordlist[] =
2429     {
2430       {""}, {""},
2431       {"if",            0,                      st_C_ignore},
2432       {"GTY",           0,                      st_C_attribute},
2433       {"@end",          0,                      st_C_objend},
2434       {"union",         0,                      st_C_struct},
2435       {"define",                0,                      st_C_define},
2436       {"import",                (C_JAVA & ~C_PLPL),     st_C_ignore},
2437       {"template",      0,                      st_C_template},
2438       {"operator",      C_PLPL,                 st_C_operator},
2439       {"@interface",    0,                      st_C_objprot},
2440       {"implements",    (C_JAVA & ~C_PLPL),     st_C_javastruct},
2441       {"friend",                C_PLPL,                 st_C_ignore},
2442       {"typedef",       0,                      st_C_typedef},
2443       {"return",                0,                      st_C_ignore},
2444       {"@implementation",0,                     st_C_objimpl},
2445       {"@protocol",     0,                      st_C_objprot},
2446       {"interface",     (C_JAVA & ~C_PLPL),     st_C_struct},
2447       {"extern",                0,                      st_C_extern},
2448       {"extends",       (C_JAVA & ~C_PLPL),     st_C_javastruct},
2449       {"struct",                0,                      st_C_struct},
2450       {"domain",                C_STAR,                 st_C_struct},
2451       {"switch",                0,                      st_C_ignore},
2452       {"enum",          0,                      st_C_enum},
2453       {"for",           0,                      st_C_ignore},
2454       {"namespace",     C_PLPL,                 st_C_struct},
2455       {"class",         0,                      st_C_class},
2456       {"while",         0,                      st_C_ignore},
2457       {"undef",         0,                      st_C_define},
2458       {"package",       (C_JAVA & ~C_PLPL),     st_C_ignore},
2459       {"__attribute__", 0,                      st_C_attribute},
2460       {"SYSCALL",       0,                      st_C_gnumacro},
2461       {"ENTRY",         0,                      st_C_gnumacro},
2462       {"PSEUDO",                0,                      st_C_gnumacro},
2463       {"DEFUN",         0,                      st_C_gnumacro}
2464     };
2465
2466   if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2467     {
2468       int key = hash (str, len);
2469
2470       if (key <= MAX_HASH_VALUE && key >= 0)
2471         {
2472           const char *s = wordlist[key].name;
2473
2474           if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2475             return &wordlist[key];
2476         }
2477     }
2478   return 0;
2479 }
2480 /*%>*/
2481
2482 static enum sym_type
2483 C_symtype (char *str, int len, int c_ext)
2484 {
2485   register struct C_stab_entry *se = in_word_set (str, len);
2486
2487   if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2488     return st_none;
2489   return se->type;
2490 }
2491
2492 \f
2493 /*
2494  * Ignoring __attribute__ ((list))
2495  */
2496 static bool inattribute;        /* looking at an __attribute__ construct */
2497
2498 /*
2499  * C functions and variables are recognized using a simple
2500  * finite automaton.  fvdef is its state variable.
2501  */
2502 static enum
2503 {
2504   fvnone,                       /* nothing seen */
2505   fdefunkey,                    /* Emacs DEFUN keyword seen */
2506   fdefunname,                   /* Emacs DEFUN name seen */
2507   foperator,                    /* func: operator keyword seen (cplpl) */
2508   fvnameseen,                   /* function or variable name seen */
2509   fstartlist,                   /* func: just after open parenthesis */
2510   finlist,                      /* func: in parameter list */
2511   flistseen,                    /* func: after parameter list */
2512   fignore,                      /* func: before open brace */
2513   vignore                       /* var-like: ignore until ';' */
2514 } fvdef;
2515
2516 static bool fvextern;           /* func or var: extern keyword seen; */
2517
2518 /*
2519  * typedefs are recognized using a simple finite automaton.
2520  * typdef is its state variable.
2521  */
2522 static enum
2523 {
2524   tnone,                        /* nothing seen */
2525   tkeyseen,                     /* typedef keyword seen */
2526   ttypeseen,                    /* defined type seen */
2527   tinbody,                      /* inside typedef body */
2528   tend,                         /* just before typedef tag */
2529   tignore                       /* junk after typedef tag */
2530 } typdef;
2531
2532 /*
2533  * struct-like structures (enum, struct and union) are recognized
2534  * using another simple finite automaton.  `structdef' is its state
2535  * variable.
2536  */
2537 static enum
2538 {
2539   snone,                        /* nothing seen yet,
2540                                    or in struct body if bracelev > 0 */
2541   skeyseen,                     /* struct-like keyword seen */
2542   stagseen,                     /* struct-like tag seen */
2543   scolonseen                    /* colon seen after struct-like tag */
2544 } structdef;
2545
2546 /*
2547  * When objdef is different from onone, objtag is the name of the class.
2548  */
2549 static const char *objtag = "<uninited>";
2550
2551 /*
2552  * Yet another little state machine to deal with preprocessor lines.
2553  */
2554 static enum
2555 {
2556   dnone,                        /* nothing seen */
2557   dsharpseen,                   /* '#' seen as first char on line */
2558   ddefineseen,                  /* '#' and 'define' seen */
2559   dignorerest                   /* ignore rest of line */
2560 } definedef;
2561
2562 /*
2563  * State machine for Objective C protocols and implementations.
2564  * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2565  */
2566 static enum
2567 {
2568   onone,                        /* nothing seen */
2569   oprotocol,                    /* @interface or @protocol seen */
2570   oimplementation,              /* @implementations seen */
2571   otagseen,                     /* class name seen */
2572   oparenseen,                   /* parenthesis before category seen */
2573   ocatseen,                     /* category name seen */
2574   oinbody,                      /* in @implementation body */
2575   omethodsign,                  /* in @implementation body, after +/- */
2576   omethodtag,                   /* after method name */
2577   omethodcolon,                 /* after method colon */
2578   omethodparm,                  /* after method parameter */
2579   oignore                       /* wait for @end */
2580 } objdef;
2581
2582
2583 /*
2584  * Use this structure to keep info about the token read, and how it
2585  * should be tagged.  Used by the make_C_tag function to build a tag.
2586  */
2587 static struct tok
2588 {
2589   char *line;                   /* string containing the token */
2590   int offset;                   /* where the token starts in LINE */
2591   int length;                   /* token length */
2592   /*
2593     The previous members can be used to pass strings around for generic
2594     purposes.  The following ones specifically refer to creating tags.  In this
2595     case the token contained here is the pattern that will be used to create a
2596     tag.
2597   */
2598   bool valid;                   /* do not create a tag; the token should be
2599                                    invalidated whenever a state machine is
2600                                    reset prematurely */
2601   bool named;                   /* create a named tag */
2602   int lineno;                   /* source line number of tag */
2603   long linepos;                 /* source char number of tag */
2604 } token;                        /* latest token read */
2605
2606 /*
2607  * Variables and functions for dealing with nested structures.
2608  * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2609  */
2610 static void pushclass_above (int, char *, int);
2611 static void popclass_above (int);
2612 static void write_classname (linebuffer *, const char *qualifier);
2613
2614 static struct {
2615   char **cname;                 /* nested class names */
2616   int *bracelev;                /* nested class brace level */
2617   int nl;                       /* class nesting level (elements used) */
2618   int size;                     /* length of the array */
2619 } cstack;                       /* stack for nested declaration tags */
2620 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2621 #define nestlev         (cstack.nl)
2622 /* After struct keyword or in struct body, not inside a nested function. */
2623 #define instruct        (structdef == snone && nestlev > 0                      \
2624                          && bracelev == cstack.bracelev[nestlev-1] + 1)
2625
2626 static void
2627 pushclass_above (int bracelev, char *str, int len)
2628 {
2629   int nl;
2630
2631   popclass_above (bracelev);
2632   nl = cstack.nl;
2633   if (nl >= cstack.size)
2634     {
2635       int size = cstack.size *= 2;
2636       xrnew (cstack.cname, size, char *);
2637       xrnew (cstack.bracelev, size, int);
2638     }
2639   assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2640   cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2641   cstack.bracelev[nl] = bracelev;
2642   cstack.nl = nl + 1;
2643 }
2644
2645 static void
2646 popclass_above (int bracelev)
2647 {
2648   int nl;
2649
2650   for (nl = cstack.nl - 1;
2651        nl >= 0 && cstack.bracelev[nl] >= bracelev;
2652        nl--)
2653     {
2654       free (cstack.cname[nl]);
2655       cstack.nl = nl;
2656     }
2657 }
2658
2659 static void
2660 write_classname (linebuffer *cn, const char *qualifier)
2661 {
2662   int i, len;
2663   int qlen = strlen (qualifier);
2664
2665   if (cstack.nl == 0 || cstack.cname[0] == NULL)
2666     {
2667       len = 0;
2668       cn->len = 0;
2669       cn->buffer[0] = '\0';
2670     }
2671   else
2672     {
2673       len = strlen (cstack.cname[0]);
2674       linebuffer_setlen (cn, len);
2675       strcpy (cn->buffer, cstack.cname[0]);
2676     }
2677   for (i = 1; i < cstack.nl; i++)
2678     {
2679       char *s = cstack.cname[i];
2680       if (s == NULL)
2681         continue;
2682       linebuffer_setlen (cn, len + qlen + strlen (s));
2683       len += sprintf (cn->buffer + len, "%s%s", qualifier, s);
2684     }
2685 }
2686
2687 \f
2688 static bool consider_token (char *, int, int, int *, int, int, bool *);
2689 static void make_C_tag (bool);
2690
2691 /*
2692  * consider_token ()
2693  *      checks to see if the current token is at the start of a
2694  *      function or variable, or corresponds to a typedef, or
2695  *      is a struct/union/enum tag, or #define, or an enum constant.
2696  *
2697  *      *IS_FUNC_OR_VAR gets true if the token is a function or #define macro
2698  *      with args.  C_EXTP points to which language we are looking at.
2699  *
2700  * Globals
2701  *      fvdef                   IN OUT
2702  *      structdef               IN OUT
2703  *      definedef               IN OUT
2704  *      typdef                  IN OUT
2705  *      objdef                  IN OUT
2706  */
2707
2708 static bool
2709 consider_token (char *str, int len, int c, int *c_extp,
2710                 int bracelev, int parlev, bool *is_func_or_var)
2711                                 /* IN: token pointer */
2712                                 /* IN: token length */
2713                                 /* IN: first char after the token */
2714                                 /* IN, OUT: C extensions mask */
2715                                 /* IN: brace level */
2716                                 /* IN: parenthesis level */
2717                                 /* OUT: function or variable found */
2718 {
2719   /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2720      structtype is the type of the preceding struct-like keyword, and
2721      structbracelev is the brace level where it has been seen. */
2722   static enum sym_type structtype;
2723   static int structbracelev;
2724   static enum sym_type toktype;
2725
2726
2727   toktype = C_symtype (str, len, *c_extp);
2728
2729   /*
2730    * Skip __attribute__
2731    */
2732   if (toktype == st_C_attribute)
2733     {
2734       inattribute = true;
2735       return false;
2736      }
2737
2738    /*
2739     * Advance the definedef state machine.
2740     */
2741    switch (definedef)
2742      {
2743      case dnone:
2744        /* We're not on a preprocessor line. */
2745        if (toktype == st_C_gnumacro)
2746          {
2747            fvdef = fdefunkey;
2748            return false;
2749          }
2750        break;
2751      case dsharpseen:
2752        if (toktype == st_C_define)
2753          {
2754            definedef = ddefineseen;
2755          }
2756        else
2757          {
2758            definedef = dignorerest;
2759          }
2760        return false;
2761      case ddefineseen:
2762        /*
2763         * Make a tag for any macro, unless it is a constant
2764         * and constantypedefs is false.
2765         */
2766        definedef = dignorerest;
2767        *is_func_or_var = (c == '(');
2768        if (!*is_func_or_var && !constantypedefs)
2769          return false;
2770        else
2771          return true;
2772      case dignorerest:
2773        return false;
2774      default:
2775        error ("internal error: definedef value.");
2776      }
2777
2778    /*
2779     * Now typedefs
2780     */
2781    switch (typdef)
2782      {
2783      case tnone:
2784        if (toktype == st_C_typedef)
2785          {
2786            if (typedefs)
2787              typdef = tkeyseen;
2788            fvextern = false;
2789            fvdef = fvnone;
2790            return false;
2791          }
2792        break;
2793      case tkeyseen:
2794        switch (toktype)
2795          {
2796          case st_none:
2797          case st_C_class:
2798          case st_C_struct:
2799          case st_C_enum:
2800            typdef = ttypeseen;
2801            break;
2802          default:
2803            break;
2804          }
2805        break;
2806      case ttypeseen:
2807        if (structdef == snone && fvdef == fvnone)
2808          {
2809            fvdef = fvnameseen;
2810            return true;
2811          }
2812        break;
2813      case tend:
2814        switch (toktype)
2815          {
2816          case st_C_class:
2817          case st_C_struct:
2818          case st_C_enum:
2819            return false;
2820          default:
2821            return true;
2822          }
2823      default:
2824        break;
2825      }
2826
2827    switch (toktype)
2828      {
2829      case st_C_javastruct:
2830        if (structdef == stagseen)
2831          structdef = scolonseen;
2832        return false;
2833      case st_C_template:
2834      case st_C_class:
2835        if ((*c_extp & C_AUTO)   /* automatic detection of C++ language */
2836            && bracelev == 0
2837            && definedef == dnone && structdef == snone
2838            && typdef == tnone && fvdef == fvnone)
2839          *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2840        if (toktype == st_C_template)
2841          break;
2842        /* FALLTHRU */
2843      case st_C_struct:
2844      case st_C_enum:
2845        if (parlev == 0
2846            && fvdef != vignore
2847            && (typdef == tkeyseen
2848                || (typedefs_or_cplusplus && structdef == snone)))
2849          {
2850            structdef = skeyseen;
2851            structtype = toktype;
2852            structbracelev = bracelev;
2853            if (fvdef == fvnameseen)
2854              fvdef = fvnone;
2855          }
2856        return false;
2857      default:
2858        break;
2859      }
2860
2861    if (structdef == skeyseen)
2862      {
2863        structdef = stagseen;
2864        return true;
2865      }
2866
2867    if (typdef != tnone)
2868      definedef = dnone;
2869
2870    /* Detect Objective C constructs. */
2871    switch (objdef)
2872      {
2873      case onone:
2874        switch (toktype)
2875          {
2876          case st_C_objprot:
2877            objdef = oprotocol;
2878            return false;
2879          case st_C_objimpl:
2880            objdef = oimplementation;
2881            return false;
2882          default:
2883            break;
2884          }
2885        break;
2886      case oimplementation:
2887        /* Save the class tag for functions or variables defined inside. */
2888        objtag = savenstr (str, len);
2889        objdef = oinbody;
2890        return false;
2891      case oprotocol:
2892        /* Save the class tag for categories. */
2893        objtag = savenstr (str, len);
2894        objdef = otagseen;
2895        *is_func_or_var = true;
2896        return true;
2897      case oparenseen:
2898        objdef = ocatseen;
2899        *is_func_or_var = true;
2900        return true;
2901      case oinbody:
2902        break;
2903      case omethodsign:
2904        if (parlev == 0)
2905          {
2906            fvdef = fvnone;
2907            objdef = omethodtag;
2908            linebuffer_setlen (&token_name, len);
2909            memcpy (token_name.buffer, str, len);
2910            token_name.buffer[len] = '\0';
2911            return true;
2912          }
2913        return false;
2914      case omethodcolon:
2915        if (parlev == 0)
2916          objdef = omethodparm;
2917        return false;
2918      case omethodparm:
2919        if (parlev == 0)
2920          {
2921            objdef = omethodtag;
2922            if (class_qualify)
2923              {
2924                int oldlen = token_name.len;
2925                fvdef = fvnone;
2926                linebuffer_setlen (&token_name, oldlen + len);
2927                memcpy (token_name.buffer + oldlen, str, len);
2928                token_name.buffer[oldlen + len] = '\0';
2929              }
2930            return true;
2931          }
2932        return false;
2933      case oignore:
2934        if (toktype == st_C_objend)
2935          {
2936            /* Memory leakage here: the string pointed by objtag is
2937               never released, because many tests would be needed to
2938               avoid breaking on incorrect input code.  The amount of
2939               memory leaked here is the sum of the lengths of the
2940               class tags.
2941            free (objtag); */
2942            objdef = onone;
2943          }
2944        return false;
2945      default:
2946        break;
2947      }
2948
2949    /* A function, variable or enum constant? */
2950    switch (toktype)
2951      {
2952      case st_C_extern:
2953        fvextern = true;
2954        switch  (fvdef)
2955          {
2956          case finlist:
2957          case flistseen:
2958          case fignore:
2959          case vignore:
2960            break;
2961          default:
2962            fvdef = fvnone;
2963          }
2964        return false;
2965      case st_C_ignore:
2966        fvextern = false;
2967        fvdef = vignore;
2968        return false;
2969      case st_C_operator:
2970        fvdef = foperator;
2971        *is_func_or_var = true;
2972        return true;
2973      case st_none:
2974        if (constantypedefs
2975            && structdef == snone
2976            && structtype == st_C_enum && bracelev > structbracelev
2977            /* Don't tag tokens in expressions that assign values to enum
2978               constants.  */
2979            && fvdef != vignore)
2980          return true;           /* enum constant */
2981        switch (fvdef)
2982          {
2983          case fdefunkey:
2984            if (bracelev > 0)
2985              break;
2986            fvdef = fdefunname;  /* GNU macro */
2987            *is_func_or_var = true;
2988            return true;
2989          case fvnone:
2990            switch (typdef)
2991              {
2992              case ttypeseen:
2993                return false;
2994              case tnone:
2995                if ((strneq (str, "asm", 3) && endtoken (str[3]))
2996                    || (strneq (str, "__asm__", 7) && endtoken (str[7])))
2997                  {
2998                    fvdef = vignore;
2999                    return false;
3000                  }
3001                break;
3002              default:
3003                break;
3004              }
3005           /* FALLTHRU */
3006           case fvnameseen:
3007           if (len >= 10 && strneq (str+len-10, "::operator", 10))
3008             {
3009               if (*c_extp & C_AUTO) /* automatic detection of C++ */
3010                 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3011               fvdef = foperator;
3012               *is_func_or_var = true;
3013               return true;
3014             }
3015           if (bracelev > 0 && !instruct)
3016             break;
3017           fvdef = fvnameseen;   /* function or variable */
3018           *is_func_or_var = true;
3019           return true;
3020          default:
3021            break;
3022         }
3023       break;
3024      default:
3025        break;
3026     }
3027
3028   return false;
3029 }
3030
3031 \f
3032 /*
3033  * C_entries often keeps pointers to tokens or lines which are older than
3034  * the line currently read.  By keeping two line buffers, and switching
3035  * them at end of line, it is possible to use those pointers.
3036  */
3037 static struct
3038 {
3039   long linepos;
3040   linebuffer lb;
3041 } lbs[2];
3042
3043 #define current_lb_is_new (newndx == curndx)
3044 #define switch_line_buffers() (curndx = 1 - curndx)
3045
3046 #define curlb (lbs[curndx].lb)
3047 #define newlb (lbs[newndx].lb)
3048 #define curlinepos (lbs[curndx].linepos)
3049 #define newlinepos (lbs[newndx].linepos)
3050
3051 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3052 #define cplpl (c_ext & C_PLPL)
3053 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3054
3055 #define CNL_SAVE_DEFINEDEF()                                            \
3056 do {                                                                    \
3057   curlinepos = charno;                                                  \
3058   readline (&curlb, inf);                                               \
3059   lp = curlb.buffer;                                                    \
3060   quotednl = false;                                                     \
3061   newndx = curndx;                                                      \
3062 } while (0)
3063
3064 #define CNL()                                                           \
3065 do {                                                                    \
3066   CNL_SAVE_DEFINEDEF ();                                                \
3067   if (savetoken.valid)                                                  \
3068     {                                                                   \
3069       token = savetoken;                                                \
3070       savetoken.valid = false;                                          \
3071     }                                                                   \
3072   definedef = dnone;                                                    \
3073 } while (0)
3074
3075
3076 static void
3077 make_C_tag (bool isfun)
3078 {
3079   /* This function is never called when token.valid is false, but
3080      we must protect against invalid input or internal errors. */
3081   if (token.valid)
3082     make_tag (token_name.buffer, token_name.len, isfun, token.line,
3083               token.offset+token.length+1, token.lineno, token.linepos);
3084   else if (DEBUG)
3085     {                             /* this branch is optimized away if !DEBUG */
3086       make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3087                 token_name.len + 17, isfun, token.line,
3088                 token.offset+token.length+1, token.lineno, token.linepos);
3089       error ("INVALID TOKEN");
3090     }
3091
3092   token.valid = false;
3093 }
3094
3095 static bool
3096 perhaps_more_input (FILE *inf)
3097 {
3098   return !feof (inf) && !ferror (inf);
3099 }
3100
3101
3102 /*
3103  * C_entries ()
3104  *      This routine finds functions, variables, typedefs,
3105  *      #define's, enum constants and struct/union/enum definitions in
3106  *      C syntax and adds them to the list.
3107  */
3108 static void
3109 C_entries (int c_ext, FILE *inf)
3110                                 /* extension of C */
3111                                 /* input file */
3112 {
3113   register char c;              /* latest char read; '\0' for end of line */
3114   register char *lp;            /* pointer one beyond the character `c' */
3115   int curndx, newndx;           /* indices for current and new lb */
3116   register int tokoff;          /* offset in line of start of current token */
3117   register int toklen;          /* length of current token */
3118   const char *qualifier;        /* string used to qualify names */
3119   int qlen;                     /* length of qualifier */
3120   int bracelev;                 /* current brace level */
3121   int bracketlev;               /* current bracket level */
3122   int parlev;                   /* current parenthesis level */
3123   int attrparlev;               /* __attribute__ parenthesis level */
3124   int templatelev;              /* current template level */
3125   int typdefbracelev;           /* bracelev where a typedef struct body begun */
3126   bool incomm, inquote, inchar, quotednl, midtoken;
3127   bool yacc_rules;              /* in the rules part of a yacc file */
3128   struct tok savetoken = {0};   /* token saved during preprocessor handling */
3129
3130
3131   linebuffer_init (&lbs[0].lb);
3132   linebuffer_init (&lbs[1].lb);
3133   if (cstack.size == 0)
3134     {
3135       cstack.size = (DEBUG) ? 1 : 4;
3136       cstack.nl = 0;
3137       cstack.cname = xnew (cstack.size, char *);
3138       cstack.bracelev = xnew (cstack.size, int);
3139     }
3140
3141   tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3142   curndx = newndx = 0;
3143   lp = curlb.buffer;
3144   *lp = 0;
3145
3146   fvdef = fvnone; fvextern = false; typdef = tnone;
3147   structdef = snone; definedef = dnone; objdef = onone;
3148   yacc_rules = false;
3149   midtoken = inquote = inchar = incomm = quotednl = false;
3150   token.valid = savetoken.valid = false;
3151   bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3152   if (cjava)
3153     { qualifier = "."; qlen = 1; }
3154   else
3155     { qualifier = "::"; qlen = 2; }
3156
3157
3158   while (perhaps_more_input (inf))
3159     {
3160       c = *lp++;
3161       if (c == '\\')
3162         {
3163           /* If we are at the end of the line, the next character is a
3164              '\0'; do not skip it, because it is what tells us
3165              to read the next line.  */
3166           if (*lp == '\0')
3167             {
3168               quotednl = true;
3169               continue;
3170             }
3171           lp++;
3172           c = ' ';
3173         }
3174       else if (incomm)
3175         {
3176           switch (c)
3177             {
3178             case '*':
3179               if (*lp == '/')
3180                 {
3181                   c = *lp++;
3182                   incomm = false;
3183                 }
3184               break;
3185             case '\0':
3186               /* Newlines inside comments do not end macro definitions in
3187                  traditional cpp. */
3188               CNL_SAVE_DEFINEDEF ();
3189               break;
3190             }
3191           continue;
3192         }
3193       else if (inquote)
3194         {
3195           switch (c)
3196             {
3197             case '"':
3198               inquote = false;
3199               break;
3200             case '\0':
3201               /* Newlines inside strings do not end macro definitions
3202                  in traditional cpp, even though compilers don't
3203                  usually accept them. */
3204               CNL_SAVE_DEFINEDEF ();
3205               break;
3206             }
3207           continue;
3208         }
3209       else if (inchar)
3210         {
3211           switch (c)
3212             {
3213             case '\0':
3214               /* Hmmm, something went wrong. */
3215               CNL ();
3216               /* FALLTHRU */
3217             case '\'':
3218               inchar = false;
3219               break;
3220             }
3221           continue;
3222         }
3223       else switch (c)
3224         {
3225         case '"':
3226           inquote = true;
3227           if (bracketlev > 0)
3228             continue;
3229           if (inattribute)
3230             break;
3231           switch (fvdef)
3232             {
3233             case fdefunkey:
3234             case fstartlist:
3235             case finlist:
3236             case fignore:
3237             case vignore:
3238               break;
3239             default:
3240               fvextern = false;
3241               fvdef = fvnone;
3242             }
3243           continue;
3244         case '\'':
3245           inchar = true;
3246           if (bracketlev > 0)
3247             continue;
3248           if (inattribute)
3249             break;
3250           if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
3251             {
3252               fvextern = false;
3253               fvdef = fvnone;
3254             }
3255           continue;
3256         case '/':
3257           if (*lp == '*')
3258             {
3259               incomm = true;
3260               lp++;
3261               c = ' ';
3262               if (bracketlev > 0)
3263                 continue;
3264             }
3265           else if (/* cplpl && */ *lp == '/')
3266             {
3267               c = '\0';
3268             }
3269           break;
3270         case '%':
3271           if ((c_ext & YACC) && *lp == '%')
3272             {
3273               /* Entering or exiting rules section in yacc file. */
3274               lp++;
3275               definedef = dnone; fvdef = fvnone; fvextern = false;
3276               typdef = tnone; structdef = snone;
3277               midtoken = inquote = inchar = incomm = quotednl = false;
3278               bracelev = 0;
3279               yacc_rules = !yacc_rules;
3280               continue;
3281             }
3282           else
3283             break;
3284         case '#':
3285           if (definedef == dnone)
3286             {
3287               char *cp;
3288               bool cpptoken = true;
3289
3290               /* Look back on this line.  If all blanks, or nonblanks
3291                  followed by an end of comment, this is a preprocessor
3292                  token. */
3293               for (cp = newlb.buffer; cp < lp-1; cp++)
3294                 if (!c_isspace (*cp))
3295                   {
3296                     if (*cp == '*' && cp[1] == '/')
3297                       {
3298                         cp++;
3299                         cpptoken = true;
3300                       }
3301                     else
3302                       cpptoken = false;
3303                   }
3304               if (cpptoken)
3305                 {
3306                   definedef = dsharpseen;
3307                   /* This is needed for tagging enum values: when there are
3308                      preprocessor conditionals inside the enum, we need to
3309                      reset the value of fvdef so that the next enum value is
3310                      tagged even though the one before it did not end in a
3311                      comma.  */
3312                   if (fvdef == vignore && instruct && parlev == 0)
3313                     {
3314                       if (strneq (cp, "#if", 3) || strneq (cp, "#el", 3))
3315                         fvdef = fvnone;
3316                     }
3317                 }
3318             } /* if (definedef == dnone) */
3319           continue;
3320         case '[':
3321           bracketlev++;
3322           continue;
3323         default:
3324           if (bracketlev > 0)
3325             {
3326               if (c == ']')
3327                 --bracketlev;
3328               else if (c == '\0')
3329                 CNL_SAVE_DEFINEDEF ();
3330               continue;
3331             }
3332           break;
3333         } /* switch (c) */
3334
3335
3336       /* Consider token only if some involved conditions are satisfied. */
3337       if (typdef != tignore
3338           && definedef != dignorerest
3339           && fvdef != finlist
3340           && templatelev == 0
3341           && (definedef != dnone
3342               || structdef != scolonseen)
3343           && !inattribute)
3344         {
3345           if (midtoken)
3346             {
3347               if (endtoken (c))
3348                 {
3349                   if (c == ':' && *lp == ':' && begtoken (lp[1]))
3350                     /* This handles :: in the middle,
3351                        but not at the beginning of an identifier.
3352                        Also, space-separated :: is not recognized. */
3353                     {
3354                       if (c_ext & C_AUTO) /* automatic detection of C++ */
3355                         c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3356                       lp += 2;
3357                       toklen += 2;
3358                       c = lp[-1];
3359                       goto still_in_token;
3360                     }
3361                   else
3362                     {
3363                       bool funorvar = false;
3364
3365                       if (yacc_rules
3366                           || consider_token (newlb.buffer + tokoff, toklen, c,
3367                                              &c_ext, bracelev, parlev,
3368                                              &funorvar))
3369                         {
3370                           if (fvdef == foperator)
3371                             {
3372                               char *oldlp = lp;
3373                               lp = skip_spaces (lp-1);
3374                               if (*lp != '\0')
3375                                 lp += 1;
3376                               while (*lp != '\0'
3377                                      && !c_isspace (*lp) && *lp != '(')
3378                                 lp += 1;
3379                               c = *lp++;
3380                               toklen += lp - oldlp;
3381                             }
3382                           token.named = false;
3383                           if (!plainc
3384                               && nestlev > 0 && definedef == dnone)
3385                             /* in struct body */
3386                             {
3387                               if (class_qualify)
3388                                 {
3389                                   int len;
3390                                   write_classname (&token_name, qualifier);
3391                                   len = token_name.len;
3392                                   linebuffer_setlen (&token_name,
3393                                                      len + qlen + toklen);
3394                                   sprintf (token_name.buffer + len, "%s%.*s",
3395                                            qualifier, toklen,
3396                                            newlb.buffer + tokoff);
3397                                 }
3398                               else
3399                                 {
3400                                   linebuffer_setlen (&token_name, toklen);
3401                                   sprintf (token_name.buffer, "%.*s",
3402                                            toklen, newlb.buffer + tokoff);
3403                                 }
3404                               token.named = true;
3405                             }
3406                           else if (objdef == ocatseen)
3407                             /* Objective C category */
3408                             {
3409                               if (class_qualify)
3410                                 {
3411                                   int len = strlen (objtag) + 2 + toklen;
3412                                   linebuffer_setlen (&token_name, len);
3413                                   sprintf (token_name.buffer, "%s(%.*s)",
3414                                            objtag, toklen,
3415                                            newlb.buffer + tokoff);
3416                                 }
3417                               else
3418                                 {
3419                                   linebuffer_setlen (&token_name, toklen);
3420                                   sprintf (token_name.buffer, "%.*s",
3421                                            toklen, newlb.buffer + tokoff);
3422                                 }
3423                               token.named = true;
3424                             }
3425                           else if (objdef == omethodtag
3426                                    || objdef == omethodparm)
3427                             /* Objective C method */
3428                             {
3429                               token.named = true;
3430                             }
3431                           else if (fvdef == fdefunname)
3432                             /* GNU DEFUN and similar macros */
3433                             {
3434                               bool defun = (newlb.buffer[tokoff] == 'F');
3435                               int off = tokoff;
3436                               int len = toklen;
3437
3438                               /* Rewrite the tag so that emacs lisp DEFUNs
3439                                  can be found by their elisp name */
3440                               if (defun)
3441                                 {
3442                                   off += 1;
3443                                   len -= 1;
3444                                 }
3445                               linebuffer_setlen (&token_name, len);
3446                               memcpy (token_name.buffer,
3447                                       newlb.buffer + off, len);
3448                               token_name.buffer[len] = '\0';
3449                               if (defun)
3450                                 while (--len >= 0)
3451                                   if (token_name.buffer[len] == '_')
3452                                     token_name.buffer[len] = '-';
3453                               token.named = defun;
3454                             }
3455                           else
3456                             {
3457                               linebuffer_setlen (&token_name, toklen);
3458                               memcpy (token_name.buffer,
3459                                       newlb.buffer + tokoff, toklen);
3460                               token_name.buffer[toklen] = '\0';
3461                               /* Name macros and members. */
3462                               token.named = (structdef == stagseen
3463                                              || typdef == ttypeseen
3464                                              || typdef == tend
3465                                              || (funorvar
3466                                                  && definedef == dignorerest)
3467                                              || (funorvar
3468                                                  && definedef == dnone
3469                                                  && structdef == snone
3470                                                  && bracelev > 0));
3471                             }
3472                           token.lineno = lineno;
3473                           token.offset = tokoff;
3474                           token.length = toklen;
3475                           token.line = newlb.buffer;
3476                           token.linepos = newlinepos;
3477                           token.valid = true;
3478
3479                           if (definedef == dnone
3480                               && (fvdef == fvnameseen
3481                                   || fvdef == foperator
3482                                   || structdef == stagseen
3483                                   || typdef == tend
3484                                   || typdef == ttypeseen
3485                                   || objdef != onone))
3486                             {
3487                               if (current_lb_is_new)
3488                                 switch_line_buffers ();
3489                             }
3490                           else if (definedef != dnone
3491                                    || fvdef == fdefunname
3492                                    || instruct)
3493                             make_C_tag (funorvar);
3494                         }
3495                       else /* not yacc and consider_token failed */
3496                         {
3497                           if (inattribute && fvdef == fignore)
3498                             {
3499                               /* We have just met __attribute__ after a
3500                                  function parameter list: do not tag the
3501                                  function again. */
3502                               fvdef = fvnone;
3503                             }
3504                         }
3505                       midtoken = false;
3506                     }
3507                 } /* if (endtoken (c)) */
3508               else if (intoken (c))
3509                 still_in_token:
3510                 {
3511                   toklen++;
3512                   continue;
3513                 }
3514             } /* if (midtoken) */
3515           else if (begtoken (c))
3516             {
3517               switch (definedef)
3518                 {
3519                 case dnone:
3520                   switch (fvdef)
3521                     {
3522                     case fstartlist:
3523                       /* This prevents tagging fb in
3524                          void (__attribute__((noreturn)) *fb) (void);
3525                          Fixing this is not easy and not very important. */
3526                       fvdef = finlist;
3527                       continue;
3528                     case flistseen:
3529                       if (plainc || declarations)
3530                         {
3531                           make_C_tag (true); /* a function */
3532                           fvdef = fignore;
3533                         }
3534                       break;
3535                     default:
3536                       break;
3537                     }
3538                   if (structdef == stagseen && !cjava)
3539                     {
3540                       popclass_above (bracelev);
3541                       structdef = snone;
3542                     }
3543                   break;
3544                 case dsharpseen:
3545                   savetoken = token;
3546                   break;
3547                 default:
3548                   break;
3549                 }
3550               if (!yacc_rules || lp == newlb.buffer + 1)
3551                 {
3552                   tokoff = lp - 1 - newlb.buffer;
3553                   toklen = 1;
3554                   midtoken = true;
3555                 }
3556               continue;
3557             } /* if (begtoken) */
3558         } /* if must look at token */
3559
3560
3561       /* Detect end of line, colon, comma, semicolon and various braces
3562          after having handled a token.*/
3563       switch (c)
3564         {
3565         case ':':
3566           if (inattribute)
3567             break;
3568           if (yacc_rules && token.offset == 0 && token.valid)
3569             {
3570               make_C_tag (false); /* a yacc function */
3571               break;
3572             }
3573           if (definedef != dnone)
3574             break;
3575           switch (objdef)
3576             {
3577             case otagseen:
3578               objdef = oignore;
3579               make_C_tag (true); /* an Objective C class */
3580               break;
3581             case omethodtag:
3582             case omethodparm:
3583               objdef = omethodcolon;
3584               if (class_qualify)
3585                 {
3586                   int toklen = token_name.len;
3587                   linebuffer_setlen (&token_name, toklen + 1);
3588                   strcpy (token_name.buffer + toklen, ":");
3589                 }
3590               break;
3591             default:
3592               break;
3593             }
3594           if (structdef == stagseen)
3595             {
3596               structdef = scolonseen;
3597               break;
3598             }
3599           /* Should be useless, but may be work as a safety net. */
3600           if (cplpl && fvdef == flistseen)
3601             {
3602               make_C_tag (true); /* a function */
3603               fvdef = fignore;
3604               break;
3605             }
3606           break;
3607         case ';':
3608           if (definedef != dnone || inattribute)
3609             break;
3610           switch (typdef)
3611             {
3612             case tend:
3613             case ttypeseen:
3614               make_C_tag (false); /* a typedef */
3615               typdef = tnone;
3616               fvdef = fvnone;
3617               break;
3618             case tnone:
3619             case tinbody:
3620             case tignore:
3621               switch (fvdef)
3622                 {
3623                 case fignore:
3624                   if (typdef == tignore || cplpl)
3625                     fvdef = fvnone;
3626                   break;
3627                 case fvnameseen:
3628                   if ((globals && bracelev == 0 && (!fvextern || declarations))
3629                       || (members && instruct))
3630                     make_C_tag (false); /* a variable */
3631                   fvextern = false;
3632                   fvdef = fvnone;
3633                   token.valid = false;
3634                   break;
3635                 case flistseen:
3636                   if ((declarations
3637                        && (cplpl || !instruct)
3638                        && (typdef == tnone || (typdef != tignore && instruct)))
3639                       || (members
3640                           && plainc && instruct))
3641                     make_C_tag (true);  /* a function */
3642                   /* FALLTHRU */
3643                 default:
3644                   fvextern = false;
3645                   fvdef = fvnone;
3646                   if (declarations
3647                        && cplpl && structdef == stagseen)
3648                     make_C_tag (false); /* forward declaration */
3649                   else
3650                     token.valid = false;
3651                 } /* switch (fvdef) */
3652               /* FALLTHRU */
3653             default:
3654               if (!instruct)
3655                 typdef = tnone;
3656             }
3657           if (structdef == stagseen)
3658             structdef = snone;
3659           break;
3660         case ',':
3661           if (definedef != dnone || inattribute)
3662             break;
3663           switch (objdef)
3664             {
3665             case omethodtag:
3666             case omethodparm:
3667               make_C_tag (true); /* an Objective C method */
3668               objdef = oinbody;
3669               break;
3670             default:
3671               break;
3672             }
3673           switch (fvdef)
3674             {
3675             case fdefunkey:
3676             case foperator:
3677             case fstartlist:
3678             case finlist:
3679             case fignore:
3680               break;
3681             case vignore:
3682               if (instruct && parlev == 0)
3683                 fvdef = fvnone;
3684               break;
3685             case fdefunname:
3686               fvdef = fignore;
3687               break;
3688             case fvnameseen:
3689               if (parlev == 0
3690                   && ((globals
3691                        && bracelev == 0
3692                        && templatelev == 0
3693                        && (!fvextern || declarations))
3694                       || (members && instruct)))
3695                   make_C_tag (false); /* a variable */
3696               break;
3697             case flistseen:
3698               if ((declarations && typdef == tnone && !instruct)
3699                   || (members && typdef != tignore && instruct))
3700                 {
3701                   make_C_tag (true); /* a function */
3702                   fvdef = fvnameseen;
3703                 }
3704               else if (!declarations)
3705                 fvdef = fvnone;
3706               token.valid = false;
3707               break;
3708             default:
3709               fvdef = fvnone;
3710             }
3711           if (structdef == stagseen)
3712             structdef = snone;
3713           break;
3714         case ']':
3715           if (definedef != dnone || inattribute)
3716             break;
3717           if (structdef == stagseen)
3718             structdef = snone;
3719           switch (typdef)
3720             {
3721             case ttypeseen:
3722             case tend:
3723               typdef = tignore;
3724               make_C_tag (false);       /* a typedef */
3725               break;
3726             case tnone:
3727             case tinbody:
3728               switch (fvdef)
3729                 {
3730                 case foperator:
3731                 case finlist:
3732                 case fignore:
3733                 case vignore:
3734                   break;
3735                 case fvnameseen:
3736                   if ((members && bracelev == 1)
3737                       || (globals && bracelev == 0
3738                           && (!fvextern || declarations)))
3739                     make_C_tag (false); /* a variable */
3740                   /* FALLTHRU */
3741                 default:
3742                   fvdef = fvnone;
3743                 }
3744               break;
3745             default:
3746               break;
3747             }
3748           break;
3749         case '(':
3750           if (inattribute)
3751             {
3752               attrparlev++;
3753               break;
3754             }
3755           if (definedef != dnone)
3756             break;
3757           if (objdef == otagseen && parlev == 0)
3758             objdef = oparenseen;
3759           switch (fvdef)
3760             {
3761             case fvnameseen:
3762               if (typdef == ttypeseen
3763                   && *lp != '*'
3764                   && !instruct)
3765                 {
3766                   /* This handles constructs like:
3767                      typedef void OperatorFun (int fun); */
3768                   make_C_tag (false);
3769                   typdef = tignore;
3770                   fvdef = fignore;
3771                   break;
3772                 }
3773               /* FALLTHRU */
3774             case foperator:
3775               fvdef = fstartlist;
3776               break;
3777             case flistseen:
3778               fvdef = finlist;
3779               break;
3780             default:
3781               break;
3782             }
3783           parlev++;
3784           break;
3785         case ')':
3786           if (inattribute)
3787             {
3788               if (--attrparlev == 0)
3789                 inattribute = false;
3790               break;
3791             }
3792           if (definedef != dnone)
3793             break;
3794           if (objdef == ocatseen && parlev == 1)
3795             {
3796               make_C_tag (true); /* an Objective C category */
3797               objdef = oignore;
3798             }
3799           if (--parlev == 0)
3800             {
3801               switch (fvdef)
3802                 {
3803                 case fstartlist:
3804                 case finlist:
3805                   fvdef = flistseen;
3806                   break;
3807                 default:
3808                   break;
3809                 }
3810               if (!instruct
3811                   && (typdef == tend
3812                       || typdef == ttypeseen))
3813                 {
3814                   typdef = tignore;
3815                   make_C_tag (false); /* a typedef */
3816                 }
3817             }
3818           else if (parlev < 0)  /* can happen due to ill-conceived #if's. */
3819             parlev = 0;
3820           break;
3821         case '{':
3822           if (definedef != dnone)
3823             break;
3824           if (typdef == ttypeseen)
3825             {
3826               /* Whenever typdef is set to tinbody (currently only
3827                  here), typdefbracelev should be set to bracelev. */
3828               typdef = tinbody;
3829               typdefbracelev = bracelev;
3830             }
3831           switch (fvdef)
3832             {
3833             case flistseen:
3834               if (cplpl && !class_qualify)
3835                 {
3836                   /* Remove class and namespace qualifiers from the token,
3837                      leaving only the method/member name.  */
3838                   char *cc, *uqname = token_name.buffer;
3839                   char *tok_end = token_name.buffer + token_name.len;
3840
3841                   for (cc = token_name.buffer; cc < tok_end; cc++)
3842                     {
3843                       if (*cc == ':' && cc[1] == ':')
3844                         {
3845                           uqname = cc + 2;
3846                           cc++;
3847                         }
3848                     }
3849                   if (uqname > token_name.buffer)
3850                     {
3851                       int uqlen = strlen (uqname);
3852                       linebuffer_setlen (&token_name, uqlen);
3853                       memmove (token_name.buffer, uqname, uqlen + 1);
3854                     }
3855                 }
3856               make_C_tag (true);    /* a function */
3857               /* FALLTHRU */
3858             case fignore:
3859               fvdef = fvnone;
3860               break;
3861             case fvnone:
3862               switch (objdef)
3863                 {
3864                 case otagseen:
3865                   make_C_tag (true); /* an Objective C class */
3866                   objdef = oignore;
3867                   break;
3868                 case omethodtag:
3869                 case omethodparm:
3870                   make_C_tag (true); /* an Objective C method */
3871                   objdef = oinbody;
3872                   break;
3873                 default:
3874                   /* Neutralize `extern "C" {' grot. */
3875                   if (bracelev == 0 && structdef == snone && nestlev == 0
3876                       && typdef == tnone)
3877                     bracelev = -1;
3878                 }
3879               break;
3880             default:
3881               break;
3882             }
3883           switch (structdef)
3884             {
3885             case skeyseen:         /* unnamed struct */
3886               pushclass_above (bracelev, NULL, 0);
3887               structdef = snone;
3888               break;
3889             case stagseen:         /* named struct or enum */
3890             case scolonseen:       /* a class */
3891               pushclass_above (bracelev,token.line+token.offset, token.length);
3892               structdef = snone;
3893               make_C_tag (false);  /* a struct or enum */
3894               break;
3895             default:
3896               break;
3897             }
3898           bracelev += 1;
3899           break;
3900         case '*':
3901           if (definedef != dnone)
3902             break;
3903           if (fvdef == fstartlist)
3904             {
3905               fvdef = fvnone;   /* avoid tagging `foo' in `foo (*bar()) ()' */
3906               token.valid = false;
3907             }
3908           break;
3909         case '}':
3910           if (definedef != dnone)
3911             break;
3912           bracelev -= 1;
3913           if (!ignoreindent && lp == newlb.buffer + 1)
3914             {
3915               if (bracelev != 0)
3916                 token.valid = false; /* unexpected value, token unreliable */
3917               bracelev = 0;     /* reset brace level if first column */
3918               parlev = 0;       /* also reset paren level, just in case... */
3919             }
3920           else if (bracelev < 0)
3921             {
3922               token.valid = false; /* something gone amiss, token unreliable */
3923               bracelev = 0;
3924             }
3925           if (bracelev == 0 && fvdef == vignore)
3926             fvdef = fvnone;             /* end of function */
3927           popclass_above (bracelev);
3928           structdef = snone;
3929           /* Only if typdef == tinbody is typdefbracelev significant. */
3930           if (typdef == tinbody && bracelev <= typdefbracelev)
3931             {
3932               assert (bracelev == typdefbracelev);
3933               typdef = tend;
3934             }
3935           break;
3936         case '=':
3937           if (definedef != dnone)
3938             break;
3939           switch (fvdef)
3940             {
3941             case foperator:
3942             case finlist:
3943             case fignore:
3944             case vignore:
3945               break;
3946             case fvnameseen:
3947               if ((members && bracelev == 1)
3948                   || (globals && bracelev == 0 && (!fvextern || declarations)))
3949                 make_C_tag (false); /* a variable */
3950               /* FALLTHRU */
3951             default:
3952               fvdef = vignore;
3953             }
3954           break;
3955         case '<':
3956           if (cplpl
3957               && (structdef == stagseen || fvdef == fvnameseen))
3958             {
3959               templatelev++;
3960               break;
3961             }
3962           goto resetfvdef;
3963         case '>':
3964           if (templatelev > 0)
3965             {
3966               templatelev--;
3967               break;
3968             }
3969           goto resetfvdef;
3970         case '+':
3971         case '-':
3972           if (objdef == oinbody && bracelev == 0)
3973             {
3974               objdef = omethodsign;
3975               break;
3976             }
3977           /* FALLTHRU */
3978         resetfvdef:
3979         case '#': case '~': case '&': case '%': case '/':
3980         case '|': case '^': case '!': case '.': case '?':
3981           if (definedef != dnone)
3982             break;
3983           /* These surely cannot follow a function tag in C. */
3984           switch (fvdef)
3985             {
3986             case foperator:
3987             case finlist:
3988             case fignore:
3989             case vignore:
3990               break;
3991             default:
3992               fvdef = fvnone;
3993             }
3994           break;
3995         case '\0':
3996           if (objdef == otagseen)
3997             {
3998               make_C_tag (true); /* an Objective C class */
3999               objdef = oignore;
4000             }
4001           /* If a macro spans multiple lines don't reset its state. */
4002           if (quotednl)
4003             CNL_SAVE_DEFINEDEF ();
4004           else
4005             CNL ();
4006           break;
4007         } /* switch (c) */
4008
4009     } /* while not eof */
4010
4011   free (lbs[0].lb.buffer);
4012   free (lbs[1].lb.buffer);
4013 }
4014
4015 /*
4016  * Process either a C++ file or a C file depending on the setting
4017  * of a global flag.
4018  */
4019 static void
4020 default_C_entries (FILE *inf)
4021 {
4022   C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
4023 }
4024
4025 /* Always do plain C. */
4026 static void
4027 plain_C_entries (FILE *inf)
4028 {
4029   C_entries (0, inf);
4030 }
4031
4032 /* Always do C++. */
4033 static void
4034 Cplusplus_entries (FILE *inf)
4035 {
4036   C_entries (C_PLPL, inf);
4037 }
4038
4039 /* Always do Java. */
4040 static void
4041 Cjava_entries (FILE *inf)
4042 {
4043   C_entries (C_JAVA, inf);
4044 }
4045
4046 /* Always do C*. */
4047 static void
4048 Cstar_entries (FILE *inf)
4049 {
4050   C_entries (C_STAR, inf);
4051 }
4052
4053 /* Always do Yacc. */
4054 static void
4055 Yacc_entries (FILE *inf)
4056 {
4057   C_entries (YACC, inf);
4058 }
4059
4060 \f
4061 /* Useful macros. */
4062 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer)    \
4063   while (perhaps_more_input (file_pointer)                              \
4064          && (readline (&(line_buffer), file_pointer),                   \
4065              (char_pointer) = (line_buffer).buffer,                     \
4066              true))                                                     \
4067
4068 #define LOOKING_AT(cp, kw)  /* kw is the keyword, a literal string */   \
4069   ((assert ("" kw), true)   /* syntax error if not a literal string */  \
4070    && strneq ((cp), kw, sizeof (kw)-1)          /* cp points at kw */   \
4071    && notinname ((cp)[sizeof (kw)-1])           /* end of kw */         \
4072    && ((cp) = skip_spaces ((cp)+sizeof (kw)-1))) /* skip spaces */
4073
4074 /* Similar to LOOKING_AT but does not use notinname, does not skip */
4075 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
4076   ((assert ("" kw), true) /* syntax error if not a literal string */    \
4077    && strncaseeq ((cp), kw, sizeof (kw)-1)      /* cp points at kw */   \
4078    && ((cp) += sizeof (kw)-1))                  /* skip spaces */
4079
4080 /*
4081  * Read a file, but do no processing.  This is used to do regexp
4082  * matching on files that have no language defined.
4083  */
4084 static void
4085 just_read_file (FILE *inf)
4086 {
4087   while (perhaps_more_input (inf))
4088     readline (&lb, inf);
4089 }
4090
4091 \f
4092 /* Fortran parsing */
4093
4094 static void F_takeprec (void);
4095 static void F_getit (FILE *);
4096
4097 static void
4098 F_takeprec (void)
4099 {
4100   dbp = skip_spaces (dbp);
4101   if (*dbp != '*')
4102     return;
4103   dbp++;
4104   dbp = skip_spaces (dbp);
4105   if (strneq (dbp, "(*)", 3))
4106     {
4107       dbp += 3;
4108       return;
4109     }
4110   if (!c_isdigit (*dbp))
4111     {
4112       --dbp;                    /* force failure */
4113       return;
4114     }
4115   do
4116     dbp++;
4117   while (c_isdigit (*dbp));
4118 }
4119
4120 static void
4121 F_getit (FILE *inf)
4122 {
4123   register char *cp;
4124
4125   dbp = skip_spaces (dbp);
4126   if (*dbp == '\0')
4127     {
4128       readline (&lb, inf);
4129       dbp = lb.buffer;
4130       if (dbp[5] != '&')
4131         return;
4132       dbp += 6;
4133       dbp = skip_spaces (dbp);
4134     }
4135   if (!c_isalpha (*dbp) && *dbp != '_' && *dbp != '$')
4136     return;
4137   for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4138     continue;
4139   make_tag (dbp, cp-dbp, true,
4140             lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4141 }
4142
4143
4144 static void
4145 Fortran_functions (FILE *inf)
4146 {
4147   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4148     {
4149       if (*dbp == '%')
4150         dbp++;                  /* Ratfor escape to fortran */
4151       dbp = skip_spaces (dbp);
4152       if (*dbp == '\0')
4153         continue;
4154
4155       if (LOOKING_AT_NOCASE (dbp, "recursive"))
4156         dbp = skip_spaces (dbp);
4157
4158       if (LOOKING_AT_NOCASE (dbp, "pure"))
4159         dbp = skip_spaces (dbp);
4160
4161       if (LOOKING_AT_NOCASE (dbp, "elemental"))
4162         dbp = skip_spaces (dbp);
4163
4164       switch (c_tolower (*dbp))
4165         {
4166         case 'i':
4167           if (nocase_tail ("integer"))
4168             F_takeprec ();
4169           break;
4170         case 'r':
4171           if (nocase_tail ("real"))
4172             F_takeprec ();
4173           break;
4174         case 'l':
4175           if (nocase_tail ("logical"))
4176             F_takeprec ();
4177           break;
4178         case 'c':
4179           if (nocase_tail ("complex") || nocase_tail ("character"))
4180             F_takeprec ();
4181           break;
4182         case 'd':
4183           if (nocase_tail ("double"))
4184             {
4185               dbp = skip_spaces (dbp);
4186               if (*dbp == '\0')
4187                 continue;
4188               if (nocase_tail ("precision"))
4189                 break;
4190               continue;
4191             }
4192           break;
4193         }
4194       dbp = skip_spaces (dbp);
4195       if (*dbp == '\0')
4196         continue;
4197       switch (c_tolower (*dbp))
4198         {
4199         case 'f':
4200           if (nocase_tail ("function"))
4201             F_getit (inf);
4202           continue;
4203         case 's':
4204           if (nocase_tail ("subroutine"))
4205             F_getit (inf);
4206           continue;
4207         case 'e':
4208           if (nocase_tail ("entry"))
4209             F_getit (inf);
4210           continue;
4211         case 'b':
4212           if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4213             {
4214               dbp = skip_spaces (dbp);
4215               if (*dbp == '\0') /* assume un-named */
4216                 make_tag ("blockdata", 9, true,
4217                           lb.buffer, dbp - lb.buffer, lineno, linecharno);
4218               else
4219                 F_getit (inf);  /* look for name */
4220             }
4221           continue;
4222         }
4223     }
4224 }
4225
4226 \f
4227 /*
4228  * Go language support
4229  * Original code by Xi Lu <lx@shellcodes.org> (2016)
4230  */
4231 static void
4232 Go_functions(FILE *inf)
4233 {
4234   char *cp, *name;
4235
4236   LOOP_ON_INPUT_LINES(inf, lb, cp)
4237     {
4238       cp = skip_spaces (cp);
4239
4240       if (LOOKING_AT (cp, "package"))
4241         {
4242           name = cp;
4243           while (!notinname (*cp) && *cp != '\0')
4244             cp++;
4245           make_tag (name, cp - name, false, lb.buffer,
4246                     cp - lb.buffer + 1, lineno, linecharno);
4247         }
4248       else if (LOOKING_AT (cp, "func"))
4249         {
4250           /* Go implementation of interface, such as:
4251              func (n *Integer) Add(m Integer) ...
4252              skip `(n *Integer)` part.
4253           */
4254           if (*cp == '(')
4255             {
4256               while (*cp != ')')
4257                 cp++;
4258               cp = skip_spaces (cp+1);
4259             }
4260
4261           if (*cp)
4262             {
4263               name = cp;
4264
4265               while (!notinname (*cp))
4266                 cp++;
4267
4268               make_tag (name, cp - name, true, lb.buffer,
4269                         cp - lb.buffer + 1, lineno, linecharno);
4270             }
4271         }
4272       else if (members && LOOKING_AT (cp, "type"))
4273         {
4274           name = cp;
4275
4276           /* Ignore the likes of the following:
4277              type (
4278                     A
4279              )
4280            */
4281           if (*cp == '(')
4282             return;
4283
4284           while (!notinname (*cp) && *cp != '\0')
4285             cp++;
4286
4287           make_tag (name, cp - name, false, lb.buffer,
4288                     cp - lb.buffer + 1, lineno, linecharno);
4289         }
4290     }
4291 }
4292
4293 \f
4294 /*
4295  * Ada parsing
4296  * Original code by
4297  * Philippe Waroquiers (1998)
4298  */
4299
4300 /* Once we are positioned after an "interesting" keyword, let's get
4301    the real tag value necessary. */
4302 static void
4303 Ada_getit (FILE *inf, const char *name_qualifier)
4304 {
4305   register char *cp;
4306   char *name;
4307   char c;
4308
4309   while (perhaps_more_input (inf))
4310     {
4311       dbp = skip_spaces (dbp);
4312       if (*dbp == '\0'
4313           || (dbp[0] == '-' && dbp[1] == '-'))
4314         {
4315           readline (&lb, inf);
4316           dbp = lb.buffer;
4317         }
4318       switch (c_tolower (*dbp))
4319         {
4320         case 'b':
4321           if (nocase_tail ("body"))
4322             {
4323               /* Skipping body of   procedure body   or   package body or ....
4324                  resetting qualifier to body instead of spec. */
4325               name_qualifier = "/b";
4326               continue;
4327             }
4328           break;
4329         case 't':
4330           /* Skipping type of   task type   or   protected type ... */
4331           if (nocase_tail ("type"))
4332             continue;
4333           break;
4334         }
4335       if (*dbp == '"')
4336         {
4337           dbp += 1;
4338           for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4339             continue;
4340         }
4341       else
4342         {
4343           dbp = skip_spaces (dbp);
4344           for (cp = dbp;
4345                c_isalnum (*cp) || *cp == '_' || *cp == '.';
4346                cp++)
4347             continue;
4348           if (cp == dbp)
4349             return;
4350         }
4351       c = *cp;
4352       *cp = '\0';
4353       name = concat (dbp, name_qualifier, "");
4354       *cp = c;
4355       make_tag (name, strlen (name), true,
4356                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4357       free (name);
4358       if (c == '"')
4359         dbp = cp + 1;
4360       return;
4361     }
4362 }
4363
4364 static void
4365 Ada_funcs (FILE *inf)
4366 {
4367   bool inquote = false;
4368   bool skip_till_semicolumn = false;
4369
4370   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4371     {
4372       while (*dbp != '\0')
4373         {
4374           /* Skip a string i.e. "abcd". */
4375           if (inquote || (*dbp == '"'))
4376             {
4377               dbp = strchr (dbp + !inquote, '"');
4378               if (dbp != NULL)
4379                 {
4380                   inquote = false;
4381                   dbp += 1;
4382                   continue;     /* advance char */
4383                 }
4384               else
4385                 {
4386                   inquote = true;
4387                   break;        /* advance line */
4388                 }
4389             }
4390
4391           /* Skip comments. */
4392           if (dbp[0] == '-' && dbp[1] == '-')
4393             break;              /* advance line */
4394
4395           /* Skip character enclosed in single quote i.e. 'a'
4396              and skip single quote starting an attribute i.e. 'Image. */
4397           if (*dbp == '\'')
4398             {
4399               dbp++ ;
4400               if (*dbp != '\0')
4401                 dbp++;
4402               continue;
4403             }
4404
4405           if (skip_till_semicolumn)
4406             {
4407               if (*dbp == ';')
4408                 skip_till_semicolumn = false;
4409               dbp++;
4410               continue;         /* advance char */
4411             }
4412
4413           /* Search for beginning of a token.  */
4414           if (!begtoken (*dbp))
4415             {
4416               dbp++;
4417               continue;         /* advance char */
4418             }
4419
4420           /* We are at the beginning of a token. */
4421           switch (c_tolower (*dbp))
4422             {
4423             case 'f':
4424               if (!packages_only && nocase_tail ("function"))
4425                 Ada_getit (inf, "/f");
4426               else
4427                 break;          /* from switch */
4428               continue;         /* advance char */
4429             case 'p':
4430               if (!packages_only && nocase_tail ("procedure"))
4431                 Ada_getit (inf, "/p");
4432               else if (nocase_tail ("package"))
4433                 Ada_getit (inf, "/s");
4434               else if (nocase_tail ("protected")) /* protected type */
4435                 Ada_getit (inf, "/t");
4436               else
4437                 break;          /* from switch */
4438               continue;         /* advance char */
4439
4440             case 'u':
4441               if (typedefs && !packages_only && nocase_tail ("use"))
4442                 {
4443                   /* when tagging types, avoid tagging  use type Pack.Typename;
4444                      for this, we will skip everything till a ; */
4445                   skip_till_semicolumn = true;
4446                   continue;     /* advance char */
4447                 }
4448
4449             case 't':
4450               if (!packages_only && nocase_tail ("task"))
4451                 Ada_getit (inf, "/k");
4452               else if (typedefs && !packages_only && nocase_tail ("type"))
4453                 {
4454                   Ada_getit (inf, "/t");
4455                   while (*dbp != '\0')
4456                     dbp += 1;
4457                 }
4458               else
4459                 break;          /* from switch */
4460               continue;         /* advance char */
4461             }
4462
4463           /* Look for the end of the token. */
4464           while (!endtoken (*dbp))
4465             dbp++;
4466
4467         } /* advance char */
4468     } /* advance line */
4469 }
4470
4471 \f
4472 /*
4473  * Unix and microcontroller assembly tag handling
4474  * Labels:  /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4475  * Idea by Bob Weiner, Motorola Inc. (1994)
4476  */
4477 static void
4478 Asm_labels (FILE *inf)
4479 {
4480   register char *cp;
4481
4482   LOOP_ON_INPUT_LINES (inf, lb, cp)
4483     {
4484       /* If first char is alphabetic or one of [_.$], test for colon
4485          following identifier. */
4486       if (c_isalpha (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4487         {
4488           /* Read past label. */
4489           cp++;
4490           while (c_isalnum (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4491             cp++;
4492           if (*cp == ':' || c_isspace (*cp))
4493             /* Found end of label, so copy it and add it to the table. */
4494             make_tag (lb.buffer, cp - lb.buffer, true,
4495                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4496         }
4497     }
4498 }
4499
4500 \f
4501 /*
4502  * Perl support
4503  * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4504  *                 /^use constant[ \t\n]+[^ \t\n{=,;]+/
4505  * Perl variable names: /^(my|local).../
4506  * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4507  * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4508  * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4509  */
4510 static void
4511 Perl_functions (FILE *inf)
4512 {
4513   char *package = savestr ("main"); /* current package name */
4514   register char *cp;
4515
4516   LOOP_ON_INPUT_LINES (inf, lb, cp)
4517     {
4518       cp = skip_spaces (cp);
4519
4520       if (LOOKING_AT (cp, "package"))
4521         {
4522           free (package);
4523           get_tag (cp, &package);
4524         }
4525       else if (LOOKING_AT (cp, "sub"))
4526         {
4527           char *pos, *sp;
4528
4529         subr:
4530           sp = cp;
4531           while (!notinname (*cp))
4532             cp++;
4533           if (cp == sp)
4534             continue;           /* nothing found */
4535           pos = strchr (sp, ':');
4536           if (pos && pos < cp && pos[1] == ':')
4537             /* The name is already qualified. */
4538             make_tag (sp, cp - sp, true,
4539                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4540           else
4541             /* Qualify it. */
4542             {
4543               char savechar, *name;
4544
4545               savechar = *cp;
4546               *cp = '\0';
4547               name = concat (package, "::", sp);
4548               *cp = savechar;
4549               make_tag (name, strlen (name), true,
4550                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4551               free (name);
4552             }
4553         }
4554       else if (LOOKING_AT (cp, "use constant")
4555                || LOOKING_AT (cp, "use constant::defer"))
4556         {
4557           /* For hash style multi-constant like
4558                 use constant { FOO => 123,
4559                                BAR => 456 };
4560              only the first FOO is picked up.  Parsing across the value
4561              expressions would be difficult in general, due to possible nested
4562              hashes, here-documents, etc.  */
4563           if (*cp == '{')
4564             cp = skip_spaces (cp+1);
4565           goto subr;
4566         }
4567       else if (globals) /* only if we are tagging global vars */
4568         {
4569           /* Skip a qualifier, if any. */
4570           bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4571           /* After "my" or "local", but before any following paren or space. */
4572           char *varstart = cp;
4573
4574           if (qual              /* should this be removed?  If yes, how? */
4575               && (*cp == '$' || *cp == '@' || *cp == '%'))
4576             {
4577               varstart += 1;
4578               do
4579                 cp++;
4580               while (c_isalnum (*cp) || *cp == '_');
4581             }
4582           else if (qual)
4583             {
4584               /* Should be examining a variable list at this point;
4585                  could insist on seeing an open parenthesis. */
4586               while (*cp != '\0' && *cp != ';' && *cp != '=' &&  *cp != ')')
4587                 cp++;
4588             }
4589           else
4590             continue;
4591
4592           make_tag (varstart, cp - varstart, false,
4593                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4594         }
4595     }
4596   free (package);
4597 }
4598
4599
4600 /*
4601  * Python support
4602  * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4603  * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4604  * More ideas by seb bacon <seb@jamkit.com> (2002)
4605  */
4606 static void
4607 Python_functions (FILE *inf)
4608 {
4609   register char *cp;
4610
4611   LOOP_ON_INPUT_LINES (inf, lb, cp)
4612     {
4613       cp = skip_spaces (cp);
4614       if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4615         {
4616           char *name = cp;
4617           while (!notinname (*cp) && *cp != ':')
4618             cp++;
4619           make_tag (name, cp - name, true,
4620                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4621         }
4622     }
4623 }
4624
4625 /*
4626  * Ruby support
4627  * Original code by Xi Lu <lx@shellcodes.org> (2015)
4628  */
4629 static void
4630 Ruby_functions (FILE *inf)
4631 {
4632   char *cp = NULL;
4633   bool reader = false, writer = false, alias = false, continuation = false;
4634
4635   LOOP_ON_INPUT_LINES (inf, lb, cp)
4636     {
4637       bool is_class = false;
4638       bool is_method = false;
4639       char *name;
4640
4641       cp = skip_spaces (cp);
4642       if (!continuation
4643           /* Constants.  */
4644           && c_isalpha (*cp) && c_isupper (*cp))
4645         {
4646           char *bp, *colon = NULL;
4647
4648           name = cp;
4649
4650           for (cp++; c_isalnum (*cp) || *cp == '_' || *cp == ':'; cp++)
4651             {
4652               if (*cp == ':')
4653                 colon = cp;
4654             }
4655           if (cp > name + 1)
4656             {
4657               bp = skip_spaces (cp);
4658               if (*bp == '=' && !(bp[1] == '=' || bp[1] == '>'))
4659                 {
4660                   if (colon && !c_isspace (colon[1]))
4661                     name = colon + 1;
4662                   make_tag (name, cp - name, false,
4663                             lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4664                 }
4665             }
4666         }
4667       else if (!continuation
4668                /* Modules, classes, methods.  */
4669                && ((is_method = LOOKING_AT (cp, "def"))
4670                    || (is_class = LOOKING_AT (cp, "class"))
4671                    || LOOKING_AT (cp, "module")))
4672         {
4673           const char self_name[] = "self.";
4674           const size_t self_size1 = sizeof (self_name) - 1;
4675
4676           name = cp;
4677
4678          /* Ruby method names can end in a '='.  Also, operator overloading can
4679             define operators whose names include '='.  */
4680           while (!notinname (*cp) || *cp == '=')
4681             cp++;
4682
4683           /* Remove "self." from the method name.  */
4684           if (cp - name > self_size1
4685               && strneq (name, self_name, self_size1))
4686             name += self_size1;
4687
4688           /* Remove the class/module qualifiers from method names.  */
4689           if (is_method)
4690             {
4691               char *q;
4692
4693               for (q = name; q < cp && *q != '.'; q++)
4694                 ;
4695               if (q < cp - 1)   /* punt if we see just "FOO." */
4696                 name = q + 1;
4697             }
4698
4699           /* Don't tag singleton classes.  */
4700           if (is_class && strneq (name, "<<", 2) && cp == name + 2)
4701             continue;
4702
4703           make_tag (name, cp - name, true,
4704                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4705         }
4706       else
4707         {
4708           /* Tag accessors and aliases.  */
4709
4710           if (!continuation)
4711             reader = writer = alias = false;
4712
4713           while (*cp && *cp != '#')
4714             {
4715               if (!continuation)
4716                 {
4717                   reader = writer = alias = false;
4718                   if (LOOKING_AT (cp, "attr_reader"))
4719                     reader = true;
4720                   else if (LOOKING_AT (cp, "attr_writer"))
4721                     writer = true;
4722                   else if (LOOKING_AT (cp, "attr_accessor"))
4723                     {
4724                       reader = true;
4725                       writer = true;
4726                     }
4727                   else if (LOOKING_AT (cp, "alias_method"))
4728                     alias = true;
4729                 }
4730               if (reader || writer || alias)
4731                 {
4732                   do {
4733                     char *np;
4734
4735                     cp = skip_spaces (cp);
4736                     if (*cp == '(')
4737                       cp = skip_spaces (cp + 1);
4738                     np = cp;
4739                     cp = skip_name (cp);
4740                     if (*np != ':')
4741                       continue;
4742                     np++;
4743                     if (reader)
4744                       {
4745                         make_tag (np, cp - np, true,
4746                                   lb.buffer, cp - lb.buffer + 1,
4747                                   lineno, linecharno);
4748                         continuation = false;
4749                       }
4750                     if (writer)
4751                       {
4752                         size_t name_len = cp - np + 1;
4753                         char *wr_name = xnew (name_len + 1, char);
4754
4755                         memcpy (wr_name, np, name_len - 1);
4756                         memcpy (wr_name + name_len - 1, "=", 2);
4757                         pfnote (wr_name, true, lb.buffer, cp - lb.buffer + 1,
4758                                 lineno, linecharno);
4759                         continuation = false;
4760                       }
4761                     if (alias)
4762                       {
4763                         if (!continuation)
4764                           make_tag (np, cp - np, true,
4765                                     lb.buffer, cp - lb.buffer + 1,
4766                                     lineno, linecharno);
4767                         continuation = false;
4768                         while (*cp && *cp != '#' && *cp != ';')
4769                           {
4770                             if (*cp == ',')
4771                               continuation = true;
4772                             else if (!c_isspace (*cp))
4773                               continuation = false;
4774                             cp++;
4775                           }
4776                         if (*cp == ';')
4777                           continuation = false;
4778                       }
4779                     cp = skip_spaces (cp);
4780                   } while ((alias
4781                             ? (*cp == ',')
4782                             : (continuation = (*cp == ',')))
4783                            && (cp = skip_spaces (cp + 1), *cp && *cp != '#'));
4784                 }
4785               if (*cp != '#')
4786                 cp = skip_name (cp);
4787               while (*cp && *cp != '#' && notinname (*cp))
4788                 cp++;
4789             }
4790         }
4791     }
4792 }
4793
4794 \f
4795 /*
4796  * PHP support
4797  * Look for:
4798  *  - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4799  *  - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4800  *  - /^[ \t]*define\(\"[^\"]+/
4801  * Only with --members:
4802  *  - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4803  * Idea by Diez B. Roggisch (2001)
4804  */
4805 static void
4806 PHP_functions (FILE *inf)
4807 {
4808   char *cp, *name;
4809   bool search_identifier = false;
4810
4811   LOOP_ON_INPUT_LINES (inf, lb, cp)
4812     {
4813       cp = skip_spaces (cp);
4814       name = cp;
4815       if (search_identifier
4816           && *cp != '\0')
4817         {
4818           while (!notinname (*cp))
4819             cp++;
4820           make_tag (name, cp - name, true,
4821                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4822           search_identifier = false;
4823         }
4824       else if (LOOKING_AT (cp, "function"))
4825         {
4826           if (*cp == '&')
4827             cp = skip_spaces (cp+1);
4828           if (*cp != '\0')
4829             {
4830               name = cp;
4831               while (!notinname (*cp))
4832                 cp++;
4833               make_tag (name, cp - name, true,
4834                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4835             }
4836           else
4837             search_identifier = true;
4838         }
4839       else if (LOOKING_AT (cp, "class"))
4840         {
4841           if (*cp != '\0')
4842             {
4843               name = cp;
4844               while (*cp != '\0' && !c_isspace (*cp))
4845                 cp++;
4846               make_tag (name, cp - name, false,
4847                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4848             }
4849           else
4850             search_identifier = true;
4851         }
4852       else if (strneq (cp, "define", 6)
4853                && (cp = skip_spaces (cp+6))
4854                && *cp++ == '('
4855                && (*cp == '"' || *cp == '\''))
4856         {
4857           char quote = *cp++;
4858           name = cp;
4859           while (*cp != quote && *cp != '\0')
4860             cp++;
4861           make_tag (name, cp - name, false,
4862                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4863         }
4864       else if (members
4865                && LOOKING_AT (cp, "var")
4866                && *cp == '$')
4867         {
4868           name = cp;
4869           while (!notinname (*cp))
4870             cp++;
4871           make_tag (name, cp - name, false,
4872                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4873         }
4874     }
4875 }
4876
4877 \f
4878 /*
4879  * Cobol tag functions
4880  * We could look for anything that could be a paragraph name.
4881  * i.e. anything that starts in column 8 is one word and ends in a full stop.
4882  * Idea by Corny de Souza (1993)
4883  */
4884 static void
4885 Cobol_paragraphs (FILE *inf)
4886 {
4887   register char *bp, *ep;
4888
4889   LOOP_ON_INPUT_LINES (inf, lb, bp)
4890     {
4891       if (lb.len < 9)
4892         continue;
4893       bp += 8;
4894
4895       /* If eoln, compiler option or comment ignore whole line. */
4896       if (bp[-1] != ' ' || !c_isalnum (bp[0]))
4897         continue;
4898
4899       for (ep = bp; c_isalnum (*ep) || *ep == '-'; ep++)
4900         continue;
4901       if (*ep++ == '.')
4902         make_tag (bp, ep - bp, true,
4903                   lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4904     }
4905 }
4906
4907 \f
4908 /*
4909  * Makefile support
4910  * Ideas by Assar Westerlund <assar@sics.se> (2001)
4911  */
4912 static void
4913 Makefile_targets (FILE *inf)
4914 {
4915   register char *bp;
4916
4917   LOOP_ON_INPUT_LINES (inf, lb, bp)
4918     {
4919       if (*bp == '\t' || *bp == '#')
4920         continue;
4921       while (*bp != '\0' && *bp != '=' && *bp != ':')
4922         bp++;
4923       if (*bp == ':' || (globals && *bp == '='))
4924         {
4925           /* We should detect if there is more than one tag, but we do not.
4926              We just skip initial and final spaces. */
4927           char * namestart = skip_spaces (lb.buffer);
4928           while (--bp > namestart)
4929             if (!notinname (*bp))
4930               break;
4931           make_tag (namestart, bp - namestart + 1, true,
4932                     lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4933         }
4934     }
4935 }
4936
4937 \f
4938 /*
4939  * Pascal parsing
4940  * Original code by Mosur K. Mohan (1989)
4941  *
4942  *  Locates tags for procedures & functions.  Doesn't do any type- or
4943  *  var-definitions.  It does look for the keyword "extern" or
4944  *  "forward" immediately following the procedure statement; if found,
4945  *  the tag is skipped.
4946  */
4947 static void
4948 Pascal_functions (FILE *inf)
4949 {
4950   linebuffer tline;             /* mostly copied from C_entries */
4951   long save_lcno;
4952   int save_lineno, namelen, taglen;
4953   char c, *name;
4954
4955   bool                          /* each of these flags is true if: */
4956     incomment,                  /* point is inside a comment */
4957     inquote,                    /* point is inside '..' string */
4958     get_tagname,                /* point is after PROCEDURE/FUNCTION
4959                                    keyword, so next item = potential tag */
4960     found_tag,                  /* point is after a potential tag */
4961     inparms,                    /* point is within parameter-list */
4962     verify_tag;                 /* point has passed the parm-list, so the
4963                                    next token will determine whether this
4964                                    is a FORWARD/EXTERN to be ignored, or
4965                                    whether it is a real tag */
4966
4967   save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4968   name = NULL;                  /* keep compiler quiet */
4969   dbp = lb.buffer;
4970   *dbp = '\0';
4971   linebuffer_init (&tline);
4972
4973   incomment = inquote = false;
4974   found_tag = false;            /* have a proc name; check if extern */
4975   get_tagname = false;          /* found "procedure" keyword         */
4976   inparms = false;              /* found '(' after "proc"            */
4977   verify_tag = false;           /* check if "extern" is ahead        */
4978
4979
4980   while (perhaps_more_input (inf)) /* long main loop to get next char */
4981     {
4982       c = *dbp++;
4983       if (c == '\0')            /* if end of line */
4984         {
4985           readline (&lb, inf);
4986           dbp = lb.buffer;
4987           if (*dbp == '\0')
4988             continue;
4989           if (!((found_tag && verify_tag)
4990                 || get_tagname))
4991             c = *dbp++;         /* only if don't need *dbp pointing
4992                                    to the beginning of the name of
4993                                    the procedure or function */
4994         }
4995       if (incomment)
4996         {
4997           if (c == '}')         /* within { } comments */
4998             incomment = false;
4999           else if (c == '*' && *dbp == ')') /* within (* *) comments */
5000             {
5001               dbp++;
5002               incomment = false;
5003             }
5004           continue;
5005         }
5006       else if (inquote)
5007         {
5008           if (c == '\'')
5009             inquote = false;
5010           continue;
5011         }
5012       else
5013         switch (c)
5014           {
5015           case '\'':
5016             inquote = true;     /* found first quote */
5017             continue;
5018           case '{':             /* found open { comment */
5019             incomment = true;
5020             continue;
5021           case '(':
5022             if (*dbp == '*')    /* found open (* comment */
5023               {
5024                 incomment = true;
5025                 dbp++;
5026               }
5027             else if (found_tag) /* found '(' after tag, i.e., parm-list */
5028               inparms = true;
5029             continue;
5030           case ')':             /* end of parms list */
5031             if (inparms)
5032               inparms = false;
5033             continue;
5034           case ';':
5035             if (found_tag && !inparms) /* end of proc or fn stmt */
5036               {
5037                 verify_tag = true;
5038                 break;
5039               }
5040             continue;
5041           }
5042       if (found_tag && verify_tag && (*dbp != ' '))
5043         {
5044           /* Check if this is an "extern" declaration. */
5045           if (*dbp == '\0')
5046             continue;
5047           if (c_tolower (*dbp) == 'e')
5048             {
5049               if (nocase_tail ("extern")) /* superfluous, really! */
5050                 {
5051                   found_tag = false;
5052                   verify_tag = false;
5053                 }
5054             }
5055           else if (c_tolower (*dbp) == 'f')
5056             {
5057               if (nocase_tail ("forward")) /* check for forward reference */
5058                 {
5059                   found_tag = false;
5060                   verify_tag = false;
5061                 }
5062             }
5063           if (found_tag && verify_tag) /* not external proc, so make tag */
5064             {
5065               found_tag = false;
5066               verify_tag = false;
5067               make_tag (name, namelen, true,
5068                         tline.buffer, taglen, save_lineno, save_lcno);
5069               continue;
5070             }
5071         }
5072       if (get_tagname)          /* grab name of proc or fn */
5073         {
5074           char *cp;
5075
5076           if (*dbp == '\0')
5077             continue;
5078
5079           /* Find block name. */
5080           for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
5081             continue;
5082
5083           /* Save all values for later tagging. */
5084           linebuffer_setlen (&tline, lb.len);
5085           strcpy (tline.buffer, lb.buffer);
5086           save_lineno = lineno;
5087           save_lcno = linecharno;
5088           name = tline.buffer + (dbp - lb.buffer);
5089           namelen = cp - dbp;
5090           taglen = cp - lb.buffer + 1;
5091
5092           dbp = cp;             /* set dbp to e-o-token */
5093           get_tagname = false;
5094           found_tag = true;
5095           continue;
5096
5097           /* And proceed to check for "extern". */
5098         }
5099       else if (!incomment && !inquote && !found_tag)
5100         {
5101           /* Check for proc/fn keywords. */
5102           switch (c_tolower (c))
5103             {
5104             case 'p':
5105               if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
5106                 get_tagname = true;
5107               continue;
5108             case 'f':
5109               if (nocase_tail ("unction"))
5110                 get_tagname = true;
5111               continue;
5112             }
5113         }
5114     } /* while not eof */
5115
5116   free (tline.buffer);
5117 }
5118
5119 \f
5120 /*
5121  * Lisp tag functions
5122  *  look for (def or (DEF, quote or QUOTE
5123  */
5124
5125 static void L_getit (void);
5126
5127 static void
5128 L_getit (void)
5129 {
5130   if (*dbp == '\'')             /* Skip prefix quote */
5131     dbp++;
5132   else if (*dbp == '(')
5133   {
5134     dbp++;
5135     /* Try to skip "(quote " */
5136     if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
5137       /* Ok, then skip "(" before name in (defstruct (foo)) */
5138       dbp = skip_spaces (dbp);
5139   }
5140   get_tag (dbp, NULL);
5141 }
5142
5143 static void
5144 Lisp_functions (FILE *inf)
5145 {
5146   LOOP_ON_INPUT_LINES (inf, lb, dbp)
5147     {
5148       if (dbp[0] != '(')
5149         continue;
5150
5151       /* "(defvar foo)" is a declaration rather than a definition.  */
5152       if (! declarations)
5153         {
5154           char *p = dbp + 1;
5155           if (LOOKING_AT (p, "defvar"))
5156             {
5157               p = skip_name (p); /* past var name */
5158               p = skip_spaces (p);
5159               if (*p == ')')
5160                 continue;
5161             }
5162         }
5163
5164       if (strneq (dbp + 1, "cl-", 3) || strneq (dbp + 1, "CL-", 3))
5165         dbp += 3;
5166
5167       if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
5168         {
5169           dbp = skip_non_spaces (dbp);
5170           dbp = skip_spaces (dbp);
5171           L_getit ();
5172         }
5173       else
5174         {
5175           /* Check for (foo::defmumble name-defined ... */
5176           do
5177             dbp++;
5178           while (!notinname (*dbp) && *dbp != ':');
5179           if (*dbp == ':')
5180             {
5181               do
5182                 dbp++;
5183               while (*dbp == ':');
5184
5185               if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
5186                 {
5187                   dbp = skip_non_spaces (dbp);
5188                   dbp = skip_spaces (dbp);
5189                   L_getit ();
5190                 }
5191             }
5192         }
5193     }
5194 }
5195
5196 \f
5197 /*
5198  * Lua script language parsing
5199  * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
5200  *
5201  *  "function" and "local function" are tags if they start at column 1.
5202  */
5203 static void
5204 Lua_functions (FILE *inf)
5205 {
5206   register char *bp;
5207
5208   LOOP_ON_INPUT_LINES (inf, lb, bp)
5209     {
5210       bp = skip_spaces (bp);
5211       if (bp[0] != 'f' && bp[0] != 'l')
5212         continue;
5213
5214       (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
5215
5216       if (LOOKING_AT (bp, "function"))
5217         {
5218           char *tag_name, *tp_dot, *tp_colon;
5219
5220           get_tag (bp, &tag_name);
5221           /* If the tag ends with ".foo" or ":foo", make an additional tag for
5222              "foo".  */
5223           tp_dot = strrchr (tag_name, '.');
5224           tp_colon = strrchr (tag_name, ':');
5225           if (tp_dot || tp_colon)
5226             {
5227               char *p = tp_dot > tp_colon ? tp_dot : tp_colon;
5228               int len_add = p - tag_name + 1;
5229
5230               get_tag (bp + len_add, NULL);
5231             }
5232         }
5233     }
5234 }
5235
5236 \f
5237 /*
5238  * PostScript tags
5239  * Just look for lines where the first character is '/'
5240  * Also look at "defineps" for PSWrap
5241  * Ideas by:
5242  *   Richard Mlynarik <mly@adoc.xerox.com> (1997)
5243  *   Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
5244  */
5245 static void
5246 PS_functions (FILE *inf)
5247 {
5248   register char *bp, *ep;
5249
5250   LOOP_ON_INPUT_LINES (inf, lb, bp)
5251     {
5252       if (bp[0] == '/')
5253         {
5254           for (ep = bp+1;
5255                *ep != '\0' && *ep != ' ' && *ep != '{';
5256                ep++)
5257             continue;
5258           make_tag (bp, ep - bp, true,
5259                     lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
5260         }
5261       else if (LOOKING_AT (bp, "defineps"))
5262         get_tag (bp, NULL);
5263     }
5264 }
5265
5266 \f
5267 /*
5268  * Forth tags
5269  * Ignore anything after \ followed by space or in ( )
5270  * Look for words defined by :
5271  * Look for constant, code, create, defer, value, and variable
5272  * OBP extensions:  Look for buffer:, field,
5273  * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
5274  */
5275 static void
5276 Forth_words (FILE *inf)
5277 {
5278   register char *bp;
5279
5280   LOOP_ON_INPUT_LINES (inf, lb, bp)
5281     while ((bp = skip_spaces (bp))[0] != '\0')
5282       if (bp[0] == '\\' && c_isspace (bp[1]))
5283         break;                  /* read next line */
5284       else if (bp[0] == '(' && c_isspace (bp[1]))
5285         do                      /* skip to ) or eol */
5286           bp++;
5287         while (*bp != ')' && *bp != '\0');
5288       else if ((bp[0] == ':' && c_isspace (bp[1]) && bp++)
5289                || LOOKING_AT_NOCASE (bp, "constant")
5290                || LOOKING_AT_NOCASE (bp, "code")
5291                || LOOKING_AT_NOCASE (bp, "create")
5292                || LOOKING_AT_NOCASE (bp, "defer")
5293                || LOOKING_AT_NOCASE (bp, "value")
5294                || LOOKING_AT_NOCASE (bp, "variable")
5295                || LOOKING_AT_NOCASE (bp, "buffer:")
5296                || LOOKING_AT_NOCASE (bp, "field"))
5297         get_tag (skip_spaces (bp), NULL); /* Yay!  A definition! */
5298       else
5299         bp = skip_non_spaces (bp);
5300 }
5301
5302 \f
5303 /*
5304  * Scheme tag functions
5305  * look for (def... xyzzy
5306  *          (def... (xyzzy
5307  *          (def ... ((...(xyzzy ....
5308  *          (set! xyzzy
5309  * Original code by Ken Haase (1985?)
5310  */
5311 static void
5312 Scheme_functions (FILE *inf)
5313 {
5314   register char *bp;
5315
5316   LOOP_ON_INPUT_LINES (inf, lb, bp)
5317     {
5318       if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
5319         {
5320           bp = skip_non_spaces (bp+4);
5321           /* Skip over open parens and white space.  Don't continue past
5322              '\0'. */
5323           while (*bp && notinname (*bp))
5324             bp++;
5325           get_tag (bp, NULL);
5326         }
5327       if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
5328         get_tag (bp, NULL);
5329     }
5330 }
5331
5332 \f
5333 /* Find tags in TeX and LaTeX input files.  */
5334
5335 /* TEX_toktab is a table of TeX control sequences that define tags.
5336  * Each entry records one such control sequence.
5337  *
5338  * Original code from who knows whom.
5339  * Ideas by:
5340  *   Stefan Monnier (2002)
5341  */
5342
5343 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
5344
5345 /* Default set of control sequences to put into TEX_toktab.
5346    The value of environment var TEXTAGS is prepended to this.  */
5347 static const char *TEX_defenv = "\
5348 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5349 :part:appendix:entry:index:def\
5350 :newcommand:renewcommand:newenvironment:renewenvironment";
5351
5352 static void TEX_decode_env (const char *, const char *);
5353
5354 /*
5355  * TeX/LaTeX scanning loop.
5356  */
5357 static void
5358 TeX_commands (FILE *inf)
5359 {
5360   char *cp;
5361   linebuffer *key;
5362
5363   char TEX_esc = '\0';
5364   char TEX_opgrp, TEX_clgrp;
5365
5366   /* Initialize token table once from environment. */
5367   if (TEX_toktab == NULL)
5368     TEX_decode_env ("TEXTAGS", TEX_defenv);
5369
5370   LOOP_ON_INPUT_LINES (inf, lb, cp)
5371     {
5372       /* Look at each TEX keyword in line. */
5373       for (;;)
5374         {
5375           /* Look for a TEX escape. */
5376           while (true)
5377             {
5378               char c = *cp++;
5379               if (c == '\0' || c == '%')
5380                 goto tex_next_line;
5381
5382               /* Select either \ or ! as escape character, whichever comes
5383                  first outside a comment.  */
5384               if (!TEX_esc)
5385                 switch (c)
5386                   {
5387                   case '\\':
5388                     TEX_esc = c;
5389                     TEX_opgrp = '{';
5390                     TEX_clgrp = '}';
5391                     break;
5392
5393                   case '!':
5394                     TEX_esc = c;
5395                     TEX_opgrp = '<';
5396                     TEX_clgrp = '>';
5397                     break;
5398                   }
5399
5400               if (c == TEX_esc)
5401                 break;
5402             }
5403
5404           for (key = TEX_toktab; key->buffer != NULL; key++)
5405             if (strneq (cp, key->buffer, key->len))
5406               {
5407                 char *p;
5408                 int namelen, linelen;
5409                 bool opgrp = false;
5410
5411                 cp = skip_spaces (cp + key->len);
5412                 if (*cp == TEX_opgrp)
5413                   {
5414                     opgrp = true;
5415                     cp++;
5416                   }
5417                 for (p = cp;
5418                      (!c_isspace (*p) && *p != '#' &&
5419                       *p != TEX_opgrp && *p != TEX_clgrp);
5420                      p++)
5421                   continue;
5422                 namelen = p - cp;
5423                 linelen = lb.len;
5424                 if (!opgrp || *p == TEX_clgrp)
5425                   {
5426                     while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5427                       p++;
5428                     linelen = p - lb.buffer + 1;
5429                   }
5430                 make_tag (cp, namelen, true,
5431                           lb.buffer, linelen, lineno, linecharno);
5432                 goto tex_next_line; /* We only tag a line once */
5433               }
5434         }
5435     tex_next_line:
5436       ;
5437     }
5438 }
5439
5440 /* Read environment and prepend it to the default string.
5441    Build token table. */
5442 static void
5443 TEX_decode_env (const char *evarname, const char *defenv)
5444 {
5445   register const char *env, *p;
5446   int i, len;
5447
5448   /* Append default string to environment. */
5449   env = getenv (evarname);
5450   if (!env)
5451     env = defenv;
5452   else
5453     env = concat (env, defenv, "");
5454
5455   /* Allocate a token table */
5456   for (len = 1, p = env; (p = strchr (p, ':')); )
5457     if (*++p)
5458       len++;
5459   TEX_toktab = xnew (len, linebuffer);
5460
5461   /* Unpack environment string into token table. Be careful about */
5462   /* zero-length strings (leading ':', "::" and trailing ':') */
5463   for (i = 0; *env != '\0';)
5464     {
5465       p = strchr (env, ':');
5466       if (!p)                   /* End of environment string. */
5467         p = env + strlen (env);
5468       if (p - env > 0)
5469         {                       /* Only non-zero strings. */
5470           TEX_toktab[i].buffer = savenstr (env, p - env);
5471           TEX_toktab[i].len = p - env;
5472           i++;
5473         }
5474       if (*p)
5475         env = p + 1;
5476       else
5477         {
5478           TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5479           TEX_toktab[i].len = 0;
5480           break;
5481         }
5482     }
5483 }
5484
5485 \f
5486 /* Texinfo support.  Dave Love, Mar. 2000.  */
5487 static void
5488 Texinfo_nodes (FILE *inf)
5489 {
5490   char *cp, *start;
5491   LOOP_ON_INPUT_LINES (inf, lb, cp)
5492     if (LOOKING_AT (cp, "@node"))
5493       {
5494         start = cp;
5495         while (*cp != '\0' && *cp != ',')
5496           cp++;
5497         make_tag (start, cp - start, true,
5498                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5499       }
5500 }
5501
5502 \f
5503 /*
5504  * HTML support.
5505  * Contents of <title>, <h1>, <h2>, <h3> are tags.
5506  * Contents of <a name=xxx> are tags with name xxx.
5507  *
5508  * Francesco Potortì, 2002.
5509  */
5510 static void
5511 HTML_labels (FILE *inf)
5512 {
5513   bool getnext = false;         /* next text outside of HTML tags is a tag */
5514   bool skiptag = false;         /* skip to the end of the current HTML tag */
5515   bool intag = false;           /* inside an html tag, looking for ID= */
5516   bool inanchor = false;        /* when INTAG, is an anchor, look for NAME= */
5517   char *end;
5518
5519
5520   linebuffer_setlen (&token_name, 0); /* no name in buffer */
5521
5522   LOOP_ON_INPUT_LINES (inf, lb, dbp)
5523     for (;;)                    /* loop on the same line */
5524       {
5525         if (skiptag)            /* skip HTML tag */
5526           {
5527             while (*dbp != '\0' && *dbp != '>')
5528               dbp++;
5529             if (*dbp == '>')
5530               {
5531                 dbp += 1;
5532                 skiptag = false;
5533                 continue;       /* look on the same line */
5534               }
5535             break;              /* go to next line */
5536           }
5537
5538         else if (intag) /* look for "name=" or "id=" */
5539           {
5540             while (*dbp != '\0' && *dbp != '>'
5541                    && c_tolower (*dbp) != 'n' && c_tolower (*dbp) != 'i')
5542               dbp++;
5543             if (*dbp == '\0')
5544               break;            /* go to next line */
5545             if (*dbp == '>')
5546               {
5547                 dbp += 1;
5548                 intag = false;
5549                 continue;       /* look on the same line */
5550               }
5551             if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5552                 || LOOKING_AT_NOCASE (dbp, "id="))
5553               {
5554                 bool quoted = (dbp[0] == '"');
5555
5556                 if (quoted)
5557                   for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5558                     continue;
5559                 else
5560                   for (end = dbp; *end != '\0' && intoken (*end); end++)
5561                     continue;
5562                 linebuffer_setlen (&token_name, end - dbp);
5563                 memcpy (token_name.buffer, dbp, end - dbp);
5564                 token_name.buffer[end - dbp] = '\0';
5565
5566                 dbp = end;
5567                 intag = false;  /* we found what we looked for */
5568                 skiptag = true; /* skip to the end of the tag */
5569                 getnext = true; /* then grab the text */
5570                 continue;       /* look on the same line */
5571               }
5572             dbp += 1;
5573           }
5574
5575         else if (getnext)       /* grab next tokens and tag them */
5576           {
5577             dbp = skip_spaces (dbp);
5578             if (*dbp == '\0')
5579               break;            /* go to next line */
5580             if (*dbp == '<')
5581               {
5582                 intag = true;
5583                 inanchor = (c_tolower (dbp[1]) == 'a' && !intoken (dbp[2]));
5584                 continue;       /* look on the same line */
5585               }
5586
5587             for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5588               continue;
5589             make_tag (token_name.buffer, token_name.len, true,
5590                       dbp, end - dbp, lineno, linecharno);
5591             linebuffer_setlen (&token_name, 0); /* no name in buffer */
5592             getnext = false;
5593             break;              /* go to next line */
5594           }
5595
5596         else                    /* look for an interesting HTML tag */
5597           {
5598             while (*dbp != '\0' && *dbp != '<')
5599               dbp++;
5600             if (*dbp == '\0')
5601               break;            /* go to next line */
5602             intag = true;
5603             if (c_tolower (dbp[1]) == 'a' && !intoken (dbp[2]))
5604               {
5605                 inanchor = true;
5606                 continue;       /* look on the same line */
5607               }
5608             else if (LOOKING_AT_NOCASE (dbp, "<title>")
5609                      || LOOKING_AT_NOCASE (dbp, "<h1>")
5610                      || LOOKING_AT_NOCASE (dbp, "<h2>")
5611                      || LOOKING_AT_NOCASE (dbp, "<h3>"))
5612               {
5613                 intag = false;
5614                 getnext = true;
5615                 continue;       /* look on the same line */
5616               }
5617             dbp += 1;
5618           }
5619       }
5620 }
5621
5622 \f
5623 /*
5624  * Prolog support
5625  *
5626  * Assumes that the predicate or rule starts at column 0.
5627  * Only the first clause of a predicate or rule is added.
5628  * Original code by Sunichirou Sugou (1989)
5629  * Rewritten by Anders Lindgren (1996)
5630  */
5631 static size_t prolog_pr (char *, char *);
5632 static void prolog_skip_comment (linebuffer *, FILE *);
5633 static size_t prolog_atom (char *, size_t);
5634
5635 static void
5636 Prolog_functions (FILE *inf)
5637 {
5638   char *cp, *last;
5639   size_t len;
5640   size_t allocated;
5641
5642   allocated = 0;
5643   len = 0;
5644   last = NULL;
5645
5646   LOOP_ON_INPUT_LINES (inf, lb, cp)
5647     {
5648       if (cp[0] == '\0')        /* Empty line */
5649         continue;
5650       else if (c_isspace (cp[0])) /* Not a predicate */
5651         continue;
5652       else if (cp[0] == '/' && cp[1] == '*')    /* comment. */
5653         prolog_skip_comment (&lb, inf);
5654       else if ((len = prolog_pr (cp, last)) > 0)
5655         {
5656           /* Predicate or rule.  Store the function name so that we
5657              only generate a tag for the first clause.  */
5658           if (last == NULL)
5659             last = xnew (len + 1, char);
5660           else if (len + 1 > allocated)
5661             xrnew (last, len + 1, char);
5662           allocated = len + 1;
5663           memcpy (last, cp, len);
5664           last[len] = '\0';
5665         }
5666     }
5667   free (last);
5668 }
5669
5670
5671 static void
5672 prolog_skip_comment (linebuffer *plb, FILE *inf)
5673 {
5674   char *cp;
5675
5676   do
5677     {
5678       for (cp = plb->buffer; *cp != '\0'; cp++)
5679         if (cp[0] == '*' && cp[1] == '/')
5680           return;
5681       readline (plb, inf);
5682     }
5683   while (perhaps_more_input (inf));
5684 }
5685
5686 /*
5687  * A predicate or rule definition is added if it matches:
5688  *     <beginning of line><Prolog Atom><whitespace>(
5689  * or  <beginning of line><Prolog Atom><whitespace>:-
5690  *
5691  * It is added to the tags database if it doesn't match the
5692  * name of the previous clause header.
5693  *
5694  * Return the size of the name of the predicate or rule, or 0 if no
5695  * header was found.
5696  */
5697 static size_t
5698 prolog_pr (char *s, char *last)
5699
5700                                 /* Name of last clause. */
5701 {
5702   size_t pos;
5703   size_t len;
5704
5705   pos = prolog_atom (s, 0);
5706   if (! pos)
5707     return 0;
5708
5709   len = pos;
5710   pos = skip_spaces (s + pos) - s;
5711
5712   if ((s[pos] == '.'
5713        || (s[pos] == '(' && (pos += 1))
5714        || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5715       && (last == NULL          /* save only the first clause */
5716           || len != strlen (last)
5717           || !strneq (s, last, len)))
5718         {
5719           make_tag (s, len, true, s, pos, lineno, linecharno);
5720           return len;
5721         }
5722   else
5723     return 0;
5724 }
5725
5726 /*
5727  * Consume a Prolog atom.
5728  * Return the number of bytes consumed, or 0 if there was an error.
5729  *
5730  * A prolog atom, in this context, could be one of:
5731  * - An alphanumeric sequence, starting with a lower case letter.
5732  * - A quoted arbitrary string. Single quotes can escape themselves.
5733  *   Backslash quotes everything.
5734  */
5735 static size_t
5736 prolog_atom (char *s, size_t pos)
5737 {
5738   size_t origpos;
5739
5740   origpos = pos;
5741
5742   if (c_islower (s[pos]) || s[pos] == '_')
5743     {
5744       /* The atom is unquoted. */
5745       pos++;
5746       while (c_isalnum (s[pos]) || s[pos] == '_')
5747         {
5748           pos++;
5749         }
5750       return pos - origpos;
5751     }
5752   else if (s[pos] == '\'')
5753     {
5754       pos++;
5755
5756       for (;;)
5757         {
5758           if (s[pos] == '\'')
5759             {
5760               pos++;
5761               if (s[pos] != '\'')
5762                 break;
5763               pos++;            /* A double quote */
5764             }
5765           else if (s[pos] == '\0')
5766             /* Multiline quoted atoms are ignored. */
5767             return 0;
5768           else if (s[pos] == '\\')
5769             {
5770               if (s[pos+1] == '\0')
5771                 return 0;
5772               pos += 2;
5773             }
5774           else
5775             pos++;
5776         }
5777       return pos - origpos;
5778     }
5779   else
5780     return 0;
5781 }
5782
5783 \f
5784 /*
5785  * Support for Erlang
5786  *
5787  * Generates tags for functions, defines, and records.
5788  * Assumes that Erlang functions start at column 0.
5789  * Original code by Anders Lindgren (1996)
5790  */
5791 static int erlang_func (char *, char *);
5792 static void erlang_attribute (char *);
5793 static int erlang_atom (char *);
5794
5795 static void
5796 Erlang_functions (FILE *inf)
5797 {
5798   char *cp, *last;
5799   int len;
5800   int allocated;
5801
5802   allocated = 0;
5803   len = 0;
5804   last = NULL;
5805
5806   LOOP_ON_INPUT_LINES (inf, lb, cp)
5807     {
5808       if (cp[0] == '\0')        /* Empty line */
5809         continue;
5810       else if (c_isspace (cp[0])) /* Not function nor attribute */
5811         continue;
5812       else if (cp[0] == '%')    /* comment */
5813         continue;
5814       else if (cp[0] == '"')    /* Sometimes, strings start in column one */
5815         continue;
5816       else if (cp[0] == '-')    /* attribute, e.g. "-define" */
5817         {
5818           erlang_attribute (cp);
5819           if (last != NULL)
5820             {
5821               free (last);
5822               last = NULL;
5823             }
5824         }
5825       else if ((len = erlang_func (cp, last)) > 0)
5826         {
5827           /*
5828            * Function.  Store the function name so that we only
5829            * generates a tag for the first clause.
5830            */
5831           if (last == NULL)
5832             last = xnew (len + 1, char);
5833           else if (len + 1 > allocated)
5834             xrnew (last, len + 1, char);
5835           allocated = len + 1;
5836           memcpy (last, cp, len);
5837           last[len] = '\0';
5838         }
5839     }
5840   free (last);
5841 }
5842
5843
5844 /*
5845  * A function definition is added if it matches:
5846  *     <beginning of line><Erlang Atom><whitespace>(
5847  *
5848  * It is added to the tags database if it doesn't match the
5849  * name of the previous clause header.
5850  *
5851  * Return the size of the name of the function, or 0 if no function
5852  * was found.
5853  */
5854 static int
5855 erlang_func (char *s, char *last)
5856
5857                                 /* Name of last clause. */
5858 {
5859   int pos;
5860   int len;
5861
5862   pos = erlang_atom (s);
5863   if (pos < 1)
5864     return 0;
5865
5866   len = pos;
5867   pos = skip_spaces (s + pos) - s;
5868
5869   /* Save only the first clause. */
5870   if (s[pos++] == '('
5871       && (last == NULL
5872           || len != (int)strlen (last)
5873           || !strneq (s, last, len)))
5874         {
5875           make_tag (s, len, true, s, pos, lineno, linecharno);
5876           return len;
5877         }
5878
5879   return 0;
5880 }
5881
5882
5883 /*
5884  * Handle attributes.  Currently, tags are generated for defines
5885  * and records.
5886  *
5887  * They are on the form:
5888  * -define(foo, bar).
5889  * -define(Foo(M, N), M+N).
5890  * -record(graph, {vtab = notable, cyclic = true}).
5891  */
5892 static void
5893 erlang_attribute (char *s)
5894 {
5895   char *cp = s;
5896
5897   if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5898       && *cp++ == '(')
5899     {
5900       int len = erlang_atom (skip_spaces (cp));
5901       if (len > 0)
5902         make_tag (cp, len, true, s, cp + len - s, lineno, linecharno);
5903     }
5904   return;
5905 }
5906
5907
5908 /*
5909  * Consume an Erlang atom (or variable).
5910  * Return the number of bytes consumed, or -1 if there was an error.
5911  */
5912 static int
5913 erlang_atom (char *s)
5914 {
5915   int pos = 0;
5916
5917   if (c_isalpha (s[pos]) || s[pos] == '_')
5918     {
5919       /* The atom is unquoted. */
5920       do
5921         pos++;
5922       while (c_isalnum (s[pos]) || s[pos] == '_');
5923     }
5924   else if (s[pos] == '\'')
5925     {
5926       for (pos++; s[pos] != '\''; pos++)
5927         if (s[pos] == '\0'      /* multiline quoted atoms are ignored */
5928             || (s[pos] == '\\' && s[++pos] == '\0'))
5929           return 0;
5930       pos++;
5931     }
5932
5933   return pos;
5934 }
5935
5936 \f
5937 static char *scan_separators (char *);
5938 static void add_regex (char *, language *);
5939 static char *substitute (char *, char *, struct re_registers *);
5940
5941 /*
5942  * Take a string like "/blah/" and turn it into "blah", verifying
5943  * that the first and last characters are the same, and handling
5944  * quoted separator characters.  Actually, stops on the occurrence of
5945  * an unquoted separator.  Also process \t, \n, etc. and turn into
5946  * appropriate characters. Works in place.  Null terminates name string.
5947  * Returns pointer to terminating separator, or NULL for
5948  * unterminated regexps.
5949  */
5950 static char *
5951 scan_separators (char *name)
5952 {
5953   char sep = name[0];
5954   char *copyto = name;
5955   bool quoted = false;
5956
5957   for (++name; *name != '\0'; ++name)
5958     {
5959       if (quoted)
5960         {
5961           switch (*name)
5962             {
5963             case 'a': *copyto++ = '\007'; break; /* BEL (bell)           */
5964             case 'b': *copyto++ = '\b'; break;   /* BS (back space)      */
5965             case 'd': *copyto++ = 0177; break;   /* DEL (delete)         */
5966             case 'e': *copyto++ = 033; break;    /* ESC (delete)         */
5967             case 'f': *copyto++ = '\f'; break;   /* FF (form feed)       */
5968             case 'n': *copyto++ = '\n'; break;   /* NL (new line)        */
5969             case 'r': *copyto++ = '\r'; break;   /* CR (carriage return) */
5970             case 't': *copyto++ = '\t'; break;   /* TAB (horizontal tab) */
5971             case 'v': *copyto++ = '\v'; break;   /* VT (vertical tab)    */
5972             default:
5973               if (*name == sep)
5974                 *copyto++ = sep;
5975               else
5976                 {
5977                   /* Something else is quoted, so preserve the quote. */
5978                   *copyto++ = '\\';
5979                   *copyto++ = *name;
5980                 }
5981               break;
5982             }
5983           quoted = false;
5984         }
5985       else if (*name == '\\')
5986         quoted = true;
5987       else if (*name == sep)
5988         break;
5989       else
5990         *copyto++ = *name;
5991     }
5992   if (*name != sep)
5993     name = NULL;                /* signal unterminated regexp */
5994
5995   /* Terminate copied string. */
5996   *copyto = '\0';
5997   return name;
5998 }
5999
6000 /* Look at the argument of --regex or --no-regex and do the right
6001    thing.  Same for each line of a regexp file. */
6002 static void
6003 analyze_regex (char *regex_arg)
6004 {
6005   if (regex_arg == NULL)
6006     {
6007       free_regexps ();          /* --no-regex: remove existing regexps */
6008       return;
6009     }
6010
6011   /* A real --regexp option or a line in a regexp file. */
6012   switch (regex_arg[0])
6013     {
6014       /* Comments in regexp file or null arg to --regex. */
6015     case '\0':
6016     case ' ':
6017     case '\t':
6018       break;
6019
6020       /* Read a regex file.  This is recursive and may result in a
6021          loop, which will stop when the file descriptors are exhausted. */
6022     case '@':
6023       {
6024         FILE *regexfp;
6025         linebuffer regexbuf;
6026         char *regexfile = regex_arg + 1;
6027
6028         /* regexfile is a file containing regexps, one per line. */
6029         regexfp = fopen (regexfile, "r" FOPEN_BINARY);
6030         if (regexfp == NULL)
6031           pfatal (regexfile);
6032         linebuffer_init (&regexbuf);
6033         while (readline_internal (&regexbuf, regexfp, regexfile) > 0)
6034           analyze_regex (regexbuf.buffer);
6035         free (regexbuf.buffer);
6036         if (fclose (regexfp) != 0)
6037           pfatal (regexfile);
6038       }
6039       break;
6040
6041       /* Regexp to be used for a specific language only. */
6042     case '{':
6043       {
6044         language *lang;
6045         char *lang_name = regex_arg + 1;
6046         char *cp;
6047
6048         for (cp = lang_name; *cp != '}'; cp++)
6049           if (*cp == '\0')
6050             {
6051               error ("unterminated language name in regex: %s", regex_arg);
6052               return;
6053             }
6054         *cp++ = '\0';
6055         lang = get_language_from_langname (lang_name);
6056         if (lang == NULL)
6057           return;
6058         add_regex (cp, lang);
6059       }
6060       break;
6061
6062       /* Regexp to be used for any language. */
6063     default:
6064       add_regex (regex_arg, NULL);
6065       break;
6066     }
6067 }
6068
6069 /* Separate the regexp pattern, compile it,
6070    and care for optional name and modifiers. */
6071 static void
6072 add_regex (char *regexp_pattern, language *lang)
6073 {
6074   static struct re_pattern_buffer zeropattern;
6075   char sep, *pat, *name, *modifiers;
6076   char empty = '\0';
6077   const char *err;
6078   struct re_pattern_buffer *patbuf;
6079   regexp *rp;
6080   bool
6081     force_explicit_name = true, /* do not use implicit tag names */
6082     ignore_case = false,        /* case is significant */
6083     multi_line = false,         /* matches are done one line at a time */
6084     single_line = false;        /* dot does not match newline */
6085
6086
6087   if (strlen (regexp_pattern) < 3)
6088     {
6089       error ("null regexp");
6090       return;
6091     }
6092   sep = regexp_pattern[0];
6093   name = scan_separators (regexp_pattern);
6094   if (name == NULL)
6095     {
6096       error ("%s: unterminated regexp", regexp_pattern);
6097       return;
6098     }
6099   if (name[1] == sep)
6100     {
6101       error ("null name for regexp \"%s\"", regexp_pattern);
6102       return;
6103     }
6104   modifiers = scan_separators (name);
6105   if (modifiers == NULL)        /* no terminating separator --> no name */
6106     {
6107       modifiers = name;
6108       name = &empty;
6109     }
6110   else
6111     modifiers += 1;             /* skip separator */
6112
6113   /* Parse regex modifiers. */
6114   for (; modifiers[0] != '\0'; modifiers++)
6115     switch (modifiers[0])
6116       {
6117       case 'N':
6118         if (modifiers == name)
6119           error ("forcing explicit tag name but no name, ignoring");
6120         force_explicit_name = true;
6121         break;
6122       case 'i':
6123         ignore_case = true;
6124         break;
6125       case 's':
6126         single_line = true;
6127         /* FALLTHRU */
6128       case 'm':
6129         multi_line = true;
6130         need_filebuf = true;
6131         break;
6132       default:
6133         error ("invalid regexp modifier '%c', ignoring", modifiers[0]);
6134         break;
6135       }
6136
6137   patbuf = xnew (1, struct re_pattern_buffer);
6138   *patbuf = zeropattern;
6139   if (ignore_case)
6140     {
6141       static char lc_trans[UCHAR_MAX + 1];
6142       int i;
6143       for (i = 0; i < UCHAR_MAX + 1; i++)
6144         lc_trans[i] = c_tolower (i);
6145       patbuf->translate = lc_trans;     /* translation table to fold case  */
6146     }
6147
6148   if (multi_line)
6149     pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
6150   else
6151     pat = regexp_pattern;
6152
6153   if (single_line)
6154     re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
6155   else
6156     re_set_syntax (RE_SYNTAX_EMACS);
6157
6158   err = re_compile_pattern (pat, strlen (pat), patbuf);
6159   if (multi_line)
6160     free (pat);
6161   if (err != NULL)
6162     {
6163       error ("%s while compiling pattern", err);
6164       return;
6165     }
6166
6167   rp = p_head;
6168   p_head = xnew (1, regexp);
6169   p_head->pattern = savestr (regexp_pattern);
6170   p_head->p_next = rp;
6171   p_head->lang = lang;
6172   p_head->pat = patbuf;
6173   p_head->name = savestr (name);
6174   p_head->error_signaled = false;
6175   p_head->force_explicit_name = force_explicit_name;
6176   p_head->ignore_case = ignore_case;
6177   p_head->multi_line = multi_line;
6178 }
6179
6180 /*
6181  * Do the substitutions indicated by the regular expression and
6182  * arguments.
6183  */
6184 static char *
6185 substitute (char *in, char *out, struct re_registers *regs)
6186 {
6187   char *result, *t;
6188   int size, dig, diglen;
6189
6190   result = NULL;
6191   size = strlen (out);
6192
6193   /* Pass 1: figure out how much to allocate by finding all \N strings. */
6194   if (out[size - 1] == '\\')
6195     fatal ("pattern error in \"%s\"", out);
6196   for (t = strchr (out, '\\');
6197        t != NULL;
6198        t = strchr (t + 2, '\\'))
6199     if (c_isdigit (t[1]))
6200       {
6201         dig = t[1] - '0';
6202         diglen = regs->end[dig] - regs->start[dig];
6203         size += diglen - 2;
6204       }
6205     else
6206       size -= 1;
6207
6208   /* Allocate space and do the substitutions. */
6209   assert (size >= 0);
6210   result = xnew (size + 1, char);
6211
6212   for (t = result; *out != '\0'; out++)
6213     if (*out == '\\' && c_isdigit (*++out))
6214       {
6215         dig = *out - '0';
6216         diglen = regs->end[dig] - regs->start[dig];
6217         memcpy (t, in + regs->start[dig], diglen);
6218         t += diglen;
6219       }
6220     else
6221       *t++ = *out;
6222   *t = '\0';
6223
6224   assert (t <= result + size);
6225   assert (t - result == (int)strlen (result));
6226
6227   return result;
6228 }
6229
6230 /* Deallocate all regexps. */
6231 static void
6232 free_regexps (void)
6233 {
6234   regexp *rp;
6235   while (p_head != NULL)
6236     {
6237       rp = p_head->p_next;
6238       free (p_head->pattern);
6239       free (p_head->name);
6240       free (p_head);
6241       p_head = rp;
6242     }
6243   return;
6244 }
6245
6246 /*
6247  * Reads the whole file as a single string from `filebuf' and looks for
6248  * multi-line regular expressions, creating tags on matches.
6249  * readline already dealt with normal regexps.
6250  *
6251  * Idea by Ben Wing <ben@666.com> (2002).
6252  */
6253 static void
6254 regex_tag_multiline (void)
6255 {
6256   char *buffer = filebuf.buffer;
6257   regexp *rp;
6258   char *name;
6259
6260   for (rp = p_head; rp != NULL; rp = rp->p_next)
6261     {
6262       int match = 0;
6263
6264       if (!rp->multi_line)
6265         continue;               /* skip normal regexps */
6266
6267       /* Generic initializations before parsing file from memory. */
6268       lineno = 1;               /* reset global line number */
6269       charno = 0;               /* reset global char number */
6270       linecharno = 0;           /* reset global char number of line start */
6271
6272       /* Only use generic regexps or those for the current language. */
6273       if (rp->lang != NULL && rp->lang != curfdp->lang)
6274         continue;
6275
6276       while (match >= 0 && match < filebuf.len)
6277         {
6278           match = re_search (rp->pat, buffer, filebuf.len, charno,
6279                              filebuf.len - match, &rp->regs);
6280           switch (match)
6281             {
6282             case -2:
6283               /* Some error. */
6284               if (!rp->error_signaled)
6285                 {
6286                   error ("regexp stack overflow while matching \"%s\"",
6287                          rp->pattern);
6288                   rp->error_signaled = true;
6289                 }
6290               break;
6291             case -1:
6292               /* No match. */
6293               break;
6294             default:
6295               if (match == rp->regs.end[0])
6296                 {
6297                   if (!rp->error_signaled)
6298                     {
6299                       error ("regexp matches the empty string: \"%s\"",
6300                              rp->pattern);
6301                       rp->error_signaled = true;
6302                     }
6303                   match = -3;   /* exit from while loop */
6304                   break;
6305                 }
6306
6307               /* Match occurred.  Construct a tag. */
6308               while (charno < rp->regs.end[0])
6309                 if (buffer[charno++] == '\n')
6310                   lineno++, linecharno = charno;
6311               name = rp->name;
6312               if (name[0] == '\0')
6313                 name = NULL;
6314               else /* make a named tag */
6315                 name = substitute (buffer, rp->name, &rp->regs);
6316               if (rp->force_explicit_name)
6317                 /* Force explicit tag name, if a name is there. */
6318                 pfnote (name, true, buffer + linecharno,
6319                         charno - linecharno + 1, lineno, linecharno);
6320               else
6321                 make_tag (name, strlen (name), true, buffer + linecharno,
6322                           charno - linecharno + 1, lineno, linecharno);
6323               break;
6324             }
6325         }
6326     }
6327 }
6328
6329 \f
6330 static bool
6331 nocase_tail (const char *cp)
6332 {
6333   int len = 0;
6334
6335   while (*cp != '\0' && c_tolower (*cp) == c_tolower (dbp[len]))
6336     cp++, len++;
6337   if (*cp == '\0' && !intoken (dbp[len]))
6338     {
6339       dbp += len;
6340       return true;
6341     }
6342   return false;
6343 }
6344
6345 static void
6346 get_tag (register char *bp, char **namepp)
6347 {
6348   register char *cp = bp;
6349
6350   if (*bp != '\0')
6351     {
6352       /* Go till you get to white space or a syntactic break */
6353       for (cp = bp + 1; !notinname (*cp); cp++)
6354         continue;
6355       make_tag (bp, cp - bp, true,
6356                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6357     }
6358
6359   if (namepp != NULL)
6360     *namepp = savenstr (bp, cp - bp);
6361 }
6362
6363 /*
6364  * Read a line of text from `stream' into `lbp', excluding the
6365  * newline or CR-NL, if any.  Return the number of characters read from
6366  * `stream', which is the length of the line including the newline.
6367  *
6368  * On DOS or Windows we do not count the CR character, if any before the
6369  * NL, in the returned length; this mirrors the behavior of Emacs on those
6370  * platforms (for text files, it translates CR-NL to NL as it reads in the
6371  * file).
6372  *
6373  * If multi-line regular expressions are requested, each line read is
6374  * appended to `filebuf'.
6375  */
6376 static long
6377 readline_internal (linebuffer *lbp, FILE *stream, char const *filename)
6378 {
6379   char *buffer = lbp->buffer;
6380   char *p = lbp->buffer;
6381   char *pend;
6382   int chars_deleted;
6383
6384   pend = p + lbp->size;         /* Separate to avoid 386/IX compiler bug.  */
6385
6386   for (;;)
6387     {
6388       register int c = getc (stream);
6389       if (p == pend)
6390         {
6391           /* We're at the end of linebuffer: expand it. */
6392           lbp->size *= 2;
6393           xrnew (buffer, lbp->size, char);
6394           p += buffer - lbp->buffer;
6395           pend = buffer + lbp->size;
6396           lbp->buffer = buffer;
6397         }
6398       if (c == EOF)
6399         {
6400           if (ferror (stream))
6401             perror (filename);
6402           *p = '\0';
6403           chars_deleted = 0;
6404           break;
6405         }
6406       if (c == '\n')
6407         {
6408           if (p > buffer && p[-1] == '\r')
6409             {
6410               p -= 1;
6411               chars_deleted = 2;
6412             }
6413           else
6414             {
6415               chars_deleted = 1;
6416             }
6417           *p = '\0';
6418           break;
6419         }
6420       *p++ = c;
6421     }
6422   lbp->len = p - buffer;
6423
6424   if (need_filebuf              /* we need filebuf for multi-line regexps */
6425       && chars_deleted > 0)     /* not at EOF */
6426     {
6427       while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6428         {
6429           /* Expand filebuf. */
6430           filebuf.size *= 2;
6431           xrnew (filebuf.buffer, filebuf.size, char);
6432         }
6433       memcpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6434       filebuf.len += lbp->len;
6435       filebuf.buffer[filebuf.len++] = '\n';
6436       filebuf.buffer[filebuf.len] = '\0';
6437     }
6438
6439   return lbp->len + chars_deleted;
6440 }
6441
6442 /*
6443  * Like readline_internal, above, but in addition try to match the
6444  * input line against relevant regular expressions and manage #line
6445  * directives.
6446  */
6447 static void
6448 readline (linebuffer *lbp, FILE *stream)
6449 {
6450   long result;
6451
6452   linecharno = charno;          /* update global char number of line start */
6453   result = readline_internal (lbp, stream, infilename); /* read line */
6454   lineno += 1;                  /* increment global line number */
6455   charno += result;             /* increment global char number */
6456
6457   /* Honor #line directives. */
6458   if (!no_line_directive)
6459     {
6460       static bool discard_until_line_directive;
6461
6462       /* Check whether this is a #line directive. */
6463       if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6464         {
6465           unsigned int lno;
6466           int start = 0;
6467
6468           if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6469               && start > 0)     /* double quote character found */
6470             {
6471               char *endp = lbp->buffer + start;
6472
6473               while ((endp = strchr (endp, '"')) != NULL
6474                      && endp[-1] == '\\')
6475                 endp++;
6476               if (endp != NULL)
6477                 /* Ok, this is a real #line directive.  Let's deal with it. */
6478                 {
6479                   char *taggedabsname;  /* absolute name of original file */
6480                   char *taggedfname;    /* name of original file as given */
6481                   char *name;           /* temp var */
6482
6483                   discard_until_line_directive = false; /* found it */
6484                   name = lbp->buffer + start;
6485                   *endp = '\0';
6486                   canonicalize_filename (name);
6487                   taggedabsname = absolute_filename (name, tagfiledir);
6488                   if (filename_is_absolute (name)
6489                       || filename_is_absolute (curfdp->infname))
6490                     taggedfname = savestr (taggedabsname);
6491                   else
6492                     taggedfname = relative_filename (taggedabsname,tagfiledir);
6493
6494                   if (streq (curfdp->taggedfname, taggedfname))
6495                     /* The #line directive is only a line number change.  We
6496                        deal with this afterwards. */
6497                     free (taggedfname);
6498                   else
6499                     /* The tags following this #line directive should be
6500                        attributed to taggedfname.  In order to do this, set
6501                        curfdp accordingly. */
6502                     {
6503                       fdesc *fdp; /* file description pointer */
6504
6505                       /* Go look for a file description already set up for the
6506                          file indicated in the #line directive.  If there is
6507                          one, use it from now until the next #line
6508                          directive. */
6509                       for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6510                         if (streq (fdp->infname, curfdp->infname)
6511                             && streq (fdp->taggedfname, taggedfname))
6512                           /* If we remove the second test above (after the &&)
6513                              then all entries pertaining to the same file are
6514                              coalesced in the tags file.  If we use it, then
6515                              entries pertaining to the same file but generated
6516                              from different files (via #line directives) will
6517                              go into separate sections in the tags file.  These
6518                              alternatives look equivalent.  The first one
6519                              destroys some apparently useless information. */
6520                           {
6521                             curfdp = fdp;
6522                             free (taggedfname);
6523                             break;
6524                           }
6525                       /* Else, if we already tagged the real file, skip all
6526                          input lines until the next #line directive. */
6527                       if (fdp == NULL) /* not found */
6528                         for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6529                           if (streq (fdp->infabsname, taggedabsname))
6530                             {
6531                               discard_until_line_directive = true;
6532                               free (taggedfname);
6533                               break;
6534                             }
6535                       /* Else create a new file description and use that from
6536                          now on, until the next #line directive. */
6537                       if (fdp == NULL) /* not found */
6538                         {
6539                           fdp = fdhead;
6540                           fdhead = xnew (1, fdesc);
6541                           *fdhead = *curfdp; /* copy curr. file description */
6542                           fdhead->next = fdp;
6543                           fdhead->infname = savestr (curfdp->infname);
6544                           fdhead->infabsname = savestr (curfdp->infabsname);
6545                           fdhead->infabsdir = savestr (curfdp->infabsdir);
6546                           fdhead->taggedfname = taggedfname;
6547                           fdhead->usecharno = false;
6548                           fdhead->prop = NULL;
6549                           fdhead->written = false;
6550                           curfdp = fdhead;
6551                         }
6552                     }
6553                   free (taggedabsname);
6554                   lineno = lno - 1;
6555                   readline (lbp, stream);
6556                   return;
6557                 } /* if a real #line directive */
6558             } /* if #line is followed by a number */
6559         } /* if line begins with "#line " */
6560
6561       /* If we are here, no #line directive was found. */
6562       if (discard_until_line_directive)
6563         {
6564           if (result > 0)
6565             {
6566               /* Do a tail recursion on ourselves, thus discarding the contents
6567                  of the line buffer. */
6568               readline (lbp, stream);
6569               return;
6570             }
6571           /* End of file. */
6572           discard_until_line_directive = false;
6573           return;
6574         }
6575     } /* if #line directives should be considered */
6576
6577   {
6578     int match;
6579     regexp *rp;
6580     char *name;
6581
6582     /* Match against relevant regexps. */
6583     if (lbp->len > 0)
6584       for (rp = p_head; rp != NULL; rp = rp->p_next)
6585         {
6586           /* Only use generic regexps or those for the current language.
6587              Also do not use multiline regexps, which is the job of
6588              regex_tag_multiline. */
6589           if ((rp->lang != NULL && rp->lang != fdhead->lang)
6590               || rp->multi_line)
6591             continue;
6592
6593           match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6594           switch (match)
6595             {
6596             case -2:
6597               /* Some error. */
6598               if (!rp->error_signaled)
6599                 {
6600                   error ("regexp stack overflow while matching \"%s\"",
6601                          rp->pattern);
6602                   rp->error_signaled = true;
6603                 }
6604               break;
6605             case -1:
6606               /* No match. */
6607               break;
6608             case 0:
6609               /* Empty string matched. */
6610               if (!rp->error_signaled)
6611                 {
6612                   error ("regexp matches the empty string: \"%s\"", rp->pattern);
6613                   rp->error_signaled = true;
6614                 }
6615               break;
6616             default:
6617               /* Match occurred.  Construct a tag. */
6618               name = rp->name;
6619               if (name[0] == '\0')
6620                 name = NULL;
6621               else /* make a named tag */
6622                 name = substitute (lbp->buffer, rp->name, &rp->regs);
6623               if (rp->force_explicit_name)
6624                 /* Force explicit tag name, if a name is there. */
6625                 pfnote (name, true, lbp->buffer, match, lineno, linecharno);
6626               else
6627                 make_tag (name, strlen (name), true,
6628                           lbp->buffer, match, lineno, linecharno);
6629               break;
6630             }
6631         }
6632   }
6633 }
6634
6635 \f
6636 /*
6637  * Return a pointer to a space of size strlen(cp)+1 allocated
6638  * with xnew where the string CP has been copied.
6639  */
6640 static char *
6641 savestr (const char *cp)
6642 {
6643   return savenstr (cp, strlen (cp));
6644 }
6645
6646 /*
6647  * Return a pointer to a space of size LEN+1 allocated with xnew where
6648  * the string CP has been copied for at most the first LEN characters.
6649  */
6650 static char *
6651 savenstr (const char *cp, int len)
6652 {
6653   char *dp = xnew (len + 1, char);
6654   dp[len] = '\0';
6655   return memcpy (dp, cp, len);
6656 }
6657
6658 /* Skip spaces (end of string is not space), return new pointer. */
6659 static char *
6660 skip_spaces (char *cp)
6661 {
6662   while (c_isspace (*cp))
6663     cp++;
6664   return cp;
6665 }
6666
6667 /* Skip non spaces, except end of string, return new pointer. */
6668 static char *
6669 skip_non_spaces (char *cp)
6670 {
6671   while (*cp != '\0' && !c_isspace (*cp))
6672     cp++;
6673   return cp;
6674 }
6675
6676 /* Skip any chars in the "name" class.*/
6677 static char *
6678 skip_name (char *cp)
6679 {
6680   /* '\0' is a notinname() so loop stops there too */
6681   while (! notinname (*cp))
6682     cp++;
6683   return cp;
6684 }
6685
6686 /* Print error message and exit.  */
6687 static void
6688 fatal (char const *format, ...)
6689 {
6690   va_list ap;
6691   va_start (ap, format);
6692   verror (format, ap);
6693   va_end (ap);
6694   exit (EXIT_FAILURE);
6695 }
6696
6697 static void
6698 pfatal (const char *s1)
6699 {
6700   perror (s1);
6701   exit (EXIT_FAILURE);
6702 }
6703
6704 static void
6705 suggest_asking_for_help (void)
6706 {
6707   fprintf (stderr, "\tTry '%s --help' for a complete list of options.\n",
6708            progname);
6709   exit (EXIT_FAILURE);
6710 }
6711
6712 /* Output a diagnostic with printf-style FORMAT and args.  */
6713 static void
6714 error (const char *format, ...)
6715 {
6716   va_list ap;
6717   va_start (ap, format);
6718   verror (format, ap);
6719   va_end (ap);
6720 }
6721
6722 static void
6723 verror (char const *format, va_list ap)
6724 {
6725   fprintf (stderr, "%s: ", progname);
6726   vfprintf (stderr, format, ap);
6727   fprintf (stderr, "\n");
6728 }
6729
6730 /* Return a newly-allocated string whose contents
6731    concatenate those of s1, s2, s3.  */
6732 static char *
6733 concat (const char *s1, const char *s2, const char *s3)
6734 {
6735   int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6736   char *result = xnew (len1 + len2 + len3 + 1, char);
6737
6738   strcpy (result, s1);
6739   strcpy (result + len1, s2);
6740   strcpy (result + len1 + len2, s3);
6741
6742   return result;
6743 }
6744
6745 \f
6746 /* Does the same work as the system V getcwd, but does not need to
6747    guess the buffer size in advance. */
6748 static char *
6749 etags_getcwd (void)
6750 {
6751   int bufsize = 200;
6752   char *path = xnew (bufsize, char);
6753
6754   while (getcwd (path, bufsize) == NULL)
6755     {
6756       if (errno != ERANGE)
6757         pfatal ("getcwd");
6758       bufsize *= 2;
6759       free (path);
6760       path = xnew (bufsize, char);
6761     }
6762
6763   canonicalize_filename (path);
6764   return path;
6765 }
6766
6767 /* Return a newly allocated string containing a name of a temporary file.  */
6768 static char *
6769 etags_mktmp (void)
6770 {
6771   const char *tmpdir = getenv ("TMPDIR");
6772   const char *slash = "/";
6773
6774 #if MSDOS || defined (DOS_NT)
6775   if (!tmpdir)
6776     tmpdir = getenv ("TEMP");
6777   if (!tmpdir)
6778     tmpdir = getenv ("TMP");
6779   if (!tmpdir)
6780     tmpdir = ".";
6781   if (tmpdir[strlen (tmpdir) - 1] == '/'
6782       || tmpdir[strlen (tmpdir) - 1] == '\\')
6783     slash = "";
6784 #else
6785   if (!tmpdir)
6786     tmpdir = "/tmp";
6787   if (tmpdir[strlen (tmpdir) - 1] == '/')
6788     slash = "";
6789 #endif
6790
6791   char *templt = concat (tmpdir, slash, "etXXXXXX");
6792   int fd = mkostemp (templt, O_CLOEXEC);
6793   if (fd < 0 || close (fd) != 0)
6794     {
6795       int temp_errno = errno;
6796       free (templt);
6797       errno = temp_errno;
6798       templt = NULL;
6799     }
6800
6801 #if defined (DOS_NT)
6802   /* The file name will be used in shell redirection, so it needs to have
6803      DOS-style backslashes, or else the Windows shell will barf.  */
6804   char *p;
6805   for (p = templt; *p; p++)
6806     if (*p == '/')
6807       *p = '\\';
6808 #endif
6809
6810   return templt;
6811 }
6812
6813 /* Return a newly allocated string containing the file name of FILE
6814    relative to the absolute directory DIR (which should end with a slash). */
6815 static char *
6816 relative_filename (char *file, char *dir)
6817 {
6818   char *fp, *dp, *afn, *res;
6819   int i;
6820
6821   /* Find the common root of file and dir (with a trailing slash). */
6822   afn = absolute_filename (file, cwd);
6823   fp = afn;
6824   dp = dir;
6825   while (*fp++ == *dp++)
6826     continue;
6827   fp--, dp--;                   /* back to the first differing char */
6828 #ifdef DOS_NT
6829   if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6830     return afn;
6831 #endif
6832   do                            /* look at the equal chars until '/' */
6833     fp--, dp--;
6834   while (*fp != '/');
6835
6836   /* Build a sequence of "../" strings for the resulting relative file name. */
6837   i = 0;
6838   while ((dp = strchr (dp + 1, '/')) != NULL)
6839     i += 1;
6840   res = xnew (3*i + strlen (fp + 1) + 1, char);
6841   char *z = res;
6842   while (i-- > 0)
6843     z = stpcpy (z, "../");
6844
6845   /* Add the file name relative to the common root of file and dir. */
6846   strcpy (z, fp + 1);
6847   free (afn);
6848
6849   return res;
6850 }
6851
6852 /* Return a newly allocated string containing the absolute file name
6853    of FILE given DIR (which should end with a slash). */
6854 static char *
6855 absolute_filename (char *file, char *dir)
6856 {
6857   char *slashp, *cp, *res;
6858
6859   if (filename_is_absolute (file))
6860     res = savestr (file);
6861 #ifdef DOS_NT
6862   /* We don't support non-absolute file names with a drive
6863      letter, like `d:NAME' (it's too much hassle).  */
6864   else if (file[1] == ':')
6865     fatal ("%s: relative file names with drive letters not supported", file);
6866 #endif
6867   else
6868     res = concat (dir, file, "");
6869
6870   /* Delete the "/dirname/.." and "/." substrings. */
6871   slashp = strchr (res, '/');
6872   while (slashp != NULL && slashp[0] != '\0')
6873     {
6874       if (slashp[1] == '.')
6875         {
6876           if (slashp[2] == '.'
6877               && (slashp[3] == '/' || slashp[3] == '\0'))
6878             {
6879               cp = slashp;
6880               do
6881                 cp--;
6882               while (cp >= res && !filename_is_absolute (cp));
6883               if (cp < res)
6884                 cp = slashp;    /* the absolute name begins with "/.." */
6885 #ifdef DOS_NT
6886               /* Under MSDOS and NT we get `d:/NAME' as absolute
6887                  file name, so the luser could say `d:/../NAME'.
6888                  We silently treat this as `d:/NAME'.  */
6889               else if (cp[0] != '/')
6890                 cp = slashp;
6891 #endif
6892               memmove (cp, slashp + 3, strlen (slashp + 2));
6893               slashp = cp;
6894               continue;
6895             }
6896           else if (slashp[2] == '/' || slashp[2] == '\0')
6897             {
6898               memmove (slashp, slashp + 2, strlen (slashp + 1));
6899               continue;
6900             }
6901         }
6902
6903       slashp = strchr (slashp + 1, '/');
6904     }
6905
6906   if (res[0] == '\0')           /* just a safety net: should never happen */
6907     {
6908       free (res);
6909       return savestr ("/");
6910     }
6911   else
6912     return res;
6913 }
6914
6915 /* Return a newly allocated string containing the absolute
6916    file name of dir where FILE resides given DIR (which should
6917    end with a slash). */
6918 static char *
6919 absolute_dirname (char *file, char *dir)
6920 {
6921   char *slashp, *res;
6922   char save;
6923
6924   slashp = strrchr (file, '/');
6925   if (slashp == NULL)
6926     return savestr (dir);
6927   save = slashp[1];
6928   slashp[1] = '\0';
6929   res = absolute_filename (file, dir);
6930   slashp[1] = save;
6931
6932   return res;
6933 }
6934
6935 /* Whether the argument string is an absolute file name.  The argument
6936    string must have been canonicalized with canonicalize_filename. */
6937 static bool
6938 filename_is_absolute (char *fn)
6939 {
6940   return (fn[0] == '/'
6941 #ifdef DOS_NT
6942           || (c_isalpha (fn[0]) && fn[1] == ':' && fn[2] == '/')
6943 #endif
6944           );
6945 }
6946
6947 /* Downcase DOS drive letter and collapse separators into single slashes.
6948    Works in place. */
6949 static void
6950 canonicalize_filename (register char *fn)
6951 {
6952   register char* cp;
6953
6954 #ifdef DOS_NT
6955   /* Canonicalize drive letter case.  */
6956   if (c_isupper (fn[0]) && fn[1] == ':')
6957     fn[0] = c_tolower (fn[0]);
6958
6959   /* Collapse multiple forward- and back-slashes into a single forward
6960      slash. */
6961   for (cp = fn; *cp != '\0'; cp++, fn++)
6962     if (*cp == '/' || *cp == '\\')
6963       {
6964         *fn = '/';
6965         while (cp[1] == '/' || cp[1] == '\\')
6966           cp++;
6967       }
6968     else
6969       *fn = *cp;
6970
6971 #else  /* !DOS_NT */
6972
6973   /* Collapse multiple slashes into a single slash. */
6974   for (cp = fn; *cp != '\0'; cp++, fn++)
6975     if (*cp == '/')
6976       {
6977         *fn = '/';
6978         while (cp[1] == '/')
6979           cp++;
6980       }
6981     else
6982       *fn = *cp;
6983
6984 #endif  /* !DOS_NT */
6985
6986   *fn = '\0';
6987 }
6988
6989 \f
6990 /* Initialize a linebuffer for use. */
6991 static void
6992 linebuffer_init (linebuffer *lbp)
6993 {
6994   lbp->size = (DEBUG) ? 3 : 200;
6995   lbp->buffer = xnew (lbp->size, char);
6996   lbp->buffer[0] = '\0';
6997   lbp->len = 0;
6998 }
6999
7000 /* Set the minimum size of a string contained in a linebuffer. */
7001 static void
7002 linebuffer_setlen (linebuffer *lbp, int toksize)
7003 {
7004   while (lbp->size <= toksize)
7005     {
7006       lbp->size *= 2;
7007       xrnew (lbp->buffer, lbp->size, char);
7008     }
7009   lbp->len = toksize;
7010 }
7011
7012 /* Like malloc but get fatal error if memory is exhausted. */
7013 static void *
7014 xmalloc (size_t size)
7015 {
7016   void *result = malloc (size);
7017   if (result == NULL)
7018     fatal ("virtual memory exhausted");
7019   return result;
7020 }
7021
7022 static void *
7023 xrealloc (void *ptr, size_t size)
7024 {
7025   void *result = realloc (ptr, size);
7026   if (result == NULL)
7027     fatal ("virtual memory exhausted");
7028   return result;
7029 }
7030
7031 /*
7032  * Local Variables:
7033  * indent-tabs-mode: t
7034  * tab-width: 8
7035  * fill-column: 79
7036  * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
7037  * c-file-style: "gnu"
7038  * End:
7039  */
7040
7041 /* etags.c ends here */