code.delx.au - gnu-emacs/blob - lib-src/etags.c

   1 /* Tags file maker to go with GNU Emacs           -*- coding: utf-8 -*-
   2
   3 Copyright (C) 1984 The Regents of the University of California
   4
   5 Redistribution and use in source and binary forms, with or without
   6 modification, are permitted provided that the following conditions are
   7 met:
   8 1. Redistributions of source code must retain the above copyright
   9    notice, this list of conditions and the following disclaimer.
  10 2. Redistributions in binary form must reproduce the above copyright
  11    notice, this list of conditions and the following disclaimer in the
  12    documentation and/or other materials provided with the
  13    distribution.
  14 3. Neither the name of the University nor the names of its
  15    contributors may be used to endorse or promote products derived
  16    from this software without specific prior written permission.
  17
  18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
  19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  21 PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
  22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
  28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29
  30
  31 Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2016 Free Software
  32 Foundation, Inc.
  33
  34 This file is not considered part of GNU Emacs.
  35
  36 This program is free software: you can redistribute it and/or modify
  37 it under the terms of the GNU General Public License as published by
  38 the Free Software Foundation, either version 3 of the License, or
  39 (at your option) any later version.
  40
  41 This program is distributed in the hope that it will be useful,
  42 but WITHOUT ANY WARRANTY; without even the implied warranty of
  43 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  44 GNU General Public License for more details.
  45
  46 You should have received a copy of the GNU General Public License
  47 along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  48
  49
  50 /* NB To comply with the above BSD license, copyright information is
  51 reproduced in etc/ETAGS.README.  That file should be updated when the
  52 above notices are.
  53
  54 To the best of our knowledge, this code was originally based on the
  55 ctags.c distributed with BSD4.2, which was copyrighted by the
  56 University of California, as described above. */
  57
  58
  59 /*
  60  * Authors:
  61  * 1983 Ctags originally by Ken Arnold.
  62  * 1984 Fortran added by Jim Kleckner.
  63  * 1984 Ed Pelegri-Llopart added C typedefs.
  64  * 1985 Emacs TAGS format by Richard Stallman.
  65  * 1989 Sam Kendall added C++.
  66  * 1992 Joseph B. Wells improved C and C++ parsing.
  67  * 1993 Francesco Potortì reorganized C and C++.
  68  * 1994 Line-by-line regexp tags by Tom Tromey.
  69  * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
  70  * 2002 #line directives by Francesco Potortì.
  71  * Francesco Potortì maintained and improved it for many years
  72    starting in 1993.
  73  */
  74
  75 /*
  76  * If you want to add support for a new language, start by looking at the LUA
  77  * language, which is the simplest.  Alternatively, consider distributing etags
  78  * together with a configuration file containing regexp definitions for etags.
  79  */
  80
  81 char pot_etags_version[] = "@(#) pot revision number is 17.38.1.4";
  82
  83 #ifdef DEBUG
  84 #  undef DEBUG
  85 #  define DEBUG true
  86 #else
  87 #  define DEBUG  false
  88 #  define NDEBUG                /* disable assert */
  89 #endif
  90
  91 #include <config.h>
  92
  93 #ifndef _GNU_SOURCE
  94 # define _GNU_SOURCE 1          /* enables some compiler checks on GNU */
  95 #endif
  96
  97 /* WIN32_NATIVE is for XEmacs.
  98    MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
  99 #ifdef WIN32_NATIVE
 100 # undef MSDOS
 101 # undef  WINDOWSNT
 102 # define WINDOWSNT
 103 #endif /* WIN32_NATIVE */
 104
 105 #ifdef MSDOS
 106 # undef MSDOS
 107 # define MSDOS true
 108 # include <sys/param.h>
 109 #else
 110 # define MSDOS false
 111 #endif /* MSDOS */
 112
 113 #ifdef WINDOWSNT
 114 # include <direct.h>
 115 # define MAXPATHLEN _MAX_PATH
 116 # undef HAVE_NTGUI
 117 # undef  DOS_NT
 118 # define DOS_NT
 119 # define O_CLOEXEC O_NOINHERIT
 120 #endif /* WINDOWSNT */
 121
 122 #include <limits.h>
 123 #include <unistd.h>
 124 #include <stdarg.h>
 125 #include <stdlib.h>
 126 #include <string.h>
 127 #include <sysstdio.h>
 128 #include <errno.h>
 129 #include <fcntl.h>
 130 #include <binary-io.h>
 131 #include <c-ctype.h>
 132 #include <c-strcase.h>
 133
 134 #include <assert.h>
 135 #ifdef NDEBUG
 136 # undef  assert                 /* some systems have a buggy assert.h */
 137 # define assert(x) ((void) 0)
 138 #endif
 139
 140 #include <getopt.h>
 141 #include <regex.h>
 142
 143 /* Define CTAGS to make the program "ctags" compatible with the usual one.
 144  Leave it undefined to make the program "etags", which makes emacs-style
 145  tag tables and tags typedefs, #defines and struct/union/enum by default. */
 146 #ifdef CTAGS
 147 # undef  CTAGS
 148 # define CTAGS true
 149 #else
 150 # define CTAGS false
 151 #endif
 152
 153 static bool
 154 streq (char const *s, char const *t)
 155 {
 156   return strcmp (s, t) == 0;
 157 }
 158
 159 static bool
 160 strcaseeq (char const *s, char const *t)
 161 {
 162   return c_strcasecmp (s, t) == 0;
 163 }
 164
 165 static bool
 166 strneq (char const *s, char const *t, size_t n)
 167 {
 168   return strncmp (s, t, n) == 0;
 169 }
 170
 171 static bool
 172 strncaseeq (char const *s, char const *t, size_t n)
 173 {
 174   return c_strncasecmp (s, t, n) == 0;
 175 }
 176
 177 /* C is not in a name.  */
 178 static bool
 179 notinname (unsigned char c)
 180 {
 181   /* Look at make_tag before modifying!  */
 182   static bool const table[UCHAR_MAX + 1] = {
 183     ['\0']=1, ['\t']=1, ['\n']=1, ['\f']=1, ['\r']=1, [' ']=1,
 184     ['(']=1, [')']=1, [',']=1, [';']=1, ['=']=1
 185   };
 186   return table[c];
 187 }
 188
 189 /* C can start a token.  */
 190 static bool
 191 begtoken (unsigned char c)
 192 {
 193   static bool const table[UCHAR_MAX + 1] = {
 194     ['$']=1, ['@']=1,
 195     ['A']=1, ['B']=1, ['C']=1, ['D']=1, ['E']=1, ['F']=1, ['G']=1, ['H']=1,
 196     ['I']=1, ['J']=1, ['K']=1, ['L']=1, ['M']=1, ['N']=1, ['O']=1, ['P']=1,
 197     ['Q']=1, ['R']=1, ['S']=1, ['T']=1, ['U']=1, ['V']=1, ['W']=1, ['X']=1,
 198     ['Y']=1, ['Z']=1,
 199     ['_']=1,
 200     ['a']=1, ['b']=1, ['c']=1, ['d']=1, ['e']=1, ['f']=1, ['g']=1, ['h']=1,
 201     ['i']=1, ['j']=1, ['k']=1, ['l']=1, ['m']=1, ['n']=1, ['o']=1, ['p']=1,
 202     ['q']=1, ['r']=1, ['s']=1, ['t']=1, ['u']=1, ['v']=1, ['w']=1, ['x']=1,
 203     ['y']=1, ['z']=1,
 204     ['~']=1
 205   };
 206   return table[c];
 207 }
 208
 209 /* C can be in the middle of a token.  */
 210 static bool
 211 intoken (unsigned char c)
 212 {
 213   static bool const table[UCHAR_MAX + 1] = {
 214     ['$']=1,
 215     ['0']=1, ['1']=1, ['2']=1, ['3']=1, ['4']=1,
 216     ['5']=1, ['6']=1, ['7']=1, ['8']=1, ['9']=1,
 217     ['A']=1, ['B']=1, ['C']=1, ['D']=1, ['E']=1, ['F']=1, ['G']=1, ['H']=1,
 218     ['I']=1, ['J']=1, ['K']=1, ['L']=1, ['M']=1, ['N']=1, ['O']=1, ['P']=1,
 219     ['Q']=1, ['R']=1, ['S']=1, ['T']=1, ['U']=1, ['V']=1, ['W']=1, ['X']=1,
 220     ['Y']=1, ['Z']=1,
 221     ['_']=1,
 222     ['a']=1, ['b']=1, ['c']=1, ['d']=1, ['e']=1, ['f']=1, ['g']=1, ['h']=1,
 223     ['i']=1, ['j']=1, ['k']=1, ['l']=1, ['m']=1, ['n']=1, ['o']=1, ['p']=1,
 224     ['q']=1, ['r']=1, ['s']=1, ['t']=1, ['u']=1, ['v']=1, ['w']=1, ['x']=1,
 225     ['y']=1, ['z']=1
 226   };
 227   return table[c];
 228 }
 229
 230 /* C can end a token.  */
 231 static bool
 232 endtoken (unsigned char c)
 233 {
 234   static bool const table[UCHAR_MAX + 1] = {
 235     ['\0']=1, ['\t']=1, ['\n']=1, ['\r']=1, [' ']=1,
 236     ['!']=1, ['"']=1, ['#']=1, ['%']=1, ['&']=1, ['\'']=1, ['(']=1, [')']=1,
 237     ['*']=1, ['+']=1, [',']=1, ['-']=1, ['.']=1, ['/']=1, [':']=1, [';']=1,
 238     ['<']=1, ['=']=1, ['>']=1, ['?']=1, ['[']=1, [']']=1, ['^']=1,
 239     ['{']=1, ['|']=1, ['}']=1, ['~']=1
 240   };
 241   return table[c];
 242 }
 243
 244 /*
 245  *      xnew, xrnew -- allocate, reallocate storage
 246  *
 247  * SYNOPSIS:    Type *xnew (int n, Type);
 248  *              void xrnew (OldPointer, int n, Type);
 249  */
 250 #define xnew(n, Type)      ((Type *) xmalloc ((n) * sizeof (Type)))
 251 #define xrnew(op, n, Type) ((op) = (Type *) xrealloc (op, (n) * sizeof (Type)))
 252
 253 typedef void Lang_function (FILE *);
 254
 255 typedef struct
 256 {
 257   const char *suffix;           /* file name suffix for this compressor */
 258   const char *command;          /* takes one arg and decompresses to stdout */
 259 } compressor;
 260
 261 typedef struct
 262 {
 263   const char *name;             /* language name */
 264   const char *help;             /* detailed help for the language */
 265   Lang_function *function;      /* parse function */
 266   const char **suffixes;        /* name suffixes of this language's files */
 267   const char **filenames;       /* names of this language's files */
 268   const char **interpreters;    /* interpreters for this language */
 269   bool metasource;              /* source used to generate other sources */
 270 } language;
 271
 272 typedef struct fdesc
 273 {
 274   struct fdesc *next;           /* for the linked list */
 275   char *infname;                /* uncompressed input file name */
 276   char *infabsname;             /* absolute uncompressed input file name */
 277   char *infabsdir;              /* absolute dir of input file */
 278   char *taggedfname;            /* file name to write in tagfile */
 279   language *lang;               /* language of file */
 280   char *prop;                   /* file properties to write in tagfile */
 281   bool usecharno;               /* etags tags shall contain char number */
 282   bool written;                 /* entry written in the tags file */
 283 } fdesc;
 284
 285 typedef struct node_st
 286 {                               /* sorting structure */
 287   struct node_st *left, *right; /* left and right sons */
 288   fdesc *fdp;                   /* description of file to whom tag belongs */
 289   char *name;                   /* tag name */
 290   char *regex;                  /* search regexp */
 291   bool valid;                   /* write this tag on the tag file */
 292   bool is_func;                 /* function tag: use regexp in CTAGS mode */
 293   bool been_warned;             /* warning already given for duplicated tag */
 294   int lno;                      /* line number tag is on */
 295   long cno;                     /* character number line starts on */
 296 } node;
 297
 298 /*
 299  * A `linebuffer' is a structure which holds a line of text.
 300  * `readline_internal' reads a line from a stream into a linebuffer
 301  * and works regardless of the length of the line.
 302  * SIZE is the size of BUFFER, LEN is the length of the string in
 303  * BUFFER after readline reads it.
 304  */
 305 typedef struct
 306 {
 307   long size;
 308   int len;
 309   char *buffer;
 310 } linebuffer;
 311
 312 /* Used to support mixing of --lang and file names. */
 313 typedef struct
 314 {
 315   enum {
 316     at_language,                /* a language specification */
 317     at_regexp,                  /* a regular expression */
 318     at_filename,                /* a file name */
 319     at_stdin,                   /* read from stdin here */
 320     at_end                      /* stop parsing the list */
 321   } arg_type;                   /* argument type */
 322   language *lang;               /* language associated with the argument */
 323   char *what;                   /* the argument itself */
 324 } argument;
 325
 326 /* Structure defining a regular expression. */
 327 typedef struct regexp
 328 {
 329   struct regexp *p_next;        /* pointer to next in list */
 330   language *lang;               /* if set, use only for this language */
 331   char *pattern;                /* the regexp pattern */
 332   char *name;                   /* tag name */
 333   struct re_pattern_buffer *pat; /* the compiled pattern */
 334   struct re_registers regs;     /* re registers */
 335   bool error_signaled;          /* already signaled for this regexp */
 336   bool force_explicit_name;     /* do not allow implicit tag name */
 337   bool ignore_case;             /* ignore case when matching */
 338   bool multi_line;              /* do a multi-line match on the whole file */
 339 } regexp;
 340
 341
 342 /* Many compilers barf on this:
 343         Lang_function Ada_funcs;
 344    so let's write it this way */
 345 static void Ada_funcs (FILE *);
 346 static void Asm_labels (FILE *);
 347 static void C_entries (int c_ext, FILE *);
 348 static void default_C_entries (FILE *);
 349 static void plain_C_entries (FILE *);
 350 static void Cjava_entries (FILE *);
 351 static void Cobol_paragraphs (FILE *);
 352 static void Cplusplus_entries (FILE *);
 353 static void Cstar_entries (FILE *);
 354 static void Erlang_functions (FILE *);
 355 static void Forth_words (FILE *);
 356 static void Fortran_functions (FILE *);
 357 static void Go_functions (FILE *);
 358 static void HTML_labels (FILE *);
 359 static void Lisp_functions (FILE *);
 360 static void Lua_functions (FILE *);
 361 static void Makefile_targets (FILE *);
 362 static void Pascal_functions (FILE *);
 363 static void Perl_functions (FILE *);
 364 static void PHP_functions (FILE *);
 365 static void PS_functions (FILE *);
 366 static void Prolog_functions (FILE *);
 367 static void Python_functions (FILE *);
 368 static void Ruby_functions (FILE *);
 369 static void Scheme_functions (FILE *);
 370 static void TeX_commands (FILE *);
 371 static void Texinfo_nodes (FILE *);
 372 static void Yacc_entries (FILE *);
 373 static void just_read_file (FILE *);
 374
 375 static language *get_language_from_langname (const char *);
 376 static void readline (linebuffer *, FILE *);
 377 static long readline_internal (linebuffer *, FILE *, char const *);
 378 static bool nocase_tail (const char *);
 379 static void get_tag (char *, char **);
 380
 381 static void analyze_regex (char *);
 382 static void free_regexps (void);
 383 static void regex_tag_multiline (void);
 384 static void error (const char *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
 385 static void verror (char const *, va_list) ATTRIBUTE_FORMAT_PRINTF (1, 0);
 386 static _Noreturn void suggest_asking_for_help (void);
 387 static _Noreturn void fatal (char const *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
 388 static _Noreturn void pfatal (const char *);
 389 static void add_node (node *, node **);
 390
 391 static void process_file_name (char *, language *);
 392 static void process_file (FILE *, char *, language *);
 393 static void find_entries (FILE *);
 394 static void free_tree (node *);
 395 static void free_fdesc (fdesc *);
 396 static void pfnote (char *, bool, char *, int, int, long);
 397 static void invalidate_nodes (fdesc *, node **);
 398 static void put_entries (node *);
 399
 400 static char *concat (const char *, const char *, const char *);
 401 static char *skip_spaces (char *);
 402 static char *skip_non_spaces (char *);
 403 static char *skip_name (char *);
 404 static char *savenstr (const char *, int);
 405 static char *savestr (const char *);
 406 static char *etags_getcwd (void);
 407 static char *relative_filename (char *, char *);
 408 static char *absolute_filename (char *, char *);
 409 static char *absolute_dirname (char *, char *);
 410 static bool filename_is_absolute (char *f);
 411 static void canonicalize_filename (char *);
 412 static char *etags_mktmp (void);
 413 static void linebuffer_init (linebuffer *);
 414 static void linebuffer_setlen (linebuffer *, int);
 415 static void *xmalloc (size_t);
 416 static void *xrealloc (void *, size_t);
 417
 418 \f
 419 static char searchar = '/';     /* use /.../ searches */
 420
 421 static char *tagfile;           /* output file */
 422 static char *progname;          /* name this program was invoked with */
 423 static char *cwd;               /* current working directory */
 424 static char *tagfiledir;        /* directory of tagfile */
 425 static FILE *tagf;              /* ioptr for tags file */
 426 static ptrdiff_t whatlen_max;   /* maximum length of any 'what' member */
 427
 428 static fdesc *fdhead;           /* head of file description list */
 429 static fdesc *curfdp;           /* current file description */
 430 static char *infilename;        /* current input file name */
 431 static int lineno;              /* line number of current line */
 432 static long charno;             /* current character number */
 433 static long linecharno;         /* charno of start of current line */
 434 static char *dbp;               /* pointer to start of current tag */
 435
 436 static const int invalidcharno = -1;
 437
 438 static node *nodehead;          /* the head of the binary tree of tags */
 439 static node *last_node;         /* the last node created */
 440
 441 static linebuffer lb;           /* the current line */
 442 static linebuffer filebuf;      /* a buffer containing the whole file */
 443 static linebuffer token_name;   /* a buffer containing a tag name */
 444
 445 static bool append_to_tagfile;  /* -a: append to tags */
 446 /* The next five default to true in C and derived languages.  */
 447 static bool typedefs;           /* -t: create tags for C and Ada typedefs */
 448 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
 449                                 /* 0 struct/enum/union decls, and C++ */
 450                                 /* member functions. */
 451 static bool constantypedefs;    /* -d: create tags for C #define, enum */
 452                                 /* constants and variables. */
 453                                 /* -D: opposite of -d.  Default under ctags. */
 454 static int globals;             /* create tags for global variables */
 455 static int members;             /* create tags for C member variables */
 456 static int declarations;        /* --declarations: tag them and extern in C&Co*/
 457 static int no_line_directive;   /* ignore #line directives (undocumented) */
 458 static int no_duplicates;       /* no duplicate tags for ctags (undocumented) */
 459 static bool update;             /* -u: update tags */
 460 static bool vgrind_style;       /* -v: create vgrind style index output */
 461 static bool no_warnings;        /* -w: suppress warnings (undocumented) */
 462 static bool cxref_style;        /* -x: create cxref style output */
 463 static bool cplusplus;          /* .[hc] means C++, not C (undocumented) */
 464 static bool ignoreindent;       /* -I: ignore indentation in C */
 465 static int packages_only;       /* --packages-only: in Ada, only tag packages*/
 466 static int class_qualify;       /* -Q: produce class-qualified tags in C++/Java */
 467
 468 /* STDIN is defined in LynxOS system headers */
 469 #ifdef STDIN
 470 # undef STDIN
 471 #endif
 472
 473 #define STDIN 0x1001            /* returned by getopt_long on --parse-stdin */
 474 static bool parsing_stdin;      /* --parse-stdin used */
 475
 476 static regexp *p_head;          /* list of all regexps */
 477 static bool need_filebuf;       /* some regexes are multi-line */
 478
 479 static struct option longopts[] =
 480 {
 481   { "append",             no_argument,       NULL,               'a'   },
 482   { "packages-only",      no_argument,       &packages_only,     1     },
 483   { "c++",                no_argument,       NULL,               'C'   },
 484   { "declarations",       no_argument,       &declarations,      1     },
 485   { "no-line-directive",  no_argument,       &no_line_directive, 1     },
 486   { "no-duplicates",      no_argument,       &no_duplicates,     1     },
 487   { "help",               no_argument,       NULL,               'h'   },
 488   { "help",               no_argument,       NULL,               'H'   },
 489   { "ignore-indentation", no_argument,       NULL,               'I'   },
 490   { "language",           required_argument, NULL,               'l'   },
 491   { "members",            no_argument,       &members,           1     },
 492   { "no-members",         no_argument,       &members,           0     },
 493   { "output",             required_argument, NULL,               'o'   },
 494   { "class-qualify",      no_argument,       &class_qualify,     'Q'   },
 495   { "regex",              required_argument, NULL,               'r'   },
 496   { "no-regex",           no_argument,       NULL,               'R'   },
 497   { "ignore-case-regex",  required_argument, NULL,               'c'   },
 498   { "parse-stdin",        required_argument, NULL,               STDIN },
 499   { "version",            no_argument,       NULL,               'V'   },
 500
 501 #if CTAGS /* Ctags options */
 502   { "backward-search",    no_argument,       NULL,               'B'   },
 503   { "cxref",              no_argument,       NULL,               'x'   },
 504   { "defines",            no_argument,       NULL,               'd'   },
 505   { "globals",            no_argument,       &globals,           1     },
 506   { "typedefs",           no_argument,       NULL,               't'   },
 507   { "typedefs-and-c++",   no_argument,       NULL,               'T'   },
 508   { "update",             no_argument,       NULL,               'u'   },
 509   { "vgrind",             no_argument,       NULL,               'v'   },
 510   { "no-warn",            no_argument,       NULL,               'w'   },
 511
 512 #else /* Etags options */
 513   { "no-defines",         no_argument,       NULL,               'D'   },
 514   { "no-globals",         no_argument,       &globals,           0     },
 515   { "include",            required_argument, NULL,               'i'   },
 516 #endif
 517   { NULL }
 518 };
 519
 520 static compressor compressors[] =
 521 {
 522   { "z", "gzip -d -c"},
 523   { "Z", "gzip -d -c"},
 524   { "gz", "gzip -d -c"},
 525   { "GZ", "gzip -d -c"},
 526   { "bz2", "bzip2 -d -c" },
 527   { "xz", "xz -d -c" },
 528   { NULL }
 529 };
 530
 531 /*
 532  * Language stuff.
 533  */
 534
 535 /* Ada code */
 536 static const char *Ada_suffixes [] =
 537   { "ads", "adb", "ada", NULL };
 538 static const char Ada_help [] =
 539 "In Ada code, functions, procedures, packages, tasks and types are\n\
 540 tags.  Use the '--packages-only' option to create tags for\n\
 541 packages only.\n\
 542 Ada tag names have suffixes indicating the type of entity:\n\
 543         Entity type:    Qualifier:\n\
 544         ------------    ----------\n\
 545         function        /f\n\
 546         procedure       /p\n\
 547         package spec    /s\n\
 548         package body    /b\n\
 549         type            /t\n\
 550         task            /k\n\
 551 Thus, 'M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
 552 body of the package 'bidule', while 'M-x find-tag <RET> bidule <RET>'\n\
 553 will just search for any tag 'bidule'.";
 554
 555 /* Assembly code */
 556 static const char *Asm_suffixes [] =
 557   { "a",        /* Unix assembler */
 558     "asm", /* Microcontroller assembly */
 559     "def", /* BSO/Tasking definition includes  */
 560     "inc", /* Microcontroller include files */
 561     "ins", /* Microcontroller include files */
 562     "s", "sa", /* Unix assembler */
 563     "S",   /* cpp-processed Unix assembler */
 564     "src", /* BSO/Tasking C compiler output */
 565     NULL
 566   };
 567 static const char Asm_help [] =
 568 "In assembler code, labels appearing at the beginning of a line,\n\
 569 followed by a colon, are tags.";
 570
 571
 572 /* Note that .c and .h can be considered C++, if the --c++ flag was
 573    given, or if the `class' or `template' keywords are met inside the file.
 574    That is why default_C_entries is called for these. */
 575 static const char *default_C_suffixes [] =
 576   { "c", "h", NULL };
 577 #if CTAGS                               /* C help for Ctags */
 578 static const char default_C_help [] =
 579 "In C code, any C function is a tag.  Use -t to tag typedefs.\n\
 580 Use -T to tag definitions of 'struct', 'union' and 'enum'.\n\
 581 Use -d to tag '#define' macro definitions and 'enum' constants.\n\
 582 Use --globals to tag global variables.\n\
 583 You can tag function declarations and external variables by\n\
 584 using '--declarations', and struct members by using '--members'.";
 585 #else                                   /* C help for Etags */
 586 static const char default_C_help [] =
 587 "In C code, any C function or typedef is a tag, and so are\n\
 588 definitions of 'struct', 'union' and 'enum'.  '#define' macro\n\
 589 definitions and 'enum' constants are tags unless you specify\n\
 590 '--no-defines'.  Global variables are tags unless you specify\n\
 591 '--no-globals' and so are struct members unless you specify\n\
 592 '--no-members'.  Use of '--no-globals', '--no-defines' and\n\
 593 '--no-members' can make the tags table file much smaller.\n\
 594 You can tag function declarations and external variables by\n\
 595 using '--declarations'.";
 596 #endif  /* C help for Ctags and Etags */
 597
 598 static const char *Cplusplus_suffixes [] =
 599   { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
 600     "M",                        /* Objective C++ */
 601     "pdb",                      /* PostScript with C syntax */
 602     NULL };
 603 static const char Cplusplus_help [] =
 604 "In C++ code, all the tag constructs of C code are tagged.  (Use\n\
 605 --help --lang=c --lang=c++ for full help.)\n\
 606 In addition to C tags, member functions are also recognized.  Member\n\
 607 variables are recognized unless you use the '--no-members' option.\n\
 608 Tags for variables and functions in classes are named 'CLASS::VARIABLE'\n\
 609 and 'CLASS::FUNCTION'.  'operator' definitions have tag names like\n\
 610 'operator+'.";
 611
 612 static const char *Cjava_suffixes [] =
 613   { "java", NULL };
 614 static char Cjava_help [] =
 615 "In Java code, all the tags constructs of C and C++ code are\n\
 616 tagged.  (Use --help --lang=c --lang=c++ --lang=java for full help.)";
 617
 618
 619 static const char *Cobol_suffixes [] =
 620   { "COB", "cob", NULL };
 621 static char Cobol_help [] =
 622 "In Cobol code, tags are paragraph names; that is, any word\n\
 623 starting in column 8 and followed by a period.";
 624
 625 static const char *Cstar_suffixes [] =
 626   { "cs", "hs", NULL };
 627
 628 static const char *Erlang_suffixes [] =
 629   { "erl", "hrl", NULL };
 630 static const char Erlang_help [] =
 631 "In Erlang code, the tags are the functions, records and macros\n\
 632 defined in the file.";
 633
 634 const char *Forth_suffixes [] =
 635   { "fth", "tok", NULL };
 636 static const char Forth_help [] =
 637 "In Forth code, tags are words defined by ':',\n\
 638 constant, code, create, defer, value, variable, buffer:, field.";
 639
 640 static const char *Fortran_suffixes [] =
 641   { "F", "f", "f90", "for", NULL };
 642 static const char Fortran_help [] =
 643 "In Fortran code, functions, subroutines and block data are tags.";
 644
 645 static const char *Go_suffixes [] = {"go", NULL};
 646 static const char Go_help [] =
 647   "In Go code, functions, interfaces and packages are tags.";
 648
 649 static const char *HTML_suffixes [] =
 650   { "htm", "html", "shtml", NULL };
 651 static const char HTML_help [] =
 652 "In HTML input files, the tags are the 'title' and the 'h1', 'h2',\n\
 653 'h3' headers.  Also, tags are 'name=' in anchors and all\n\
 654 occurrences of 'id='.";
 655
 656 static const char *Lisp_suffixes [] =
 657   { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
 658 static const char Lisp_help [] =
 659 "In Lisp code, any function defined with 'defun', any variable\n\
 660 defined with 'defvar' or 'defconst', and in general the first\n\
 661 argument of any expression that starts with '(def' in column zero\n\
 662 is a tag.\n\
 663 The '--declarations' option tags \"(defvar foo)\" constructs too.";
 664
 665 static const char *Lua_suffixes [] =
 666   { "lua", "LUA", NULL };
 667 static const char Lua_help [] =
 668 "In Lua scripts, all functions are tags.";
 669
 670 static const char *Makefile_filenames [] =
 671   { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
 672 static const char Makefile_help [] =
 673 "In makefiles, targets are tags; additionally, variables are tags\n\
 674 unless you specify '--no-globals'.";
 675
 676 static const char *Objc_suffixes [] =
 677   { "lm",                       /* Objective lex file */
 678     "m",                        /* Objective C file */
 679      NULL };
 680 static const char Objc_help [] =
 681 "In Objective C code, tags include Objective C definitions for classes,\n\
 682 class categories, methods and protocols.  Tags for variables and\n\
 683 functions in classes are named 'CLASS::VARIABLE' and 'CLASS::FUNCTION'.\n\
 684 (Use --help --lang=c --lang=objc --lang=java for full help.)";
 685
 686 static const char *Pascal_suffixes [] =
 687   { "p", "pas", NULL };
 688 static const char Pascal_help [] =
 689 "In Pascal code, the tags are the functions and procedures defined\n\
 690 in the file.";
 691 /* " // this is for working around an Emacs highlighting bug... */
 692
 693 static const char *Perl_suffixes [] =
 694   { "pl", "pm", NULL };
 695 static const char *Perl_interpreters [] =
 696   { "perl", "@PERL@", NULL };
 697 static const char Perl_help [] =
 698 "In Perl code, the tags are the packages, subroutines and variables\n\
 699 defined by the 'package', 'sub', 'my' and 'local' keywords.  Use\n\
 700 '--globals' if you want to tag global variables.  Tags for\n\
 701 subroutines are named 'PACKAGE::SUB'.  The name for subroutines\n\
 702 defined in the default package is 'main::SUB'.";
 703
 704 static const char *PHP_suffixes [] =
 705   { "php", "php3", "php4", NULL };
 706 static const char PHP_help [] =
 707 "In PHP code, tags are functions, classes and defines.  Unless you use\n\
 708 the '--no-members' option, vars are tags too.";
 709
 710 static const char *plain_C_suffixes [] =
 711   { "pc",                       /* Pro*C file */
 712      NULL };
 713
 714 static const char *PS_suffixes [] =
 715   { "ps", "psw", NULL };        /* .psw is for PSWrap */
 716 static const char PS_help [] =
 717 "In PostScript code, the tags are the functions.";
 718
 719 static const char *Prolog_suffixes [] =
 720   { "prolog", NULL };
 721 static const char Prolog_help [] =
 722 "In Prolog code, tags are predicates and rules at the beginning of\n\
 723 line.";
 724
 725 static const char *Python_suffixes [] =
 726   { "py", NULL };
 727 static const char Python_help [] =
 728 "In Python code, 'def' or 'class' at the beginning of a line\n\
 729 generate a tag.";
 730
 731 static const char *Ruby_suffixes [] =
 732   { "rb", "ru", "rbw", NULL };
 733 static const char *Ruby_filenames [] =
 734   { "Rakefile", "Thorfile", NULL };
 735 static const char Ruby_help [] =
 736   "In Ruby code, 'def' or 'class' or 'module' at the beginning of\n\
 737 a line generate a tag.  Constants also generate a tag.";
 738
 739 /* Can't do the `SCM' or `scm' prefix with a version number. */
 740 static const char *Scheme_suffixes [] =
 741   { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
 742 static const char Scheme_help [] =
 743 "In Scheme code, tags include anything defined with 'def' or with a\n\
 744 construct whose name starts with 'def'.  They also include\n\
 745 variables set with 'set!' at top level in the file.";
 746
 747 static const char *TeX_suffixes [] =
 748   { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
 749 static const char TeX_help [] =
 750 "In LaTeX text, the argument of any of the commands '\\chapter',\n\
 751 '\\section', '\\subsection', '\\subsubsection', '\\eqno', '\\label',\n\
 752 '\\ref', '\\cite', '\\bibitem', '\\part', '\\appendix', '\\entry',\n\
 753 '\\index', '\\def', '\\newcommand', '\\renewcommand',\n\
 754 '\\newenvironment' or '\\renewenvironment' is a tag.\n\
 755 \n\
 756 Other commands can be specified by setting the environment variable\n\
 757 'TEXTAGS' to a colon-separated list like, for example,\n\
 758      TEXTAGS=\"mycommand:myothercommand\".";
 759
 760
 761 static const char *Texinfo_suffixes [] =
 762   { "texi", "texinfo", "txi", NULL };
 763 static const char Texinfo_help [] =
 764 "for texinfo files, lines starting with @node are tagged.";
 765
 766 static const char *Yacc_suffixes [] =
 767   { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
 768 static const char Yacc_help [] =
 769 "In Bison or Yacc input files, each rule defines as a tag the\n\
 770 nonterminal it constructs.  The portions of the file that contain\n\
 771 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
 772 for full help).";
 773
 774 static const char auto_help [] =
 775 "'auto' is not a real language, it indicates to use\n\
 776 a default language for files base on file name suffix and file contents.";
 777
 778 static const char none_help [] =
 779 "'none' is not a real language, it indicates to only do\n\
 780 regexp processing on files.";
 781
 782 static const char no_lang_help [] =
 783 "No detailed help available for this language.";
 784
 785
 786 /*
 787  * Table of languages.
 788  *
 789  * It is ok for a given function to be listed under more than one
 790  * name.  I just didn't.
 791  */
 792
 793 static language lang_names [] =
 794 {
 795   { "ada",       Ada_help,       Ada_funcs,         Ada_suffixes       },
 796   { "asm",       Asm_help,       Asm_labels,        Asm_suffixes       },
 797   { "c",         default_C_help, default_C_entries, default_C_suffixes },
 798   { "c++",       Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
 799   { "c*",        no_lang_help,   Cstar_entries,     Cstar_suffixes     },
 800   { "cobol",     Cobol_help,     Cobol_paragraphs,  Cobol_suffixes     },
 801   { "erlang",    Erlang_help,    Erlang_functions,  Erlang_suffixes    },
 802   { "forth",     Forth_help,     Forth_words,       Forth_suffixes     },
 803   { "fortran",   Fortran_help,   Fortran_functions, Fortran_suffixes   },
 804   { "go",        Go_help,        Go_functions,      Go_suffixes        },
 805   { "html",      HTML_help,      HTML_labels,       HTML_suffixes      },
 806   { "java",      Cjava_help,     Cjava_entries,     Cjava_suffixes     },
 807   { "lisp",      Lisp_help,      Lisp_functions,    Lisp_suffixes      },
 808   { "lua",       Lua_help,       Lua_functions,     Lua_suffixes       },
 809   { "makefile",  Makefile_help,Makefile_targets,NULL,Makefile_filenames},
 810   { "objc",      Objc_help,      plain_C_entries,   Objc_suffixes      },
 811   { "pascal",    Pascal_help,    Pascal_functions,  Pascal_suffixes    },
 812   { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
 813   { "php",       PHP_help,       PHP_functions,     PHP_suffixes       },
 814   { "postscript",PS_help,        PS_functions,      PS_suffixes        },
 815   { "proc",      no_lang_help,   plain_C_entries,   plain_C_suffixes   },
 816   { "prolog",    Prolog_help,    Prolog_functions,  Prolog_suffixes    },
 817   { "python",    Python_help,    Python_functions,  Python_suffixes    },
 818   { "ruby",      Ruby_help,Ruby_functions,Ruby_suffixes,Ruby_filenames },
 819   { "scheme",    Scheme_help,    Scheme_functions,  Scheme_suffixes    },
 820   { "tex",       TeX_help,       TeX_commands,      TeX_suffixes       },
 821   { "texinfo",   Texinfo_help,   Texinfo_nodes,     Texinfo_suffixes   },
 822   { "yacc",      Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,true},
 823   { "auto",      auto_help },                      /* default guessing scheme */
 824   { "none",      none_help,      just_read_file }, /* regexp matching only */
 825   { NULL }                /* end of list */
 826 };
 827
 828 \f
 829 static void
 830 print_language_names (void)
 831 {
 832   language *lang;
 833   const char **name, **ext;
 834
 835   puts ("\nThese are the currently supported languages, along with the\n\
 836 default file names and dot suffixes:");
 837   for (lang = lang_names; lang->name != NULL; lang++)
 838     {
 839       printf ("  %-*s", 10, lang->name);
 840       if (lang->filenames != NULL)
 841         for (name = lang->filenames; *name != NULL; name++)
 842           printf (" %s", *name);
 843       if (lang->suffixes != NULL)
 844         for (ext = lang->suffixes; *ext != NULL; ext++)
 845           printf (" .%s", *ext);
 846       puts ("");
 847     }
 848   puts ("where 'auto' means use default language for files based on file\n\
 849 name suffix, and 'none' means only do regexp processing on files.\n\
 850 If no language is specified and no matching suffix is found,\n\
 851 the first line of the file is read for a sharp-bang (#!) sequence\n\
 852 followed by the name of an interpreter.  If no such sequence is found,\n\
 853 Fortran is tried first; if no tags are found, C is tried next.\n\
 854 When parsing any C file, a \"class\" or \"template\" keyword\n\
 855 switches to C++.");
 856   puts ("Compressed files are supported using gzip, bzip2, and xz.\n\
 857 \n\
 858 For detailed help on a given language use, for example,\n\
 859 etags --help --lang=ada.");
 860 }
 861
 862 #ifndef EMACS_NAME
 863 # define EMACS_NAME "standalone"
 864 #endif
 865 #ifndef VERSION
 866 # define VERSION "17.38.1.4"
 867 #endif
 868 static _Noreturn void
 869 print_version (void)
 870 {
 871   char emacs_copyright[] = COPYRIGHT;
 872
 873   printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
 874   puts (emacs_copyright);
 875   puts ("This program is distributed under the terms in ETAGS.README");
 876
 877   exit (EXIT_SUCCESS);
 878 }
 879
 880 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
 881 # define PRINT_UNDOCUMENTED_OPTIONS_HELP false
 882 #endif
 883
 884 static _Noreturn void
 885 print_help (argument *argbuffer)
 886 {
 887   bool help_for_lang = false;
 888
 889   for (; argbuffer->arg_type != at_end; argbuffer++)
 890     if (argbuffer->arg_type == at_language)
 891       {
 892         if (help_for_lang)
 893           puts ("");
 894         puts (argbuffer->lang->help);
 895         help_for_lang = true;
 896       }
 897
 898   if (help_for_lang)
 899     exit (EXIT_SUCCESS);
 900
 901   printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
 902 \n\
 903 These are the options accepted by %s.\n", progname, progname);
 904   puts ("You may use unambiguous abbreviations for the long option names.");
 905   puts ("  A - as file name means read names from stdin (one per line).\n\
 906 Absolute names are stored in the output file as they are.\n\
 907 Relative ones are stored relative to the output file's directory.\n");
 908
 909   puts ("-a, --append\n\
 910         Append tag entries to existing tags file.");
 911
 912   puts ("--packages-only\n\
 913         For Ada files, only generate tags for packages.");
 914
 915   if (CTAGS)
 916     puts ("-B, --backward-search\n\
 917         Write the search commands for the tag entries using '?', the\n\
 918         backward-search command instead of '/', the forward-search command.");
 919
 920   /* This option is mostly obsolete, because etags can now automatically
 921      detect C++.  Retained for backward compatibility and for debugging and
 922      experimentation.  In principle, we could want to tag as C++ even
 923      before any "class" or "template" keyword.
 924   puts ("-C, --c++\n\
 925         Treat files whose name suffix defaults to C language as C++ files.");
 926   */
 927
 928   puts ("--declarations\n\
 929         In C and derived languages, create tags for function declarations,");
 930   if (CTAGS)
 931     puts ("\tand create tags for extern variables if --globals is used.");
 932   else
 933     puts
 934       ("\tand create tags for extern variables unless --no-globals is used.");
 935
 936   if (CTAGS)
 937     puts ("-d, --defines\n\
 938         Create tag entries for C #define constants and enum constants, too.");
 939   else
 940     puts ("-D, --no-defines\n\
 941         Don't create tag entries for C #define constants and enum constants.\n\
 942         This makes the tags file smaller.");
 943
 944   if (!CTAGS)
 945     puts ("-i FILE, --include=FILE\n\
 946         Include a note in tag file indicating that, when searching for\n\
 947         a tag, one should also consult the tags file FILE after\n\
 948         checking the current file.");
 949
 950   puts ("-l LANG, --language=LANG\n\
 951         Force the following files to be considered as written in the\n\
 952         named language up to the next --language=LANG option.");
 953
 954   if (CTAGS)
 955     puts ("--globals\n\
 956         Create tag entries for global variables in some languages.");
 957   else
 958     puts ("--no-globals\n\
 959         Do not create tag entries for global variables in some\n\
 960         languages.  This makes the tags file smaller.");
 961
 962   puts ("--no-line-directive\n\
 963         Ignore #line preprocessor directives in C and derived languages.");
 964
 965   if (CTAGS)
 966     puts ("--members\n\
 967         Create tag entries for members of structures in some languages.");
 968   else
 969     puts ("--no-members\n\
 970         Do not create tag entries for members of structures\n\
 971         in some languages.");
 972
 973   puts ("-Q, --class-qualify\n\
 974         Qualify tag names with their class name in C++, ObjC, and Java.\n\
 975         This produces tag names of the form \"class::member\" for C++,\n\
 976         \"class(category)\" for Objective C, and \"class.member\" for Java.\n\
 977         For Objective C, this also produces class methods qualified with\n\
 978         their arguments, as in \"foo:bar:baz:more\".");
 979   puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
 980         Make a tag for each line matching a regular expression pattern\n\
 981         in the following files.  {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
 982         files only.  REGEXFILE is a file containing one REGEXP per line.\n\
 983         REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
 984         optional.  The TAGREGEXP pattern is anchored (as if preceded by ^).");
 985   puts ("       If TAGNAME/ is present, the tags created are named.\n\
 986         For example Tcl named tags can be created with:\n\
 987           --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
 988         MODS are optional one-letter modifiers: 'i' means to ignore case,\n\
 989         'm' means to allow multi-line matches, 's' implies 'm' and\n\
 990         causes dot to match any character, including newline.");
 991
 992   puts ("-R, --no-regex\n\
 993         Don't create tags from regexps for the following files.");
 994
 995   puts ("-I, --ignore-indentation\n\
 996         In C and C++ do not assume that a closing brace in the first\n\
 997         column is the final brace of a function or structure definition.");
 998
 999   puts ("-o FILE, --output=FILE\n\
1000         Write the tags to FILE.");
1001
1002   puts ("--parse-stdin=NAME\n\
1003         Read from standard input and record tags as belonging to file NAME.");
1004
1005   if (CTAGS)
1006     {
1007       puts ("-t, --typedefs\n\
1008         Generate tag entries for C and Ada typedefs.");
1009       puts ("-T, --typedefs-and-c++\n\
1010         Generate tag entries for C typedefs, C struct/enum/union tags,\n\
1011         and C++ member functions.");
1012     }
1013
1014   if (CTAGS)
1015     puts ("-u, --update\n\
1016         Update the tag entries for the given files, leaving tag\n\
1017         entries for other files in place.  Currently, this is\n\
1018         implemented by deleting the existing entries for the given\n\
1019         files and then rewriting the new entries at the end of the\n\
1020         tags file.  It is often faster to simply rebuild the entire\n\
1021         tag file than to use this.");
1022
1023   if (CTAGS)
1024     {
1025       puts ("-v, --vgrind\n\
1026         Print on the standard output an index of items intended for\n\
1027         human consumption, similar to the output of vgrind.  The index\n\
1028         is sorted, and gives the page number of each item.");
1029
1030       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1031         puts ("-w, --no-duplicates\n\
1032         Do not create duplicate tag entries, for compatibility with\n\
1033         traditional ctags.");
1034
1035       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1036         puts ("-w, --no-warn\n\
1037         Suppress warning messages about duplicate tag entries.");
1038
1039       puts ("-x, --cxref\n\
1040         Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1041         The output uses line numbers instead of page numbers, but\n\
1042         beyond that the differences are cosmetic; try both to see\n\
1043         which you like.");
1044     }
1045
1046   puts ("-V, --version\n\
1047         Print the version of the program.\n\
1048 -h, --help\n\
1049         Print this help message.\n\
1050         Followed by one or more '--language' options prints detailed\n\
1051         help about tag generation for the specified languages.");
1052
1053   print_language_names ();
1054
1055   puts ("");
1056   puts ("Report bugs to bug-gnu-emacs@gnu.org");
1057
1058   exit (EXIT_SUCCESS);
1059 }
1060
1061 \f
1062 int
1063 main (int argc, char **argv)
1064 {
1065   int i;
1066   unsigned int nincluded_files;
1067   char **included_files;
1068   argument *argbuffer;
1069   int current_arg, file_count;
1070   linebuffer filename_lb;
1071   bool help_asked = false;
1072   ptrdiff_t len;
1073   char *optstring;
1074   int opt;
1075
1076   progname = argv[0];
1077   nincluded_files = 0;
1078   included_files = xnew (argc, char *);
1079   current_arg = 0;
1080   file_count = 0;
1081
1082   /* Allocate enough no matter what happens.  Overkill, but each one
1083      is small. */
1084   argbuffer = xnew (argc, argument);
1085
1086   /*
1087    * Always find typedefs and structure tags.
1088    * Also default to find macro constants, enum constants, struct
1089    * members and global variables.  Do it for both etags and ctags.
1090    */
1091   typedefs = typedefs_or_cplusplus = constantypedefs = true;
1092   globals = members = true;
1093
1094   /* When the optstring begins with a '-' getopt_long does not rearrange the
1095      non-options arguments to be at the end, but leaves them alone. */
1096   optstring = concat ("-ac:Cf:Il:o:Qr:RSVhH",
1097                       (CTAGS) ? "BxdtTuvw" : "Di:",
1098                       "");
1099
1100   while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1101     switch (opt)
1102       {
1103       case 0:
1104         /* If getopt returns 0, then it has already processed a
1105            long-named option.  We should do nothing.  */
1106         break;
1107
1108       case 1:
1109         /* This means that a file name has been seen.  Record it. */
1110         argbuffer[current_arg].arg_type = at_filename;
1111         argbuffer[current_arg].what     = optarg;
1112         len = strlen (optarg);
1113         if (whatlen_max < len)
1114           whatlen_max = len;
1115         ++current_arg;
1116         ++file_count;
1117         break;
1118
1119       case STDIN:
1120         /* Parse standard input.  Idea by Vivek <vivek@etla.org>. */
1121         argbuffer[current_arg].arg_type = at_stdin;
1122         argbuffer[current_arg].what     = optarg;
1123         len = strlen (optarg);
1124         if (whatlen_max < len)
1125           whatlen_max = len;
1126         ++current_arg;
1127         ++file_count;
1128         if (parsing_stdin)
1129           fatal ("cannot parse standard input more than once");
1130         parsing_stdin = true;
1131         break;
1132
1133         /* Common options. */
1134       case 'a': append_to_tagfile = true;       break;
1135       case 'C': cplusplus = true;               break;
1136       case 'f':         /* for compatibility with old makefiles */
1137       case 'o':
1138         if (tagfile)
1139           {
1140             error ("-o option may only be given once.");
1141             suggest_asking_for_help ();
1142             /* NOTREACHED */
1143           }
1144         tagfile = optarg;
1145         break;
1146       case 'I':
1147       case 'S':         /* for backward compatibility */
1148         ignoreindent = true;
1149         break;
1150       case 'l':
1151         {
1152           language *lang = get_language_from_langname (optarg);
1153           if (lang != NULL)
1154             {
1155               argbuffer[current_arg].lang = lang;
1156               argbuffer[current_arg].arg_type = at_language;
1157               ++current_arg;
1158             }
1159         }
1160         break;
1161       case 'c':
1162         /* Backward compatibility: support obsolete --ignore-case-regexp. */
1163         optarg = concat (optarg, "i", ""); /* memory leak here */
1164         /* FALLTHRU */
1165       case 'r':
1166         argbuffer[current_arg].arg_type = at_regexp;
1167         argbuffer[current_arg].what = optarg;
1168         len = strlen (optarg);
1169         if (whatlen_max < len)
1170           whatlen_max = len;
1171         ++current_arg;
1172         break;
1173       case 'R':
1174         argbuffer[current_arg].arg_type = at_regexp;
1175         argbuffer[current_arg].what = NULL;
1176         ++current_arg;
1177         break;
1178       case 'V':
1179         print_version ();
1180         break;
1181       case 'h':
1182       case 'H':
1183         help_asked = true;
1184         break;
1185       case 'Q':
1186         class_qualify = 1;
1187         break;
1188
1189         /* Etags options */
1190       case 'D': constantypedefs = false;                        break;
1191       case 'i': included_files[nincluded_files++] = optarg;     break;
1192
1193         /* Ctags options. */
1194       case 'B': searchar = '?';                                 break;
1195       case 'd': constantypedefs = true;                         break;
1196       case 't': typedefs = true;                                break;
1197       case 'T': typedefs = typedefs_or_cplusplus = true;        break;
1198       case 'u': update = true;                                  break;
1199       case 'v': vgrind_style = true;                      /*FALLTHRU*/
1200       case 'x': cxref_style = true;                             break;
1201       case 'w': no_warnings = true;                             break;
1202       default:
1203         suggest_asking_for_help ();
1204         /* NOTREACHED */
1205       }
1206
1207   /* No more options.  Store the rest of arguments. */
1208   for (; optind < argc; optind++)
1209     {
1210       argbuffer[current_arg].arg_type = at_filename;
1211       argbuffer[current_arg].what = argv[optind];
1212       len = strlen (argv[optind]);
1213       if (whatlen_max < len)
1214         whatlen_max = len;
1215       ++current_arg;
1216       ++file_count;
1217     }
1218
1219   argbuffer[current_arg].arg_type = at_end;
1220
1221   if (help_asked)
1222     print_help (argbuffer);
1223     /* NOTREACHED */
1224
1225   if (nincluded_files == 0 && file_count == 0)
1226     {
1227       error ("no input files specified.");
1228       suggest_asking_for_help ();
1229       /* NOTREACHED */
1230     }
1231
1232   if (tagfile == NULL)
1233     tagfile = savestr (CTAGS ? "tags" : "TAGS");
1234   cwd = etags_getcwd ();        /* the current working directory */
1235   if (cwd[strlen (cwd) - 1] != '/')
1236     {
1237       char *oldcwd = cwd;
1238       cwd = concat (oldcwd, "/", "");
1239       free (oldcwd);
1240     }
1241
1242   /* Compute base directory for relative file names. */
1243   if (streq (tagfile, "-")
1244       || strneq (tagfile, "/dev/", 5))
1245     tagfiledir = cwd;            /* relative file names are relative to cwd */
1246   else
1247     {
1248       canonicalize_filename (tagfile);
1249       tagfiledir = absolute_dirname (tagfile, cwd);
1250     }
1251
1252   linebuffer_init (&lb);
1253   linebuffer_init (&filename_lb);
1254   linebuffer_init (&filebuf);
1255   linebuffer_init (&token_name);
1256
1257   if (!CTAGS)
1258     {
1259       if (streq (tagfile, "-"))
1260         {
1261           tagf = stdout;
1262           SET_BINARY (fileno (stdout));
1263         }
1264       else
1265         tagf = fopen (tagfile, append_to_tagfile ? "ab" : "wb");
1266       if (tagf == NULL)
1267         pfatal (tagfile);
1268     }
1269
1270   /*
1271    * Loop through files finding functions.
1272    */
1273   for (i = 0; i < current_arg; i++)
1274     {
1275       static language *lang;    /* non-NULL if language is forced */
1276       char *this_file;
1277
1278       switch (argbuffer[i].arg_type)
1279         {
1280         case at_language:
1281           lang = argbuffer[i].lang;
1282           break;
1283         case at_regexp:
1284           analyze_regex (argbuffer[i].what);
1285           break;
1286         case at_filename:
1287               this_file = argbuffer[i].what;
1288               /* Input file named "-" means read file names from stdin
1289                  (one per line) and use them. */
1290               if (streq (this_file, "-"))
1291                 {
1292                   if (parsing_stdin)
1293                     fatal ("cannot parse standard input "
1294                            "AND read file names from it");
1295                   while (readline_internal (&filename_lb, stdin, "-") > 0)
1296                     process_file_name (filename_lb.buffer, lang);
1297                 }
1298               else
1299                 process_file_name (this_file, lang);
1300           break;
1301         case at_stdin:
1302           this_file = argbuffer[i].what;
1303           process_file (stdin, this_file, lang);
1304           break;
1305         default:
1306           error ("internal error: arg_type");
1307         }
1308     }
1309
1310   free_regexps ();
1311   free (lb.buffer);
1312   free (filebuf.buffer);
1313   free (token_name.buffer);
1314
1315   if (!CTAGS || cxref_style)
1316     {
1317       /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1318       put_entries (nodehead);
1319       free_tree (nodehead);
1320       nodehead = NULL;
1321       if (!CTAGS)
1322         {
1323           fdesc *fdp;
1324
1325           /* Output file entries that have no tags. */
1326           for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1327             if (!fdp->written)
1328               fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1329
1330           while (nincluded_files-- > 0)
1331             fprintf (tagf, "\f\n%s,include\n", *included_files++);
1332
1333           if (fclose (tagf) == EOF)
1334             pfatal (tagfile);
1335         }
1336
1337       exit (EXIT_SUCCESS);
1338     }
1339
1340   /* From here on, we are in (CTAGS && !cxref_style) */
1341   if (update)
1342     {
1343       char *cmd =
1344         xmalloc (strlen (tagfile) + whatlen_max +
1345                  sizeof "mv..OTAGS;fgrep -v '\t\t' OTAGS >;rm OTAGS");
1346       for (i = 0; i < current_arg; ++i)
1347         {
1348           switch (argbuffer[i].arg_type)
1349             {
1350             case at_filename:
1351             case at_stdin:
1352               break;
1353             default:
1354               continue;         /* the for loop */
1355             }
1356           char *z = stpcpy (cmd, "mv ");
1357           z = stpcpy (z, tagfile);
1358           z = stpcpy (z, " OTAGS;fgrep -v '\t");
1359           z = stpcpy (z, argbuffer[i].what);
1360           z = stpcpy (z, "\t' OTAGS >");
1361           z = stpcpy (z, tagfile);
1362           strcpy (z, ";rm OTAGS");
1363           if (system (cmd) != EXIT_SUCCESS)
1364             fatal ("failed to execute shell command");
1365         }
1366       free (cmd);
1367       append_to_tagfile = true;
1368     }
1369
1370   tagf = fopen (tagfile, append_to_tagfile ? "ab" : "wb");
1371   if (tagf == NULL)
1372     pfatal (tagfile);
1373   put_entries (nodehead);       /* write all the tags (CTAGS) */
1374   free_tree (nodehead);
1375   nodehead = NULL;
1376   if (fclose (tagf) == EOF)
1377     pfatal (tagfile);
1378
1379   if (CTAGS)
1380     if (append_to_tagfile || update)
1381       {
1382         char *cmd = xmalloc (2 * strlen (tagfile) + sizeof "sort -u -o..");
1383         /* Maybe these should be used:
1384            setenv ("LC_COLLATE", "C", 1);
1385            setenv ("LC_ALL", "C", 1); */
1386         char *z = stpcpy (cmd, "sort -u -o ");
1387         z = stpcpy (z, tagfile);
1388         *z++ = ' ';
1389         strcpy (z, tagfile);
1390         exit (system (cmd));
1391       }
1392   return EXIT_SUCCESS;
1393 }
1394
1395
1396 /*
1397  * Return a compressor given the file name.  If EXTPTR is non-zero,
1398  * return a pointer into FILE where the compressor-specific
1399  * extension begins.  If no compressor is found, NULL is returned
1400  * and EXTPTR is not significant.
1401  * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1402  */
1403 static compressor *
1404 get_compressor_from_suffix (char *file, char **extptr)
1405 {
1406   compressor *compr;
1407   char *slash, *suffix;
1408
1409   /* File has been processed by canonicalize_filename,
1410      so we don't need to consider backslashes on DOS_NT.  */
1411   slash = strrchr (file, '/');
1412   suffix = strrchr (file, '.');
1413   if (suffix == NULL || suffix < slash)
1414     return NULL;
1415   if (extptr != NULL)
1416     *extptr = suffix;
1417   suffix += 1;
1418   /* Let those poor souls who live with DOS 8+3 file name limits get
1419      some solace by treating foo.cgz as if it were foo.c.gz, etc.
1420      Only the first do loop is run if not MSDOS */
1421   do
1422     {
1423       for (compr = compressors; compr->suffix != NULL; compr++)
1424         if (streq (compr->suffix, suffix))
1425           return compr;
1426       if (!MSDOS)
1427         break;                  /* do it only once: not really a loop */
1428       if (extptr != NULL)
1429         *extptr = ++suffix;
1430     } while (*suffix != '\0');
1431   return NULL;
1432 }
1433
1434
1435
1436 /*
1437  * Return a language given the name.
1438  */
1439 static language *
1440 get_language_from_langname (const char *name)
1441 {
1442   language *lang;
1443
1444   if (name == NULL)
1445     error ("empty language name");
1446   else
1447     {
1448       for (lang = lang_names; lang->name != NULL; lang++)
1449         if (streq (name, lang->name))
1450           return lang;
1451       error ("unknown language \"%s\"", name);
1452     }
1453
1454   return NULL;
1455 }
1456
1457
1458 /*
1459  * Return a language given the interpreter name.
1460  */
1461 static language *
1462 get_language_from_interpreter (char *interpreter)
1463 {
1464   language *lang;
1465   const char **iname;
1466
1467   if (interpreter == NULL)
1468     return NULL;
1469   for (lang = lang_names; lang->name != NULL; lang++)
1470     if (lang->interpreters != NULL)
1471       for (iname = lang->interpreters; *iname != NULL; iname++)
1472         if (streq (*iname, interpreter))
1473             return lang;
1474
1475   return NULL;
1476 }
1477
1478
1479
1480 /*
1481  * Return a language given the file name.
1482  */
1483 static language *
1484 get_language_from_filename (char *file, int case_sensitive)
1485 {
1486   language *lang;
1487   const char **name, **ext, *suffix;
1488
1489   /* Try whole file name first. */
1490   for (lang = lang_names; lang->name != NULL; lang++)
1491     if (lang->filenames != NULL)
1492       for (name = lang->filenames; *name != NULL; name++)
1493         if ((case_sensitive)
1494             ? streq (*name, file)
1495             : strcaseeq (*name, file))
1496           return lang;
1497
1498   /* If not found, try suffix after last dot. */
1499   suffix = strrchr (file, '.');
1500   if (suffix == NULL)
1501     return NULL;
1502   suffix += 1;
1503   for (lang = lang_names; lang->name != NULL; lang++)
1504     if (lang->suffixes != NULL)
1505       for (ext = lang->suffixes; *ext != NULL; ext++)
1506         if ((case_sensitive)
1507             ? streq (*ext, suffix)
1508             : strcaseeq (*ext, suffix))
1509           return lang;
1510   return NULL;
1511 }
1512
1513 \f
1514 /*
1515  * This routine is called on each file argument.
1516  */
1517 static void
1518 process_file_name (char *file, language *lang)
1519 {
1520   FILE *inf;
1521   fdesc *fdp;
1522   compressor *compr;
1523   char *compressed_name, *uncompressed_name;
1524   char *ext, *real_name, *tmp_name;
1525   int retval;
1526
1527   canonicalize_filename (file);
1528   if (streq (file, tagfile) && !streq (tagfile, "-"))
1529     {
1530       error ("skipping inclusion of %s in self.", file);
1531       return;
1532     }
1533   compr = get_compressor_from_suffix (file, &ext);
1534   if (compr)
1535     {
1536       compressed_name = file;
1537       uncompressed_name = savenstr (file, ext - file);
1538     }
1539   else
1540     {
1541       compressed_name = NULL;
1542       uncompressed_name = file;
1543     }
1544
1545   /* If the canonicalized uncompressed name
1546      has already been dealt with, skip it silently. */
1547   for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1548     {
1549       assert (fdp->infname != NULL);
1550       if (streq (uncompressed_name, fdp->infname))
1551         goto cleanup;
1552     }
1553
1554   inf = fopen (file, "r" FOPEN_BINARY);
1555   if (inf)
1556     real_name = file;
1557   else
1558     {
1559       int file_errno = errno;
1560       if (compressed_name)
1561         {
1562           /* Try with the given suffix.  */
1563           inf = fopen (uncompressed_name, "r" FOPEN_BINARY);
1564           if (inf)
1565             real_name = uncompressed_name;
1566         }
1567       else
1568         {
1569           /* Try all possible suffixes.  */
1570           for (compr = compressors; compr->suffix != NULL; compr++)
1571             {
1572               compressed_name = concat (file, ".", compr->suffix);
1573               inf = fopen (compressed_name, "r" FOPEN_BINARY);
1574               if (inf)
1575                 {
1576                   real_name = compressed_name;
1577                   break;
1578                 }
1579               if (MSDOS)
1580                 {
1581                   char *suf = compressed_name + strlen (file);
1582                   size_t suflen = strlen (compr->suffix) + 1;
1583                   for ( ; suf[1]; suf++, suflen--)
1584                     {
1585                       memmove (suf, suf + 1, suflen);
1586                       inf = fopen (compressed_name, "r" FOPEN_BINARY);
1587                       if (inf)
1588                         {
1589                           real_name = compressed_name;
1590                           break;
1591                         }
1592                     }
1593                   if (inf)
1594                     break;
1595                 }
1596               free (compressed_name);
1597               compressed_name = NULL;
1598             }
1599         }
1600       if (! inf)
1601         {
1602           errno = file_errno;
1603           perror (file);
1604           goto cleanup;
1605         }
1606     }
1607
1608   if (real_name == compressed_name)
1609     {
1610       fclose (inf);
1611       tmp_name = etags_mktmp ();
1612       if (!tmp_name)
1613         inf = NULL;
1614       else
1615         {
1616 #if MSDOS || defined (DOS_NT)
1617           char *cmd1 = concat (compr->command, " \"", real_name);
1618           char *cmd = concat (cmd1, "\" > ", tmp_name);
1619 #else
1620           char *cmd1 = concat (compr->command, " '", real_name);
1621           char *cmd = concat (cmd1, "' > ", tmp_name);
1622 #endif
1623           free (cmd1);
1624           int tmp_errno;
1625           if (system (cmd) == -1)
1626             {
1627               inf = NULL;
1628               tmp_errno = EINVAL;
1629             }
1630           else
1631             {
1632               inf = fopen (tmp_name, "r" FOPEN_BINARY);
1633               tmp_errno = errno;
1634             }
1635           free (cmd);
1636           errno = tmp_errno;
1637         }
1638
1639       if (!inf)
1640         {
1641           perror (real_name);
1642           goto cleanup;
1643         }
1644     }
1645
1646   process_file (inf, uncompressed_name, lang);
1647
1648   retval = fclose (inf);
1649   if (real_name == compressed_name)
1650     {
1651       remove (tmp_name);
1652       free (tmp_name);
1653     }
1654   if (retval < 0)
1655     pfatal (file);
1656
1657  cleanup:
1658   if (compressed_name != file)
1659     free (compressed_name);
1660   if (uncompressed_name != file)
1661     free (uncompressed_name);
1662   last_node = NULL;
1663   curfdp = NULL;
1664   return;
1665 }
1666
1667 static void
1668 process_file (FILE *fh, char *fn, language *lang)
1669 {
1670   static const fdesc emptyfdesc;
1671   fdesc *fdp;
1672
1673   infilename = fn;
1674   /* Create a new input file description entry. */
1675   fdp = xnew (1, fdesc);
1676   *fdp = emptyfdesc;
1677   fdp->next = fdhead;
1678   fdp->infname = savestr (fn);
1679   fdp->lang = lang;
1680   fdp->infabsname = absolute_filename (fn, cwd);
1681   fdp->infabsdir = absolute_dirname (fn, cwd);
1682   if (filename_is_absolute (fn))
1683     {
1684       /* An absolute file name.  Canonicalize it. */
1685       fdp->taggedfname = absolute_filename (fn, NULL);
1686     }
1687   else
1688     {
1689       /* A file name relative to cwd.  Make it relative
1690          to the directory of the tags file. */
1691       fdp->taggedfname = relative_filename (fn, tagfiledir);
1692     }
1693   fdp->usecharno = true;        /* use char position when making tags */
1694   fdp->prop = NULL;
1695   fdp->written = false;         /* not written on tags file yet */
1696
1697   fdhead = fdp;
1698   curfdp = fdhead;              /* the current file description */
1699
1700   find_entries (fh);
1701
1702   /* If not Ctags, and if this is not metasource and if it contained no #line
1703      directives, we can write the tags and free all nodes pointing to
1704      curfdp. */
1705   if (!CTAGS
1706       && curfdp->usecharno      /* no #line directives in this file */
1707       && !curfdp->lang->metasource)
1708     {
1709       node *np, *prev;
1710
1711       /* Look for the head of the sublist relative to this file.  See add_node
1712          for the structure of the node tree. */
1713       prev = NULL;
1714       for (np = nodehead; np != NULL; prev = np, np = np->left)
1715         if (np->fdp == curfdp)
1716           break;
1717
1718       /* If we generated tags for this file, write and delete them. */
1719       if (np != NULL)
1720         {
1721           /* This is the head of the last sublist, if any.  The following
1722              instructions depend on this being true. */
1723           assert (np->left == NULL);
1724
1725           assert (fdhead == curfdp);
1726           assert (last_node->fdp == curfdp);
1727           put_entries (np);     /* write tags for file curfdp->taggedfname */
1728           free_tree (np);       /* remove the written nodes */
1729           if (prev == NULL)
1730             nodehead = NULL;    /* no nodes left */
1731           else
1732             prev->left = NULL;  /* delete the pointer to the sublist */
1733         }
1734     }
1735 }
1736
1737 static void
1738 reset_input (FILE *inf)
1739 {
1740   if (fseek (inf, 0, SEEK_SET) != 0)
1741     perror (infilename);
1742 }
1743
1744 /*
1745  * This routine opens the specified file and calls the function
1746  * which finds the function and type definitions.
1747  */
1748 static void
1749 find_entries (FILE *inf)
1750 {
1751   char *cp;
1752   language *lang = curfdp->lang;
1753   Lang_function *parser = NULL;
1754
1755   /* If user specified a language, use it. */
1756   if (lang != NULL && lang->function != NULL)
1757     {
1758       parser = lang->function;
1759     }
1760
1761   /* Else try to guess the language given the file name. */
1762   if (parser == NULL)
1763     {
1764       lang = get_language_from_filename (curfdp->infname, true);
1765       if (lang != NULL && lang->function != NULL)
1766         {
1767           curfdp->lang = lang;
1768           parser = lang->function;
1769         }
1770     }
1771
1772   /* Else look for sharp-bang as the first two characters. */
1773   if (parser == NULL
1774       && readline_internal (&lb, inf, infilename) > 0
1775       && lb.len >= 2
1776       && lb.buffer[0] == '#'
1777       && lb.buffer[1] == '!')
1778     {
1779       char *lp;
1780
1781       /* Set lp to point at the first char after the last slash in the
1782          line or, if no slashes, at the first nonblank.  Then set cp to
1783          the first successive blank and terminate the string. */
1784       lp = strrchr (lb.buffer+2, '/');
1785       if (lp != NULL)
1786         lp += 1;
1787       else
1788         lp = skip_spaces (lb.buffer + 2);
1789       cp = skip_non_spaces (lp);
1790       *cp = '\0';
1791
1792       if (strlen (lp) > 0)
1793         {
1794           lang = get_language_from_interpreter (lp);
1795           if (lang != NULL && lang->function != NULL)
1796             {
1797               curfdp->lang = lang;
1798               parser = lang->function;
1799             }
1800         }
1801     }
1802
1803   reset_input (inf);
1804
1805   /* Else try to guess the language given the case insensitive file name. */
1806   if (parser == NULL)
1807     {
1808       lang = get_language_from_filename (curfdp->infname, false);
1809       if (lang != NULL && lang->function != NULL)
1810         {
1811           curfdp->lang = lang;
1812           parser = lang->function;
1813         }
1814     }
1815
1816   /* Else try Fortran or C. */
1817   if (parser == NULL)
1818     {
1819       node *old_last_node = last_node;
1820
1821       curfdp->lang = get_language_from_langname ("fortran");
1822       find_entries (inf);
1823
1824       if (old_last_node == last_node)
1825         /* No Fortran entries found.  Try C. */
1826         {
1827           reset_input (inf);
1828           curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1829           find_entries (inf);
1830         }
1831       return;
1832     }
1833
1834   if (!no_line_directive
1835       && curfdp->lang != NULL && curfdp->lang->metasource)
1836     /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1837        file, or anyway we parsed a file that is automatically generated from
1838        this one.  If this is the case, the bingo.c file contained #line
1839        directives that generated tags pointing to this file.  Let's delete
1840        them all before parsing this file, which is the real source. */
1841     {
1842       fdesc **fdpp = &fdhead;
1843       while (*fdpp != NULL)
1844         if (*fdpp != curfdp
1845             && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1846           /* We found one of those!  We must delete both the file description
1847              and all tags referring to it. */
1848           {
1849             fdesc *badfdp = *fdpp;
1850
1851             /* Delete the tags referring to badfdp->taggedfname
1852                that were obtained from badfdp->infname. */
1853             invalidate_nodes (badfdp, &nodehead);
1854
1855             *fdpp = badfdp->next; /* remove the bad description from the list */
1856             free_fdesc (badfdp);
1857           }
1858         else
1859           fdpp = &(*fdpp)->next; /* advance the list pointer */
1860     }
1861
1862   assert (parser != NULL);
1863
1864   /* Generic initializations before reading from file. */
1865   linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1866
1867   /* Generic initializations before parsing file with readline. */
1868   lineno = 0;                  /* reset global line number */
1869   charno = 0;                  /* reset global char number */
1870   linecharno = 0;              /* reset global char number of line start */
1871
1872   parser (inf);
1873
1874   regex_tag_multiline ();
1875 }
1876
1877 \f
1878 /*
1879  * Check whether an implicitly named tag should be created,
1880  * then call `pfnote'.
1881  * NAME is a string that is internally copied by this function.
1882  *
1883  * TAGS format specification
1884  * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1885  * The following is explained in some more detail in etc/ETAGS.EBNF.
1886  *
1887  * make_tag creates tags with "implicit tag names" (unnamed tags)
1888  * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1889  *  1. NAME does not contain any of the characters in NONAM;
1890  *  2. LINESTART contains name as either a rightmost, or rightmost but
1891  *     one character, substring;
1892  *  3. the character, if any, immediately before NAME in LINESTART must
1893  *     be a character in NONAM;
1894  *  4. the character, if any, immediately after NAME in LINESTART must
1895  *     also be a character in NONAM.
1896  *
1897  * The implementation uses the notinname() macro, which recognizes the
1898  * characters stored in the string `nonam'.
1899  * etags.el needs to use the same characters that are in NONAM.
1900  */
1901 static void
1902 make_tag (const char *name,     /* tag name, or NULL if unnamed */
1903           int namelen,          /* tag length */
1904           bool is_func,         /* tag is a function */
1905           char *linestart,      /* start of the line where tag is */
1906           int linelen,          /* length of the line where tag is */
1907           int lno,              /* line number */
1908           long int cno)         /* character number */
1909 {
1910   bool named = (name != NULL && namelen > 0);
1911   char *nname = NULL;
1912
1913   if (!CTAGS && named)          /* maybe set named to false */
1914     /* Let's try to make an implicit tag name, that is, create an unnamed tag
1915        such that etags.el can guess a name from it. */
1916     {
1917       int i;
1918       register const char *cp = name;
1919
1920       for (i = 0; i < namelen; i++)
1921         if (notinname (*cp++))
1922           break;
1923       if (i == namelen)                         /* rule #1 */
1924         {
1925           cp = linestart + linelen - namelen;
1926           if (notinname (linestart[linelen-1]))
1927             cp -= 1;                            /* rule #4 */
1928           if (cp >= linestart                   /* rule #2 */
1929               && (cp == linestart
1930                   || notinname (cp[-1]))        /* rule #3 */
1931               && strneq (name, cp, namelen))    /* rule #2 */
1932             named = false;      /* use implicit tag name */
1933         }
1934     }
1935
1936   if (named)
1937     nname = savenstr (name, namelen);
1938
1939   pfnote (nname, is_func, linestart, linelen, lno, cno);
1940 }
1941
1942 /* Record a tag. */
1943 static void
1944 pfnote (char *name, bool is_func, char *linestart, int linelen, int lno,
1945         long int cno)
1946                                 /* tag name, or NULL if unnamed */
1947                                 /* tag is a function */
1948                                 /* start of the line where tag is */
1949                                 /* length of the line where tag is */
1950                                 /* line number */
1951                                 /* character number */
1952 {
1953   register node *np;
1954
1955   assert (name == NULL || name[0] != '\0');
1956   if (CTAGS && name == NULL)
1957     return;
1958
1959   np = xnew (1, node);
1960
1961   /* If ctags mode, change name "main" to M<thisfilename>. */
1962   if (CTAGS && !cxref_style && streq (name, "main"))
1963     {
1964       char *fp = strrchr (curfdp->taggedfname, '/');
1965       np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
1966       fp = strrchr (np->name, '.');
1967       if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1968         fp[0] = '\0';
1969     }
1970   else
1971     np->name = name;
1972   np->valid = true;
1973   np->been_warned = false;
1974   np->fdp = curfdp;
1975   np->is_func = is_func;
1976   np->lno = lno;
1977   if (np->fdp->usecharno)
1978     /* Our char numbers are 0-base, because of C language tradition?
1979        ctags compatibility?  old versions compatibility?   I don't know.
1980        Anyway, since emacs's are 1-base we expect etags.el to take care
1981        of the difference.  If we wanted to have 1-based numbers, we would
1982        uncomment the +1 below. */
1983     np->cno = cno /* + 1 */ ;
1984   else
1985     np->cno = invalidcharno;
1986   np->left = np->right = NULL;
1987   if (CTAGS && !cxref_style)
1988     {
1989       if (strlen (linestart) < 50)
1990         np->regex = concat (linestart, "$", "");
1991       else
1992         np->regex = savenstr (linestart, 50);
1993     }
1994   else
1995     np->regex = savenstr (linestart, linelen);
1996
1997   add_node (np, &nodehead);
1998 }
1999
2000 /*
2001  * free_tree ()
2002  *      recurse on left children, iterate on right children.
2003  */
2004 static void
2005 free_tree (register node *np)
2006 {
2007   while (np)
2008     {
2009       register node *node_right = np->right;
2010       free_tree (np->left);
2011       free (np->name);
2012       free (np->regex);
2013       free (np);
2014       np = node_right;
2015     }
2016 }
2017
2018 /*
2019  * free_fdesc ()
2020  *      delete a file description
2021  */
2022 static void
2023 free_fdesc (register fdesc *fdp)
2024 {
2025   free (fdp->infname);
2026   free (fdp->infabsname);
2027   free (fdp->infabsdir);
2028   free (fdp->taggedfname);
2029   free (fdp->prop);
2030   free (fdp);
2031 }
2032
2033 /*
2034  * add_node ()
2035  *      Adds a node to the tree of nodes.  In etags mode, sort by file
2036  *      name.  In ctags mode, sort by tag name.  Make no attempt at
2037  *      balancing.
2038  *
2039  *      add_node is the only function allowed to add nodes, so it can
2040  *      maintain state.
2041  */
2042 static void
2043 add_node (node *np, node **cur_node_p)
2044 {
2045   register int dif;
2046   register node *cur_node = *cur_node_p;
2047
2048   if (cur_node == NULL)
2049     {
2050       *cur_node_p = np;
2051       last_node = np;
2052       return;
2053     }
2054
2055   if (!CTAGS)
2056     /* Etags Mode */
2057     {
2058       /* For each file name, tags are in a linked sublist on the right
2059          pointer.  The first tags of different files are a linked list
2060          on the left pointer.  last_node points to the end of the last
2061          used sublist. */
2062       if (last_node != NULL && last_node->fdp == np->fdp)
2063         {
2064           /* Let's use the same sublist as the last added node. */
2065           assert (last_node->right == NULL);
2066           last_node->right = np;
2067           last_node = np;
2068         }
2069       else if (cur_node->fdp == np->fdp)
2070         {
2071           /* Scanning the list we found the head of a sublist which is
2072              good for us.  Let's scan this sublist. */
2073           add_node (np, &cur_node->right);
2074         }
2075       else
2076         /* The head of this sublist is not good for us.  Let's try the
2077            next one. */
2078         add_node (np, &cur_node->left);
2079     } /* if ETAGS mode */
2080
2081   else
2082     {
2083       /* Ctags Mode */
2084       dif = strcmp (np->name, cur_node->name);
2085
2086       /*
2087        * If this tag name matches an existing one, then
2088        * do not add the node, but maybe print a warning.
2089        */
2090       if (no_duplicates && !dif)
2091         {
2092           if (np->fdp == cur_node->fdp)
2093             {
2094               if (!no_warnings)
2095                 {
2096                   fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2097                            np->fdp->infname, lineno, np->name);
2098                   fprintf (stderr, "Second entry ignored\n");
2099                 }
2100             }
2101           else if (!cur_node->been_warned && !no_warnings)
2102             {
2103               fprintf
2104                 (stderr,
2105                  "Duplicate entry in files %s and %s: %s (Warning only)\n",
2106                  np->fdp->infname, cur_node->fdp->infname, np->name);
2107               cur_node->been_warned = true;
2108             }
2109           return;
2110         }
2111
2112       /* Actually add the node */
2113       add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2114     } /* if CTAGS mode */
2115 }
2116
2117 /*
2118  * invalidate_nodes ()
2119  *      Scan the node tree and invalidate all nodes pointing to the
2120  *      given file description (CTAGS case) or free them (ETAGS case).
2121  */
2122 static void
2123 invalidate_nodes (fdesc *badfdp, node **npp)
2124 {
2125   node *np = *npp;
2126
2127   if (np == NULL)
2128     return;
2129
2130   if (CTAGS)
2131     {
2132       if (np->left != NULL)
2133         invalidate_nodes (badfdp, &np->left);
2134       if (np->fdp == badfdp)
2135         np->valid = false;
2136       if (np->right != NULL)
2137         invalidate_nodes (badfdp, &np->right);
2138     }
2139   else
2140     {
2141       assert (np->fdp != NULL);
2142       if (np->fdp == badfdp)
2143         {
2144           *npp = np->left;      /* detach the sublist from the list */
2145           np->left = NULL;      /* isolate it */
2146           free_tree (np);       /* free it */
2147           invalidate_nodes (badfdp, npp);
2148         }
2149       else
2150         invalidate_nodes (badfdp, &np->left);
2151     }
2152 }
2153
2154 \f
2155 static int total_size_of_entries (node *);
2156 static int number_len (long) ATTRIBUTE_CONST;
2157
2158 /* Length of a non-negative number's decimal representation. */
2159 static int
2160 number_len (long int num)
2161 {
2162   int len = 1;
2163   while ((num /= 10) > 0)
2164     len += 1;
2165   return len;
2166 }
2167
2168 /*
2169  * Return total number of characters that put_entries will output for
2170  * the nodes in the linked list at the right of the specified node.
2171  * This count is irrelevant with etags.el since emacs 19.34 at least,
2172  * but is still supplied for backward compatibility.
2173  */
2174 static int
2175 total_size_of_entries (register node *np)
2176 {
2177   register int total = 0;
2178
2179   for (; np != NULL; np = np->right)
2180     if (np->valid)
2181       {
2182         total += strlen (np->regex) + 1;                /* pat\177 */
2183         if (np->name != NULL)
2184           total += strlen (np->name) + 1;               /* name\001 */
2185         total += number_len ((long) np->lno) + 1;       /* lno, */
2186         if (np->cno != invalidcharno)                   /* cno */
2187           total += number_len (np->cno);
2188         total += 1;                                     /* newline */
2189       }
2190
2191   return total;
2192 }
2193
2194 static void
2195 put_entries (register node *np)
2196 {
2197   register char *sp;
2198   static fdesc *fdp = NULL;
2199
2200   if (np == NULL)
2201     return;
2202
2203   /* Output subentries that precede this one */
2204   if (CTAGS)
2205     put_entries (np->left);
2206
2207   /* Output this entry */
2208   if (np->valid)
2209     {
2210       if (!CTAGS)
2211         {
2212           /* Etags mode */
2213           if (fdp != np->fdp)
2214             {
2215               fdp = np->fdp;
2216               fprintf (tagf, "\f\n%s,%d\n",
2217                        fdp->taggedfname, total_size_of_entries (np));
2218               fdp->written = true;
2219             }
2220           fputs (np->regex, tagf);
2221           fputc ('\177', tagf);
2222           if (np->name != NULL)
2223             {
2224               fputs (np->name, tagf);
2225               fputc ('\001', tagf);
2226             }
2227           fprintf (tagf, "%d,", np->lno);
2228           if (np->cno != invalidcharno)
2229             fprintf (tagf, "%ld", np->cno);
2230           fputs ("\n", tagf);
2231         }
2232       else
2233         {
2234           /* Ctags mode */
2235           if (np->name == NULL)
2236             error ("internal error: NULL name in ctags mode.");
2237
2238           if (cxref_style)
2239             {
2240               if (vgrind_style)
2241                 fprintf (stdout, "%s %s %d\n",
2242                          np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2243               else
2244                 fprintf (stdout, "%-16s %3d %-16s %s\n",
2245                          np->name, np->lno, np->fdp->taggedfname, np->regex);
2246             }
2247           else
2248             {
2249               fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2250
2251               if (np->is_func)
2252                 {               /* function or #define macro with args */
2253                   putc (searchar, tagf);
2254                   putc ('^', tagf);
2255
2256                   for (sp = np->regex; *sp; sp++)
2257                     {
2258                       if (*sp == '\\' || *sp == searchar)
2259                         putc ('\\', tagf);
2260                       putc (*sp, tagf);
2261                     }
2262                   putc (searchar, tagf);
2263                 }
2264               else
2265                 {               /* anything else; text pattern inadequate */
2266                   fprintf (tagf, "%d", np->lno);
2267                 }
2268               putc ('\n', tagf);
2269             }
2270         }
2271     } /* if this node contains a valid tag */
2272
2273   /* Output subentries that follow this one */
2274   put_entries (np->right);
2275   if (!CTAGS)
2276     put_entries (np->left);
2277 }
2278
2279 \f
2280 /* C extensions. */
2281 #define C_EXT   0x00fff         /* C extensions */
2282 #define C_PLAIN 0x00000         /* C */
2283 #define C_PLPL  0x00001         /* C++ */
2284 #define C_STAR  0x00003         /* C* */
2285 #define C_JAVA  0x00005         /* JAVA */
2286 #define C_AUTO  0x01000         /* C, but switch to C++ if `class' is met */
2287 #define YACC    0x10000         /* yacc file */
2288
2289 /*
2290  * The C symbol tables.
2291  */
2292 enum sym_type
2293 {
2294   st_none,
2295   st_C_objprot, st_C_objimpl, st_C_objend,
2296   st_C_gnumacro,
2297   st_C_ignore, st_C_attribute,
2298   st_C_javastruct,
2299   st_C_operator,
2300   st_C_class, st_C_template,
2301   st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2302 };
2303
2304 /* Feed stuff between (but not including) %[ and %] lines to:
2305      gperf -m 5
2306 %[
2307 %compare-strncmp
2308 %enum
2309 %struct-type
2310 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2311 %%
2312 if,             0,                      st_C_ignore
2313 for,            0,                      st_C_ignore
2314 while,          0,                      st_C_ignore
2315 switch,         0,                      st_C_ignore
2316 return,         0,                      st_C_ignore
2317 __attribute__,  0,                      st_C_attribute
2318 GTY,            0,                      st_C_attribute
2319 @interface,     0,                      st_C_objprot
2320 @protocol,      0,                      st_C_objprot
2321 @implementation,0,                      st_C_objimpl
2322 @end,           0,                      st_C_objend
2323 import,         (C_JAVA & ~C_PLPL),     st_C_ignore
2324 package,        (C_JAVA & ~C_PLPL),     st_C_ignore
2325 friend,         C_PLPL,                 st_C_ignore
2326 extends,        (C_JAVA & ~C_PLPL),     st_C_javastruct
2327 implements,     (C_JAVA & ~C_PLPL),     st_C_javastruct
2328 interface,      (C_JAVA & ~C_PLPL),     st_C_struct
2329 class,          0,                      st_C_class
2330 namespace,      C_PLPL,                 st_C_struct
2331 domain,         C_STAR,                 st_C_struct
2332 union,          0,                      st_C_struct
2333 struct,         0,                      st_C_struct
2334 extern,         0,                      st_C_extern
2335 enum,           0,                      st_C_enum
2336 typedef,        0,                      st_C_typedef
2337 define,         0,                      st_C_define
2338 undef,          0,                      st_C_define
2339 operator,       C_PLPL,                 st_C_operator
2340 template,       0,                      st_C_template
2341 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2342 DEFUN,          0,                      st_C_gnumacro
2343 SYSCALL,        0,                      st_C_gnumacro
2344 ENTRY,          0,                      st_C_gnumacro
2345 PSEUDO,         0,                      st_C_gnumacro
2346 # These are defined inside C functions, so currently they are not met.
2347 # EXFUN used in glibc, DEFVAR_* in emacs.
2348 #EXFUN,         0,                      st_C_gnumacro
2349 #DEFVAR_,       0,                      st_C_gnumacro
2350 %]
2351 and replace lines between %< and %> with its output, then:
2352  - remove the #if characterset check
2353  - make in_word_set static and not inline. */
2354 /*%<*/
2355 /* C code produced by gperf version 3.0.1 */
2356 /* Command-line: gperf -m 5  */
2357 /* Computed positions: -k'2-3' */
2358
2359 struct C_stab_entry { const char *name; int c_ext; enum sym_type type; };
2360 /* maximum key range = 33, duplicates = 0 */
2361
2362 static int
2363 hash (const char *str, int len)
2364 {
2365   static char const asso_values[] =
2366     {
2367       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2368       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2369       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2370       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2371       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2372       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2373       35, 35, 35, 35, 35, 35, 35, 35, 35,  3,
2374       26, 35, 35, 35, 35, 35, 35, 35, 27, 35,
2375       35, 35, 35, 24,  0, 35, 35, 35, 35,  0,
2376       35, 35, 35, 35, 35,  1, 35, 16, 35,  6,
2377       23,  0,  0, 35, 22,  0, 35, 35,  5,  0,
2378        0, 15,  1, 35,  6, 35,  8, 19, 35, 16,
2379        4,  5, 35, 35, 35, 35, 35, 35, 35, 35,
2380       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2381       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2382       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2383       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2384       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2385       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2386       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2387       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2388       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2389       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2390       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2391       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2392       35, 35, 35, 35, 35, 35
2393     };
2394   int hval = len;
2395
2396   switch (hval)
2397     {
2398       default:
2399         hval += asso_values[(unsigned char) str[2]];
2400       /*FALLTHROUGH*/
2401       case 2:
2402         hval += asso_values[(unsigned char) str[1]];
2403         break;
2404     }
2405   return hval;
2406 }
2407
2408 static struct C_stab_entry *
2409 in_word_set (register const char *str, register unsigned int len)
2410 {
2411   enum
2412     {
2413       TOTAL_KEYWORDS = 33,
2414       MIN_WORD_LENGTH = 2,
2415       MAX_WORD_LENGTH = 15,
2416       MIN_HASH_VALUE = 2,
2417       MAX_HASH_VALUE = 34
2418     };
2419
2420   static struct C_stab_entry wordlist[] =
2421     {
2422       {""}, {""},
2423       {"if",            0,                      st_C_ignore},
2424       {"GTY",           0,                      st_C_attribute},
2425       {"@end",          0,                      st_C_objend},
2426       {"union",         0,                      st_C_struct},
2427       {"define",                0,                      st_C_define},
2428       {"import",                (C_JAVA & ~C_PLPL),     st_C_ignore},
2429       {"template",      0,                      st_C_template},
2430       {"operator",      C_PLPL,                 st_C_operator},
2431       {"@interface",    0,                      st_C_objprot},
2432       {"implements",    (C_JAVA & ~C_PLPL),     st_C_javastruct},
2433       {"friend",                C_PLPL,                 st_C_ignore},
2434       {"typedef",       0,                      st_C_typedef},
2435       {"return",                0,                      st_C_ignore},
2436       {"@implementation",0,                     st_C_objimpl},
2437       {"@protocol",     0,                      st_C_objprot},
2438       {"interface",     (C_JAVA & ~C_PLPL),     st_C_struct},
2439       {"extern",                0,                      st_C_extern},
2440       {"extends",       (C_JAVA & ~C_PLPL),     st_C_javastruct},
2441       {"struct",                0,                      st_C_struct},
2442       {"domain",                C_STAR,                 st_C_struct},
2443       {"switch",                0,                      st_C_ignore},
2444       {"enum",          0,                      st_C_enum},
2445       {"for",           0,                      st_C_ignore},
2446       {"namespace",     C_PLPL,                 st_C_struct},
2447       {"class",         0,                      st_C_class},
2448       {"while",         0,                      st_C_ignore},
2449       {"undef",         0,                      st_C_define},
2450       {"package",       (C_JAVA & ~C_PLPL),     st_C_ignore},
2451       {"__attribute__", 0,                      st_C_attribute},
2452       {"SYSCALL",       0,                      st_C_gnumacro},
2453       {"ENTRY",         0,                      st_C_gnumacro},
2454       {"PSEUDO",                0,                      st_C_gnumacro},
2455       {"DEFUN",         0,                      st_C_gnumacro}
2456     };
2457
2458   if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2459     {
2460       int key = hash (str, len);
2461
2462       if (key <= MAX_HASH_VALUE && key >= 0)
2463         {
2464           const char *s = wordlist[key].name;
2465
2466           if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2467             return &wordlist[key];
2468         }
2469     }
2470   return 0;
2471 }
2472 /*%>*/
2473
2474 static enum sym_type
2475 C_symtype (char *str, int len, int c_ext)
2476 {
2477   register struct C_stab_entry *se = in_word_set (str, len);
2478
2479   if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2480     return st_none;
2481   return se->type;
2482 }
2483
2484 \f
2485 /*
2486  * Ignoring __attribute__ ((list))
2487  */
2488 static bool inattribute;        /* looking at an __attribute__ construct */
2489
2490 /*
2491  * C functions and variables are recognized using a simple
2492  * finite automaton.  fvdef is its state variable.
2493  */
2494 static enum
2495 {
2496   fvnone,                       /* nothing seen */
2497   fdefunkey,                    /* Emacs DEFUN keyword seen */
2498   fdefunname,                   /* Emacs DEFUN name seen */
2499   foperator,                    /* func: operator keyword seen (cplpl) */
2500   fvnameseen,                   /* function or variable name seen */
2501   fstartlist,                   /* func: just after open parenthesis */
2502   finlist,                      /* func: in parameter list */
2503   flistseen,                    /* func: after parameter list */
2504   fignore,                      /* func: before open brace */
2505   vignore                       /* var-like: ignore until ';' */
2506 } fvdef;
2507
2508 static bool fvextern;           /* func or var: extern keyword seen; */
2509
2510 /*
2511  * typedefs are recognized using a simple finite automaton.
2512  * typdef is its state variable.
2513  */
2514 static enum
2515 {
2516   tnone,                        /* nothing seen */
2517   tkeyseen,                     /* typedef keyword seen */
2518   ttypeseen,                    /* defined type seen */
2519   tinbody,                      /* inside typedef body */
2520   tend,                         /* just before typedef tag */
2521   tignore                       /* junk after typedef tag */
2522 } typdef;
2523
2524 /*
2525  * struct-like structures (enum, struct and union) are recognized
2526  * using another simple finite automaton.  `structdef' is its state
2527  * variable.
2528  */
2529 static enum
2530 {
2531   snone,                        /* nothing seen yet,
2532                                    or in struct body if bracelev > 0 */
2533   skeyseen,                     /* struct-like keyword seen */
2534   stagseen,                     /* struct-like tag seen */
2535   scolonseen                    /* colon seen after struct-like tag */
2536 } structdef;
2537
2538 /*
2539  * When objdef is different from onone, objtag is the name of the class.
2540  */
2541 static const char *objtag = "<uninited>";
2542
2543 /*
2544  * Yet another little state machine to deal with preprocessor lines.
2545  */
2546 static enum
2547 {
2548   dnone,                        /* nothing seen */
2549   dsharpseen,                   /* '#' seen as first char on line */
2550   ddefineseen,                  /* '#' and 'define' seen */
2551   dignorerest                   /* ignore rest of line */
2552 } definedef;
2553
2554 /*
2555  * State machine for Objective C protocols and implementations.
2556  * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2557  */
2558 static enum
2559 {
2560   onone,                        /* nothing seen */
2561   oprotocol,                    /* @interface or @protocol seen */
2562   oimplementation,              /* @implementations seen */
2563   otagseen,                     /* class name seen */
2564   oparenseen,                   /* parenthesis before category seen */
2565   ocatseen,                     /* category name seen */
2566   oinbody,                      /* in @implementation body */
2567   omethodsign,                  /* in @implementation body, after +/- */
2568   omethodtag,                   /* after method name */
2569   omethodcolon,                 /* after method colon */
2570   omethodparm,                  /* after method parameter */
2571   oignore                       /* wait for @end */
2572 } objdef;
2573
2574
2575 /*
2576  * Use this structure to keep info about the token read, and how it
2577  * should be tagged.  Used by the make_C_tag function to build a tag.
2578  */
2579 static struct tok
2580 {
2581   char *line;                   /* string containing the token */
2582   int offset;                   /* where the token starts in LINE */
2583   int length;                   /* token length */
2584   /*
2585     The previous members can be used to pass strings around for generic
2586     purposes.  The following ones specifically refer to creating tags.  In this
2587     case the token contained here is the pattern that will be used to create a
2588     tag.
2589   */
2590   bool valid;                   /* do not create a tag; the token should be
2591                                    invalidated whenever a state machine is
2592                                    reset prematurely */
2593   bool named;                   /* create a named tag */
2594   int lineno;                   /* source line number of tag */
2595   long linepos;                 /* source char number of tag */
2596 } token;                        /* latest token read */
2597
2598 /*
2599  * Variables and functions for dealing with nested structures.
2600  * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2601  */
2602 static void pushclass_above (int, char *, int);
2603 static void popclass_above (int);
2604 static void write_classname (linebuffer *, const char *qualifier);
2605
2606 static struct {
2607   char **cname;                 /* nested class names */
2608   int *bracelev;                /* nested class brace level */
2609   int nl;                       /* class nesting level (elements used) */
2610   int size;                     /* length of the array */
2611 } cstack;                       /* stack for nested declaration tags */
2612 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2613 #define nestlev         (cstack.nl)
2614 /* After struct keyword or in struct body, not inside a nested function. */
2615 #define instruct        (structdef == snone && nestlev > 0                      \
2616                          && bracelev == cstack.bracelev[nestlev-1] + 1)
2617
2618 static void
2619 pushclass_above (int bracelev, char *str, int len)
2620 {
2621   int nl;
2622
2623   popclass_above (bracelev);
2624   nl = cstack.nl;
2625   if (nl >= cstack.size)
2626     {
2627       int size = cstack.size *= 2;
2628       xrnew (cstack.cname, size, char *);
2629       xrnew (cstack.bracelev, size, int);
2630     }
2631   assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2632   cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2633   cstack.bracelev[nl] = bracelev;
2634   cstack.nl = nl + 1;
2635 }
2636
2637 static void
2638 popclass_above (int bracelev)
2639 {
2640   int nl;
2641
2642   for (nl = cstack.nl - 1;
2643        nl >= 0 && cstack.bracelev[nl] >= bracelev;
2644        nl--)
2645     {
2646       free (cstack.cname[nl]);
2647       cstack.nl = nl;
2648     }
2649 }
2650
2651 static void
2652 write_classname (linebuffer *cn, const char *qualifier)
2653 {
2654   int i, len;
2655   int qlen = strlen (qualifier);
2656
2657   if (cstack.nl == 0 || cstack.cname[0] == NULL)
2658     {
2659       len = 0;
2660       cn->len = 0;
2661       cn->buffer[0] = '\0';
2662     }
2663   else
2664     {
2665       len = strlen (cstack.cname[0]);
2666       linebuffer_setlen (cn, len);
2667       strcpy (cn->buffer, cstack.cname[0]);
2668     }
2669   for (i = 1; i < cstack.nl; i++)
2670     {
2671       char *s = cstack.cname[i];
2672       if (s == NULL)
2673         continue;
2674       linebuffer_setlen (cn, len + qlen + strlen (s));
2675       len += sprintf (cn->buffer + len, "%s%s", qualifier, s);
2676     }
2677 }
2678
2679 \f
2680 static bool consider_token (char *, int, int, int *, int, int, bool *);
2681 static void make_C_tag (bool);
2682
2683 /*
2684  * consider_token ()
2685  *      checks to see if the current token is at the start of a
2686  *      function or variable, or corresponds to a typedef, or
2687  *      is a struct/union/enum tag, or #define, or an enum constant.
2688  *
2689  *      *IS_FUNC_OR_VAR gets true if the token is a function or #define macro
2690  *      with args.  C_EXTP points to which language we are looking at.
2691  *
2692  * Globals
2693  *      fvdef                   IN OUT
2694  *      structdef               IN OUT
2695  *      definedef               IN OUT
2696  *      typdef                  IN OUT
2697  *      objdef                  IN OUT
2698  */
2699
2700 static bool
2701 consider_token (char *str, int len, int c, int *c_extp,
2702                 int bracelev, int parlev, bool *is_func_or_var)
2703                                 /* IN: token pointer */
2704                                 /* IN: token length */
2705                                 /* IN: first char after the token */
2706                                 /* IN, OUT: C extensions mask */
2707                                 /* IN: brace level */
2708                                 /* IN: parenthesis level */
2709                                 /* OUT: function or variable found */
2710 {
2711   /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2712      structtype is the type of the preceding struct-like keyword, and
2713      structbracelev is the brace level where it has been seen. */
2714   static enum sym_type structtype;
2715   static int structbracelev;
2716   static enum sym_type toktype;
2717
2718
2719   toktype = C_symtype (str, len, *c_extp);
2720
2721   /*
2722    * Skip __attribute__
2723    */
2724   if (toktype == st_C_attribute)
2725     {
2726       inattribute = true;
2727       return false;
2728      }
2729
2730    /*
2731     * Advance the definedef state machine.
2732     */
2733    switch (definedef)
2734      {
2735      case dnone:
2736        /* We're not on a preprocessor line. */
2737        if (toktype == st_C_gnumacro)
2738          {
2739            fvdef = fdefunkey;
2740            return false;
2741          }
2742        break;
2743      case dsharpseen:
2744        if (toktype == st_C_define)
2745          {
2746            definedef = ddefineseen;
2747          }
2748        else
2749          {
2750            definedef = dignorerest;
2751          }
2752        return false;
2753      case ddefineseen:
2754        /*
2755         * Make a tag for any macro, unless it is a constant
2756         * and constantypedefs is false.
2757         */
2758        definedef = dignorerest;
2759        *is_func_or_var = (c == '(');
2760        if (!*is_func_or_var && !constantypedefs)
2761          return false;
2762        else
2763          return true;
2764      case dignorerest:
2765        return false;
2766      default:
2767        error ("internal error: definedef value.");
2768      }
2769
2770    /*
2771     * Now typedefs
2772     */
2773    switch (typdef)
2774      {
2775      case tnone:
2776        if (toktype == st_C_typedef)
2777          {
2778            if (typedefs)
2779              typdef = tkeyseen;
2780            fvextern = false;
2781            fvdef = fvnone;
2782            return false;
2783          }
2784        break;
2785      case tkeyseen:
2786        switch (toktype)
2787          {
2788          case st_none:
2789          case st_C_class:
2790          case st_C_struct:
2791          case st_C_enum:
2792            typdef = ttypeseen;
2793            break;
2794          default:
2795            break;
2796          }
2797        break;
2798      case ttypeseen:
2799        if (structdef == snone && fvdef == fvnone)
2800          {
2801            fvdef = fvnameseen;
2802            return true;
2803          }
2804        break;
2805      case tend:
2806        switch (toktype)
2807          {
2808          case st_C_class:
2809          case st_C_struct:
2810          case st_C_enum:
2811            return false;
2812          default:
2813            return true;
2814          }
2815      default:
2816        break;
2817      }
2818
2819    switch (toktype)
2820      {
2821      case st_C_javastruct:
2822        if (structdef == stagseen)
2823          structdef = scolonseen;
2824        return false;
2825      case st_C_template:
2826      case st_C_class:
2827        if ((*c_extp & C_AUTO)   /* automatic detection of C++ language */
2828            && bracelev == 0
2829            && definedef == dnone && structdef == snone
2830            && typdef == tnone && fvdef == fvnone)
2831          *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2832        if (toktype == st_C_template)
2833          break;
2834        /* FALLTHRU */
2835      case st_C_struct:
2836      case st_C_enum:
2837        if (parlev == 0
2838            && fvdef != vignore
2839            && (typdef == tkeyseen
2840                || (typedefs_or_cplusplus && structdef == snone)))
2841          {
2842            structdef = skeyseen;
2843            structtype = toktype;
2844            structbracelev = bracelev;
2845            if (fvdef == fvnameseen)
2846              fvdef = fvnone;
2847          }
2848        return false;
2849      default:
2850        break;
2851      }
2852
2853    if (structdef == skeyseen)
2854      {
2855        structdef = stagseen;
2856        return true;
2857      }
2858
2859    if (typdef != tnone)
2860      definedef = dnone;
2861
2862    /* Detect Objective C constructs. */
2863    switch (objdef)
2864      {
2865      case onone:
2866        switch (toktype)
2867          {
2868          case st_C_objprot:
2869            objdef = oprotocol;
2870            return false;
2871          case st_C_objimpl:
2872            objdef = oimplementation;
2873            return false;
2874          default:
2875            break;
2876          }
2877        break;
2878      case oimplementation:
2879        /* Save the class tag for functions or variables defined inside. */
2880        objtag = savenstr (str, len);
2881        objdef = oinbody;
2882        return false;
2883      case oprotocol:
2884        /* Save the class tag for categories. */
2885        objtag = savenstr (str, len);
2886        objdef = otagseen;
2887        *is_func_or_var = true;
2888        return true;
2889      case oparenseen:
2890        objdef = ocatseen;
2891        *is_func_or_var = true;
2892        return true;
2893      case oinbody:
2894        break;
2895      case omethodsign:
2896        if (parlev == 0)
2897          {
2898            fvdef = fvnone;
2899            objdef = omethodtag;
2900            linebuffer_setlen (&token_name, len);
2901            memcpy (token_name.buffer, str, len);
2902            token_name.buffer[len] = '\0';
2903            return true;
2904          }
2905        return false;
2906      case omethodcolon:
2907        if (parlev == 0)
2908          objdef = omethodparm;
2909        return false;
2910      case omethodparm:
2911        if (parlev == 0)
2912          {
2913            objdef = omethodtag;
2914            if (class_qualify)
2915              {
2916                int oldlen = token_name.len;
2917                fvdef = fvnone;
2918                linebuffer_setlen (&token_name, oldlen + len);
2919                memcpy (token_name.buffer + oldlen, str, len);
2920                token_name.buffer[oldlen + len] = '\0';
2921              }
2922            return true;
2923          }
2924        return false;
2925      case oignore:
2926        if (toktype == st_C_objend)
2927          {
2928            /* Memory leakage here: the string pointed by objtag is
2929               never released, because many tests would be needed to
2930               avoid breaking on incorrect input code.  The amount of
2931               memory leaked here is the sum of the lengths of the
2932               class tags.
2933            free (objtag); */
2934            objdef = onone;
2935          }
2936        return false;
2937      default:
2938        break;
2939      }
2940
2941    /* A function, variable or enum constant? */
2942    switch (toktype)
2943      {
2944      case st_C_extern:
2945        fvextern = true;
2946        switch  (fvdef)
2947          {
2948          case finlist:
2949          case flistseen:
2950          case fignore:
2951          case vignore:
2952            break;
2953          default:
2954            fvdef = fvnone;
2955          }
2956        return false;
2957      case st_C_ignore:
2958        fvextern = false;
2959        fvdef = vignore;
2960        return false;
2961      case st_C_operator:
2962        fvdef = foperator;
2963        *is_func_or_var = true;
2964        return true;
2965      case st_none:
2966        if (constantypedefs
2967            && structdef == snone
2968            && structtype == st_C_enum && bracelev > structbracelev
2969            /* Don't tag tokens in expressions that assign values to enum
2970               constants.  */
2971            && fvdef != vignore)
2972          return true;           /* enum constant */
2973        switch (fvdef)
2974          {
2975          case fdefunkey:
2976            if (bracelev > 0)
2977              break;
2978            fvdef = fdefunname;  /* GNU macro */
2979            *is_func_or_var = true;
2980            return true;
2981          case fvnone:
2982            switch (typdef)
2983              {
2984              case ttypeseen:
2985                return false;
2986              case tnone:
2987                if ((strneq (str, "asm", 3) && endtoken (str[3]))
2988                    || (strneq (str, "__asm__", 7) && endtoken (str[7])))
2989                  {
2990                    fvdef = vignore;
2991                    return false;
2992                  }
2993                break;
2994              default:
2995                break;
2996              }
2997           /* FALLTHRU */
2998           case fvnameseen:
2999           if (len >= 10 && strneq (str+len-10, "::operator", 10))
3000             {
3001               if (*c_extp & C_AUTO) /* automatic detection of C++ */
3002                 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3003               fvdef = foperator;
3004               *is_func_or_var = true;
3005               return true;
3006             }
3007           if (bracelev > 0 && !instruct)
3008             break;
3009           fvdef = fvnameseen;   /* function or variable */
3010           *is_func_or_var = true;
3011           return true;
3012          default:
3013            break;
3014         }
3015       break;
3016      default:
3017        break;
3018     }
3019
3020   return false;
3021 }
3022
3023 \f
3024 /*
3025  * C_entries often keeps pointers to tokens or lines which are older than
3026  * the line currently read.  By keeping two line buffers, and switching
3027  * them at end of line, it is possible to use those pointers.
3028  */
3029 static struct
3030 {
3031   long linepos;
3032   linebuffer lb;
3033 } lbs[2];
3034
3035 #define current_lb_is_new (newndx == curndx)
3036 #define switch_line_buffers() (curndx = 1 - curndx)
3037
3038 #define curlb (lbs[curndx].lb)
3039 #define newlb (lbs[newndx].lb)
3040 #define curlinepos (lbs[curndx].linepos)
3041 #define newlinepos (lbs[newndx].linepos)
3042
3043 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3044 #define cplpl (c_ext & C_PLPL)
3045 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3046
3047 #define CNL_SAVE_DEFINEDEF()                                            \
3048 do {                                                                    \
3049   curlinepos = charno;                                                  \
3050   readline (&curlb, inf);                                               \
3051   lp = curlb.buffer;                                                    \
3052   quotednl = false;                                                     \
3053   newndx = curndx;                                                      \
3054 } while (0)
3055
3056 #define CNL()                                                           \
3057 do {                                                                    \
3058   CNL_SAVE_DEFINEDEF ();                                                \
3059   if (savetoken.valid)                                                  \
3060     {                                                                   \
3061       token = savetoken;                                                \
3062       savetoken.valid = false;                                          \
3063     }                                                                   \
3064   definedef = dnone;                                                    \
3065 } while (0)
3066
3067
3068 static void
3069 make_C_tag (bool isfun)
3070 {
3071   /* This function is never called when token.valid is false, but
3072      we must protect against invalid input or internal errors. */
3073   if (token.valid)
3074     make_tag (token_name.buffer, token_name.len, isfun, token.line,
3075               token.offset+token.length+1, token.lineno, token.linepos);
3076   else if (DEBUG)
3077     {                             /* this branch is optimized away if !DEBUG */
3078       make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3079                 token_name.len + 17, isfun, token.line,
3080                 token.offset+token.length+1, token.lineno, token.linepos);
3081       error ("INVALID TOKEN");
3082     }
3083
3084   token.valid = false;
3085 }
3086
3087 static bool
3088 perhaps_more_input (FILE *inf)
3089 {
3090   return !feof (inf) && !ferror (inf);
3091 }
3092
3093
3094 /*
3095  * C_entries ()
3096  *      This routine finds functions, variables, typedefs,
3097  *      #define's, enum constants and struct/union/enum definitions in
3098  *      C syntax and adds them to the list.
3099  */
3100 static void
3101 C_entries (int c_ext, FILE *inf)
3102                                 /* extension of C */
3103                                 /* input file */
3104 {
3105   register char c;              /* latest char read; '\0' for end of line */
3106   register char *lp;            /* pointer one beyond the character `c' */
3107   int curndx, newndx;           /* indices for current and new lb */
3108   register int tokoff;          /* offset in line of start of current token */
3109   register int toklen;          /* length of current token */
3110   const char *qualifier;        /* string used to qualify names */
3111   int qlen;                     /* length of qualifier */
3112   int bracelev;                 /* current brace level */
3113   int bracketlev;               /* current bracket level */
3114   int parlev;                   /* current parenthesis level */
3115   int attrparlev;               /* __attribute__ parenthesis level */
3116   int templatelev;              /* current template level */
3117   int typdefbracelev;           /* bracelev where a typedef struct body begun */
3118   bool incomm, inquote, inchar, quotednl, midtoken;
3119   bool yacc_rules;              /* in the rules part of a yacc file */
3120   struct tok savetoken = {0};   /* token saved during preprocessor handling */
3121
3122
3123   linebuffer_init (&lbs[0].lb);
3124   linebuffer_init (&lbs[1].lb);
3125   if (cstack.size == 0)
3126     {
3127       cstack.size = (DEBUG) ? 1 : 4;
3128       cstack.nl = 0;
3129       cstack.cname = xnew (cstack.size, char *);
3130       cstack.bracelev = xnew (cstack.size, int);
3131     }
3132
3133   tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3134   curndx = newndx = 0;
3135   lp = curlb.buffer;
3136   *lp = 0;
3137
3138   fvdef = fvnone; fvextern = false; typdef = tnone;
3139   structdef = snone; definedef = dnone; objdef = onone;
3140   yacc_rules = false;
3141   midtoken = inquote = inchar = incomm = quotednl = false;
3142   token.valid = savetoken.valid = false;
3143   bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3144   if (cjava)
3145     { qualifier = "."; qlen = 1; }
3146   else
3147     { qualifier = "::"; qlen = 2; }
3148
3149
3150   while (perhaps_more_input (inf))
3151     {
3152       c = *lp++;
3153       if (c == '\\')
3154         {
3155           /* If we are at the end of the line, the next character is a
3156              '\0'; do not skip it, because it is what tells us
3157              to read the next line.  */
3158           if (*lp == '\0')
3159             {
3160               quotednl = true;
3161               continue;
3162             }
3163           lp++;
3164           c = ' ';
3165         }
3166       else if (incomm)
3167         {
3168           switch (c)
3169             {
3170             case '*':
3171               if (*lp == '/')
3172                 {
3173                   c = *lp++;
3174                   incomm = false;
3175                 }
3176               break;
3177             case '\0':
3178               /* Newlines inside comments do not end macro definitions in
3179                  traditional cpp. */
3180               CNL_SAVE_DEFINEDEF ();
3181               break;
3182             }
3183           continue;
3184         }
3185       else if (inquote)
3186         {
3187           switch (c)
3188             {
3189             case '"':
3190               inquote = false;
3191               break;
3192             case '\0':
3193               /* Newlines inside strings do not end macro definitions
3194                  in traditional cpp, even though compilers don't
3195                  usually accept them. */
3196               CNL_SAVE_DEFINEDEF ();
3197               break;
3198             }
3199           continue;
3200         }
3201       else if (inchar)
3202         {
3203           switch (c)
3204             {
3205             case '\0':
3206               /* Hmmm, something went wrong. */
3207               CNL ();
3208               /* FALLTHRU */
3209             case '\'':
3210               inchar = false;
3211               break;
3212             }
3213           continue;
3214         }
3215       else switch (c)
3216         {
3217         case '"':
3218           inquote = true;
3219           if (bracketlev > 0)
3220             continue;
3221           if (inattribute)
3222             break;
3223           switch (fvdef)
3224             {
3225             case fdefunkey:
3226             case fstartlist:
3227             case finlist:
3228             case fignore:
3229             case vignore:
3230               break;
3231             default:
3232               fvextern = false;
3233               fvdef = fvnone;
3234             }
3235           continue;
3236         case '\'':
3237           inchar = true;
3238           if (bracketlev > 0)
3239             continue;
3240           if (inattribute)
3241             break;
3242           if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
3243             {
3244               fvextern = false;
3245               fvdef = fvnone;
3246             }
3247           continue;
3248         case '/':
3249           if (*lp == '*')
3250             {
3251               incomm = true;
3252               lp++;
3253               c = ' ';
3254               if (bracketlev > 0)
3255                 continue;
3256             }
3257           else if (/* cplpl && */ *lp == '/')
3258             {
3259               c = '\0';
3260             }
3261           break;
3262         case '%':
3263           if ((c_ext & YACC) && *lp == '%')
3264             {
3265               /* Entering or exiting rules section in yacc file. */
3266               lp++;
3267               definedef = dnone; fvdef = fvnone; fvextern = false;
3268               typdef = tnone; structdef = snone;
3269               midtoken = inquote = inchar = incomm = quotednl = false;
3270               bracelev = 0;
3271               yacc_rules = !yacc_rules;
3272               continue;
3273             }
3274           else
3275             break;
3276         case '#':
3277           if (definedef == dnone)
3278             {
3279               char *cp;
3280               bool cpptoken = true;
3281
3282               /* Look back on this line.  If all blanks, or nonblanks
3283                  followed by an end of comment, this is a preprocessor
3284                  token. */
3285               for (cp = newlb.buffer; cp < lp-1; cp++)
3286                 if (!c_isspace (*cp))
3287                   {
3288                     if (*cp == '*' && cp[1] == '/')
3289                       {
3290                         cp++;
3291                         cpptoken = true;
3292                       }
3293                     else
3294                       cpptoken = false;
3295                   }
3296               if (cpptoken)
3297                 {
3298                   definedef = dsharpseen;
3299                   /* This is needed for tagging enum values: when there are
3300                      preprocessor conditionals inside the enum, we need to
3301                      reset the value of fvdef so that the next enum value is
3302                      tagged even though the one before it did not end in a
3303                      comma.  */
3304                   if (fvdef == vignore && instruct && parlev == 0)
3305                     {
3306                       if (strneq (cp, "#if", 3) || strneq (cp, "#el", 3))
3307                         fvdef = fvnone;
3308                     }
3309                 }
3310             } /* if (definedef == dnone) */
3311           continue;
3312         case '[':
3313           bracketlev++;
3314           continue;
3315         default:
3316           if (bracketlev > 0)
3317             {
3318               if (c == ']')
3319                 --bracketlev;
3320               else if (c == '\0')
3321                 CNL_SAVE_DEFINEDEF ();
3322               continue;
3323             }
3324           break;
3325         } /* switch (c) */
3326
3327
3328       /* Consider token only if some involved conditions are satisfied. */
3329       if (typdef != tignore
3330           && definedef != dignorerest
3331           && fvdef != finlist
3332           && templatelev == 0
3333           && (definedef != dnone
3334               || structdef != scolonseen)
3335           && !inattribute)
3336         {
3337           if (midtoken)
3338             {
3339               if (endtoken (c))
3340                 {
3341                   if (c == ':' && *lp == ':' && begtoken (lp[1]))
3342                     /* This handles :: in the middle,
3343                        but not at the beginning of an identifier.
3344                        Also, space-separated :: is not recognized. */
3345                     {
3346                       if (c_ext & C_AUTO) /* automatic detection of C++ */
3347                         c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3348                       lp += 2;
3349                       toklen += 2;
3350                       c = lp[-1];
3351                       goto still_in_token;
3352                     }
3353                   else
3354                     {
3355                       bool funorvar = false;
3356
3357                       if (yacc_rules
3358                           || consider_token (newlb.buffer + tokoff, toklen, c,
3359                                              &c_ext, bracelev, parlev,
3360                                              &funorvar))
3361                         {
3362                           if (fvdef == foperator)
3363                             {
3364                               char *oldlp = lp;
3365                               lp = skip_spaces (lp-1);
3366                               if (*lp != '\0')
3367                                 lp += 1;
3368                               while (*lp != '\0'
3369                                      && !c_isspace (*lp) && *lp != '(')
3370                                 lp += 1;
3371                               c = *lp++;
3372                               toklen += lp - oldlp;
3373                             }
3374                           token.named = false;
3375                           if (!plainc
3376                               && nestlev > 0 && definedef == dnone)
3377                             /* in struct body */
3378                             {
3379                               if (class_qualify)
3380                                 {
3381                                   int len;
3382                                   write_classname (&token_name, qualifier);
3383                                   len = token_name.len;
3384                                   linebuffer_setlen (&token_name,
3385                                                      len + qlen + toklen);
3386                                   sprintf (token_name.buffer + len, "%s%.*s",
3387                                            qualifier, toklen,
3388                                            newlb.buffer + tokoff);
3389                                 }
3390                               else
3391                                 {
3392                                   linebuffer_setlen (&token_name, toklen);
3393                                   sprintf (token_name.buffer, "%.*s",
3394                                            toklen, newlb.buffer + tokoff);
3395                                 }
3396                               token.named = true;
3397                             }
3398                           else if (objdef == ocatseen)
3399                             /* Objective C category */
3400                             {
3401                               if (class_qualify)
3402                                 {
3403                                   int len = strlen (objtag) + 2 + toklen;
3404                                   linebuffer_setlen (&token_name, len);
3405                                   sprintf (token_name.buffer, "%s(%.*s)",
3406                                            objtag, toklen,
3407                                            newlb.buffer + tokoff);
3408                                 }
3409                               else
3410                                 {
3411                                   linebuffer_setlen (&token_name, toklen);
3412                                   sprintf (token_name.buffer, "%.*s",
3413                                            toklen, newlb.buffer + tokoff);
3414                                 }
3415                               token.named = true;
3416                             }
3417                           else if (objdef == omethodtag
3418                                    || objdef == omethodparm)
3419                             /* Objective C method */
3420                             {
3421                               token.named = true;
3422                             }
3423                           else if (fvdef == fdefunname)
3424                             /* GNU DEFUN and similar macros */
3425                             {
3426                               bool defun = (newlb.buffer[tokoff] == 'F');
3427                               int off = tokoff;
3428                               int len = toklen;
3429
3430                               /* Rewrite the tag so that emacs lisp DEFUNs
3431                                  can be found by their elisp name */
3432                               if (defun)
3433                                 {
3434                                   off += 1;
3435                                   len -= 1;
3436                                 }
3437                               linebuffer_setlen (&token_name, len);
3438                               memcpy (token_name.buffer,
3439                                       newlb.buffer + off, len);
3440                               token_name.buffer[len] = '\0';
3441                               if (defun)
3442                                 while (--len >= 0)
3443                                   if (token_name.buffer[len] == '_')
3444                                     token_name.buffer[len] = '-';
3445                               token.named = defun;
3446                             }
3447                           else
3448                             {
3449                               linebuffer_setlen (&token_name, toklen);
3450                               memcpy (token_name.buffer,
3451                                       newlb.buffer + tokoff, toklen);
3452                               token_name.buffer[toklen] = '\0';
3453                               /* Name macros and members. */
3454                               token.named = (structdef == stagseen
3455                                              || typdef == ttypeseen
3456                                              || typdef == tend
3457                                              || (funorvar
3458                                                  && definedef == dignorerest)
3459                                              || (funorvar
3460                                                  && definedef == dnone
3461                                                  && structdef == snone
3462                                                  && bracelev > 0));
3463                             }
3464                           token.lineno = lineno;
3465                           token.offset = tokoff;
3466                           token.length = toklen;
3467                           token.line = newlb.buffer;
3468                           token.linepos = newlinepos;
3469                           token.valid = true;
3470
3471                           if (definedef == dnone
3472                               && (fvdef == fvnameseen
3473                                   || fvdef == foperator
3474                                   || structdef == stagseen
3475                                   || typdef == tend
3476                                   || typdef == ttypeseen
3477                                   || objdef != onone))
3478                             {
3479                               if (current_lb_is_new)
3480                                 switch_line_buffers ();
3481                             }
3482                           else if (definedef != dnone
3483                                    || fvdef == fdefunname
3484                                    || instruct)
3485                             make_C_tag (funorvar);
3486                         }
3487                       else /* not yacc and consider_token failed */
3488                         {
3489                           if (inattribute && fvdef == fignore)
3490                             {
3491                               /* We have just met __attribute__ after a
3492                                  function parameter list: do not tag the
3493                                  function again. */
3494                               fvdef = fvnone;
3495                             }
3496                         }
3497                       midtoken = false;
3498                     }
3499                 } /* if (endtoken (c)) */
3500               else if (intoken (c))
3501                 still_in_token:
3502                 {
3503                   toklen++;
3504                   continue;
3505                 }
3506             } /* if (midtoken) */
3507           else if (begtoken (c))
3508             {
3509               switch (definedef)
3510                 {
3511                 case dnone:
3512                   switch (fvdef)
3513                     {
3514                     case fstartlist:
3515                       /* This prevents tagging fb in
3516                          void (__attribute__((noreturn)) *fb) (void);
3517                          Fixing this is not easy and not very important. */
3518                       fvdef = finlist;
3519                       continue;
3520                     case flistseen:
3521                       if (plainc || declarations)
3522                         {
3523                           make_C_tag (true); /* a function */
3524                           fvdef = fignore;
3525                         }
3526                       break;
3527                     default:
3528                       break;
3529                     }
3530                   if (structdef == stagseen && !cjava)
3531                     {
3532                       popclass_above (bracelev);
3533                       structdef = snone;
3534                     }
3535                   break;
3536                 case dsharpseen:
3537                   savetoken = token;
3538                   break;
3539                 default:
3540                   break;
3541                 }
3542               if (!yacc_rules || lp == newlb.buffer + 1)
3543                 {
3544                   tokoff = lp - 1 - newlb.buffer;
3545                   toklen = 1;
3546                   midtoken = true;
3547                 }
3548               continue;
3549             } /* if (begtoken) */
3550         } /* if must look at token */
3551
3552
3553       /* Detect end of line, colon, comma, semicolon and various braces
3554          after having handled a token.*/
3555       switch (c)
3556         {
3557         case ':':
3558           if (inattribute)
3559             break;
3560           if (yacc_rules && token.offset == 0 && token.valid)
3561             {
3562               make_C_tag (false); /* a yacc function */
3563               break;
3564             }
3565           if (definedef != dnone)
3566             break;
3567           switch (objdef)
3568             {
3569             case otagseen:
3570               objdef = oignore;
3571               make_C_tag (true); /* an Objective C class */
3572               break;
3573             case omethodtag:
3574             case omethodparm:
3575               objdef = omethodcolon;
3576               if (class_qualify)
3577                 {
3578                   int toklen = token_name.len;
3579                   linebuffer_setlen (&token_name, toklen + 1);
3580                   strcpy (token_name.buffer + toklen, ":");
3581                 }
3582               break;
3583             default:
3584               break;
3585             }
3586           if (structdef == stagseen)
3587             {
3588               structdef = scolonseen;
3589               break;
3590             }
3591           /* Should be useless, but may be work as a safety net. */
3592           if (cplpl && fvdef == flistseen)
3593             {
3594               make_C_tag (true); /* a function */
3595               fvdef = fignore;
3596               break;
3597             }
3598           break;
3599         case ';':
3600           if (definedef != dnone || inattribute)
3601             break;
3602           switch (typdef)
3603             {
3604             case tend:
3605             case ttypeseen:
3606               make_C_tag (false); /* a typedef */
3607               typdef = tnone;
3608               fvdef = fvnone;
3609               break;
3610             case tnone:
3611             case tinbody:
3612             case tignore:
3613               switch (fvdef)
3614                 {
3615                 case fignore:
3616                   if (typdef == tignore || cplpl)
3617                     fvdef = fvnone;
3618                   break;
3619                 case fvnameseen:
3620                   if ((globals && bracelev == 0 && (!fvextern || declarations))
3621                       || (members && instruct))
3622                     make_C_tag (false); /* a variable */
3623                   fvextern = false;
3624                   fvdef = fvnone;
3625                   token.valid = false;
3626                   break;
3627                 case flistseen:
3628                   if ((declarations
3629                        && (cplpl || !instruct)
3630                        && (typdef == tnone || (typdef != tignore && instruct)))
3631                       || (members
3632                           && plainc && instruct))
3633                     make_C_tag (true);  /* a function */
3634                   /* FALLTHRU */
3635                 default:
3636                   fvextern = false;
3637                   fvdef = fvnone;
3638                   if (declarations
3639                        && cplpl && structdef == stagseen)
3640                     make_C_tag (false); /* forward declaration */
3641                   else
3642                     token.valid = false;
3643                 } /* switch (fvdef) */
3644               /* FALLTHRU */
3645             default:
3646               if (!instruct)
3647                 typdef = tnone;
3648             }
3649           if (structdef == stagseen)
3650             structdef = snone;
3651           break;
3652         case ',':
3653           if (definedef != dnone || inattribute)
3654             break;
3655           switch (objdef)
3656             {
3657             case omethodtag:
3658             case omethodparm:
3659               make_C_tag (true); /* an Objective C method */
3660               objdef = oinbody;
3661               break;
3662             default:
3663               break;
3664             }
3665           switch (fvdef)
3666             {
3667             case fdefunkey:
3668             case foperator:
3669             case fstartlist:
3670             case finlist:
3671             case fignore:
3672               break;
3673             case vignore:
3674               if (instruct && parlev == 0)
3675                 fvdef = fvnone;
3676               break;
3677             case fdefunname:
3678               fvdef = fignore;
3679               break;
3680             case fvnameseen:
3681               if (parlev == 0
3682                   && ((globals
3683                        && bracelev == 0
3684                        && templatelev == 0
3685                        && (!fvextern || declarations))
3686                       || (members && instruct)))
3687                   make_C_tag (false); /* a variable */
3688               break;
3689             case flistseen:
3690               if ((declarations && typdef == tnone && !instruct)
3691                   || (members && typdef != tignore && instruct))
3692                 {
3693                   make_C_tag (true); /* a function */
3694                   fvdef = fvnameseen;
3695                 }
3696               else if (!declarations)
3697                 fvdef = fvnone;
3698               token.valid = false;
3699               break;
3700             default:
3701               fvdef = fvnone;
3702             }
3703           if (structdef == stagseen)
3704             structdef = snone;
3705           break;
3706         case ']':
3707           if (definedef != dnone || inattribute)
3708             break;
3709           if (structdef == stagseen)
3710             structdef = snone;
3711           switch (typdef)
3712             {
3713             case ttypeseen:
3714             case tend:
3715               typdef = tignore;
3716               make_C_tag (false);       /* a typedef */
3717               break;
3718             case tnone:
3719             case tinbody:
3720               switch (fvdef)
3721                 {
3722                 case foperator:
3723                 case finlist:
3724                 case fignore:
3725                 case vignore:
3726                   break;
3727                 case fvnameseen:
3728                   if ((members && bracelev == 1)
3729                       || (globals && bracelev == 0
3730                           && (!fvextern || declarations)))
3731                     make_C_tag (false); /* a variable */
3732                   /* FALLTHRU */
3733                 default:
3734                   fvdef = fvnone;
3735                 }
3736               break;
3737             default:
3738               break;
3739             }
3740           break;
3741         case '(':
3742           if (inattribute)
3743             {
3744               attrparlev++;
3745               break;
3746             }
3747           if (definedef != dnone)
3748             break;
3749           if (objdef == otagseen && parlev == 0)
3750             objdef = oparenseen;
3751           switch (fvdef)
3752             {
3753             case fvnameseen:
3754               if (typdef == ttypeseen
3755                   && *lp != '*'
3756                   && !instruct)
3757                 {
3758                   /* This handles constructs like:
3759                      typedef void OperatorFun (int fun); */
3760                   make_C_tag (false);
3761                   typdef = tignore;
3762                   fvdef = fignore;
3763                   break;
3764                 }
3765               /* FALLTHRU */
3766             case foperator:
3767               fvdef = fstartlist;
3768               break;
3769             case flistseen:
3770               fvdef = finlist;
3771               break;
3772             default:
3773               break;
3774             }
3775           parlev++;
3776           break;
3777         case ')':
3778           if (inattribute)
3779             {
3780               if (--attrparlev == 0)
3781                 inattribute = false;
3782               break;
3783             }
3784           if (definedef != dnone)
3785             break;
3786           if (objdef == ocatseen && parlev == 1)
3787             {
3788               make_C_tag (true); /* an Objective C category */
3789               objdef = oignore;
3790             }
3791           if (--parlev == 0)
3792             {
3793               switch (fvdef)
3794                 {
3795                 case fstartlist:
3796                 case finlist:
3797                   fvdef = flistseen;
3798                   break;
3799                 default:
3800                   break;
3801                 }
3802               if (!instruct
3803                   && (typdef == tend
3804                       || typdef == ttypeseen))
3805                 {
3806                   typdef = tignore;
3807                   make_C_tag (false); /* a typedef */
3808                 }
3809             }
3810           else if (parlev < 0)  /* can happen due to ill-conceived #if's. */
3811             parlev = 0;
3812           break;
3813         case '{':
3814           if (definedef != dnone)
3815             break;
3816           if (typdef == ttypeseen)
3817             {
3818               /* Whenever typdef is set to tinbody (currently only
3819                  here), typdefbracelev should be set to bracelev. */
3820               typdef = tinbody;
3821               typdefbracelev = bracelev;
3822             }
3823           switch (fvdef)
3824             {
3825             case flistseen:
3826               if (cplpl && !class_qualify)
3827                 {
3828                   /* Remove class and namespace qualifiers from the token,
3829                      leaving only the method/member name.  */
3830                   char *cc, *uqname = token_name.buffer;
3831                   char *tok_end = token_name.buffer + token_name.len;
3832
3833                   for (cc = token_name.buffer; cc < tok_end; cc++)
3834                     {
3835                       if (*cc == ':' && cc[1] == ':')
3836                         {
3837                           uqname = cc + 2;
3838                           cc++;
3839                         }
3840                     }
3841                   if (uqname > token_name.buffer)
3842                     {
3843                       int uqlen = strlen (uqname);
3844                       linebuffer_setlen (&token_name, uqlen);
3845                       memmove (token_name.buffer, uqname, uqlen + 1);
3846                     }
3847                 }
3848               make_C_tag (true);    /* a function */
3849               /* FALLTHRU */
3850             case fignore:
3851               fvdef = fvnone;
3852               break;
3853             case fvnone:
3854               switch (objdef)
3855                 {
3856                 case otagseen:
3857                   make_C_tag (true); /* an Objective C class */
3858                   objdef = oignore;
3859                   break;
3860                 case omethodtag:
3861                 case omethodparm:
3862                   make_C_tag (true); /* an Objective C method */
3863                   objdef = oinbody;
3864                   break;
3865                 default:
3866                   /* Neutralize `extern "C" {' grot. */
3867                   if (bracelev == 0 && structdef == snone && nestlev == 0
3868                       && typdef == tnone)
3869                     bracelev = -1;
3870                 }
3871               break;
3872             default:
3873               break;
3874             }
3875           switch (structdef)
3876             {
3877             case skeyseen:         /* unnamed struct */
3878               pushclass_above (bracelev, NULL, 0);
3879               structdef = snone;
3880               break;
3881             case stagseen:         /* named struct or enum */
3882             case scolonseen:       /* a class */
3883               pushclass_above (bracelev,token.line+token.offset, token.length);
3884               structdef = snone;
3885               make_C_tag (false);  /* a struct or enum */
3886               break;
3887             default:
3888               break;
3889             }
3890           bracelev += 1;
3891           break;
3892         case '*':
3893           if (definedef != dnone)
3894             break;
3895           if (fvdef == fstartlist)
3896             {
3897               fvdef = fvnone;   /* avoid tagging `foo' in `foo (*bar()) ()' */
3898               token.valid = false;
3899             }
3900           break;
3901         case '}':
3902           if (definedef != dnone)
3903             break;
3904           bracelev -= 1;
3905           if (!ignoreindent && lp == newlb.buffer + 1)
3906             {
3907               if (bracelev != 0)
3908                 token.valid = false; /* unexpected value, token unreliable */
3909               bracelev = 0;     /* reset brace level if first column */
3910               parlev = 0;       /* also reset paren level, just in case... */
3911             }
3912           else if (bracelev < 0)
3913             {
3914               token.valid = false; /* something gone amiss, token unreliable */
3915               bracelev = 0;
3916             }
3917           if (bracelev == 0 && fvdef == vignore)
3918             fvdef = fvnone;             /* end of function */
3919           popclass_above (bracelev);
3920           structdef = snone;
3921           /* Only if typdef == tinbody is typdefbracelev significant. */
3922           if (typdef == tinbody && bracelev <= typdefbracelev)
3923             {
3924               assert (bracelev == typdefbracelev);
3925               typdef = tend;
3926             }
3927           break;
3928         case '=':
3929           if (definedef != dnone)
3930             break;
3931           switch (fvdef)
3932             {
3933             case foperator:
3934             case finlist:
3935             case fignore:
3936             case vignore:
3937               break;
3938             case fvnameseen:
3939               if ((members && bracelev == 1)
3940                   || (globals && bracelev == 0 && (!fvextern || declarations)))
3941                 make_C_tag (false); /* a variable */
3942               /* FALLTHRU */
3943             default:
3944               fvdef = vignore;
3945             }
3946           break;
3947         case '<':
3948           if (cplpl
3949               && (structdef == stagseen || fvdef == fvnameseen))
3950             {
3951               templatelev++;
3952               break;
3953             }
3954           goto resetfvdef;
3955         case '>':
3956           if (templatelev > 0)
3957             {
3958               templatelev--;
3959               break;
3960             }
3961           goto resetfvdef;
3962         case '+':
3963         case '-':
3964           if (objdef == oinbody && bracelev == 0)
3965             {
3966               objdef = omethodsign;
3967               break;
3968             }
3969           /* FALLTHRU */
3970         resetfvdef:
3971         case '#': case '~': case '&': case '%': case '/':
3972         case '|': case '^': case '!': case '.': case '?':
3973           if (definedef != dnone)
3974             break;
3975           /* These surely cannot follow a function tag in C. */
3976           switch (fvdef)
3977             {
3978             case foperator:
3979             case finlist:
3980             case fignore:
3981             case vignore:
3982               break;
3983             default:
3984               fvdef = fvnone;
3985             }
3986           break;
3987         case '\0':
3988           if (objdef == otagseen)
3989             {
3990               make_C_tag (true); /* an Objective C class */
3991               objdef = oignore;
3992             }
3993           /* If a macro spans multiple lines don't reset its state. */
3994           if (quotednl)
3995             CNL_SAVE_DEFINEDEF ();
3996           else
3997             CNL ();
3998           break;
3999         } /* switch (c) */
4000
4001     } /* while not eof */
4002
4003   free (lbs[0].lb.buffer);
4004   free (lbs[1].lb.buffer);
4005 }
4006
4007 /*
4008  * Process either a C++ file or a C file depending on the setting
4009  * of a global flag.
4010  */
4011 static void
4012 default_C_entries (FILE *inf)
4013 {
4014   C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
4015 }
4016
4017 /* Always do plain C. */
4018 static void
4019 plain_C_entries (FILE *inf)
4020 {
4021   C_entries (0, inf);
4022 }
4023
4024 /* Always do C++. */
4025 static void
4026 Cplusplus_entries (FILE *inf)
4027 {
4028   C_entries (C_PLPL, inf);
4029 }
4030
4031 /* Always do Java. */
4032 static void
4033 Cjava_entries (FILE *inf)
4034 {
4035   C_entries (C_JAVA, inf);
4036 }
4037
4038 /* Always do C*. */
4039 static void
4040 Cstar_entries (FILE *inf)
4041 {
4042   C_entries (C_STAR, inf);
4043 }
4044
4045 /* Always do Yacc. */
4046 static void
4047 Yacc_entries (FILE *inf)
4048 {
4049   C_entries (YACC, inf);
4050 }
4051
4052 \f
4053 /* Useful macros. */
4054 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer)    \
4055   while (perhaps_more_input (file_pointer)                              \
4056          && (readline (&(line_buffer), file_pointer),                   \
4057              (char_pointer) = (line_buffer).buffer,                     \
4058              true))                                                     \
4059
4060 #define LOOKING_AT(cp, kw)  /* kw is the keyword, a literal string */   \
4061   ((assert ("" kw), true)   /* syntax error if not a literal string */  \
4062    && strneq ((cp), kw, sizeof (kw)-1)          /* cp points at kw */   \
4063    && notinname ((cp)[sizeof (kw)-1])           /* end of kw */         \
4064    && ((cp) = skip_spaces ((cp)+sizeof (kw)-1))) /* skip spaces */
4065
4066 /* Similar to LOOKING_AT but does not use notinname, does not skip */
4067 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
4068   ((assert ("" kw), true) /* syntax error if not a literal string */    \
4069    && strncaseeq ((cp), kw, sizeof (kw)-1)      /* cp points at kw */   \
4070    && ((cp) += sizeof (kw)-1))                  /* skip spaces */
4071
4072 /*
4073  * Read a file, but do no processing.  This is used to do regexp
4074  * matching on files that have no language defined.
4075  */
4076 static void
4077 just_read_file (FILE *inf)
4078 {
4079   while (perhaps_more_input (inf))
4080     readline (&lb, inf);
4081 }
4082
4083 \f
4084 /* Fortran parsing */
4085
4086 static void F_takeprec (void);
4087 static void F_getit (FILE *);
4088
4089 static void
4090 F_takeprec (void)
4091 {
4092   dbp = skip_spaces (dbp);
4093   if (*dbp != '*')
4094     return;
4095   dbp++;
4096   dbp = skip_spaces (dbp);
4097   if (strneq (dbp, "(*)", 3))
4098     {
4099       dbp += 3;
4100       return;
4101     }
4102   if (!c_isdigit (*dbp))
4103     {
4104       --dbp;                    /* force failure */
4105       return;
4106     }
4107   do
4108     dbp++;
4109   while (c_isdigit (*dbp));
4110 }
4111
4112 static void
4113 F_getit (FILE *inf)
4114 {
4115   register char *cp;
4116
4117   dbp = skip_spaces (dbp);
4118   if (*dbp == '\0')
4119     {
4120       readline (&lb, inf);
4121       dbp = lb.buffer;
4122       if (dbp[5] != '&')
4123         return;
4124       dbp += 6;
4125       dbp = skip_spaces (dbp);
4126     }
4127   if (!c_isalpha (*dbp) && *dbp != '_' && *dbp != '$')
4128     return;
4129   for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4130     continue;
4131   make_tag (dbp, cp-dbp, true,
4132             lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4133 }
4134
4135
4136 static void
4137 Fortran_functions (FILE *inf)
4138 {
4139   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4140     {
4141       if (*dbp == '%')
4142         dbp++;                  /* Ratfor escape to fortran */
4143       dbp = skip_spaces (dbp);
4144       if (*dbp == '\0')
4145         continue;
4146
4147       if (LOOKING_AT_NOCASE (dbp, "recursive"))
4148         dbp = skip_spaces (dbp);
4149
4150       if (LOOKING_AT_NOCASE (dbp, "pure"))
4151         dbp = skip_spaces (dbp);
4152
4153       if (LOOKING_AT_NOCASE (dbp, "elemental"))
4154         dbp = skip_spaces (dbp);
4155
4156       switch (c_tolower (*dbp))
4157         {
4158         case 'i':
4159           if (nocase_tail ("integer"))
4160             F_takeprec ();
4161           break;
4162         case 'r':
4163           if (nocase_tail ("real"))
4164             F_takeprec ();
4165           break;
4166         case 'l':
4167           if (nocase_tail ("logical"))
4168             F_takeprec ();
4169           break;
4170         case 'c':
4171           if (nocase_tail ("complex") || nocase_tail ("character"))
4172             F_takeprec ();
4173           break;
4174         case 'd':
4175           if (nocase_tail ("double"))
4176             {
4177               dbp = skip_spaces (dbp);
4178               if (*dbp == '\0')
4179                 continue;
4180               if (nocase_tail ("precision"))
4181                 break;
4182               continue;
4183             }
4184           break;
4185         }
4186       dbp = skip_spaces (dbp);
4187       if (*dbp == '\0')
4188         continue;
4189       switch (c_tolower (*dbp))
4190         {
4191         case 'f':
4192           if (nocase_tail ("function"))
4193             F_getit (inf);
4194           continue;
4195         case 's':
4196           if (nocase_tail ("subroutine"))
4197             F_getit (inf);
4198           continue;
4199         case 'e':
4200           if (nocase_tail ("entry"))
4201             F_getit (inf);
4202           continue;
4203         case 'b':
4204           if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4205             {
4206               dbp = skip_spaces (dbp);
4207               if (*dbp == '\0') /* assume un-named */
4208                 make_tag ("blockdata", 9, true,
4209                           lb.buffer, dbp - lb.buffer, lineno, linecharno);
4210               else
4211                 F_getit (inf);  /* look for name */
4212             }
4213           continue;
4214         }
4215     }
4216 }
4217
4218 \f
4219 /*
4220  * Go language support
4221  * Original code by Xi Lu <lx@shellcodes.org> (2016)
4222  */
4223 static void
4224 Go_functions(FILE *inf)
4225 {
4226   char *cp, *name;
4227
4228   LOOP_ON_INPUT_LINES(inf, lb, cp)
4229     {
4230       cp = skip_spaces (cp);
4231
4232       if (LOOKING_AT (cp, "package"))
4233         {
4234           name = cp;
4235           while (!notinname (*cp) && *cp != '\0')
4236             cp++;
4237           make_tag (name, cp - name, false, lb.buffer,
4238                     cp - lb.buffer + 1, lineno, linecharno);
4239         }
4240       else if (LOOKING_AT (cp, "func"))
4241         {
4242           /* Go implementation of interface, such as:
4243              func (n *Integer) Add(m Integer) ...
4244              skip `(n *Integer)` part.
4245           */
4246           if (*cp == '(')
4247             {
4248               while (*cp != ')')
4249                 cp++;
4250               cp = skip_spaces (cp+1);
4251             }
4252
4253           if (*cp)
4254             {
4255               name = cp;
4256
4257               while (!notinname (*cp))
4258                 cp++;
4259
4260               make_tag (name, cp - name, true, lb.buffer,
4261                         cp - lb.buffer + 1, lineno, linecharno);
4262             }
4263         }
4264       else if (members && LOOKING_AT (cp, "type"))
4265         {
4266           name = cp;
4267
4268           /* Ignore the likes of the following:
4269              type (
4270                     A
4271              )
4272            */
4273           if (*cp == '(')
4274             return;
4275
4276           while (!notinname (*cp) && *cp != '\0')
4277             cp++;
4278
4279           make_tag (name, cp - name, false, lb.buffer,
4280                     cp - lb.buffer + 1, lineno, linecharno);
4281         }
4282     }
4283 }
4284
4285 \f
4286 /*
4287  * Ada parsing
4288  * Original code by
4289  * Philippe Waroquiers (1998)
4290  */
4291
4292 /* Once we are positioned after an "interesting" keyword, let's get
4293    the real tag value necessary. */
4294 static void
4295 Ada_getit (FILE *inf, const char *name_qualifier)
4296 {
4297   register char *cp;
4298   char *name;
4299   char c;
4300
4301   while (perhaps_more_input (inf))
4302     {
4303       dbp = skip_spaces (dbp);
4304       if (*dbp == '\0'
4305           || (dbp[0] == '-' && dbp[1] == '-'))
4306         {
4307           readline (&lb, inf);
4308           dbp = lb.buffer;
4309         }
4310       switch (c_tolower (*dbp))
4311         {
4312         case 'b':
4313           if (nocase_tail ("body"))
4314             {
4315               /* Skipping body of   procedure body   or   package body or ....
4316                  resetting qualifier to body instead of spec. */
4317               name_qualifier = "/b";
4318               continue;
4319             }
4320           break;
4321         case 't':
4322           /* Skipping type of   task type   or   protected type ... */
4323           if (nocase_tail ("type"))
4324             continue;
4325           break;
4326         }
4327       if (*dbp == '"')
4328         {
4329           dbp += 1;
4330           for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4331             continue;
4332         }
4333       else
4334         {
4335           dbp = skip_spaces (dbp);
4336           for (cp = dbp;
4337                c_isalnum (*cp) || *cp == '_' || *cp == '.';
4338                cp++)
4339             continue;
4340           if (cp == dbp)
4341             return;
4342         }
4343       c = *cp;
4344       *cp = '\0';
4345       name = concat (dbp, name_qualifier, "");
4346       *cp = c;
4347       make_tag (name, strlen (name), true,
4348                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4349       free (name);
4350       if (c == '"')
4351         dbp = cp + 1;
4352       return;
4353     }
4354 }
4355
4356 static void
4357 Ada_funcs (FILE *inf)
4358 {
4359   bool inquote = false;
4360   bool skip_till_semicolumn = false;
4361
4362   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4363     {
4364       while (*dbp != '\0')
4365         {
4366           /* Skip a string i.e. "abcd". */
4367           if (inquote || (*dbp == '"'))
4368             {
4369               dbp = strchr (dbp + !inquote, '"');
4370               if (dbp != NULL)
4371                 {
4372                   inquote = false;
4373                   dbp += 1;
4374                   continue;     /* advance char */
4375                 }
4376               else
4377                 {
4378                   inquote = true;
4379                   break;        /* advance line */
4380                 }
4381             }
4382
4383           /* Skip comments. */
4384           if (dbp[0] == '-' && dbp[1] == '-')
4385             break;              /* advance line */
4386
4387           /* Skip character enclosed in single quote i.e. 'a'
4388              and skip single quote starting an attribute i.e. 'Image. */
4389           if (*dbp == '\'')
4390             {
4391               dbp++ ;
4392               if (*dbp != '\0')
4393                 dbp++;
4394               continue;
4395             }
4396
4397           if (skip_till_semicolumn)
4398             {
4399               if (*dbp == ';')
4400                 skip_till_semicolumn = false;
4401               dbp++;
4402               continue;         /* advance char */
4403             }
4404
4405           /* Search for beginning of a token.  */
4406           if (!begtoken (*dbp))
4407             {
4408               dbp++;
4409               continue;         /* advance char */
4410             }
4411
4412           /* We are at the beginning of a token. */
4413           switch (c_tolower (*dbp))
4414             {
4415             case 'f':
4416               if (!packages_only && nocase_tail ("function"))
4417                 Ada_getit (inf, "/f");
4418               else
4419                 break;          /* from switch */
4420               continue;         /* advance char */
4421             case 'p':
4422               if (!packages_only && nocase_tail ("procedure"))
4423                 Ada_getit (inf, "/p");
4424               else if (nocase_tail ("package"))
4425                 Ada_getit (inf, "/s");
4426               else if (nocase_tail ("protected")) /* protected type */
4427                 Ada_getit (inf, "/t");
4428               else
4429                 break;          /* from switch */
4430               continue;         /* advance char */
4431
4432             case 'u':
4433               if (typedefs && !packages_only && nocase_tail ("use"))
4434                 {
4435                   /* when tagging types, avoid tagging  use type Pack.Typename;
4436                      for this, we will skip everything till a ; */
4437                   skip_till_semicolumn = true;
4438                   continue;     /* advance char */
4439                 }
4440
4441             case 't':
4442               if (!packages_only && nocase_tail ("task"))
4443                 Ada_getit (inf, "/k");
4444               else if (typedefs && !packages_only && nocase_tail ("type"))
4445                 {
4446                   Ada_getit (inf, "/t");
4447                   while (*dbp != '\0')
4448                     dbp += 1;
4449                 }
4450               else
4451                 break;          /* from switch */
4452               continue;         /* advance char */
4453             }
4454
4455           /* Look for the end of the token. */
4456           while (!endtoken (*dbp))
4457             dbp++;
4458
4459         } /* advance char */
4460     } /* advance line */
4461 }
4462
4463 \f
4464 /*
4465  * Unix and microcontroller assembly tag handling
4466  * Labels:  /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4467  * Idea by Bob Weiner, Motorola Inc. (1994)
4468  */
4469 static void
4470 Asm_labels (FILE *inf)
4471 {
4472   register char *cp;
4473
4474   LOOP_ON_INPUT_LINES (inf, lb, cp)
4475     {
4476       /* If first char is alphabetic or one of [_.$], test for colon
4477          following identifier. */
4478       if (c_isalpha (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4479         {
4480           /* Read past label. */
4481           cp++;
4482           while (c_isalnum (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4483             cp++;
4484           if (*cp == ':' || c_isspace (*cp))
4485             /* Found end of label, so copy it and add it to the table. */
4486             make_tag (lb.buffer, cp - lb.buffer, true,
4487                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4488         }
4489     }
4490 }
4491
4492 \f
4493 /*
4494  * Perl support
4495  * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4496  *                 /^use constant[ \t\n]+[^ \t\n{=,;]+/
4497  * Perl variable names: /^(my|local).../
4498  * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4499  * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4500  * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4501  */
4502 static void
4503 Perl_functions (FILE *inf)
4504 {
4505   char *package = savestr ("main"); /* current package name */
4506   register char *cp;
4507
4508   LOOP_ON_INPUT_LINES (inf, lb, cp)
4509     {
4510       cp = skip_spaces (cp);
4511
4512       if (LOOKING_AT (cp, "package"))
4513         {
4514           free (package);
4515           get_tag (cp, &package);
4516         }
4517       else if (LOOKING_AT (cp, "sub"))
4518         {
4519           char *pos, *sp;
4520
4521         subr:
4522           sp = cp;
4523           while (!notinname (*cp))
4524             cp++;
4525           if (cp == sp)
4526             continue;           /* nothing found */
4527           pos = strchr (sp, ':');
4528           if (pos && pos < cp && pos[1] == ':')
4529             /* The name is already qualified. */
4530             make_tag (sp, cp - sp, true,
4531                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4532           else
4533             /* Qualify it. */
4534             {
4535               char savechar, *name;
4536
4537               savechar = *cp;
4538               *cp = '\0';
4539               name = concat (package, "::", sp);
4540               *cp = savechar;
4541               make_tag (name, strlen (name), true,
4542                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4543               free (name);
4544             }
4545         }
4546       else if (LOOKING_AT (cp, "use constant")
4547                || LOOKING_AT (cp, "use constant::defer"))
4548         {
4549           /* For hash style multi-constant like
4550                 use constant { FOO => 123,
4551                                BAR => 456 };
4552              only the first FOO is picked up.  Parsing across the value
4553              expressions would be difficult in general, due to possible nested
4554              hashes, here-documents, etc.  */
4555           if (*cp == '{')
4556             cp = skip_spaces (cp+1);
4557           goto subr;
4558         }
4559       else if (globals) /* only if we are tagging global vars */
4560         {
4561           /* Skip a qualifier, if any. */
4562           bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4563           /* After "my" or "local", but before any following paren or space. */
4564           char *varstart = cp;
4565
4566           if (qual              /* should this be removed?  If yes, how? */
4567               && (*cp == '$' || *cp == '@' || *cp == '%'))
4568             {
4569               varstart += 1;
4570               do
4571                 cp++;
4572               while (c_isalnum (*cp) || *cp == '_');
4573             }
4574           else if (qual)
4575             {
4576               /* Should be examining a variable list at this point;
4577                  could insist on seeing an open parenthesis. */
4578               while (*cp != '\0' && *cp != ';' && *cp != '=' &&  *cp != ')')
4579                 cp++;
4580             }
4581           else
4582             continue;
4583
4584           make_tag (varstart, cp - varstart, false,
4585                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4586         }
4587     }
4588   free (package);
4589 }
4590
4591
4592 /*
4593  * Python support
4594  * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4595  * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4596  * More ideas by seb bacon <seb@jamkit.com> (2002)
4597  */
4598 static void
4599 Python_functions (FILE *inf)
4600 {
4601   register char *cp;
4602
4603   LOOP_ON_INPUT_LINES (inf, lb, cp)
4604     {
4605       cp = skip_spaces (cp);
4606       if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4607         {
4608           char *name = cp;
4609           while (!notinname (*cp) && *cp != ':')
4610             cp++;
4611           make_tag (name, cp - name, true,
4612                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4613         }
4614     }
4615 }
4616
4617 /*
4618  * Ruby support
4619  * Original code by Xi Lu <lx@shellcodes.org> (2015)
4620  */
4621 static void
4622 Ruby_functions (FILE *inf)
4623 {
4624   char *cp = NULL;
4625
4626   LOOP_ON_INPUT_LINES (inf, lb, cp)
4627     {
4628       bool is_class = false;
4629       bool is_method = false;
4630       char *name;
4631
4632       cp = skip_spaces (cp);
4633       if (c_isalpha (*cp) && c_isupper (*cp)) /* constants */
4634         {
4635           char *bp, *colon = NULL;
4636
4637           name = cp;
4638
4639           for (cp++; c_isalnum (*cp) || *cp == '_' || *cp == ':'; cp++)
4640             {
4641               if (*cp == ':')
4642                 colon = cp;
4643             }
4644           if (cp > name + 1)
4645             {
4646               bp = skip_spaces (cp);
4647               if (*bp == '=' && !(bp[1] == '=' || bp[1] == '>'))
4648                 {
4649                   if (colon && !c_isspace (colon[1]))
4650                     name = colon + 1;
4651                   make_tag (name, cp - name, false,
4652                             lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4653                 }
4654             }
4655         }
4656       else if ((is_method = LOOKING_AT (cp, "def")) /* module/class/method */
4657                || (is_class = LOOKING_AT (cp, "class"))
4658                || LOOKING_AT (cp, "module"))
4659         {
4660           const char self_name[] = "self.";
4661           const size_t self_size1 = sizeof (self_name) - 1;
4662
4663           name = cp;
4664
4665          /* Ruby method names can end in a '='.  Also, operator overloading can
4666             define operators whose names include '='.  */
4667           while (!notinname (*cp) || *cp == '=')
4668             cp++;
4669
4670           /* Remove "self." from the method name.  */
4671           if (cp - name > self_size1
4672               && strneq (name, self_name, self_size1))
4673             name += self_size1;
4674
4675           /* Remove the class/module qualifiers from method names.  */
4676           if (is_method)
4677             {
4678               char *q;
4679
4680               for (q = name; q < cp && *q != '.'; q++)
4681                 ;
4682               if (q < cp - 1)   /* punt if we see just "FOO." */
4683                 name = q + 1;
4684             }
4685
4686           /* Don't tag singleton classes.  */
4687           if (is_class && strneq (name, "<<", 2) && cp == name + 2)
4688             continue;
4689
4690           make_tag (name, cp - name, true,
4691                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4692         }
4693       else
4694         {
4695           /* Tag accessors and aliases.  */
4696           while (*cp && *cp != '#')
4697             {
4698               bool reader = false, writer = false, alias = false;
4699
4700               if (LOOKING_AT (cp, "attr_reader"))
4701                 reader = true;
4702               else if (LOOKING_AT (cp, "attr_writer"))
4703                 writer = true;
4704               else if (LOOKING_AT (cp, "attr_accessor"))
4705                 {
4706                   reader = true;
4707                   writer = true;
4708                 }
4709               else if (LOOKING_AT (cp, "alias_method"))
4710                 alias = true;
4711               if (reader || writer || alias)
4712                 {
4713                   do {
4714                     char *np = cp;
4715
4716                     cp = skip_name (cp);
4717                     if (reader)
4718                       make_tag (np, cp - np, true,
4719                                 lb.buffer, cp - lb.buffer + 1,
4720                                 lineno, linecharno);
4721                     if (writer)
4722                       {
4723                         size_t name_len = cp - np + 1;
4724                         char *wr_name = xnew (name_len + 1, char);
4725
4726                         memcpy (wr_name, np, name_len - 1);
4727                         memcpy (wr_name + name_len - 1, "=", 2);
4728                         pfnote (wr_name, true, lb.buffer, cp - lb.buffer + 1,
4729                                 lineno, linecharno);
4730                       }
4731                     if (alias)
4732                       {
4733                         make_tag (np, cp - np, true,
4734                                   lb.buffer, cp - lb.buffer + 1,
4735                                   lineno, linecharno);
4736                         while (*cp && *cp != '#' && *cp != ';')
4737                           cp++;
4738                       }
4739                   } while (*cp == ','
4740                            && (cp = skip_spaces (cp + 1), *cp && *cp != '#'));
4741                 }
4742               cp = skip_name (cp);
4743               while (*cp && *cp != '#' && notinname (*cp))
4744                 cp++;
4745             }
4746         }
4747     }
4748 }
4749
4750 \f
4751 /*
4752  * PHP support
4753  * Look for:
4754  *  - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4755  *  - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4756  *  - /^[ \t]*define\(\"[^\"]+/
4757  * Only with --members:
4758  *  - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4759  * Idea by Diez B. Roggisch (2001)
4760  */
4761 static void
4762 PHP_functions (FILE *inf)
4763 {
4764   char *cp, *name;
4765   bool search_identifier = false;
4766
4767   LOOP_ON_INPUT_LINES (inf, lb, cp)
4768     {
4769       cp = skip_spaces (cp);
4770       name = cp;
4771       if (search_identifier
4772           && *cp != '\0')
4773         {
4774           while (!notinname (*cp))
4775             cp++;
4776           make_tag (name, cp - name, true,
4777                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4778           search_identifier = false;
4779         }
4780       else if (LOOKING_AT (cp, "function"))
4781         {
4782           if (*cp == '&')
4783             cp = skip_spaces (cp+1);
4784           if (*cp != '\0')
4785             {
4786               name = cp;
4787               while (!notinname (*cp))
4788                 cp++;
4789               make_tag (name, cp - name, true,
4790                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4791             }
4792           else
4793             search_identifier = true;
4794         }
4795       else if (LOOKING_AT (cp, "class"))
4796         {
4797           if (*cp != '\0')
4798             {
4799               name = cp;
4800               while (*cp != '\0' && !c_isspace (*cp))
4801                 cp++;
4802               make_tag (name, cp - name, false,
4803                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4804             }
4805           else
4806             search_identifier = true;
4807         }
4808       else if (strneq (cp, "define", 6)
4809                && (cp = skip_spaces (cp+6))
4810                && *cp++ == '('
4811                && (*cp == '"' || *cp == '\''))
4812         {
4813           char quote = *cp++;
4814           name = cp;
4815           while (*cp != quote && *cp != '\0')
4816             cp++;
4817           make_tag (name, cp - name, false,
4818                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4819         }
4820       else if (members
4821                && LOOKING_AT (cp, "var")
4822                && *cp == '$')
4823         {
4824           name = cp;
4825           while (!notinname (*cp))
4826             cp++;
4827           make_tag (name, cp - name, false,
4828                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4829         }
4830     }
4831 }
4832
4833 \f
4834 /*
4835  * Cobol tag functions
4836  * We could look for anything that could be a paragraph name.
4837  * i.e. anything that starts in column 8 is one word and ends in a full stop.
4838  * Idea by Corny de Souza (1993)
4839  */
4840 static void
4841 Cobol_paragraphs (FILE *inf)
4842 {
4843   register char *bp, *ep;
4844
4845   LOOP_ON_INPUT_LINES (inf, lb, bp)
4846     {
4847       if (lb.len < 9)
4848         continue;
4849       bp += 8;
4850
4851       /* If eoln, compiler option or comment ignore whole line. */
4852       if (bp[-1] != ' ' || !c_isalnum (bp[0]))
4853         continue;
4854
4855       for (ep = bp; c_isalnum (*ep) || *ep == '-'; ep++)
4856         continue;
4857       if (*ep++ == '.')
4858         make_tag (bp, ep - bp, true,
4859                   lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4860     }
4861 }
4862
4863 \f
4864 /*
4865  * Makefile support
4866  * Ideas by Assar Westerlund <assar@sics.se> (2001)
4867  */
4868 static void
4869 Makefile_targets (FILE *inf)
4870 {
4871   register char *bp;
4872
4873   LOOP_ON_INPUT_LINES (inf, lb, bp)
4874     {
4875       if (*bp == '\t' || *bp == '#')
4876         continue;
4877       while (*bp != '\0' && *bp != '=' && *bp != ':')
4878         bp++;
4879       if (*bp == ':' || (globals && *bp == '='))
4880         {
4881           /* We should detect if there is more than one tag, but we do not.
4882              We just skip initial and final spaces. */
4883           char * namestart = skip_spaces (lb.buffer);
4884           while (--bp > namestart)
4885             if (!notinname (*bp))
4886               break;
4887           make_tag (namestart, bp - namestart + 1, true,
4888                     lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4889         }
4890     }
4891 }
4892
4893 \f
4894 /*
4895  * Pascal parsing
4896  * Original code by Mosur K. Mohan (1989)
4897  *
4898  *  Locates tags for procedures & functions.  Doesn't do any type- or
4899  *  var-definitions.  It does look for the keyword "extern" or
4900  *  "forward" immediately following the procedure statement; if found,
4901  *  the tag is skipped.
4902  */
4903 static void
4904 Pascal_functions (FILE *inf)
4905 {
4906   linebuffer tline;             /* mostly copied from C_entries */
4907   long save_lcno;
4908   int save_lineno, namelen, taglen;
4909   char c, *name;
4910
4911   bool                          /* each of these flags is true if: */
4912     incomment,                  /* point is inside a comment */
4913     inquote,                    /* point is inside '..' string */
4914     get_tagname,                /* point is after PROCEDURE/FUNCTION
4915                                    keyword, so next item = potential tag */
4916     found_tag,                  /* point is after a potential tag */
4917     inparms,                    /* point is within parameter-list */
4918     verify_tag;                 /* point has passed the parm-list, so the
4919                                    next token will determine whether this
4920                                    is a FORWARD/EXTERN to be ignored, or
4921                                    whether it is a real tag */
4922
4923   save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4924   name = NULL;                  /* keep compiler quiet */
4925   dbp = lb.buffer;
4926   *dbp = '\0';
4927   linebuffer_init (&tline);
4928
4929   incomment = inquote = false;
4930   found_tag = false;            /* have a proc name; check if extern */
4931   get_tagname = false;          /* found "procedure" keyword         */
4932   inparms = false;              /* found '(' after "proc"            */
4933   verify_tag = false;           /* check if "extern" is ahead        */
4934
4935
4936   while (perhaps_more_input (inf)) /* long main loop to get next char */
4937     {
4938       c = *dbp++;
4939       if (c == '\0')            /* if end of line */
4940         {
4941           readline (&lb, inf);
4942           dbp = lb.buffer;
4943           if (*dbp == '\0')
4944             continue;
4945           if (!((found_tag && verify_tag)
4946                 || get_tagname))
4947             c = *dbp++;         /* only if don't need *dbp pointing
4948                                    to the beginning of the name of
4949                                    the procedure or function */
4950         }
4951       if (incomment)
4952         {
4953           if (c == '}')         /* within { } comments */
4954             incomment = false;
4955           else if (c == '*' && *dbp == ')') /* within (* *) comments */
4956             {
4957               dbp++;
4958               incomment = false;
4959             }
4960           continue;
4961         }
4962       else if (inquote)
4963         {
4964           if (c == '\'')
4965             inquote = false;
4966           continue;
4967         }
4968       else
4969         switch (c)
4970           {
4971           case '\'':
4972             inquote = true;     /* found first quote */
4973             continue;
4974           case '{':             /* found open { comment */
4975             incomment = true;
4976             continue;
4977           case '(':
4978             if (*dbp == '*')    /* found open (* comment */
4979               {
4980                 incomment = true;
4981                 dbp++;
4982               }
4983             else if (found_tag) /* found '(' after tag, i.e., parm-list */
4984               inparms = true;
4985             continue;
4986           case ')':             /* end of parms list */
4987             if (inparms)
4988               inparms = false;
4989             continue;
4990           case ';':
4991             if (found_tag && !inparms) /* end of proc or fn stmt */
4992               {
4993                 verify_tag = true;
4994                 break;
4995               }
4996             continue;
4997           }
4998       if (found_tag && verify_tag && (*dbp != ' '))
4999         {
5000           /* Check if this is an "extern" declaration. */
5001           if (*dbp == '\0')
5002             continue;
5003           if (c_tolower (*dbp) == 'e')
5004             {
5005               if (nocase_tail ("extern")) /* superfluous, really! */
5006                 {
5007                   found_tag = false;
5008                   verify_tag = false;
5009                 }
5010             }
5011           else if (c_tolower (*dbp) == 'f')
5012             {
5013               if (nocase_tail ("forward")) /* check for forward reference */
5014                 {
5015                   found_tag = false;
5016                   verify_tag = false;
5017                 }
5018             }
5019           if (found_tag && verify_tag) /* not external proc, so make tag */
5020             {
5021               found_tag = false;
5022               verify_tag = false;
5023               make_tag (name, namelen, true,
5024                         tline.buffer, taglen, save_lineno, save_lcno);
5025               continue;
5026             }
5027         }
5028       if (get_tagname)          /* grab name of proc or fn */
5029         {
5030           char *cp;
5031
5032           if (*dbp == '\0')
5033             continue;
5034
5035           /* Find block name. */
5036           for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
5037             continue;
5038
5039           /* Save all values for later tagging. */
5040           linebuffer_setlen (&tline, lb.len);
5041           strcpy (tline.buffer, lb.buffer);
5042           save_lineno = lineno;
5043           save_lcno = linecharno;
5044           name = tline.buffer + (dbp - lb.buffer);
5045           namelen = cp - dbp;
5046           taglen = cp - lb.buffer + 1;
5047
5048           dbp = cp;             /* set dbp to e-o-token */
5049           get_tagname = false;
5050           found_tag = true;
5051           continue;
5052
5053           /* And proceed to check for "extern". */
5054         }
5055       else if (!incomment && !inquote && !found_tag)
5056         {
5057           /* Check for proc/fn keywords. */
5058           switch (c_tolower (c))
5059             {
5060             case 'p':
5061               if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
5062                 get_tagname = true;
5063               continue;
5064             case 'f':
5065               if (nocase_tail ("unction"))
5066                 get_tagname = true;
5067               continue;
5068             }
5069         }
5070     } /* while not eof */
5071
5072   free (tline.buffer);
5073 }
5074
5075 \f
5076 /*
5077  * Lisp tag functions
5078  *  look for (def or (DEF, quote or QUOTE
5079  */
5080
5081 static void L_getit (void);
5082
5083 static void
5084 L_getit (void)
5085 {
5086   if (*dbp == '\'')             /* Skip prefix quote */
5087     dbp++;
5088   else if (*dbp == '(')
5089   {
5090     dbp++;
5091     /* Try to skip "(quote " */
5092     if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
5093       /* Ok, then skip "(" before name in (defstruct (foo)) */
5094       dbp = skip_spaces (dbp);
5095   }
5096   get_tag (dbp, NULL);
5097 }
5098
5099 static void
5100 Lisp_functions (FILE *inf)
5101 {
5102   LOOP_ON_INPUT_LINES (inf, lb, dbp)
5103     {
5104       if (dbp[0] != '(')
5105         continue;
5106
5107       /* "(defvar foo)" is a declaration rather than a definition.  */
5108       if (! declarations)
5109         {
5110           char *p = dbp + 1;
5111           if (LOOKING_AT (p, "defvar"))
5112             {
5113               p = skip_name (p); /* past var name */
5114               p = skip_spaces (p);
5115               if (*p == ')')
5116                 continue;
5117             }
5118         }
5119
5120       if (strneq (dbp + 1, "cl-", 3) || strneq (dbp + 1, "CL-", 3))
5121         dbp += 3;
5122
5123       if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
5124         {
5125           dbp = skip_non_spaces (dbp);
5126           dbp = skip_spaces (dbp);
5127           L_getit ();
5128         }
5129       else
5130         {
5131           /* Check for (foo::defmumble name-defined ... */
5132           do
5133             dbp++;
5134           while (!notinname (*dbp) && *dbp != ':');
5135           if (*dbp == ':')
5136             {
5137               do
5138                 dbp++;
5139               while (*dbp == ':');
5140
5141               if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
5142                 {
5143                   dbp = skip_non_spaces (dbp);
5144                   dbp = skip_spaces (dbp);
5145                   L_getit ();
5146                 }
5147             }
5148         }
5149     }
5150 }
5151
5152 \f
5153 /*
5154  * Lua script language parsing
5155  * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
5156  *
5157  *  "function" and "local function" are tags if they start at column 1.
5158  */
5159 static void
5160 Lua_functions (FILE *inf)
5161 {
5162   register char *bp;
5163
5164   LOOP_ON_INPUT_LINES (inf, lb, bp)
5165     {
5166       bp = skip_spaces (bp);
5167       if (bp[0] != 'f' && bp[0] != 'l')
5168         continue;
5169
5170       (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
5171
5172       if (LOOKING_AT (bp, "function"))
5173         {
5174           char *tag_name, *tp_dot, *tp_colon;
5175
5176           get_tag (bp, &tag_name);
5177           /* If the tag ends with ".foo" or ":foo", make an additional tag for
5178              "foo".  */
5179           tp_dot = strrchr (tag_name, '.');
5180           tp_colon = strrchr (tag_name, ':');
5181           if (tp_dot || tp_colon)
5182             {
5183               char *p = tp_dot > tp_colon ? tp_dot : tp_colon;
5184               int len_add = p - tag_name + 1;
5185
5186               get_tag (bp + len_add, NULL);
5187             }
5188         }
5189     }
5190 }
5191
5192 \f
5193 /*
5194  * PostScript tags
5195  * Just look for lines where the first character is '/'
5196  * Also look at "defineps" for PSWrap
5197  * Ideas by:
5198  *   Richard Mlynarik <mly@adoc.xerox.com> (1997)
5199  *   Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
5200  */
5201 static void
5202 PS_functions (FILE *inf)
5203 {
5204   register char *bp, *ep;
5205
5206   LOOP_ON_INPUT_LINES (inf, lb, bp)
5207     {
5208       if (bp[0] == '/')
5209         {
5210           for (ep = bp+1;
5211                *ep != '\0' && *ep != ' ' && *ep != '{';
5212                ep++)
5213             continue;
5214           make_tag (bp, ep - bp, true,
5215                     lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
5216         }
5217       else if (LOOKING_AT (bp, "defineps"))
5218         get_tag (bp, NULL);
5219     }
5220 }
5221
5222 \f
5223 /*
5224  * Forth tags
5225  * Ignore anything after \ followed by space or in ( )
5226  * Look for words defined by :
5227  * Look for constant, code, create, defer, value, and variable
5228  * OBP extensions:  Look for buffer:, field,
5229  * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
5230  */
5231 static void
5232 Forth_words (FILE *inf)
5233 {
5234   register char *bp;
5235
5236   LOOP_ON_INPUT_LINES (inf, lb, bp)
5237     while ((bp = skip_spaces (bp))[0] != '\0')
5238       if (bp[0] == '\\' && c_isspace (bp[1]))
5239         break;                  /* read next line */
5240       else if (bp[0] == '(' && c_isspace (bp[1]))
5241         do                      /* skip to ) or eol */
5242           bp++;
5243         while (*bp != ')' && *bp != '\0');
5244       else if ((bp[0] == ':' && c_isspace (bp[1]) && bp++)
5245                || LOOKING_AT_NOCASE (bp, "constant")
5246                || LOOKING_AT_NOCASE (bp, "code")
5247                || LOOKING_AT_NOCASE (bp, "create")
5248                || LOOKING_AT_NOCASE (bp, "defer")
5249                || LOOKING_AT_NOCASE (bp, "value")
5250                || LOOKING_AT_NOCASE (bp, "variable")
5251                || LOOKING_AT_NOCASE (bp, "buffer:")
5252                || LOOKING_AT_NOCASE (bp, "field"))
5253         get_tag (skip_spaces (bp), NULL); /* Yay!  A definition! */
5254       else
5255         bp = skip_non_spaces (bp);
5256 }
5257
5258 \f
5259 /*
5260  * Scheme tag functions
5261  * look for (def... xyzzy
5262  *          (def... (xyzzy
5263  *          (def ... ((...(xyzzy ....
5264  *          (set! xyzzy
5265  * Original code by Ken Haase (1985?)
5266  */
5267 static void
5268 Scheme_functions (FILE *inf)
5269 {
5270   register char *bp;
5271
5272   LOOP_ON_INPUT_LINES (inf, lb, bp)
5273     {
5274       if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
5275         {
5276           bp = skip_non_spaces (bp+4);
5277           /* Skip over open parens and white space.  Don't continue past
5278              '\0'. */
5279           while (*bp && notinname (*bp))
5280             bp++;
5281           get_tag (bp, NULL);
5282         }
5283       if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
5284         get_tag (bp, NULL);
5285     }
5286 }
5287
5288 \f
5289 /* Find tags in TeX and LaTeX input files.  */
5290
5291 /* TEX_toktab is a table of TeX control sequences that define tags.
5292  * Each entry records one such control sequence.
5293  *
5294  * Original code from who knows whom.
5295  * Ideas by:
5296  *   Stefan Monnier (2002)
5297  */
5298
5299 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
5300
5301 /* Default set of control sequences to put into TEX_toktab.
5302    The value of environment var TEXTAGS is prepended to this.  */
5303 static const char *TEX_defenv = "\
5304 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5305 :part:appendix:entry:index:def\
5306 :newcommand:renewcommand:newenvironment:renewenvironment";
5307
5308 static void TEX_decode_env (const char *, const char *);
5309
5310 /*
5311  * TeX/LaTeX scanning loop.
5312  */
5313 static void
5314 TeX_commands (FILE *inf)
5315 {
5316   char *cp;
5317   linebuffer *key;
5318
5319   char TEX_esc = '\0';
5320   char TEX_opgrp, TEX_clgrp;
5321
5322   /* Initialize token table once from environment. */
5323   if (TEX_toktab == NULL)
5324     TEX_decode_env ("TEXTAGS", TEX_defenv);
5325
5326   LOOP_ON_INPUT_LINES (inf, lb, cp)
5327     {
5328       /* Look at each TEX keyword in line. */
5329       for (;;)
5330         {
5331           /* Look for a TEX escape. */
5332           while (true)
5333             {
5334               char c = *cp++;
5335               if (c == '\0' || c == '%')
5336                 goto tex_next_line;
5337
5338               /* Select either \ or ! as escape character, whichever comes
5339                  first outside a comment.  */
5340               if (!TEX_esc)
5341                 switch (c)
5342                   {
5343                   case '\\':
5344                     TEX_esc = c;
5345                     TEX_opgrp = '{';
5346                     TEX_clgrp = '}';
5347                     break;
5348
5349                   case '!':
5350                     TEX_esc = c;
5351                     TEX_opgrp = '<';
5352                     TEX_clgrp = '>';
5353                     break;
5354                   }
5355
5356               if (c == TEX_esc)
5357                 break;
5358             }
5359
5360           for (key = TEX_toktab; key->buffer != NULL; key++)
5361             if (strneq (cp, key->buffer, key->len))
5362               {
5363                 char *p;
5364                 int namelen, linelen;
5365                 bool opgrp = false;
5366
5367                 cp = skip_spaces (cp + key->len);
5368                 if (*cp == TEX_opgrp)
5369                   {
5370                     opgrp = true;
5371                     cp++;
5372                   }
5373                 for (p = cp;
5374                      (!c_isspace (*p) && *p != '#' &&
5375                       *p != TEX_opgrp && *p != TEX_clgrp);
5376                      p++)
5377                   continue;
5378                 namelen = p - cp;
5379                 linelen = lb.len;
5380                 if (!opgrp || *p == TEX_clgrp)
5381                   {
5382                     while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5383                       p++;
5384                     linelen = p - lb.buffer + 1;
5385                   }
5386                 make_tag (cp, namelen, true,
5387                           lb.buffer, linelen, lineno, linecharno);
5388                 goto tex_next_line; /* We only tag a line once */
5389               }
5390         }
5391     tex_next_line:
5392       ;
5393     }
5394 }
5395
5396 /* Read environment and prepend it to the default string.
5397    Build token table. */
5398 static void
5399 TEX_decode_env (const char *evarname, const char *defenv)
5400 {
5401   register const char *env, *p;
5402   int i, len;
5403
5404   /* Append default string to environment. */
5405   env = getenv (evarname);
5406   if (!env)
5407     env = defenv;
5408   else
5409     env = concat (env, defenv, "");
5410
5411   /* Allocate a token table */
5412   for (len = 1, p = env; (p = strchr (p, ':')); )
5413     if (*++p)
5414       len++;
5415   TEX_toktab = xnew (len, linebuffer);
5416
5417   /* Unpack environment string into token table. Be careful about */
5418   /* zero-length strings (leading ':', "::" and trailing ':') */
5419   for (i = 0; *env != '\0';)
5420     {
5421       p = strchr (env, ':');
5422       if (!p)                   /* End of environment string. */
5423         p = env + strlen (env);
5424       if (p - env > 0)
5425         {                       /* Only non-zero strings. */
5426           TEX_toktab[i].buffer = savenstr (env, p - env);
5427           TEX_toktab[i].len = p - env;
5428           i++;
5429         }
5430       if (*p)
5431         env = p + 1;
5432       else
5433         {
5434           TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5435           TEX_toktab[i].len = 0;
5436           break;
5437         }
5438     }
5439 }
5440
5441 \f
5442 /* Texinfo support.  Dave Love, Mar. 2000.  */
5443 static void
5444 Texinfo_nodes (FILE *inf)
5445 {
5446   char *cp, *start;
5447   LOOP_ON_INPUT_LINES (inf, lb, cp)
5448     if (LOOKING_AT (cp, "@node"))
5449       {
5450         start = cp;
5451         while (*cp != '\0' && *cp != ',')
5452           cp++;
5453         make_tag (start, cp - start, true,
5454                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5455       }
5456 }
5457
5458 \f
5459 /*
5460  * HTML support.
5461  * Contents of <title>, <h1>, <h2>, <h3> are tags.
5462  * Contents of <a name=xxx> are tags with name xxx.
5463  *
5464  * Francesco Potortì, 2002.
5465  */
5466 static void
5467 HTML_labels (FILE *inf)
5468 {
5469   bool getnext = false;         /* next text outside of HTML tags is a tag */
5470   bool skiptag = false;         /* skip to the end of the current HTML tag */
5471   bool intag = false;           /* inside an html tag, looking for ID= */
5472   bool inanchor = false;        /* when INTAG, is an anchor, look for NAME= */
5473   char *end;
5474
5475
5476   linebuffer_setlen (&token_name, 0); /* no name in buffer */
5477
5478   LOOP_ON_INPUT_LINES (inf, lb, dbp)
5479     for (;;)                    /* loop on the same line */
5480       {
5481         if (skiptag)            /* skip HTML tag */
5482           {
5483             while (*dbp != '\0' && *dbp != '>')
5484               dbp++;
5485             if (*dbp == '>')
5486               {
5487                 dbp += 1;
5488                 skiptag = false;
5489                 continue;       /* look on the same line */
5490               }
5491             break;              /* go to next line */
5492           }
5493
5494         else if (intag) /* look for "name=" or "id=" */
5495           {
5496             while (*dbp != '\0' && *dbp != '>'
5497                    && c_tolower (*dbp) != 'n' && c_tolower (*dbp) != 'i')
5498               dbp++;
5499             if (*dbp == '\0')
5500               break;            /* go to next line */
5501             if (*dbp == '>')
5502               {
5503                 dbp += 1;
5504                 intag = false;
5505                 continue;       /* look on the same line */
5506               }
5507             if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5508                 || LOOKING_AT_NOCASE (dbp, "id="))
5509               {
5510                 bool quoted = (dbp[0] == '"');
5511
5512                 if (quoted)
5513                   for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5514                     continue;
5515                 else
5516                   for (end = dbp; *end != '\0' && intoken (*end); end++)
5517                     continue;
5518                 linebuffer_setlen (&token_name, end - dbp);
5519                 memcpy (token_name.buffer, dbp, end - dbp);
5520                 token_name.buffer[end - dbp] = '\0';
5521
5522                 dbp = end;
5523                 intag = false;  /* we found what we looked for */
5524                 skiptag = true; /* skip to the end of the tag */
5525                 getnext = true; /* then grab the text */
5526                 continue;       /* look on the same line */
5527               }
5528             dbp += 1;
5529           }
5530
5531         else if (getnext)       /* grab next tokens and tag them */
5532           {
5533             dbp = skip_spaces (dbp);
5534             if (*dbp == '\0')
5535               break;            /* go to next line */
5536             if (*dbp == '<')
5537               {
5538                 intag = true;
5539                 inanchor = (c_tolower (dbp[1]) == 'a' && !intoken (dbp[2]));
5540                 continue;       /* look on the same line */
5541               }
5542
5543             for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5544               continue;
5545             make_tag (token_name.buffer, token_name.len, true,
5546                       dbp, end - dbp, lineno, linecharno);
5547             linebuffer_setlen (&token_name, 0); /* no name in buffer */
5548             getnext = false;
5549             break;              /* go to next line */
5550           }
5551
5552         else                    /* look for an interesting HTML tag */
5553           {
5554             while (*dbp != '\0' && *dbp != '<')
5555               dbp++;
5556             if (*dbp == '\0')
5557               break;            /* go to next line */
5558             intag = true;
5559             if (c_tolower (dbp[1]) == 'a' && !intoken (dbp[2]))
5560               {
5561                 inanchor = true;
5562                 continue;       /* look on the same line */
5563               }
5564             else if (LOOKING_AT_NOCASE (dbp, "<title>")
5565                      || LOOKING_AT_NOCASE (dbp, "<h1>")
5566                      || LOOKING_AT_NOCASE (dbp, "<h2>")
5567                      || LOOKING_AT_NOCASE (dbp, "<h3>"))
5568               {
5569                 intag = false;
5570                 getnext = true;
5571                 continue;       /* look on the same line */
5572               }
5573             dbp += 1;
5574           }
5575       }
5576 }
5577
5578 \f
5579 /*
5580  * Prolog support
5581  *
5582  * Assumes that the predicate or rule starts at column 0.
5583  * Only the first clause of a predicate or rule is added.
5584  * Original code by Sunichirou Sugou (1989)
5585  * Rewritten by Anders Lindgren (1996)
5586  */
5587 static size_t prolog_pr (char *, char *);
5588 static void prolog_skip_comment (linebuffer *, FILE *);
5589 static size_t prolog_atom (char *, size_t);
5590
5591 static void
5592 Prolog_functions (FILE *inf)
5593 {
5594   char *cp, *last;
5595   size_t len;
5596   size_t allocated;
5597
5598   allocated = 0;
5599   len = 0;
5600   last = NULL;
5601
5602   LOOP_ON_INPUT_LINES (inf, lb, cp)
5603     {
5604       if (cp[0] == '\0')        /* Empty line */
5605         continue;
5606       else if (c_isspace (cp[0])) /* Not a predicate */
5607         continue;
5608       else if (cp[0] == '/' && cp[1] == '*')    /* comment. */
5609         prolog_skip_comment (&lb, inf);
5610       else if ((len = prolog_pr (cp, last)) > 0)
5611         {
5612           /* Predicate or rule.  Store the function name so that we
5613              only generate a tag for the first clause.  */
5614           if (last == NULL)
5615             last = xnew (len + 1, char);
5616           else if (len + 1 > allocated)
5617             xrnew (last, len + 1, char);
5618           allocated = len + 1;
5619           memcpy (last, cp, len);
5620           last[len] = '\0';
5621         }
5622     }
5623   free (last);
5624 }
5625
5626
5627 static void
5628 prolog_skip_comment (linebuffer *plb, FILE *inf)
5629 {
5630   char *cp;
5631
5632   do
5633     {
5634       for (cp = plb->buffer; *cp != '\0'; cp++)
5635         if (cp[0] == '*' && cp[1] == '/')
5636           return;
5637       readline (plb, inf);
5638     }
5639   while (perhaps_more_input (inf));
5640 }
5641
5642 /*
5643  * A predicate or rule definition is added if it matches:
5644  *     <beginning of line><Prolog Atom><whitespace>(
5645  * or  <beginning of line><Prolog Atom><whitespace>:-
5646  *
5647  * It is added to the tags database if it doesn't match the
5648  * name of the previous clause header.
5649  *
5650  * Return the size of the name of the predicate or rule, or 0 if no
5651  * header was found.
5652  */
5653 static size_t
5654 prolog_pr (char *s, char *last)
5655
5656                                 /* Name of last clause. */
5657 {
5658   size_t pos;
5659   size_t len;
5660
5661   pos = prolog_atom (s, 0);
5662   if (! pos)
5663     return 0;
5664
5665   len = pos;
5666   pos = skip_spaces (s + pos) - s;
5667
5668   if ((s[pos] == '.'
5669        || (s[pos] == '(' && (pos += 1))
5670        || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5671       && (last == NULL          /* save only the first clause */
5672           || len != strlen (last)
5673           || !strneq (s, last, len)))
5674         {
5675           make_tag (s, len, true, s, pos, lineno, linecharno);
5676           return len;
5677         }
5678   else
5679     return 0;
5680 }
5681
5682 /*
5683  * Consume a Prolog atom.
5684  * Return the number of bytes consumed, or 0 if there was an error.
5685  *
5686  * A prolog atom, in this context, could be one of:
5687  * - An alphanumeric sequence, starting with a lower case letter.
5688  * - A quoted arbitrary string. Single quotes can escape themselves.
5689  *   Backslash quotes everything.
5690  */
5691 static size_t
5692 prolog_atom (char *s, size_t pos)
5693 {
5694   size_t origpos;
5695
5696   origpos = pos;
5697
5698   if (c_islower (s[pos]) || s[pos] == '_')
5699     {
5700       /* The atom is unquoted. */
5701       pos++;
5702       while (c_isalnum (s[pos]) || s[pos] == '_')
5703         {
5704           pos++;
5705         }
5706       return pos - origpos;
5707     }
5708   else if (s[pos] == '\'')
5709     {
5710       pos++;
5711
5712       for (;;)
5713         {
5714           if (s[pos] == '\'')
5715             {
5716               pos++;
5717               if (s[pos] != '\'')
5718                 break;
5719               pos++;            /* A double quote */
5720             }
5721           else if (s[pos] == '\0')
5722             /* Multiline quoted atoms are ignored. */
5723             return 0;
5724           else if (s[pos] == '\\')
5725             {
5726               if (s[pos+1] == '\0')
5727                 return 0;
5728               pos += 2;
5729             }
5730           else
5731             pos++;
5732         }
5733       return pos - origpos;
5734     }
5735   else
5736     return 0;
5737 }
5738
5739 \f
5740 /*
5741  * Support for Erlang
5742  *
5743  * Generates tags for functions, defines, and records.
5744  * Assumes that Erlang functions start at column 0.
5745  * Original code by Anders Lindgren (1996)
5746  */
5747 static int erlang_func (char *, char *);
5748 static void erlang_attribute (char *);
5749 static int erlang_atom (char *);
5750
5751 static void
5752 Erlang_functions (FILE *inf)
5753 {
5754   char *cp, *last;
5755   int len;
5756   int allocated;
5757
5758   allocated = 0;
5759   len = 0;
5760   last = NULL;
5761
5762   LOOP_ON_INPUT_LINES (inf, lb, cp)
5763     {
5764       if (cp[0] == '\0')        /* Empty line */
5765         continue;
5766       else if (c_isspace (cp[0])) /* Not function nor attribute */
5767         continue;
5768       else if (cp[0] == '%')    /* comment */
5769         continue;
5770       else if (cp[0] == '"')    /* Sometimes, strings start in column one */
5771         continue;
5772       else if (cp[0] == '-')    /* attribute, e.g. "-define" */
5773         {
5774           erlang_attribute (cp);
5775           if (last != NULL)
5776             {
5777               free (last);
5778               last = NULL;
5779             }
5780         }
5781       else if ((len = erlang_func (cp, last)) > 0)
5782         {
5783           /*
5784            * Function.  Store the function name so that we only
5785            * generates a tag for the first clause.
5786            */
5787           if (last == NULL)
5788             last = xnew (len + 1, char);
5789           else if (len + 1 > allocated)
5790             xrnew (last, len + 1, char);
5791           allocated = len + 1;
5792           memcpy (last, cp, len);
5793           last[len] = '\0';
5794         }
5795     }
5796   free (last);
5797 }
5798
5799
5800 /*
5801  * A function definition is added if it matches:
5802  *     <beginning of line><Erlang Atom><whitespace>(
5803  *
5804  * It is added to the tags database if it doesn't match the
5805  * name of the previous clause header.
5806  *
5807  * Return the size of the name of the function, or 0 if no function
5808  * was found.
5809  */
5810 static int
5811 erlang_func (char *s, char *last)
5812
5813                                 /* Name of last clause. */
5814 {
5815   int pos;
5816   int len;
5817
5818   pos = erlang_atom (s);
5819   if (pos < 1)
5820     return 0;
5821
5822   len = pos;
5823   pos = skip_spaces (s + pos) - s;
5824
5825   /* Save only the first clause. */
5826   if (s[pos++] == '('
5827       && (last == NULL
5828           || len != (int)strlen (last)
5829           || !strneq (s, last, len)))
5830         {
5831           make_tag (s, len, true, s, pos, lineno, linecharno);
5832           return len;
5833         }
5834
5835   return 0;
5836 }
5837
5838
5839 /*
5840  * Handle attributes.  Currently, tags are generated for defines
5841  * and records.
5842  *
5843  * They are on the form:
5844  * -define(foo, bar).
5845  * -define(Foo(M, N), M+N).
5846  * -record(graph, {vtab = notable, cyclic = true}).
5847  */
5848 static void
5849 erlang_attribute (char *s)
5850 {
5851   char *cp = s;
5852
5853   if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5854       && *cp++ == '(')
5855     {
5856       int len = erlang_atom (skip_spaces (cp));
5857       if (len > 0)
5858         make_tag (cp, len, true, s, cp + len - s, lineno, linecharno);
5859     }
5860   return;
5861 }
5862
5863
5864 /*
5865  * Consume an Erlang atom (or variable).
5866  * Return the number of bytes consumed, or -1 if there was an error.
5867  */
5868 static int
5869 erlang_atom (char *s)
5870 {
5871   int pos = 0;
5872
5873   if (c_isalpha (s[pos]) || s[pos] == '_')
5874     {
5875       /* The atom is unquoted. */
5876       do
5877         pos++;
5878       while (c_isalnum (s[pos]) || s[pos] == '_');
5879     }
5880   else if (s[pos] == '\'')
5881     {
5882       for (pos++; s[pos] != '\''; pos++)
5883         if (s[pos] == '\0'      /* multiline quoted atoms are ignored */
5884             || (s[pos] == '\\' && s[++pos] == '\0'))
5885           return 0;
5886       pos++;
5887     }
5888
5889   return pos;
5890 }
5891
5892 \f
5893 static char *scan_separators (char *);
5894 static void add_regex (char *, language *);
5895 static char *substitute (char *, char *, struct re_registers *);
5896
5897 /*
5898  * Take a string like "/blah/" and turn it into "blah", verifying
5899  * that the first and last characters are the same, and handling
5900  * quoted separator characters.  Actually, stops on the occurrence of
5901  * an unquoted separator.  Also process \t, \n, etc. and turn into
5902  * appropriate characters. Works in place.  Null terminates name string.
5903  * Returns pointer to terminating separator, or NULL for
5904  * unterminated regexps.
5905  */
5906 static char *
5907 scan_separators (char *name)
5908 {
5909   char sep = name[0];
5910   char *copyto = name;
5911   bool quoted = false;
5912
5913   for (++name; *name != '\0'; ++name)
5914     {
5915       if (quoted)
5916         {
5917           switch (*name)
5918             {
5919             case 'a': *copyto++ = '\007'; break; /* BEL (bell)           */
5920             case 'b': *copyto++ = '\b'; break;   /* BS (back space)      */
5921             case 'd': *copyto++ = 0177; break;   /* DEL (delete)         */
5922             case 'e': *copyto++ = 033; break;    /* ESC (delete)         */
5923             case 'f': *copyto++ = '\f'; break;   /* FF (form feed)       */
5924             case 'n': *copyto++ = '\n'; break;   /* NL (new line)        */
5925             case 'r': *copyto++ = '\r'; break;   /* CR (carriage return) */
5926             case 't': *copyto++ = '\t'; break;   /* TAB (horizontal tab) */
5927             case 'v': *copyto++ = '\v'; break;   /* VT (vertical tab)    */
5928             default:
5929               if (*name == sep)
5930                 *copyto++ = sep;
5931               else
5932                 {
5933                   /* Something else is quoted, so preserve the quote. */
5934                   *copyto++ = '\\';
5935                   *copyto++ = *name;
5936                 }
5937               break;
5938             }
5939           quoted = false;
5940         }
5941       else if (*name == '\\')
5942         quoted = true;
5943       else if (*name == sep)
5944         break;
5945       else
5946         *copyto++ = *name;
5947     }
5948   if (*name != sep)
5949     name = NULL;                /* signal unterminated regexp */
5950
5951   /* Terminate copied string. */
5952   *copyto = '\0';
5953   return name;
5954 }
5955
5956 /* Look at the argument of --regex or --no-regex and do the right
5957    thing.  Same for each line of a regexp file. */
5958 static void
5959 analyze_regex (char *regex_arg)
5960 {
5961   if (regex_arg == NULL)
5962     {
5963       free_regexps ();          /* --no-regex: remove existing regexps */
5964       return;
5965     }
5966
5967   /* A real --regexp option or a line in a regexp file. */
5968   switch (regex_arg[0])
5969     {
5970       /* Comments in regexp file or null arg to --regex. */
5971     case '\0':
5972     case ' ':
5973     case '\t':
5974       break;
5975
5976       /* Read a regex file.  This is recursive and may result in a
5977          loop, which will stop when the file descriptors are exhausted. */
5978     case '@':
5979       {
5980         FILE *regexfp;
5981         linebuffer regexbuf;
5982         char *regexfile = regex_arg + 1;
5983
5984         /* regexfile is a file containing regexps, one per line. */
5985         regexfp = fopen (regexfile, "r" FOPEN_BINARY);
5986         if (regexfp == NULL)
5987           pfatal (regexfile);
5988         linebuffer_init (&regexbuf);
5989         while (readline_internal (&regexbuf, regexfp, regexfile) > 0)
5990           analyze_regex (regexbuf.buffer);
5991         free (regexbuf.buffer);
5992         if (fclose (regexfp) != 0)
5993           pfatal (regexfile);
5994       }
5995       break;
5996
5997       /* Regexp to be used for a specific language only. */
5998     case '{':
5999       {
6000         language *lang;
6001         char *lang_name = regex_arg + 1;
6002         char *cp;
6003
6004         for (cp = lang_name; *cp != '}'; cp++)
6005           if (*cp == '\0')
6006             {
6007               error ("unterminated language name in regex: %s", regex_arg);
6008               return;
6009             }
6010         *cp++ = '\0';
6011         lang = get_language_from_langname (lang_name);
6012         if (lang == NULL)
6013           return;
6014         add_regex (cp, lang);
6015       }
6016       break;
6017
6018       /* Regexp to be used for any language. */
6019     default:
6020       add_regex (regex_arg, NULL);
6021       break;
6022     }
6023 }
6024
6025 /* Separate the regexp pattern, compile it,
6026    and care for optional name and modifiers. */
6027 static void
6028 add_regex (char *regexp_pattern, language *lang)
6029 {
6030   static struct re_pattern_buffer zeropattern;
6031   char sep, *pat, *name, *modifiers;
6032   char empty = '\0';
6033   const char *err;
6034   struct re_pattern_buffer *patbuf;
6035   regexp *rp;
6036   bool
6037     force_explicit_name = true, /* do not use implicit tag names */
6038     ignore_case = false,        /* case is significant */
6039     multi_line = false,         /* matches are done one line at a time */
6040     single_line = false;        /* dot does not match newline */
6041
6042
6043   if (strlen (regexp_pattern) < 3)
6044     {
6045       error ("null regexp");
6046       return;
6047     }
6048   sep = regexp_pattern[0];
6049   name = scan_separators (regexp_pattern);
6050   if (name == NULL)
6051     {
6052       error ("%s: unterminated regexp", regexp_pattern);
6053       return;
6054     }
6055   if (name[1] == sep)
6056     {
6057       error ("null name for regexp \"%s\"", regexp_pattern);
6058       return;
6059     }
6060   modifiers = scan_separators (name);
6061   if (modifiers == NULL)        /* no terminating separator --> no name */
6062     {
6063       modifiers = name;
6064       name = &empty;
6065     }
6066   else
6067     modifiers += 1;             /* skip separator */
6068
6069   /* Parse regex modifiers. */
6070   for (; modifiers[0] != '\0'; modifiers++)
6071     switch (modifiers[0])
6072       {
6073       case 'N':
6074         if (modifiers == name)
6075           error ("forcing explicit tag name but no name, ignoring");
6076         force_explicit_name = true;
6077         break;
6078       case 'i':
6079         ignore_case = true;
6080         break;
6081       case 's':
6082         single_line = true;
6083         /* FALLTHRU */
6084       case 'm':
6085         multi_line = true;
6086         need_filebuf = true;
6087         break;
6088       default:
6089         error ("invalid regexp modifier '%c', ignoring", modifiers[0]);
6090         break;
6091       }
6092
6093   patbuf = xnew (1, struct re_pattern_buffer);
6094   *patbuf = zeropattern;
6095   if (ignore_case)
6096     {
6097       static char lc_trans[UCHAR_MAX + 1];
6098       int i;
6099       for (i = 0; i < UCHAR_MAX + 1; i++)
6100         lc_trans[i] = c_tolower (i);
6101       patbuf->translate = lc_trans;     /* translation table to fold case  */
6102     }
6103
6104   if (multi_line)
6105     pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
6106   else
6107     pat = regexp_pattern;
6108
6109   if (single_line)
6110     re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
6111   else
6112     re_set_syntax (RE_SYNTAX_EMACS);
6113
6114   err = re_compile_pattern (pat, strlen (pat), patbuf);
6115   if (multi_line)
6116     free (pat);
6117   if (err != NULL)
6118     {
6119       error ("%s while compiling pattern", err);
6120       return;
6121     }
6122
6123   rp = p_head;
6124   p_head = xnew (1, regexp);
6125   p_head->pattern = savestr (regexp_pattern);
6126   p_head->p_next = rp;
6127   p_head->lang = lang;
6128   p_head->pat = patbuf;
6129   p_head->name = savestr (name);
6130   p_head->error_signaled = false;
6131   p_head->force_explicit_name = force_explicit_name;
6132   p_head->ignore_case = ignore_case;
6133   p_head->multi_line = multi_line;
6134 }
6135
6136 /*
6137  * Do the substitutions indicated by the regular expression and
6138  * arguments.
6139  */
6140 static char *
6141 substitute (char *in, char *out, struct re_registers *regs)
6142 {
6143   char *result, *t;
6144   int size, dig, diglen;
6145
6146   result = NULL;
6147   size = strlen (out);
6148
6149   /* Pass 1: figure out how much to allocate by finding all \N strings. */
6150   if (out[size - 1] == '\\')
6151     fatal ("pattern error in \"%s\"", out);
6152   for (t = strchr (out, '\\');
6153        t != NULL;
6154        t = strchr (t + 2, '\\'))
6155     if (c_isdigit (t[1]))
6156       {
6157         dig = t[1] - '0';
6158         diglen = regs->end[dig] - regs->start[dig];
6159         size += diglen - 2;
6160       }
6161     else
6162       size -= 1;
6163
6164   /* Allocate space and do the substitutions. */
6165   assert (size >= 0);
6166   result = xnew (size + 1, char);
6167
6168   for (t = result; *out != '\0'; out++)
6169     if (*out == '\\' && c_isdigit (*++out))
6170       {
6171         dig = *out - '0';
6172         diglen = regs->end[dig] - regs->start[dig];
6173         memcpy (t, in + regs->start[dig], diglen);
6174         t += diglen;
6175       }
6176     else
6177       *t++ = *out;
6178   *t = '\0';
6179
6180   assert (t <= result + size);
6181   assert (t - result == (int)strlen (result));
6182
6183   return result;
6184 }
6185
6186 /* Deallocate all regexps. */
6187 static void
6188 free_regexps (void)
6189 {
6190   regexp *rp;
6191   while (p_head != NULL)
6192     {
6193       rp = p_head->p_next;
6194       free (p_head->pattern);
6195       free (p_head->name);
6196       free (p_head);
6197       p_head = rp;
6198     }
6199   return;
6200 }
6201
6202 /*
6203  * Reads the whole file as a single string from `filebuf' and looks for
6204  * multi-line regular expressions, creating tags on matches.
6205  * readline already dealt with normal regexps.
6206  *
6207  * Idea by Ben Wing <ben@666.com> (2002).
6208  */
6209 static void
6210 regex_tag_multiline (void)
6211 {
6212   char *buffer = filebuf.buffer;
6213   regexp *rp;
6214   char *name;
6215
6216   for (rp = p_head; rp != NULL; rp = rp->p_next)
6217     {
6218       int match = 0;
6219
6220       if (!rp->multi_line)
6221         continue;               /* skip normal regexps */
6222
6223       /* Generic initializations before parsing file from memory. */
6224       lineno = 1;               /* reset global line number */
6225       charno = 0;               /* reset global char number */
6226       linecharno = 0;           /* reset global char number of line start */
6227
6228       /* Only use generic regexps or those for the current language. */
6229       if (rp->lang != NULL && rp->lang != curfdp->lang)
6230         continue;
6231
6232       while (match >= 0 && match < filebuf.len)
6233         {
6234           match = re_search (rp->pat, buffer, filebuf.len, charno,
6235                              filebuf.len - match, &rp->regs);
6236           switch (match)
6237             {
6238             case -2:
6239               /* Some error. */
6240               if (!rp->error_signaled)
6241                 {
6242                   error ("regexp stack overflow while matching \"%s\"",
6243                          rp->pattern);
6244                   rp->error_signaled = true;
6245                 }
6246               break;
6247             case -1:
6248               /* No match. */
6249               break;
6250             default:
6251               if (match == rp->regs.end[0])
6252                 {
6253                   if (!rp->error_signaled)
6254                     {
6255                       error ("regexp matches the empty string: \"%s\"",
6256                              rp->pattern);
6257                       rp->error_signaled = true;
6258                     }
6259                   match = -3;   /* exit from while loop */
6260                   break;
6261                 }
6262
6263               /* Match occurred.  Construct a tag. */
6264               while (charno < rp->regs.end[0])
6265                 if (buffer[charno++] == '\n')
6266                   lineno++, linecharno = charno;
6267               name = rp->name;
6268               if (name[0] == '\0')
6269                 name = NULL;
6270               else /* make a named tag */
6271                 name = substitute (buffer, rp->name, &rp->regs);
6272               if (rp->force_explicit_name)
6273                 /* Force explicit tag name, if a name is there. */
6274                 pfnote (name, true, buffer + linecharno,
6275                         charno - linecharno + 1, lineno, linecharno);
6276               else
6277                 make_tag (name, strlen (name), true, buffer + linecharno,
6278                           charno - linecharno + 1, lineno, linecharno);
6279               break;
6280             }
6281         }
6282     }
6283 }
6284
6285 \f
6286 static bool
6287 nocase_tail (const char *cp)
6288 {
6289   int len = 0;
6290
6291   while (*cp != '\0' && c_tolower (*cp) == c_tolower (dbp[len]))
6292     cp++, len++;
6293   if (*cp == '\0' && !intoken (dbp[len]))
6294     {
6295       dbp += len;
6296       return true;
6297     }
6298   return false;
6299 }
6300
6301 static void
6302 get_tag (register char *bp, char **namepp)
6303 {
6304   register char *cp = bp;
6305
6306   if (*bp != '\0')
6307     {
6308       /* Go till you get to white space or a syntactic break */
6309       for (cp = bp + 1; !notinname (*cp); cp++)
6310         continue;
6311       make_tag (bp, cp - bp, true,
6312                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6313     }
6314
6315   if (namepp != NULL)
6316     *namepp = savenstr (bp, cp - bp);
6317 }
6318
6319 /*
6320  * Read a line of text from `stream' into `lbp', excluding the
6321  * newline or CR-NL, if any.  Return the number of characters read from
6322  * `stream', which is the length of the line including the newline.
6323  *
6324  * On DOS or Windows we do not count the CR character, if any before the
6325  * NL, in the returned length; this mirrors the behavior of Emacs on those
6326  * platforms (for text files, it translates CR-NL to NL as it reads in the
6327  * file).
6328  *
6329  * If multi-line regular expressions are requested, each line read is
6330  * appended to `filebuf'.
6331  */
6332 static long
6333 readline_internal (linebuffer *lbp, FILE *stream, char const *filename)
6334 {
6335   char *buffer = lbp->buffer;
6336   char *p = lbp->buffer;
6337   char *pend;
6338   int chars_deleted;
6339
6340   pend = p + lbp->size;         /* Separate to avoid 386/IX compiler bug.  */
6341
6342   for (;;)
6343     {
6344       register int c = getc (stream);
6345       if (p == pend)
6346         {
6347           /* We're at the end of linebuffer: expand it. */
6348           lbp->size *= 2;
6349           xrnew (buffer, lbp->size, char);
6350           p += buffer - lbp->buffer;
6351           pend = buffer + lbp->size;
6352           lbp->buffer = buffer;
6353         }
6354       if (c == EOF)
6355         {
6356           if (ferror (stream))
6357             perror (filename);
6358           *p = '\0';
6359           chars_deleted = 0;
6360           break;
6361         }
6362       if (c == '\n')
6363         {
6364           if (p > buffer && p[-1] == '\r')
6365             {
6366               p -= 1;
6367               chars_deleted = 2;
6368             }
6369           else
6370             {
6371               chars_deleted = 1;
6372             }
6373           *p = '\0';
6374           break;
6375         }
6376       *p++ = c;
6377     }
6378   lbp->len = p - buffer;
6379
6380   if (need_filebuf              /* we need filebuf for multi-line regexps */
6381       && chars_deleted > 0)     /* not at EOF */
6382     {
6383       while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6384         {
6385           /* Expand filebuf. */
6386           filebuf.size *= 2;
6387           xrnew (filebuf.buffer, filebuf.size, char);
6388         }
6389       memcpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6390       filebuf.len += lbp->len;
6391       filebuf.buffer[filebuf.len++] = '\n';
6392       filebuf.buffer[filebuf.len] = '\0';
6393     }
6394
6395   return lbp->len + chars_deleted;
6396 }
6397
6398 /*
6399  * Like readline_internal, above, but in addition try to match the
6400  * input line against relevant regular expressions and manage #line
6401  * directives.
6402  */
6403 static void
6404 readline (linebuffer *lbp, FILE *stream)
6405 {
6406   long result;
6407
6408   linecharno = charno;          /* update global char number of line start */
6409   result = readline_internal (lbp, stream, infilename); /* read line */
6410   lineno += 1;                  /* increment global line number */
6411   charno += result;             /* increment global char number */
6412
6413   /* Honor #line directives. */
6414   if (!no_line_directive)
6415     {
6416       static bool discard_until_line_directive;
6417
6418       /* Check whether this is a #line directive. */
6419       if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6420         {
6421           unsigned int lno;
6422           int start = 0;
6423
6424           if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6425               && start > 0)     /* double quote character found */
6426             {
6427               char *endp = lbp->buffer + start;
6428
6429               while ((endp = strchr (endp, '"')) != NULL
6430                      && endp[-1] == '\\')
6431                 endp++;
6432               if (endp != NULL)
6433                 /* Ok, this is a real #line directive.  Let's deal with it. */
6434                 {
6435                   char *taggedabsname;  /* absolute name of original file */
6436                   char *taggedfname;    /* name of original file as given */
6437                   char *name;           /* temp var */
6438
6439                   discard_until_line_directive = false; /* found it */
6440                   name = lbp->buffer + start;
6441                   *endp = '\0';
6442                   canonicalize_filename (name);
6443                   taggedabsname = absolute_filename (name, tagfiledir);
6444                   if (filename_is_absolute (name)
6445                       || filename_is_absolute (curfdp->infname))
6446                     taggedfname = savestr (taggedabsname);
6447                   else
6448                     taggedfname = relative_filename (taggedabsname,tagfiledir);
6449
6450                   if (streq (curfdp->taggedfname, taggedfname))
6451                     /* The #line directive is only a line number change.  We
6452                        deal with this afterwards. */
6453                     free (taggedfname);
6454                   else
6455                     /* The tags following this #line directive should be
6456                        attributed to taggedfname.  In order to do this, set
6457                        curfdp accordingly. */
6458                     {
6459                       fdesc *fdp; /* file description pointer */
6460
6461                       /* Go look for a file description already set up for the
6462                          file indicated in the #line directive.  If there is
6463                          one, use it from now until the next #line
6464                          directive. */
6465                       for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6466                         if (streq (fdp->infname, curfdp->infname)
6467                             && streq (fdp->taggedfname, taggedfname))
6468                           /* If we remove the second test above (after the &&)
6469                              then all entries pertaining to the same file are
6470                              coalesced in the tags file.  If we use it, then
6471                              entries pertaining to the same file but generated
6472                              from different files (via #line directives) will
6473                              go into separate sections in the tags file.  These
6474                              alternatives look equivalent.  The first one
6475                              destroys some apparently useless information. */
6476                           {
6477                             curfdp = fdp;
6478                             free (taggedfname);
6479                             break;
6480                           }
6481                       /* Else, if we already tagged the real file, skip all
6482                          input lines until the next #line directive. */
6483                       if (fdp == NULL) /* not found */
6484                         for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6485                           if (streq (fdp->infabsname, taggedabsname))
6486                             {
6487                               discard_until_line_directive = true;
6488                               free (taggedfname);
6489                               break;
6490                             }
6491                       /* Else create a new file description and use that from
6492                          now on, until the next #line directive. */
6493                       if (fdp == NULL) /* not found */
6494                         {
6495                           fdp = fdhead;
6496                           fdhead = xnew (1, fdesc);
6497                           *fdhead = *curfdp; /* copy curr. file description */
6498                           fdhead->next = fdp;
6499                           fdhead->infname = savestr (curfdp->infname);
6500                           fdhead->infabsname = savestr (curfdp->infabsname);
6501                           fdhead->infabsdir = savestr (curfdp->infabsdir);
6502                           fdhead->taggedfname = taggedfname;
6503                           fdhead->usecharno = false;
6504                           fdhead->prop = NULL;
6505                           fdhead->written = false;
6506                           curfdp = fdhead;
6507                         }
6508                     }
6509                   free (taggedabsname);
6510                   lineno = lno - 1;
6511                   readline (lbp, stream);
6512                   return;
6513                 } /* if a real #line directive */
6514             } /* if #line is followed by a number */
6515         } /* if line begins with "#line " */
6516
6517       /* If we are here, no #line directive was found. */
6518       if (discard_until_line_directive)
6519         {
6520           if (result > 0)
6521             {
6522               /* Do a tail recursion on ourselves, thus discarding the contents
6523                  of the line buffer. */
6524               readline (lbp, stream);
6525               return;
6526             }
6527           /* End of file. */
6528           discard_until_line_directive = false;
6529           return;
6530         }
6531     } /* if #line directives should be considered */
6532
6533   {
6534     int match;
6535     regexp *rp;
6536     char *name;
6537
6538     /* Match against relevant regexps. */
6539     if (lbp->len > 0)
6540       for (rp = p_head; rp != NULL; rp = rp->p_next)
6541         {
6542           /* Only use generic regexps or those for the current language.
6543              Also do not use multiline regexps, which is the job of
6544              regex_tag_multiline. */
6545           if ((rp->lang != NULL && rp->lang != fdhead->lang)
6546               || rp->multi_line)
6547             continue;
6548
6549           match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6550           switch (match)
6551             {
6552             case -2:
6553               /* Some error. */
6554               if (!rp->error_signaled)
6555                 {
6556                   error ("regexp stack overflow while matching \"%s\"",
6557                          rp->pattern);
6558                   rp->error_signaled = true;
6559                 }
6560               break;
6561             case -1:
6562               /* No match. */
6563               break;
6564             case 0:
6565               /* Empty string matched. */
6566               if (!rp->error_signaled)
6567                 {
6568                   error ("regexp matches the empty string: \"%s\"", rp->pattern);
6569                   rp->error_signaled = true;
6570                 }
6571               break;
6572             default:
6573               /* Match occurred.  Construct a tag. */
6574               name = rp->name;
6575               if (name[0] == '\0')
6576                 name = NULL;
6577               else /* make a named tag */
6578                 name = substitute (lbp->buffer, rp->name, &rp->regs);
6579               if (rp->force_explicit_name)
6580                 /* Force explicit tag name, if a name is there. */
6581                 pfnote (name, true, lbp->buffer, match, lineno, linecharno);
6582               else
6583                 make_tag (name, strlen (name), true,
6584                           lbp->buffer, match, lineno, linecharno);
6585               break;
6586             }
6587         }
6588   }
6589 }
6590
6591 \f
6592 /*
6593  * Return a pointer to a space of size strlen(cp)+1 allocated
6594  * with xnew where the string CP has been copied.
6595  */
6596 static char *
6597 savestr (const char *cp)
6598 {
6599   return savenstr (cp, strlen (cp));
6600 }
6601
6602 /*
6603  * Return a pointer to a space of size LEN+1 allocated with xnew where
6604  * the string CP has been copied for at most the first LEN characters.
6605  */
6606 static char *
6607 savenstr (const char *cp, int len)
6608 {
6609   char *dp = xnew (len + 1, char);
6610   dp[len] = '\0';
6611   return memcpy (dp, cp, len);
6612 }
6613
6614 /* Skip spaces (end of string is not space), return new pointer. */
6615 static char *
6616 skip_spaces (char *cp)
6617 {
6618   while (c_isspace (*cp))
6619     cp++;
6620   return cp;
6621 }
6622
6623 /* Skip non spaces, except end of string, return new pointer. */
6624 static char *
6625 skip_non_spaces (char *cp)
6626 {
6627   while (*cp != '\0' && !c_isspace (*cp))
6628     cp++;
6629   return cp;
6630 }
6631
6632 /* Skip any chars in the "name" class.*/
6633 static char *
6634 skip_name (char *cp)
6635 {
6636   /* '\0' is a notinname() so loop stops there too */
6637   while (! notinname (*cp))
6638     cp++;
6639   return cp;
6640 }
6641
6642 /* Print error message and exit.  */
6643 static void
6644 fatal (char const *format, ...)
6645 {
6646   va_list ap;
6647   va_start (ap, format);
6648   verror (format, ap);
6649   va_end (ap);
6650   exit (EXIT_FAILURE);
6651 }
6652
6653 static void
6654 pfatal (const char *s1)
6655 {
6656   perror (s1);
6657   exit (EXIT_FAILURE);
6658 }
6659
6660 static void
6661 suggest_asking_for_help (void)
6662 {
6663   fprintf (stderr, "\tTry '%s --help' for a complete list of options.\n",
6664            progname);
6665   exit (EXIT_FAILURE);
6666 }
6667
6668 /* Output a diagnostic with printf-style FORMAT and args.  */
6669 static void
6670 error (const char *format, ...)
6671 {
6672   va_list ap;
6673   va_start (ap, format);
6674   verror (format, ap);
6675   va_end (ap);
6676 }
6677
6678 static void
6679 verror (char const *format, va_list ap)
6680 {
6681   fprintf (stderr, "%s: ", progname);
6682   vfprintf (stderr, format, ap);
6683   fprintf (stderr, "\n");
6684 }
6685
6686 /* Return a newly-allocated string whose contents
6687    concatenate those of s1, s2, s3.  */
6688 static char *
6689 concat (const char *s1, const char *s2, const char *s3)
6690 {
6691   int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6692   char *result = xnew (len1 + len2 + len3 + 1, char);
6693
6694   strcpy (result, s1);
6695   strcpy (result + len1, s2);
6696   strcpy (result + len1 + len2, s3);
6697
6698   return result;
6699 }
6700
6701 \f
6702 /* Does the same work as the system V getcwd, but does not need to
6703    guess the buffer size in advance. */
6704 static char *
6705 etags_getcwd (void)
6706 {
6707   int bufsize = 200;
6708   char *path = xnew (bufsize, char);
6709
6710   while (getcwd (path, bufsize) == NULL)
6711     {
6712       if (errno != ERANGE)
6713         pfatal ("getcwd");
6714       bufsize *= 2;
6715       free (path);
6716       path = xnew (bufsize, char);
6717     }
6718
6719   canonicalize_filename (path);
6720   return path;
6721 }
6722
6723 /* Return a newly allocated string containing a name of a temporary file.  */
6724 static char *
6725 etags_mktmp (void)
6726 {
6727   const char *tmpdir = getenv ("TMPDIR");
6728   const char *slash = "/";
6729
6730 #if MSDOS || defined (DOS_NT)
6731   if (!tmpdir)
6732     tmpdir = getenv ("TEMP");
6733   if (!tmpdir)
6734     tmpdir = getenv ("TMP");
6735   if (!tmpdir)
6736     tmpdir = ".";
6737   if (tmpdir[strlen (tmpdir) - 1] == '/'
6738       || tmpdir[strlen (tmpdir) - 1] == '\\')
6739     slash = "";
6740 #else
6741   if (!tmpdir)
6742     tmpdir = "/tmp";
6743   if (tmpdir[strlen (tmpdir) - 1] == '/')
6744     slash = "";
6745 #endif
6746
6747   char *templt = concat (tmpdir, slash, "etXXXXXX");
6748   int fd = mkostemp (templt, O_CLOEXEC);
6749   if (fd < 0 || close (fd) != 0)
6750     {
6751       int temp_errno = errno;
6752       free (templt);
6753       errno = temp_errno;
6754       templt = NULL;
6755     }
6756
6757 #if defined (DOS_NT)
6758   /* The file name will be used in shell redirection, so it needs to have
6759      DOS-style backslashes, or else the Windows shell will barf.  */
6760   char *p;
6761   for (p = templt; *p; p++)
6762     if (*p == '/')
6763       *p = '\\';
6764 #endif
6765
6766   return templt;
6767 }
6768
6769 /* Return a newly allocated string containing the file name of FILE
6770    relative to the absolute directory DIR (which should end with a slash). */
6771 static char *
6772 relative_filename (char *file, char *dir)
6773 {
6774   char *fp, *dp, *afn, *res;
6775   int i;
6776
6777   /* Find the common root of file and dir (with a trailing slash). */
6778   afn = absolute_filename (file, cwd);
6779   fp = afn;
6780   dp = dir;
6781   while (*fp++ == *dp++)
6782     continue;
6783   fp--, dp--;                   /* back to the first differing char */
6784 #ifdef DOS_NT
6785   if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6786     return afn;
6787 #endif
6788   do                            /* look at the equal chars until '/' */
6789     fp--, dp--;
6790   while (*fp != '/');
6791
6792   /* Build a sequence of "../" strings for the resulting relative file name. */
6793   i = 0;
6794   while ((dp = strchr (dp + 1, '/')) != NULL)
6795     i += 1;
6796   res = xnew (3*i + strlen (fp + 1) + 1, char);
6797   char *z = res;
6798   while (i-- > 0)
6799     z = stpcpy (z, "../");
6800
6801   /* Add the file name relative to the common root of file and dir. */
6802   strcpy (z, fp + 1);
6803   free (afn);
6804
6805   return res;
6806 }
6807
6808 /* Return a newly allocated string containing the absolute file name
6809    of FILE given DIR (which should end with a slash). */
6810 static char *
6811 absolute_filename (char *file, char *dir)
6812 {
6813   char *slashp, *cp, *res;
6814
6815   if (filename_is_absolute (file))
6816     res = savestr (file);
6817 #ifdef DOS_NT
6818   /* We don't support non-absolute file names with a drive
6819      letter, like `d:NAME' (it's too much hassle).  */
6820   else if (file[1] == ':')
6821     fatal ("%s: relative file names with drive letters not supported", file);
6822 #endif
6823   else
6824     res = concat (dir, file, "");
6825
6826   /* Delete the "/dirname/.." and "/." substrings. */
6827   slashp = strchr (res, '/');
6828   while (slashp != NULL && slashp[0] != '\0')
6829     {
6830       if (slashp[1] == '.')
6831         {
6832           if (slashp[2] == '.'
6833               && (slashp[3] == '/' || slashp[3] == '\0'))
6834             {
6835               cp = slashp;
6836               do
6837                 cp--;
6838               while (cp >= res && !filename_is_absolute (cp));
6839               if (cp < res)
6840                 cp = slashp;    /* the absolute name begins with "/.." */
6841 #ifdef DOS_NT
6842               /* Under MSDOS and NT we get `d:/NAME' as absolute
6843                  file name, so the luser could say `d:/../NAME'.
6844                  We silently treat this as `d:/NAME'.  */
6845               else if (cp[0] != '/')
6846                 cp = slashp;
6847 #endif
6848               memmove (cp, slashp + 3, strlen (slashp + 2));
6849               slashp = cp;
6850               continue;
6851             }
6852           else if (slashp[2] == '/' || slashp[2] == '\0')
6853             {
6854               memmove (slashp, slashp + 2, strlen (slashp + 1));
6855               continue;
6856             }
6857         }
6858
6859       slashp = strchr (slashp + 1, '/');
6860     }
6861
6862   if (res[0] == '\0')           /* just a safety net: should never happen */
6863     {
6864       free (res);
6865       return savestr ("/");
6866     }
6867   else
6868     return res;
6869 }
6870
6871 /* Return a newly allocated string containing the absolute
6872    file name of dir where FILE resides given DIR (which should
6873    end with a slash). */
6874 static char *
6875 absolute_dirname (char *file, char *dir)
6876 {
6877   char *slashp, *res;
6878   char save;
6879
6880   slashp = strrchr (file, '/');
6881   if (slashp == NULL)
6882     return savestr (dir);
6883   save = slashp[1];
6884   slashp[1] = '\0';
6885   res = absolute_filename (file, dir);
6886   slashp[1] = save;
6887
6888   return res;
6889 }
6890
6891 /* Whether the argument string is an absolute file name.  The argument
6892    string must have been canonicalized with canonicalize_filename. */
6893 static bool
6894 filename_is_absolute (char *fn)
6895 {
6896   return (fn[0] == '/'
6897 #ifdef DOS_NT
6898           || (c_isalpha (fn[0]) && fn[1] == ':' && fn[2] == '/')
6899 #endif
6900           );
6901 }
6902
6903 /* Downcase DOS drive letter and collapse separators into single slashes.
6904    Works in place. */
6905 static void
6906 canonicalize_filename (register char *fn)
6907 {
6908   register char* cp;
6909
6910 #ifdef DOS_NT
6911   /* Canonicalize drive letter case.  */
6912   if (c_isupper (fn[0]) && fn[1] == ':')
6913     fn[0] = c_tolower (fn[0]);
6914
6915   /* Collapse multiple forward- and back-slashes into a single forward
6916      slash. */
6917   for (cp = fn; *cp != '\0'; cp++, fn++)
6918     if (*cp == '/' || *cp == '\\')
6919       {
6920         *fn = '/';
6921         while (cp[1] == '/' || cp[1] == '\\')
6922           cp++;
6923       }
6924     else
6925       *fn = *cp;
6926
6927 #else  /* !DOS_NT */
6928
6929   /* Collapse multiple slashes into a single slash. */
6930   for (cp = fn; *cp != '\0'; cp++, fn++)
6931     if (*cp == '/')
6932       {
6933         *fn = '/';
6934         while (cp[1] == '/')
6935           cp++;
6936       }
6937     else
6938       *fn = *cp;
6939
6940 #endif  /* !DOS_NT */
6941
6942   *fn = '\0';
6943 }
6944
6945 \f
6946 /* Initialize a linebuffer for use. */
6947 static void
6948 linebuffer_init (linebuffer *lbp)
6949 {
6950   lbp->size = (DEBUG) ? 3 : 200;
6951   lbp->buffer = xnew (lbp->size, char);
6952   lbp->buffer[0] = '\0';
6953   lbp->len = 0;
6954 }
6955
6956 /* Set the minimum size of a string contained in a linebuffer. */
6957 static void
6958 linebuffer_setlen (linebuffer *lbp, int toksize)
6959 {
6960   while (lbp->size <= toksize)
6961     {
6962       lbp->size *= 2;
6963       xrnew (lbp->buffer, lbp->size, char);
6964     }
6965   lbp->len = toksize;
6966 }
6967
6968 /* Like malloc but get fatal error if memory is exhausted. */
6969 static void *
6970 xmalloc (size_t size)
6971 {
6972   void *result = malloc (size);
6973   if (result == NULL)
6974     fatal ("virtual memory exhausted");
6975   return result;
6976 }
6977
6978 static void *
6979 xrealloc (void *ptr, size_t size)
6980 {
6981   void *result = realloc (ptr, size);
6982   if (result == NULL)
6983     fatal ("virtual memory exhausted");
6984   return result;
6985 }
6986
6987 /*
6988  * Local Variables:
6989  * indent-tabs-mode: t
6990  * tab-width: 8
6991  * fill-column: 79
6992  * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6993  * c-file-style: "gnu"
6994  * End:
6995  */
6996
6997 /* etags.c ends here */