code.delx.au - gnu-emacs/blob - lib-src/etags.c

   1 /* Tags file maker to go with GNU Emacs           -*- coding: utf-8 -*-
   2
   3 Copyright (C) 1984 The Regents of the University of California
   4
   5 Redistribution and use in source and binary forms, with or without
   6 modification, are permitted provided that the following conditions are
   7 met:
   8 1. Redistributions of source code must retain the above copyright
   9    notice, this list of conditions and the following disclaimer.
  10 2. Redistributions in binary form must reproduce the above copyright
  11    notice, this list of conditions and the following disclaimer in the
  12    documentation and/or other materials provided with the
  13    distribution.
  14 3. Neither the name of the University nor the names of its
  15    contributors may be used to endorse or promote products derived
  16    from this software without specific prior written permission.
  17
  18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
  19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  21 PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
  22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
  28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29
  30
  31 Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2016 Free Software
  32 Foundation, Inc.
  33
  34 This file is not considered part of GNU Emacs.
  35
  36 This program is free software: you can redistribute it and/or modify
  37 it under the terms of the GNU General Public License as published by
  38 the Free Software Foundation, either version 3 of the License, or
  39 (at your option) any later version.
  40
  41 This program is distributed in the hope that it will be useful,
  42 but WITHOUT ANY WARRANTY; without even the implied warranty of
  43 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  44 GNU General Public License for more details.
  45
  46 You should have received a copy of the GNU General Public License
  47 along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  48
  49
  50 /* NB To comply with the above BSD license, copyright information is
  51 reproduced in etc/ETAGS.README.  That file should be updated when the
  52 above notices are.
  53
  54 To the best of our knowledge, this code was originally based on the
  55 ctags.c distributed with BSD4.2, which was copyrighted by the
  56 University of California, as described above. */
  57
  58
  59 /*
  60  * Authors:
  61  * 1983 Ctags originally by Ken Arnold.
  62  * 1984 Fortran added by Jim Kleckner.
  63  * 1984 Ed Pelegri-Llopart added C typedefs.
  64  * 1985 Emacs TAGS format by Richard Stallman.
  65  * 1989 Sam Kendall added C++.
  66  * 1992 Joseph B. Wells improved C and C++ parsing.
  67  * 1993 Francesco Potortì reorganized C and C++.
  68  * 1994 Line-by-line regexp tags by Tom Tromey.
  69  * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
  70  * 2002 #line directives by Francesco Potortì.
  71  * Francesco Potortì maintained and improved it for many years
  72    starting in 1993.
  73  */
  74
  75 /*
  76  * If you want to add support for a new language, start by looking at the LUA
  77  * language, which is the simplest.  Alternatively, consider distributing etags
  78  * together with a configuration file containing regexp definitions for etags.
  79  */
  80
  81 char pot_etags_version[] = "@(#) pot revision number is 17.38.1.4";
  82
  83 #ifdef DEBUG
  84 #  undef DEBUG
  85 #  define DEBUG true
  86 #else
  87 #  define DEBUG  false
  88 #  define NDEBUG                /* disable assert */
  89 #endif
  90
  91 #include <config.h>
  92
  93 #ifndef _GNU_SOURCE
  94 # define _GNU_SOURCE 1          /* enables some compiler checks on GNU */
  95 #endif
  96
  97 /* WIN32_NATIVE is for XEmacs.
  98    MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
  99 #ifdef WIN32_NATIVE
 100 # undef MSDOS
 101 # undef  WINDOWSNT
 102 # define WINDOWSNT
 103 #endif /* WIN32_NATIVE */
 104
 105 #ifdef MSDOS
 106 # undef MSDOS
 107 # define MSDOS true
 108 # include <sys/param.h>
 109 #else
 110 # define MSDOS false
 111 #endif /* MSDOS */
 112
 113 #ifdef WINDOWSNT
 114 # include <direct.h>
 115 # define MAXPATHLEN _MAX_PATH
 116 # undef HAVE_NTGUI
 117 # undef  DOS_NT
 118 # define DOS_NT
 119 # define O_CLOEXEC O_NOINHERIT
 120 #endif /* WINDOWSNT */
 121
 122 #include <limits.h>
 123 #include <unistd.h>
 124 #include <stdarg.h>
 125 #include <stdlib.h>
 126 #include <string.h>
 127 #include <sysstdio.h>
 128 #include <errno.h>
 129 #include <fcntl.h>
 130 #include <binary-io.h>
 131 #include <c-ctype.h>
 132 #include <c-strcase.h>
 133
 134 #include <assert.h>
 135 #ifdef NDEBUG
 136 # undef  assert                 /* some systems have a buggy assert.h */
 137 # define assert(x) ((void) 0)
 138 #endif
 139
 140 #include <getopt.h>
 141 #include <regex.h>
 142
 143 /* Define CTAGS to make the program "ctags" compatible with the usual one.
 144  Leave it undefined to make the program "etags", which makes emacs-style
 145  tag tables and tags typedefs, #defines and struct/union/enum by default. */
 146 #ifdef CTAGS
 147 # undef  CTAGS
 148 # define CTAGS true
 149 #else
 150 # define CTAGS false
 151 #endif
 152
 153 static bool
 154 streq (char const *s, char const *t)
 155 {
 156   return strcmp (s, t) == 0;
 157 }
 158
 159 static bool
 160 strcaseeq (char const *s, char const *t)
 161 {
 162   return c_strcasecmp (s, t) == 0;
 163 }
 164
 165 static bool
 166 strneq (char const *s, char const *t, size_t n)
 167 {
 168   return strncmp (s, t, n) == 0;
 169 }
 170
 171 static bool
 172 strncaseeq (char const *s, char const *t, size_t n)
 173 {
 174   return c_strncasecmp (s, t, n) == 0;
 175 }
 176
 177 /* C is not in a name.  */
 178 static bool
 179 notinname (unsigned char c)
 180 {
 181   /* Look at make_tag before modifying!  */
 182   static bool const table[UCHAR_MAX + 1] = {
 183     ['\0']=1, ['\t']=1, ['\n']=1, ['\f']=1, ['\r']=1, [' ']=1,
 184     ['(']=1, [')']=1, [',']=1, [';']=1, ['=']=1
 185   };
 186   return table[c];
 187 }
 188
 189 /* C can start a token.  */
 190 static bool
 191 begtoken (unsigned char c)
 192 {
 193   static bool const table[UCHAR_MAX + 1] = {
 194     ['$']=1, ['@']=1,
 195     ['A']=1, ['B']=1, ['C']=1, ['D']=1, ['E']=1, ['F']=1, ['G']=1, ['H']=1,
 196     ['I']=1, ['J']=1, ['K']=1, ['L']=1, ['M']=1, ['N']=1, ['O']=1, ['P']=1,
 197     ['Q']=1, ['R']=1, ['S']=1, ['T']=1, ['U']=1, ['V']=1, ['W']=1, ['X']=1,
 198     ['Y']=1, ['Z']=1,
 199     ['_']=1,
 200     ['a']=1, ['b']=1, ['c']=1, ['d']=1, ['e']=1, ['f']=1, ['g']=1, ['h']=1,
 201     ['i']=1, ['j']=1, ['k']=1, ['l']=1, ['m']=1, ['n']=1, ['o']=1, ['p']=1,
 202     ['q']=1, ['r']=1, ['s']=1, ['t']=1, ['u']=1, ['v']=1, ['w']=1, ['x']=1,
 203     ['y']=1, ['z']=1,
 204     ['~']=1
 205   };
 206   return table[c];
 207 }
 208
 209 /* C can be in the middle of a token.  */
 210 static bool
 211 intoken (unsigned char c)
 212 {
 213   static bool const table[UCHAR_MAX + 1] = {
 214     ['$']=1,
 215     ['0']=1, ['1']=1, ['2']=1, ['3']=1, ['4']=1,
 216     ['5']=1, ['6']=1, ['7']=1, ['8']=1, ['9']=1,
 217     ['A']=1, ['B']=1, ['C']=1, ['D']=1, ['E']=1, ['F']=1, ['G']=1, ['H']=1,
 218     ['I']=1, ['J']=1, ['K']=1, ['L']=1, ['M']=1, ['N']=1, ['O']=1, ['P']=1,
 219     ['Q']=1, ['R']=1, ['S']=1, ['T']=1, ['U']=1, ['V']=1, ['W']=1, ['X']=1,
 220     ['Y']=1, ['Z']=1,
 221     ['_']=1,
 222     ['a']=1, ['b']=1, ['c']=1, ['d']=1, ['e']=1, ['f']=1, ['g']=1, ['h']=1,
 223     ['i']=1, ['j']=1, ['k']=1, ['l']=1, ['m']=1, ['n']=1, ['o']=1, ['p']=1,
 224     ['q']=1, ['r']=1, ['s']=1, ['t']=1, ['u']=1, ['v']=1, ['w']=1, ['x']=1,
 225     ['y']=1, ['z']=1
 226   };
 227   return table[c];
 228 }
 229
 230 /* C can end a token.  */
 231 static bool
 232 endtoken (unsigned char c)
 233 {
 234   static bool const table[UCHAR_MAX + 1] = {
 235     ['\0']=1, ['\t']=1, ['\n']=1, ['\r']=1, [' ']=1,
 236     ['!']=1, ['"']=1, ['#']=1, ['%']=1, ['&']=1, ['\'']=1, ['(']=1, [')']=1,
 237     ['*']=1, ['+']=1, [',']=1, ['-']=1, ['.']=1, ['/']=1, [':']=1, [';']=1,
 238     ['<']=1, ['=']=1, ['>']=1, ['?']=1, ['[']=1, [']']=1, ['^']=1,
 239     ['{']=1, ['|']=1, ['}']=1, ['~']=1
 240   };
 241   return table[c];
 242 }
 243
 244 /*
 245  *      xnew, xrnew -- allocate, reallocate storage
 246  *
 247  * SYNOPSIS:    Type *xnew (int n, Type);
 248  *              void xrnew (OldPointer, int n, Type);
 249  */
 250 #define xnew(n, Type)      ((Type *) xmalloc ((n) * sizeof (Type)))
 251 #define xrnew(op, n, Type) ((op) = (Type *) xrealloc (op, (n) * sizeof (Type)))
 252
 253 typedef void Lang_function (FILE *);
 254
 255 typedef struct
 256 {
 257   const char *suffix;           /* file name suffix for this compressor */
 258   const char *command;          /* takes one arg and decompresses to stdout */
 259 } compressor;
 260
 261 typedef struct
 262 {
 263   const char *name;             /* language name */
 264   const char *help;             /* detailed help for the language */
 265   Lang_function *function;      /* parse function */
 266   const char **suffixes;        /* name suffixes of this language's files */
 267   const char **filenames;       /* names of this language's files */
 268   const char **interpreters;    /* interpreters for this language */
 269   bool metasource;              /* source used to generate other sources */
 270 } language;
 271
 272 typedef struct fdesc
 273 {
 274   struct fdesc *next;           /* for the linked list */
 275   char *infname;                /* uncompressed input file name */
 276   char *infabsname;             /* absolute uncompressed input file name */
 277   char *infabsdir;              /* absolute dir of input file */
 278   char *taggedfname;            /* file name to write in tagfile */
 279   language *lang;               /* language of file */
 280   char *prop;                   /* file properties to write in tagfile */
 281   bool usecharno;               /* etags tags shall contain char number */
 282   bool written;                 /* entry written in the tags file */
 283 } fdesc;
 284
 285 typedef struct node_st
 286 {                               /* sorting structure */
 287   struct node_st *left, *right; /* left and right sons */
 288   fdesc *fdp;                   /* description of file to whom tag belongs */
 289   char *name;                   /* tag name */
 290   char *regex;                  /* search regexp */
 291   bool valid;                   /* write this tag on the tag file */
 292   bool is_func;                 /* function tag: use regexp in CTAGS mode */
 293   bool been_warned;             /* warning already given for duplicated tag */
 294   int lno;                      /* line number tag is on */
 295   long cno;                     /* character number line starts on */
 296 } node;
 297
 298 /*
 299  * A `linebuffer' is a structure which holds a line of text.
 300  * `readline_internal' reads a line from a stream into a linebuffer
 301  * and works regardless of the length of the line.
 302  * SIZE is the size of BUFFER, LEN is the length of the string in
 303  * BUFFER after readline reads it.
 304  */
 305 typedef struct
 306 {
 307   long size;
 308   int len;
 309   char *buffer;
 310 } linebuffer;
 311
 312 /* Used to support mixing of --lang and file names. */
 313 typedef struct
 314 {
 315   enum {
 316     at_language,                /* a language specification */
 317     at_regexp,                  /* a regular expression */
 318     at_filename,                /* a file name */
 319     at_stdin,                   /* read from stdin here */
 320     at_end                      /* stop parsing the list */
 321   } arg_type;                   /* argument type */
 322   language *lang;               /* language associated with the argument */
 323   char *what;                   /* the argument itself */
 324 } argument;
 325
 326 /* Structure defining a regular expression. */
 327 typedef struct regexp
 328 {
 329   struct regexp *p_next;        /* pointer to next in list */
 330   language *lang;               /* if set, use only for this language */
 331   char *pattern;                /* the regexp pattern */
 332   char *name;                   /* tag name */
 333   struct re_pattern_buffer *pat; /* the compiled pattern */
 334   struct re_registers regs;     /* re registers */
 335   bool error_signaled;          /* already signaled for this regexp */
 336   bool force_explicit_name;     /* do not allow implicit tag name */
 337   bool ignore_case;             /* ignore case when matching */
 338   bool multi_line;              /* do a multi-line match on the whole file */
 339 } regexp;
 340
 341
 342 /* Many compilers barf on this:
 343         Lang_function Ada_funcs;
 344    so let's write it this way */
 345 static void Ada_funcs (FILE *);
 346 static void Asm_labels (FILE *);
 347 static void C_entries (int c_ext, FILE *);
 348 static void default_C_entries (FILE *);
 349 static void plain_C_entries (FILE *);
 350 static void Cjava_entries (FILE *);
 351 static void Cobol_paragraphs (FILE *);
 352 static void Cplusplus_entries (FILE *);
 353 static void Cstar_entries (FILE *);
 354 static void Erlang_functions (FILE *);
 355 static void Forth_words (FILE *);
 356 static void Fortran_functions (FILE *);
 357 static void Go_functions (FILE *);
 358 static void HTML_labels (FILE *);
 359 static void Lisp_functions (FILE *);
 360 static void Lua_functions (FILE *);
 361 static void Makefile_targets (FILE *);
 362 static void Pascal_functions (FILE *);
 363 static void Perl_functions (FILE *);
 364 static void PHP_functions (FILE *);
 365 static void PS_functions (FILE *);
 366 static void Prolog_functions (FILE *);
 367 static void Python_functions (FILE *);
 368 static void Ruby_functions (FILE *);
 369 static void Scheme_functions (FILE *);
 370 static void TeX_commands (FILE *);
 371 static void Texinfo_nodes (FILE *);
 372 static void Yacc_entries (FILE *);
 373 static void just_read_file (FILE *);
 374
 375 static language *get_language_from_langname (const char *);
 376 static void readline (linebuffer *, FILE *);
 377 static long readline_internal (linebuffer *, FILE *, char const *);
 378 static bool nocase_tail (const char *);
 379 static void get_tag (char *, char **);
 380
 381 static void analyze_regex (char *);
 382 static void free_regexps (void);
 383 static void regex_tag_multiline (void);
 384 static void error (const char *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
 385 static void verror (char const *, va_list) ATTRIBUTE_FORMAT_PRINTF (1, 0);
 386 static _Noreturn void suggest_asking_for_help (void);
 387 static _Noreturn void fatal (char const *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
 388 static _Noreturn void pfatal (const char *);
 389 static void add_node (node *, node **);
 390
 391 static void process_file_name (char *, language *);
 392 static void process_file (FILE *, char *, language *);
 393 static void find_entries (FILE *);
 394 static void free_tree (node *);
 395 static void free_fdesc (fdesc *);
 396 static void pfnote (char *, bool, char *, int, int, long);
 397 static void invalidate_nodes (fdesc *, node **);
 398 static void put_entries (node *);
 399
 400 static char *concat (const char *, const char *, const char *);
 401 static char *skip_spaces (char *);
 402 static char *skip_non_spaces (char *);
 403 static char *skip_name (char *);
 404 static char *savenstr (const char *, int);
 405 static char *savestr (const char *);
 406 static char *etags_getcwd (void);
 407 static char *relative_filename (char *, char *);
 408 static char *absolute_filename (char *, char *);
 409 static char *absolute_dirname (char *, char *);
 410 static bool filename_is_absolute (char *f);
 411 static void canonicalize_filename (char *);
 412 static char *etags_mktmp (void);
 413 static void linebuffer_init (linebuffer *);
 414 static void linebuffer_setlen (linebuffer *, int);
 415 static void *xmalloc (size_t);
 416 static void *xrealloc (void *, size_t);
 417
 418 \f
 419 static char searchar = '/';     /* use /.../ searches */
 420
 421 static char *tagfile;           /* output file */
 422 static char *progname;          /* name this program was invoked with */
 423 static char *cwd;               /* current working directory */
 424 static char *tagfiledir;        /* directory of tagfile */
 425 static FILE *tagf;              /* ioptr for tags file */
 426 static ptrdiff_t whatlen_max;   /* maximum length of any 'what' member */
 427
 428 static fdesc *fdhead;           /* head of file description list */
 429 static fdesc *curfdp;           /* current file description */
 430 static char *infilename;        /* current input file name */
 431 static int lineno;              /* line number of current line */
 432 static long charno;             /* current character number */
 433 static long linecharno;         /* charno of start of current line */
 434 static char *dbp;               /* pointer to start of current tag */
 435
 436 static const int invalidcharno = -1;
 437
 438 static node *nodehead;          /* the head of the binary tree of tags */
 439 static node *last_node;         /* the last node created */
 440
 441 static linebuffer lb;           /* the current line */
 442 static linebuffer filebuf;      /* a buffer containing the whole file */
 443 static linebuffer token_name;   /* a buffer containing a tag name */
 444
 445 static bool append_to_tagfile;  /* -a: append to tags */
 446 /* The next five default to true in C and derived languages.  */
 447 static bool typedefs;           /* -t: create tags for C and Ada typedefs */
 448 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
 449                                 /* 0 struct/enum/union decls, and C++ */
 450                                 /* member functions. */
 451 static bool constantypedefs;    /* -d: create tags for C #define, enum */
 452                                 /* constants and variables. */
 453                                 /* -D: opposite of -d.  Default under ctags. */
 454 static int globals;             /* create tags for global variables */
 455 static int members;             /* create tags for C member variables */
 456 static int declarations;        /* --declarations: tag them and extern in C&Co*/
 457 static int no_line_directive;   /* ignore #line directives (undocumented) */
 458 static int no_duplicates;       /* no duplicate tags for ctags (undocumented) */
 459 static bool update;             /* -u: update tags */
 460 static bool vgrind_style;       /* -v: create vgrind style index output */
 461 static bool no_warnings;        /* -w: suppress warnings (undocumented) */
 462 static bool cxref_style;        /* -x: create cxref style output */
 463 static bool cplusplus;          /* .[hc] means C++, not C (undocumented) */
 464 static bool ignoreindent;       /* -I: ignore indentation in C */
 465 static int packages_only;       /* --packages-only: in Ada, only tag packages*/
 466 static int class_qualify;       /* -Q: produce class-qualified tags in C++/Java */
 467
 468 /* STDIN is defined in LynxOS system headers */
 469 #ifdef STDIN
 470 # undef STDIN
 471 #endif
 472
 473 #define STDIN 0x1001            /* returned by getopt_long on --parse-stdin */
 474 static bool parsing_stdin;      /* --parse-stdin used */
 475
 476 static regexp *p_head;          /* list of all regexps */
 477 static bool need_filebuf;       /* some regexes are multi-line */
 478
 479 static struct option longopts[] =
 480 {
 481   { "append",             no_argument,       NULL,               'a'   },
 482   { "packages-only",      no_argument,       &packages_only,     1     },
 483   { "c++",                no_argument,       NULL,               'C'   },
 484   { "declarations",       no_argument,       &declarations,      1     },
 485   { "no-line-directive",  no_argument,       &no_line_directive, 1     },
 486   { "no-duplicates",      no_argument,       &no_duplicates,     1     },
 487   { "help",               no_argument,       NULL,               'h'   },
 488   { "help",               no_argument,       NULL,               'H'   },
 489   { "ignore-indentation", no_argument,       NULL,               'I'   },
 490   { "language",           required_argument, NULL,               'l'   },
 491   { "members",            no_argument,       &members,           1     },
 492   { "no-members",         no_argument,       &members,           0     },
 493   { "output",             required_argument, NULL,               'o'   },
 494   { "class-qualify",      no_argument,       &class_qualify,     'Q'   },
 495   { "regex",              required_argument, NULL,               'r'   },
 496   { "no-regex",           no_argument,       NULL,               'R'   },
 497   { "ignore-case-regex",  required_argument, NULL,               'c'   },
 498   { "parse-stdin",        required_argument, NULL,               STDIN },
 499   { "version",            no_argument,       NULL,               'V'   },
 500
 501 #if CTAGS /* Ctags options */
 502   { "backward-search",    no_argument,       NULL,               'B'   },
 503   { "cxref",              no_argument,       NULL,               'x'   },
 504   { "defines",            no_argument,       NULL,               'd'   },
 505   { "globals",            no_argument,       &globals,           1     },
 506   { "typedefs",           no_argument,       NULL,               't'   },
 507   { "typedefs-and-c++",   no_argument,       NULL,               'T'   },
 508   { "update",             no_argument,       NULL,               'u'   },
 509   { "vgrind",             no_argument,       NULL,               'v'   },
 510   { "no-warn",            no_argument,       NULL,               'w'   },
 511
 512 #else /* Etags options */
 513   { "no-defines",         no_argument,       NULL,               'D'   },
 514   { "no-globals",         no_argument,       &globals,           0     },
 515   { "include",            required_argument, NULL,               'i'   },
 516 #endif
 517   { NULL }
 518 };
 519
 520 static compressor compressors[] =
 521 {
 522   { "z", "gzip -d -c"},
 523   { "Z", "gzip -d -c"},
 524   { "gz", "gzip -d -c"},
 525   { "GZ", "gzip -d -c"},
 526   { "bz2", "bzip2 -d -c" },
 527   { "xz", "xz -d -c" },
 528   { NULL }
 529 };
 530
 531 /*
 532  * Language stuff.
 533  */
 534
 535 /* Ada code */
 536 static const char *Ada_suffixes [] =
 537   { "ads", "adb", "ada", NULL };
 538 static const char Ada_help [] =
 539 "In Ada code, functions, procedures, packages, tasks and types are\n\
 540 tags.  Use the '--packages-only' option to create tags for\n\
 541 packages only.\n\
 542 Ada tag names have suffixes indicating the type of entity:\n\
 543         Entity type:    Qualifier:\n\
 544         ------------    ----------\n\
 545         function        /f\n\
 546         procedure       /p\n\
 547         package spec    /s\n\
 548         package body    /b\n\
 549         type            /t\n\
 550         task            /k\n\
 551 Thus, 'M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
 552 body of the package 'bidule', while 'M-x find-tag <RET> bidule <RET>'\n\
 553 will just search for any tag 'bidule'.";
 554
 555 /* Assembly code */
 556 static const char *Asm_suffixes [] =
 557   { "a",        /* Unix assembler */
 558     "asm", /* Microcontroller assembly */
 559     "def", /* BSO/Tasking definition includes  */
 560     "inc", /* Microcontroller include files */
 561     "ins", /* Microcontroller include files */
 562     "s", "sa", /* Unix assembler */
 563     "S",   /* cpp-processed Unix assembler */
 564     "src", /* BSO/Tasking C compiler output */
 565     NULL
 566   };
 567 static const char Asm_help [] =
 568 "In assembler code, labels appearing at the beginning of a line,\n\
 569 followed by a colon, are tags.";
 570
 571
 572 /* Note that .c and .h can be considered C++, if the --c++ flag was
 573    given, or if the `class' or `template' keywords are met inside the file.
 574    That is why default_C_entries is called for these. */
 575 static const char *default_C_suffixes [] =
 576   { "c", "h", NULL };
 577 #if CTAGS                               /* C help for Ctags */
 578 static const char default_C_help [] =
 579 "In C code, any C function is a tag.  Use -t to tag typedefs.\n\
 580 Use -T to tag definitions of 'struct', 'union' and 'enum'.\n\
 581 Use -d to tag '#define' macro definitions and 'enum' constants.\n\
 582 Use --globals to tag global variables.\n\
 583 You can tag function declarations and external variables by\n\
 584 using '--declarations', and struct members by using '--members'.";
 585 #else                                   /* C help for Etags */
 586 static const char default_C_help [] =
 587 "In C code, any C function or typedef is a tag, and so are\n\
 588 definitions of 'struct', 'union' and 'enum'.  '#define' macro\n\
 589 definitions and 'enum' constants are tags unless you specify\n\
 590 '--no-defines'.  Global variables are tags unless you specify\n\
 591 '--no-globals' and so are struct members unless you specify\n\
 592 '--no-members'.  Use of '--no-globals', '--no-defines' and\n\
 593 '--no-members' can make the tags table file much smaller.\n\
 594 You can tag function declarations and external variables by\n\
 595 using '--declarations'.";
 596 #endif  /* C help for Ctags and Etags */
 597
 598 static const char *Cplusplus_suffixes [] =
 599   { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
 600     "M",                        /* Objective C++ */
 601     "pdb",                      /* PostScript with C syntax */
 602     NULL };
 603 static const char Cplusplus_help [] =
 604 "In C++ code, all the tag constructs of C code are tagged.  (Use\n\
 605 --help --lang=c --lang=c++ for full help.)\n\
 606 In addition to C tags, member functions are also recognized.  Member\n\
 607 variables are recognized unless you use the '--no-members' option.\n\
 608 Tags for variables and functions in classes are named 'CLASS::VARIABLE'\n\
 609 and 'CLASS::FUNCTION'.  'operator' definitions have tag names like\n\
 610 'operator+'.";
 611
 612 static const char *Cjava_suffixes [] =
 613   { "java", NULL };
 614 static char Cjava_help [] =
 615 "In Java code, all the tags constructs of C and C++ code are\n\
 616 tagged.  (Use --help --lang=c --lang=c++ --lang=java for full help.)";
 617
 618
 619 static const char *Cobol_suffixes [] =
 620   { "COB", "cob", NULL };
 621 static char Cobol_help [] =
 622 "In Cobol code, tags are paragraph names; that is, any word\n\
 623 starting in column 8 and followed by a period.";
 624
 625 static const char *Cstar_suffixes [] =
 626   { "cs", "hs", NULL };
 627
 628 static const char *Erlang_suffixes [] =
 629   { "erl", "hrl", NULL };
 630 static const char Erlang_help [] =
 631 "In Erlang code, the tags are the functions, records and macros\n\
 632 defined in the file.";
 633
 634 const char *Forth_suffixes [] =
 635   { "fth", "tok", NULL };
 636 static const char Forth_help [] =
 637 "In Forth code, tags are words defined by ':',\n\
 638 constant, code, create, defer, value, variable, buffer:, field.";
 639
 640 static const char *Fortran_suffixes [] =
 641   { "F", "f", "f90", "for", NULL };
 642 static const char Fortran_help [] =
 643 "In Fortran code, functions, subroutines and block data are tags.";
 644
 645 static const char *Go_suffixes [] = {"go", NULL};
 646 static const char Go_help [] =
 647   "In Go code, functions, interfaces and packages are tags.";
 648
 649 static const char *HTML_suffixes [] =
 650   { "htm", "html", "shtml", NULL };
 651 static const char HTML_help [] =
 652 "In HTML input files, the tags are the 'title' and the 'h1', 'h2',\n\
 653 'h3' headers.  Also, tags are 'name=' in anchors and all\n\
 654 occurrences of 'id='.";
 655
 656 static const char *Lisp_suffixes [] =
 657   { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
 658 static const char Lisp_help [] =
 659 "In Lisp code, any function defined with 'defun', any variable\n\
 660 defined with 'defvar' or 'defconst', and in general the first\n\
 661 argument of any expression that starts with '(def' in column zero\n\
 662 is a tag.\n\
 663 The '--declarations' option tags \"(defvar foo)\" constructs too.";
 664
 665 static const char *Lua_suffixes [] =
 666   { "lua", "LUA", NULL };
 667 static const char Lua_help [] =
 668 "In Lua scripts, all functions are tags.";
 669
 670 static const char *Makefile_filenames [] =
 671   { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
 672 static const char Makefile_help [] =
 673 "In makefiles, targets are tags; additionally, variables are tags\n\
 674 unless you specify '--no-globals'.";
 675
 676 static const char *Objc_suffixes [] =
 677   { "lm",                       /* Objective lex file */
 678     "m",                        /* Objective C file */
 679      NULL };
 680 static const char Objc_help [] =
 681 "In Objective C code, tags include Objective C definitions for classes,\n\
 682 class categories, methods and protocols.  Tags for variables and\n\
 683 functions in classes are named 'CLASS::VARIABLE' and 'CLASS::FUNCTION'.\n\
 684 (Use --help --lang=c --lang=objc --lang=java for full help.)";
 685
 686 static const char *Pascal_suffixes [] =
 687   { "p", "pas", NULL };
 688 static const char Pascal_help [] =
 689 "In Pascal code, the tags are the functions and procedures defined\n\
 690 in the file.";
 691 /* " // this is for working around an Emacs highlighting bug... */
 692
 693 static const char *Perl_suffixes [] =
 694   { "pl", "pm", NULL };
 695 static const char *Perl_interpreters [] =
 696   { "perl", "@PERL@", NULL };
 697 static const char Perl_help [] =
 698 "In Perl code, the tags are the packages, subroutines and variables\n\
 699 defined by the 'package', 'sub', 'my' and 'local' keywords.  Use\n\
 700 '--globals' if you want to tag global variables.  Tags for\n\
 701 subroutines are named 'PACKAGE::SUB'.  The name for subroutines\n\
 702 defined in the default package is 'main::SUB'.";
 703
 704 static const char *PHP_suffixes [] =
 705   { "php", "php3", "php4", NULL };
 706 static const char PHP_help [] =
 707 "In PHP code, tags are functions, classes and defines.  Unless you use\n\
 708 the '--no-members' option, vars are tags too.";
 709
 710 static const char *plain_C_suffixes [] =
 711   { "pc",                       /* Pro*C file */
 712      NULL };
 713
 714 static const char *PS_suffixes [] =
 715   { "ps", "psw", NULL };        /* .psw is for PSWrap */
 716 static const char PS_help [] =
 717 "In PostScript code, the tags are the functions.";
 718
 719 static const char *Prolog_suffixes [] =
 720   { "prolog", NULL };
 721 static const char Prolog_help [] =
 722 "In Prolog code, tags are predicates and rules at the beginning of\n\
 723 line.";
 724
 725 static const char *Python_suffixes [] =
 726   { "py", NULL };
 727 static const char Python_help [] =
 728 "In Python code, 'def' or 'class' at the beginning of a line\n\
 729 generate a tag.";
 730
 731 static const char *Ruby_suffixes [] =
 732   { "rb", "ruby", NULL };
 733 static const char Ruby_help [] =
 734   "In Ruby code, 'def' or 'class' or 'module' at the beginning of\n\
 735 a line generate a tag.  Constants also generate a tag.";
 736
 737 /* Can't do the `SCM' or `scm' prefix with a version number. */
 738 static const char *Scheme_suffixes [] =
 739   { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
 740 static const char Scheme_help [] =
 741 "In Scheme code, tags include anything defined with 'def' or with a\n\
 742 construct whose name starts with 'def'.  They also include\n\
 743 variables set with 'set!' at top level in the file.";
 744
 745 static const char *TeX_suffixes [] =
 746   { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
 747 static const char TeX_help [] =
 748 "In LaTeX text, the argument of any of the commands '\\chapter',\n\
 749 '\\section', '\\subsection', '\\subsubsection', '\\eqno', '\\label',\n\
 750 '\\ref', '\\cite', '\\bibitem', '\\part', '\\appendix', '\\entry',\n\
 751 '\\index', '\\def', '\\newcommand', '\\renewcommand',\n\
 752 '\\newenvironment' or '\\renewenvironment' is a tag.\n\
 753 \n\
 754 Other commands can be specified by setting the environment variable\n\
 755 'TEXTAGS' to a colon-separated list like, for example,\n\
 756      TEXTAGS=\"mycommand:myothercommand\".";
 757
 758
 759 static const char *Texinfo_suffixes [] =
 760   { "texi", "texinfo", "txi", NULL };
 761 static const char Texinfo_help [] =
 762 "for texinfo files, lines starting with @node are tagged.";
 763
 764 static const char *Yacc_suffixes [] =
 765   { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
 766 static const char Yacc_help [] =
 767 "In Bison or Yacc input files, each rule defines as a tag the\n\
 768 nonterminal it constructs.  The portions of the file that contain\n\
 769 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
 770 for full help).";
 771
 772 static const char auto_help [] =
 773 "'auto' is not a real language, it indicates to use\n\
 774 a default language for files base on file name suffix and file contents.";
 775
 776 static const char none_help [] =
 777 "'none' is not a real language, it indicates to only do\n\
 778 regexp processing on files.";
 779
 780 static const char no_lang_help [] =
 781 "No detailed help available for this language.";
 782
 783
 784 /*
 785  * Table of languages.
 786  *
 787  * It is ok for a given function to be listed under more than one
 788  * name.  I just didn't.
 789  */
 790
 791 static language lang_names [] =
 792 {
 793   { "ada",       Ada_help,       Ada_funcs,         Ada_suffixes       },
 794   { "asm",       Asm_help,       Asm_labels,        Asm_suffixes       },
 795   { "c",         default_C_help, default_C_entries, default_C_suffixes },
 796   { "c++",       Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
 797   { "c*",        no_lang_help,   Cstar_entries,     Cstar_suffixes     },
 798   { "cobol",     Cobol_help,     Cobol_paragraphs,  Cobol_suffixes     },
 799   { "erlang",    Erlang_help,    Erlang_functions,  Erlang_suffixes    },
 800   { "forth",     Forth_help,     Forth_words,       Forth_suffixes     },
 801   { "fortran",   Fortran_help,   Fortran_functions, Fortran_suffixes   },
 802   { "go",        Go_help,        Go_functions,      Go_suffixes        },
 803   { "html",      HTML_help,      HTML_labels,       HTML_suffixes      },
 804   { "java",      Cjava_help,     Cjava_entries,     Cjava_suffixes     },
 805   { "lisp",      Lisp_help,      Lisp_functions,    Lisp_suffixes      },
 806   { "lua",       Lua_help,       Lua_functions,     Lua_suffixes       },
 807   { "makefile",  Makefile_help,Makefile_targets,NULL,Makefile_filenames},
 808   { "objc",      Objc_help,      plain_C_entries,   Objc_suffixes      },
 809   { "pascal",    Pascal_help,    Pascal_functions,  Pascal_suffixes    },
 810   { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
 811   { "php",       PHP_help,       PHP_functions,     PHP_suffixes       },
 812   { "postscript",PS_help,        PS_functions,      PS_suffixes        },
 813   { "proc",      no_lang_help,   plain_C_entries,   plain_C_suffixes   },
 814   { "prolog",    Prolog_help,    Prolog_functions,  Prolog_suffixes    },
 815   { "python",    Python_help,    Python_functions,  Python_suffixes    },
 816   { "ruby",      Ruby_help,      Ruby_functions,    Ruby_suffixes      },
 817   { "scheme",    Scheme_help,    Scheme_functions,  Scheme_suffixes    },
 818   { "tex",       TeX_help,       TeX_commands,      TeX_suffixes       },
 819   { "texinfo",   Texinfo_help,   Texinfo_nodes,     Texinfo_suffixes   },
 820   { "yacc",      Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,true},
 821   { "auto",      auto_help },                      /* default guessing scheme */
 822   { "none",      none_help,      just_read_file }, /* regexp matching only */
 823   { NULL }                /* end of list */
 824 };
 825
 826 \f
 827 static void
 828 print_language_names (void)
 829 {
 830   language *lang;
 831   const char **name, **ext;
 832
 833   puts ("\nThese are the currently supported languages, along with the\n\
 834 default file names and dot suffixes:");
 835   for (lang = lang_names; lang->name != NULL; lang++)
 836     {
 837       printf ("  %-*s", 10, lang->name);
 838       if (lang->filenames != NULL)
 839         for (name = lang->filenames; *name != NULL; name++)
 840           printf (" %s", *name);
 841       if (lang->suffixes != NULL)
 842         for (ext = lang->suffixes; *ext != NULL; ext++)
 843           printf (" .%s", *ext);
 844       puts ("");
 845     }
 846   puts ("where 'auto' means use default language for files based on file\n\
 847 name suffix, and 'none' means only do regexp processing on files.\n\
 848 If no language is specified and no matching suffix is found,\n\
 849 the first line of the file is read for a sharp-bang (#!) sequence\n\
 850 followed by the name of an interpreter.  If no such sequence is found,\n\
 851 Fortran is tried first; if no tags are found, C is tried next.\n\
 852 When parsing any C file, a \"class\" or \"template\" keyword\n\
 853 switches to C++.");
 854   puts ("Compressed files are supported using gzip, bzip2, and xz.\n\
 855 \n\
 856 For detailed help on a given language use, for example,\n\
 857 etags --help --lang=ada.");
 858 }
 859
 860 #ifndef EMACS_NAME
 861 # define EMACS_NAME "standalone"
 862 #endif
 863 #ifndef VERSION
 864 # define VERSION "17.38.1.4"
 865 #endif
 866 static _Noreturn void
 867 print_version (void)
 868 {
 869   char emacs_copyright[] = COPYRIGHT;
 870
 871   printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
 872   puts (emacs_copyright);
 873   puts ("This program is distributed under the terms in ETAGS.README");
 874
 875   exit (EXIT_SUCCESS);
 876 }
 877
 878 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
 879 # define PRINT_UNDOCUMENTED_OPTIONS_HELP false
 880 #endif
 881
 882 static _Noreturn void
 883 print_help (argument *argbuffer)
 884 {
 885   bool help_for_lang = false;
 886
 887   for (; argbuffer->arg_type != at_end; argbuffer++)
 888     if (argbuffer->arg_type == at_language)
 889       {
 890         if (help_for_lang)
 891           puts ("");
 892         puts (argbuffer->lang->help);
 893         help_for_lang = true;
 894       }
 895
 896   if (help_for_lang)
 897     exit (EXIT_SUCCESS);
 898
 899   printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
 900 \n\
 901 These are the options accepted by %s.\n", progname, progname);
 902   puts ("You may use unambiguous abbreviations for the long option names.");
 903   puts ("  A - as file name means read names from stdin (one per line).\n\
 904 Absolute names are stored in the output file as they are.\n\
 905 Relative ones are stored relative to the output file's directory.\n");
 906
 907   puts ("-a, --append\n\
 908         Append tag entries to existing tags file.");
 909
 910   puts ("--packages-only\n\
 911         For Ada files, only generate tags for packages.");
 912
 913   if (CTAGS)
 914     puts ("-B, --backward-search\n\
 915         Write the search commands for the tag entries using '?', the\n\
 916         backward-search command instead of '/', the forward-search command.");
 917
 918   /* This option is mostly obsolete, because etags can now automatically
 919      detect C++.  Retained for backward compatibility and for debugging and
 920      experimentation.  In principle, we could want to tag as C++ even
 921      before any "class" or "template" keyword.
 922   puts ("-C, --c++\n\
 923         Treat files whose name suffix defaults to C language as C++ files.");
 924   */
 925
 926   puts ("--declarations\n\
 927         In C and derived languages, create tags for function declarations,");
 928   if (CTAGS)
 929     puts ("\tand create tags for extern variables if --globals is used.");
 930   else
 931     puts
 932       ("\tand create tags for extern variables unless --no-globals is used.");
 933
 934   if (CTAGS)
 935     puts ("-d, --defines\n\
 936         Create tag entries for C #define constants and enum constants, too.");
 937   else
 938     puts ("-D, --no-defines\n\
 939         Don't create tag entries for C #define constants and enum constants.\n\
 940         This makes the tags file smaller.");
 941
 942   if (!CTAGS)
 943     puts ("-i FILE, --include=FILE\n\
 944         Include a note in tag file indicating that, when searching for\n\
 945         a tag, one should also consult the tags file FILE after\n\
 946         checking the current file.");
 947
 948   puts ("-l LANG, --language=LANG\n\
 949         Force the following files to be considered as written in the\n\
 950         named language up to the next --language=LANG option.");
 951
 952   if (CTAGS)
 953     puts ("--globals\n\
 954         Create tag entries for global variables in some languages.");
 955   else
 956     puts ("--no-globals\n\
 957         Do not create tag entries for global variables in some\n\
 958         languages.  This makes the tags file smaller.");
 959
 960   puts ("--no-line-directive\n\
 961         Ignore #line preprocessor directives in C and derived languages.");
 962
 963   if (CTAGS)
 964     puts ("--members\n\
 965         Create tag entries for members of structures in some languages.");
 966   else
 967     puts ("--no-members\n\
 968         Do not create tag entries for members of structures\n\
 969         in some languages.");
 970
 971   puts ("-Q, --class-qualify\n\
 972         Qualify tag names with their class name in C++, ObjC, and Java.\n\
 973         This produces tag names of the form \"class::member\" for C++,\n\
 974         \"class(category)\" for Objective C, and \"class.member\" for Java.\n\
 975         For Objective C, this also produces class methods qualified with\n\
 976         their arguments, as in \"foo:bar:baz:more\".");
 977   puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
 978         Make a tag for each line matching a regular expression pattern\n\
 979         in the following files.  {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
 980         files only.  REGEXFILE is a file containing one REGEXP per line.\n\
 981         REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
 982         optional.  The TAGREGEXP pattern is anchored (as if preceded by ^).");
 983   puts ("       If TAGNAME/ is present, the tags created are named.\n\
 984         For example Tcl named tags can be created with:\n\
 985           --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
 986         MODS are optional one-letter modifiers: 'i' means to ignore case,\n\
 987         'm' means to allow multi-line matches, 's' implies 'm' and\n\
 988         causes dot to match any character, including newline.");
 989
 990   puts ("-R, --no-regex\n\
 991         Don't create tags from regexps for the following files.");
 992
 993   puts ("-I, --ignore-indentation\n\
 994         In C and C++ do not assume that a closing brace in the first\n\
 995         column is the final brace of a function or structure definition.");
 996
 997   puts ("-o FILE, --output=FILE\n\
 998         Write the tags to FILE.");
 999
1000   puts ("--parse-stdin=NAME\n\
1001         Read from standard input and record tags as belonging to file NAME.");
1002
1003   if (CTAGS)
1004     {
1005       puts ("-t, --typedefs\n\
1006         Generate tag entries for C and Ada typedefs.");
1007       puts ("-T, --typedefs-and-c++\n\
1008         Generate tag entries for C typedefs, C struct/enum/union tags,\n\
1009         and C++ member functions.");
1010     }
1011
1012   if (CTAGS)
1013     puts ("-u, --update\n\
1014         Update the tag entries for the given files, leaving tag\n\
1015         entries for other files in place.  Currently, this is\n\
1016         implemented by deleting the existing entries for the given\n\
1017         files and then rewriting the new entries at the end of the\n\
1018         tags file.  It is often faster to simply rebuild the entire\n\
1019         tag file than to use this.");
1020
1021   if (CTAGS)
1022     {
1023       puts ("-v, --vgrind\n\
1024         Print on the standard output an index of items intended for\n\
1025         human consumption, similar to the output of vgrind.  The index\n\
1026         is sorted, and gives the page number of each item.");
1027
1028       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1029         puts ("-w, --no-duplicates\n\
1030         Do not create duplicate tag entries, for compatibility with\n\
1031         traditional ctags.");
1032
1033       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1034         puts ("-w, --no-warn\n\
1035         Suppress warning messages about duplicate tag entries.");
1036
1037       puts ("-x, --cxref\n\
1038         Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1039         The output uses line numbers instead of page numbers, but\n\
1040         beyond that the differences are cosmetic; try both to see\n\
1041         which you like.");
1042     }
1043
1044   puts ("-V, --version\n\
1045         Print the version of the program.\n\
1046 -h, --help\n\
1047         Print this help message.\n\
1048         Followed by one or more '--language' options prints detailed\n\
1049         help about tag generation for the specified languages.");
1050
1051   print_language_names ();
1052
1053   puts ("");
1054   puts ("Report bugs to bug-gnu-emacs@gnu.org");
1055
1056   exit (EXIT_SUCCESS);
1057 }
1058
1059 \f
1060 int
1061 main (int argc, char **argv)
1062 {
1063   int i;
1064   unsigned int nincluded_files;
1065   char **included_files;
1066   argument *argbuffer;
1067   int current_arg, file_count;
1068   linebuffer filename_lb;
1069   bool help_asked = false;
1070   ptrdiff_t len;
1071   char *optstring;
1072   int opt;
1073
1074   progname = argv[0];
1075   nincluded_files = 0;
1076   included_files = xnew (argc, char *);
1077   current_arg = 0;
1078   file_count = 0;
1079
1080   /* Allocate enough no matter what happens.  Overkill, but each one
1081      is small. */
1082   argbuffer = xnew (argc, argument);
1083
1084   /*
1085    * Always find typedefs and structure tags.
1086    * Also default to find macro constants, enum constants, struct
1087    * members and global variables.  Do it for both etags and ctags.
1088    */
1089   typedefs = typedefs_or_cplusplus = constantypedefs = true;
1090   globals = members = true;
1091
1092   /* When the optstring begins with a '-' getopt_long does not rearrange the
1093      non-options arguments to be at the end, but leaves them alone. */
1094   optstring = concat ("-ac:Cf:Il:o:Qr:RSVhH",
1095                       (CTAGS) ? "BxdtTuvw" : "Di:",
1096                       "");
1097
1098   while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1099     switch (opt)
1100       {
1101       case 0:
1102         /* If getopt returns 0, then it has already processed a
1103            long-named option.  We should do nothing.  */
1104         break;
1105
1106       case 1:
1107         /* This means that a file name has been seen.  Record it. */
1108         argbuffer[current_arg].arg_type = at_filename;
1109         argbuffer[current_arg].what     = optarg;
1110         len = strlen (optarg);
1111         if (whatlen_max < len)
1112           whatlen_max = len;
1113         ++current_arg;
1114         ++file_count;
1115         break;
1116
1117       case STDIN:
1118         /* Parse standard input.  Idea by Vivek <vivek@etla.org>. */
1119         argbuffer[current_arg].arg_type = at_stdin;
1120         argbuffer[current_arg].what     = optarg;
1121         len = strlen (optarg);
1122         if (whatlen_max < len)
1123           whatlen_max = len;
1124         ++current_arg;
1125         ++file_count;
1126         if (parsing_stdin)
1127           fatal ("cannot parse standard input more than once");
1128         parsing_stdin = true;
1129         break;
1130
1131         /* Common options. */
1132       case 'a': append_to_tagfile = true;       break;
1133       case 'C': cplusplus = true;               break;
1134       case 'f':         /* for compatibility with old makefiles */
1135       case 'o':
1136         if (tagfile)
1137           {
1138             error ("-o option may only be given once.");
1139             suggest_asking_for_help ();
1140             /* NOTREACHED */
1141           }
1142         tagfile = optarg;
1143         break;
1144       case 'I':
1145       case 'S':         /* for backward compatibility */
1146         ignoreindent = true;
1147         break;
1148       case 'l':
1149         {
1150           language *lang = get_language_from_langname (optarg);
1151           if (lang != NULL)
1152             {
1153               argbuffer[current_arg].lang = lang;
1154               argbuffer[current_arg].arg_type = at_language;
1155               ++current_arg;
1156             }
1157         }
1158         break;
1159       case 'c':
1160         /* Backward compatibility: support obsolete --ignore-case-regexp. */
1161         optarg = concat (optarg, "i", ""); /* memory leak here */
1162         /* FALLTHRU */
1163       case 'r':
1164         argbuffer[current_arg].arg_type = at_regexp;
1165         argbuffer[current_arg].what = optarg;
1166         len = strlen (optarg);
1167         if (whatlen_max < len)
1168           whatlen_max = len;
1169         ++current_arg;
1170         break;
1171       case 'R':
1172         argbuffer[current_arg].arg_type = at_regexp;
1173         argbuffer[current_arg].what = NULL;
1174         ++current_arg;
1175         break;
1176       case 'V':
1177         print_version ();
1178         break;
1179       case 'h':
1180       case 'H':
1181         help_asked = true;
1182         break;
1183       case 'Q':
1184         class_qualify = 1;
1185         break;
1186
1187         /* Etags options */
1188       case 'D': constantypedefs = false;                        break;
1189       case 'i': included_files[nincluded_files++] = optarg;     break;
1190
1191         /* Ctags options. */
1192       case 'B': searchar = '?';                                 break;
1193       case 'd': constantypedefs = true;                         break;
1194       case 't': typedefs = true;                                break;
1195       case 'T': typedefs = typedefs_or_cplusplus = true;        break;
1196       case 'u': update = true;                                  break;
1197       case 'v': vgrind_style = true;                      /*FALLTHRU*/
1198       case 'x': cxref_style = true;                             break;
1199       case 'w': no_warnings = true;                             break;
1200       default:
1201         suggest_asking_for_help ();
1202         /* NOTREACHED */
1203       }
1204
1205   /* No more options.  Store the rest of arguments. */
1206   for (; optind < argc; optind++)
1207     {
1208       argbuffer[current_arg].arg_type = at_filename;
1209       argbuffer[current_arg].what = argv[optind];
1210       len = strlen (argv[optind]);
1211       if (whatlen_max < len)
1212         whatlen_max = len;
1213       ++current_arg;
1214       ++file_count;
1215     }
1216
1217   argbuffer[current_arg].arg_type = at_end;
1218
1219   if (help_asked)
1220     print_help (argbuffer);
1221     /* NOTREACHED */
1222
1223   if (nincluded_files == 0 && file_count == 0)
1224     {
1225       error ("no input files specified.");
1226       suggest_asking_for_help ();
1227       /* NOTREACHED */
1228     }
1229
1230   if (tagfile == NULL)
1231     tagfile = savestr (CTAGS ? "tags" : "TAGS");
1232   cwd = etags_getcwd ();        /* the current working directory */
1233   if (cwd[strlen (cwd) - 1] != '/')
1234     {
1235       char *oldcwd = cwd;
1236       cwd = concat (oldcwd, "/", "");
1237       free (oldcwd);
1238     }
1239
1240   /* Compute base directory for relative file names. */
1241   if (streq (tagfile, "-")
1242       || strneq (tagfile, "/dev/", 5))
1243     tagfiledir = cwd;            /* relative file names are relative to cwd */
1244   else
1245     {
1246       canonicalize_filename (tagfile);
1247       tagfiledir = absolute_dirname (tagfile, cwd);
1248     }
1249
1250   linebuffer_init (&lb);
1251   linebuffer_init (&filename_lb);
1252   linebuffer_init (&filebuf);
1253   linebuffer_init (&token_name);
1254
1255   if (!CTAGS)
1256     {
1257       if (streq (tagfile, "-"))
1258         {
1259           tagf = stdout;
1260           SET_BINARY (fileno (stdout));
1261         }
1262       else
1263         tagf = fopen (tagfile, append_to_tagfile ? "ab" : "wb");
1264       if (tagf == NULL)
1265         pfatal (tagfile);
1266     }
1267
1268   /*
1269    * Loop through files finding functions.
1270    */
1271   for (i = 0; i < current_arg; i++)
1272     {
1273       static language *lang;    /* non-NULL if language is forced */
1274       char *this_file;
1275
1276       switch (argbuffer[i].arg_type)
1277         {
1278         case at_language:
1279           lang = argbuffer[i].lang;
1280           break;
1281         case at_regexp:
1282           analyze_regex (argbuffer[i].what);
1283           break;
1284         case at_filename:
1285               this_file = argbuffer[i].what;
1286               /* Input file named "-" means read file names from stdin
1287                  (one per line) and use them. */
1288               if (streq (this_file, "-"))
1289                 {
1290                   if (parsing_stdin)
1291                     fatal ("cannot parse standard input "
1292                            "AND read file names from it");
1293                   while (readline_internal (&filename_lb, stdin, "-") > 0)
1294                     process_file_name (filename_lb.buffer, lang);
1295                 }
1296               else
1297                 process_file_name (this_file, lang);
1298           break;
1299         case at_stdin:
1300           this_file = argbuffer[i].what;
1301           process_file (stdin, this_file, lang);
1302           break;
1303         default:
1304           error ("internal error: arg_type");
1305         }
1306     }
1307
1308   free_regexps ();
1309   free (lb.buffer);
1310   free (filebuf.buffer);
1311   free (token_name.buffer);
1312
1313   if (!CTAGS || cxref_style)
1314     {
1315       /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1316       put_entries (nodehead);
1317       free_tree (nodehead);
1318       nodehead = NULL;
1319       if (!CTAGS)
1320         {
1321           fdesc *fdp;
1322
1323           /* Output file entries that have no tags. */
1324           for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1325             if (!fdp->written)
1326               fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1327
1328           while (nincluded_files-- > 0)
1329             fprintf (tagf, "\f\n%s,include\n", *included_files++);
1330
1331           if (fclose (tagf) == EOF)
1332             pfatal (tagfile);
1333         }
1334
1335       exit (EXIT_SUCCESS);
1336     }
1337
1338   /* From here on, we are in (CTAGS && !cxref_style) */
1339   if (update)
1340     {
1341       char *cmd =
1342         xmalloc (strlen (tagfile) + whatlen_max +
1343                  sizeof "mv..OTAGS;fgrep -v '\t\t' OTAGS >;rm OTAGS");
1344       for (i = 0; i < current_arg; ++i)
1345         {
1346           switch (argbuffer[i].arg_type)
1347             {
1348             case at_filename:
1349             case at_stdin:
1350               break;
1351             default:
1352               continue;         /* the for loop */
1353             }
1354           char *z = stpcpy (cmd, "mv ");
1355           z = stpcpy (z, tagfile);
1356           z = stpcpy (z, " OTAGS;fgrep -v '\t");
1357           z = stpcpy (z, argbuffer[i].what);
1358           z = stpcpy (z, "\t' OTAGS >");
1359           z = stpcpy (z, tagfile);
1360           strcpy (z, ";rm OTAGS");
1361           if (system (cmd) != EXIT_SUCCESS)
1362             fatal ("failed to execute shell command");
1363         }
1364       free (cmd);
1365       append_to_tagfile = true;
1366     }
1367
1368   tagf = fopen (tagfile, append_to_tagfile ? "ab" : "wb");
1369   if (tagf == NULL)
1370     pfatal (tagfile);
1371   put_entries (nodehead);       /* write all the tags (CTAGS) */
1372   free_tree (nodehead);
1373   nodehead = NULL;
1374   if (fclose (tagf) == EOF)
1375     pfatal (tagfile);
1376
1377   if (CTAGS)
1378     if (append_to_tagfile || update)
1379       {
1380         char *cmd = xmalloc (2 * strlen (tagfile) + sizeof "sort -u -o..");
1381         /* Maybe these should be used:
1382            setenv ("LC_COLLATE", "C", 1);
1383            setenv ("LC_ALL", "C", 1); */
1384         char *z = stpcpy (cmd, "sort -u -o ");
1385         z = stpcpy (z, tagfile);
1386         *z++ = ' ';
1387         strcpy (z, tagfile);
1388         exit (system (cmd));
1389       }
1390   return EXIT_SUCCESS;
1391 }
1392
1393
1394 /*
1395  * Return a compressor given the file name.  If EXTPTR is non-zero,
1396  * return a pointer into FILE where the compressor-specific
1397  * extension begins.  If no compressor is found, NULL is returned
1398  * and EXTPTR is not significant.
1399  * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1400  */
1401 static compressor *
1402 get_compressor_from_suffix (char *file, char **extptr)
1403 {
1404   compressor *compr;
1405   char *slash, *suffix;
1406
1407   /* File has been processed by canonicalize_filename,
1408      so we don't need to consider backslashes on DOS_NT.  */
1409   slash = strrchr (file, '/');
1410   suffix = strrchr (file, '.');
1411   if (suffix == NULL || suffix < slash)
1412     return NULL;
1413   if (extptr != NULL)
1414     *extptr = suffix;
1415   suffix += 1;
1416   /* Let those poor souls who live with DOS 8+3 file name limits get
1417      some solace by treating foo.cgz as if it were foo.c.gz, etc.
1418      Only the first do loop is run if not MSDOS */
1419   do
1420     {
1421       for (compr = compressors; compr->suffix != NULL; compr++)
1422         if (streq (compr->suffix, suffix))
1423           return compr;
1424       if (!MSDOS)
1425         break;                  /* do it only once: not really a loop */
1426       if (extptr != NULL)
1427         *extptr = ++suffix;
1428     } while (*suffix != '\0');
1429   return NULL;
1430 }
1431
1432
1433
1434 /*
1435  * Return a language given the name.
1436  */
1437 static language *
1438 get_language_from_langname (const char *name)
1439 {
1440   language *lang;
1441
1442   if (name == NULL)
1443     error ("empty language name");
1444   else
1445     {
1446       for (lang = lang_names; lang->name != NULL; lang++)
1447         if (streq (name, lang->name))
1448           return lang;
1449       error ("unknown language \"%s\"", name);
1450     }
1451
1452   return NULL;
1453 }
1454
1455
1456 /*
1457  * Return a language given the interpreter name.
1458  */
1459 static language *
1460 get_language_from_interpreter (char *interpreter)
1461 {
1462   language *lang;
1463   const char **iname;
1464
1465   if (interpreter == NULL)
1466     return NULL;
1467   for (lang = lang_names; lang->name != NULL; lang++)
1468     if (lang->interpreters != NULL)
1469       for (iname = lang->interpreters; *iname != NULL; iname++)
1470         if (streq (*iname, interpreter))
1471             return lang;
1472
1473   return NULL;
1474 }
1475
1476
1477
1478 /*
1479  * Return a language given the file name.
1480  */
1481 static language *
1482 get_language_from_filename (char *file, int case_sensitive)
1483 {
1484   language *lang;
1485   const char **name, **ext, *suffix;
1486
1487   /* Try whole file name first. */
1488   for (lang = lang_names; lang->name != NULL; lang++)
1489     if (lang->filenames != NULL)
1490       for (name = lang->filenames; *name != NULL; name++)
1491         if ((case_sensitive)
1492             ? streq (*name, file)
1493             : strcaseeq (*name, file))
1494           return lang;
1495
1496   /* If not found, try suffix after last dot. */
1497   suffix = strrchr (file, '.');
1498   if (suffix == NULL)
1499     return NULL;
1500   suffix += 1;
1501   for (lang = lang_names; lang->name != NULL; lang++)
1502     if (lang->suffixes != NULL)
1503       for (ext = lang->suffixes; *ext != NULL; ext++)
1504         if ((case_sensitive)
1505             ? streq (*ext, suffix)
1506             : strcaseeq (*ext, suffix))
1507           return lang;
1508   return NULL;
1509 }
1510
1511 \f
1512 /*
1513  * This routine is called on each file argument.
1514  */
1515 static void
1516 process_file_name (char *file, language *lang)
1517 {
1518   FILE *inf;
1519   fdesc *fdp;
1520   compressor *compr;
1521   char *compressed_name, *uncompressed_name;
1522   char *ext, *real_name, *tmp_name;
1523   int retval;
1524
1525   canonicalize_filename (file);
1526   if (streq (file, tagfile) && !streq (tagfile, "-"))
1527     {
1528       error ("skipping inclusion of %s in self.", file);
1529       return;
1530     }
1531   compr = get_compressor_from_suffix (file, &ext);
1532   if (compr)
1533     {
1534       compressed_name = file;
1535       uncompressed_name = savenstr (file, ext - file);
1536     }
1537   else
1538     {
1539       compressed_name = NULL;
1540       uncompressed_name = file;
1541     }
1542
1543   /* If the canonicalized uncompressed name
1544      has already been dealt with, skip it silently. */
1545   for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1546     {
1547       assert (fdp->infname != NULL);
1548       if (streq (uncompressed_name, fdp->infname))
1549         goto cleanup;
1550     }
1551
1552   inf = fopen (file, "r" FOPEN_BINARY);
1553   if (inf)
1554     real_name = file;
1555   else
1556     {
1557       int file_errno = errno;
1558       if (compressed_name)
1559         {
1560           /* Try with the given suffix.  */
1561           inf = fopen (uncompressed_name, "r" FOPEN_BINARY);
1562           if (inf)
1563             real_name = uncompressed_name;
1564         }
1565       else
1566         {
1567           /* Try all possible suffixes.  */
1568           for (compr = compressors; compr->suffix != NULL; compr++)
1569             {
1570               compressed_name = concat (file, ".", compr->suffix);
1571               inf = fopen (compressed_name, "r" FOPEN_BINARY);
1572               if (inf)
1573                 {
1574                   real_name = compressed_name;
1575                   break;
1576                 }
1577               if (MSDOS)
1578                 {
1579                   char *suf = compressed_name + strlen (file);
1580                   size_t suflen = strlen (compr->suffix) + 1;
1581                   for ( ; suf[1]; suf++, suflen--)
1582                     {
1583                       memmove (suf, suf + 1, suflen);
1584                       inf = fopen (compressed_name, "r" FOPEN_BINARY);
1585                       if (inf)
1586                         {
1587                           real_name = compressed_name;
1588                           break;
1589                         }
1590                     }
1591                   if (inf)
1592                     break;
1593                 }
1594               free (compressed_name);
1595               compressed_name = NULL;
1596             }
1597         }
1598       if (! inf)
1599         {
1600           errno = file_errno;
1601           perror (file);
1602           goto cleanup;
1603         }
1604     }
1605
1606   if (real_name == compressed_name)
1607     {
1608       fclose (inf);
1609       tmp_name = etags_mktmp ();
1610       if (!tmp_name)
1611         inf = NULL;
1612       else
1613         {
1614 #if MSDOS || defined (DOS_NT)
1615           char *cmd1 = concat (compr->command, " \"", real_name);
1616           char *cmd = concat (cmd1, "\" > ", tmp_name);
1617 #else
1618           char *cmd1 = concat (compr->command, " '", real_name);
1619           char *cmd = concat (cmd1, "' > ", tmp_name);
1620 #endif
1621           free (cmd1);
1622           int tmp_errno;
1623           if (system (cmd) == -1)
1624             {
1625               inf = NULL;
1626               tmp_errno = EINVAL;
1627             }
1628           else
1629             {
1630               inf = fopen (tmp_name, "r" FOPEN_BINARY);
1631               tmp_errno = errno;
1632             }
1633           free (cmd);
1634           errno = tmp_errno;
1635         }
1636
1637       if (!inf)
1638         {
1639           perror (real_name);
1640           goto cleanup;
1641         }
1642     }
1643
1644   process_file (inf, uncompressed_name, lang);
1645
1646   retval = fclose (inf);
1647   if (real_name == compressed_name)
1648     {
1649       remove (tmp_name);
1650       free (tmp_name);
1651     }
1652   if (retval < 0)
1653     pfatal (file);
1654
1655  cleanup:
1656   if (compressed_name != file)
1657     free (compressed_name);
1658   if (uncompressed_name != file)
1659     free (uncompressed_name);
1660   last_node = NULL;
1661   curfdp = NULL;
1662   return;
1663 }
1664
1665 static void
1666 process_file (FILE *fh, char *fn, language *lang)
1667 {
1668   static const fdesc emptyfdesc;
1669   fdesc *fdp;
1670
1671   infilename = fn;
1672   /* Create a new input file description entry. */
1673   fdp = xnew (1, fdesc);
1674   *fdp = emptyfdesc;
1675   fdp->next = fdhead;
1676   fdp->infname = savestr (fn);
1677   fdp->lang = lang;
1678   fdp->infabsname = absolute_filename (fn, cwd);
1679   fdp->infabsdir = absolute_dirname (fn, cwd);
1680   if (filename_is_absolute (fn))
1681     {
1682       /* An absolute file name.  Canonicalize it. */
1683       fdp->taggedfname = absolute_filename (fn, NULL);
1684     }
1685   else
1686     {
1687       /* A file name relative to cwd.  Make it relative
1688          to the directory of the tags file. */
1689       fdp->taggedfname = relative_filename (fn, tagfiledir);
1690     }
1691   fdp->usecharno = true;        /* use char position when making tags */
1692   fdp->prop = NULL;
1693   fdp->written = false;         /* not written on tags file yet */
1694
1695   fdhead = fdp;
1696   curfdp = fdhead;              /* the current file description */
1697
1698   find_entries (fh);
1699
1700   /* If not Ctags, and if this is not metasource and if it contained no #line
1701      directives, we can write the tags and free all nodes pointing to
1702      curfdp. */
1703   if (!CTAGS
1704       && curfdp->usecharno      /* no #line directives in this file */
1705       && !curfdp->lang->metasource)
1706     {
1707       node *np, *prev;
1708
1709       /* Look for the head of the sublist relative to this file.  See add_node
1710          for the structure of the node tree. */
1711       prev = NULL;
1712       for (np = nodehead; np != NULL; prev = np, np = np->left)
1713         if (np->fdp == curfdp)
1714           break;
1715
1716       /* If we generated tags for this file, write and delete them. */
1717       if (np != NULL)
1718         {
1719           /* This is the head of the last sublist, if any.  The following
1720              instructions depend on this being true. */
1721           assert (np->left == NULL);
1722
1723           assert (fdhead == curfdp);
1724           assert (last_node->fdp == curfdp);
1725           put_entries (np);     /* write tags for file curfdp->taggedfname */
1726           free_tree (np);       /* remove the written nodes */
1727           if (prev == NULL)
1728             nodehead = NULL;    /* no nodes left */
1729           else
1730             prev->left = NULL;  /* delete the pointer to the sublist */
1731         }
1732     }
1733 }
1734
1735 static void
1736 reset_input (FILE *inf)
1737 {
1738   if (fseek (inf, 0, SEEK_SET) != 0)
1739     perror (infilename);
1740 }
1741
1742 /*
1743  * This routine opens the specified file and calls the function
1744  * which finds the function and type definitions.
1745  */
1746 static void
1747 find_entries (FILE *inf)
1748 {
1749   char *cp;
1750   language *lang = curfdp->lang;
1751   Lang_function *parser = NULL;
1752
1753   /* If user specified a language, use it. */
1754   if (lang != NULL && lang->function != NULL)
1755     {
1756       parser = lang->function;
1757     }
1758
1759   /* Else try to guess the language given the file name. */
1760   if (parser == NULL)
1761     {
1762       lang = get_language_from_filename (curfdp->infname, true);
1763       if (lang != NULL && lang->function != NULL)
1764         {
1765           curfdp->lang = lang;
1766           parser = lang->function;
1767         }
1768     }
1769
1770   /* Else look for sharp-bang as the first two characters. */
1771   if (parser == NULL
1772       && readline_internal (&lb, inf, infilename) > 0
1773       && lb.len >= 2
1774       && lb.buffer[0] == '#'
1775       && lb.buffer[1] == '!')
1776     {
1777       char *lp;
1778
1779       /* Set lp to point at the first char after the last slash in the
1780          line or, if no slashes, at the first nonblank.  Then set cp to
1781          the first successive blank and terminate the string. */
1782       lp = strrchr (lb.buffer+2, '/');
1783       if (lp != NULL)
1784         lp += 1;
1785       else
1786         lp = skip_spaces (lb.buffer + 2);
1787       cp = skip_non_spaces (lp);
1788       *cp = '\0';
1789
1790       if (strlen (lp) > 0)
1791         {
1792           lang = get_language_from_interpreter (lp);
1793           if (lang != NULL && lang->function != NULL)
1794             {
1795               curfdp->lang = lang;
1796               parser = lang->function;
1797             }
1798         }
1799     }
1800
1801   reset_input (inf);
1802
1803   /* Else try to guess the language given the case insensitive file name. */
1804   if (parser == NULL)
1805     {
1806       lang = get_language_from_filename (curfdp->infname, false);
1807       if (lang != NULL && lang->function != NULL)
1808         {
1809           curfdp->lang = lang;
1810           parser = lang->function;
1811         }
1812     }
1813
1814   /* Else try Fortran or C. */
1815   if (parser == NULL)
1816     {
1817       node *old_last_node = last_node;
1818
1819       curfdp->lang = get_language_from_langname ("fortran");
1820       find_entries (inf);
1821
1822       if (old_last_node == last_node)
1823         /* No Fortran entries found.  Try C. */
1824         {
1825           reset_input (inf);
1826           curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1827           find_entries (inf);
1828         }
1829       return;
1830     }
1831
1832   if (!no_line_directive
1833       && curfdp->lang != NULL && curfdp->lang->metasource)
1834     /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1835        file, or anyway we parsed a file that is automatically generated from
1836        this one.  If this is the case, the bingo.c file contained #line
1837        directives that generated tags pointing to this file.  Let's delete
1838        them all before parsing this file, which is the real source. */
1839     {
1840       fdesc **fdpp = &fdhead;
1841       while (*fdpp != NULL)
1842         if (*fdpp != curfdp
1843             && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1844           /* We found one of those!  We must delete both the file description
1845              and all tags referring to it. */
1846           {
1847             fdesc *badfdp = *fdpp;
1848
1849             /* Delete the tags referring to badfdp->taggedfname
1850                that were obtained from badfdp->infname. */
1851             invalidate_nodes (badfdp, &nodehead);
1852
1853             *fdpp = badfdp->next; /* remove the bad description from the list */
1854             free_fdesc (badfdp);
1855           }
1856         else
1857           fdpp = &(*fdpp)->next; /* advance the list pointer */
1858     }
1859
1860   assert (parser != NULL);
1861
1862   /* Generic initializations before reading from file. */
1863   linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1864
1865   /* Generic initializations before parsing file with readline. */
1866   lineno = 0;                  /* reset global line number */
1867   charno = 0;                  /* reset global char number */
1868   linecharno = 0;              /* reset global char number of line start */
1869
1870   parser (inf);
1871
1872   regex_tag_multiline ();
1873 }
1874
1875 \f
1876 /*
1877  * Check whether an implicitly named tag should be created,
1878  * then call `pfnote'.
1879  * NAME is a string that is internally copied by this function.
1880  *
1881  * TAGS format specification
1882  * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1883  * The following is explained in some more detail in etc/ETAGS.EBNF.
1884  *
1885  * make_tag creates tags with "implicit tag names" (unnamed tags)
1886  * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1887  *  1. NAME does not contain any of the characters in NONAM;
1888  *  2. LINESTART contains name as either a rightmost, or rightmost but
1889  *     one character, substring;
1890  *  3. the character, if any, immediately before NAME in LINESTART must
1891  *     be a character in NONAM;
1892  *  4. the character, if any, immediately after NAME in LINESTART must
1893  *     also be a character in NONAM.
1894  *
1895  * The implementation uses the notinname() macro, which recognizes the
1896  * characters stored in the string `nonam'.
1897  * etags.el needs to use the same characters that are in NONAM.
1898  */
1899 static void
1900 make_tag (const char *name,     /* tag name, or NULL if unnamed */
1901           int namelen,          /* tag length */
1902           bool is_func,         /* tag is a function */
1903           char *linestart,      /* start of the line where tag is */
1904           int linelen,          /* length of the line where tag is */
1905           int lno,              /* line number */
1906           long int cno)         /* character number */
1907 {
1908   bool named = (name != NULL && namelen > 0);
1909   char *nname = NULL;
1910
1911   if (!CTAGS && named)          /* maybe set named to false */
1912     /* Let's try to make an implicit tag name, that is, create an unnamed tag
1913        such that etags.el can guess a name from it. */
1914     {
1915       int i;
1916       register const char *cp = name;
1917
1918       for (i = 0; i < namelen; i++)
1919         if (notinname (*cp++))
1920           break;
1921       if (i == namelen)                         /* rule #1 */
1922         {
1923           cp = linestart + linelen - namelen;
1924           if (notinname (linestart[linelen-1]))
1925             cp -= 1;                            /* rule #4 */
1926           if (cp >= linestart                   /* rule #2 */
1927               && (cp == linestart
1928                   || notinname (cp[-1]))        /* rule #3 */
1929               && strneq (name, cp, namelen))    /* rule #2 */
1930             named = false;      /* use implicit tag name */
1931         }
1932     }
1933
1934   if (named)
1935     nname = savenstr (name, namelen);
1936
1937   pfnote (nname, is_func, linestart, linelen, lno, cno);
1938 }
1939
1940 /* Record a tag. */
1941 static void
1942 pfnote (char *name, bool is_func, char *linestart, int linelen, int lno,
1943         long int cno)
1944                                 /* tag name, or NULL if unnamed */
1945                                 /* tag is a function */
1946                                 /* start of the line where tag is */
1947                                 /* length of the line where tag is */
1948                                 /* line number */
1949                                 /* character number */
1950 {
1951   register node *np;
1952
1953   assert (name == NULL || name[0] != '\0');
1954   if (CTAGS && name == NULL)
1955     return;
1956
1957   np = xnew (1, node);
1958
1959   /* If ctags mode, change name "main" to M<thisfilename>. */
1960   if (CTAGS && !cxref_style && streq (name, "main"))
1961     {
1962       char *fp = strrchr (curfdp->taggedfname, '/');
1963       np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
1964       fp = strrchr (np->name, '.');
1965       if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1966         fp[0] = '\0';
1967     }
1968   else
1969     np->name = name;
1970   np->valid = true;
1971   np->been_warned = false;
1972   np->fdp = curfdp;
1973   np->is_func = is_func;
1974   np->lno = lno;
1975   if (np->fdp->usecharno)
1976     /* Our char numbers are 0-base, because of C language tradition?
1977        ctags compatibility?  old versions compatibility?   I don't know.
1978        Anyway, since emacs's are 1-base we expect etags.el to take care
1979        of the difference.  If we wanted to have 1-based numbers, we would
1980        uncomment the +1 below. */
1981     np->cno = cno /* + 1 */ ;
1982   else
1983     np->cno = invalidcharno;
1984   np->left = np->right = NULL;
1985   if (CTAGS && !cxref_style)
1986     {
1987       if (strlen (linestart) < 50)
1988         np->regex = concat (linestart, "$", "");
1989       else
1990         np->regex = savenstr (linestart, 50);
1991     }
1992   else
1993     np->regex = savenstr (linestart, linelen);
1994
1995   add_node (np, &nodehead);
1996 }
1997
1998 /*
1999  * free_tree ()
2000  *      recurse on left children, iterate on right children.
2001  */
2002 static void
2003 free_tree (register node *np)
2004 {
2005   while (np)
2006     {
2007       register node *node_right = np->right;
2008       free_tree (np->left);
2009       free (np->name);
2010       free (np->regex);
2011       free (np);
2012       np = node_right;
2013     }
2014 }
2015
2016 /*
2017  * free_fdesc ()
2018  *      delete a file description
2019  */
2020 static void
2021 free_fdesc (register fdesc *fdp)
2022 {
2023   free (fdp->infname);
2024   free (fdp->infabsname);
2025   free (fdp->infabsdir);
2026   free (fdp->taggedfname);
2027   free (fdp->prop);
2028   free (fdp);
2029 }
2030
2031 /*
2032  * add_node ()
2033  *      Adds a node to the tree of nodes.  In etags mode, sort by file
2034  *      name.  In ctags mode, sort by tag name.  Make no attempt at
2035  *      balancing.
2036  *
2037  *      add_node is the only function allowed to add nodes, so it can
2038  *      maintain state.
2039  */
2040 static void
2041 add_node (node *np, node **cur_node_p)
2042 {
2043   register int dif;
2044   register node *cur_node = *cur_node_p;
2045
2046   if (cur_node == NULL)
2047     {
2048       *cur_node_p = np;
2049       last_node = np;
2050       return;
2051     }
2052
2053   if (!CTAGS)
2054     /* Etags Mode */
2055     {
2056       /* For each file name, tags are in a linked sublist on the right
2057          pointer.  The first tags of different files are a linked list
2058          on the left pointer.  last_node points to the end of the last
2059          used sublist. */
2060       if (last_node != NULL && last_node->fdp == np->fdp)
2061         {
2062           /* Let's use the same sublist as the last added node. */
2063           assert (last_node->right == NULL);
2064           last_node->right = np;
2065           last_node = np;
2066         }
2067       else if (cur_node->fdp == np->fdp)
2068         {
2069           /* Scanning the list we found the head of a sublist which is
2070              good for us.  Let's scan this sublist. */
2071           add_node (np, &cur_node->right);
2072         }
2073       else
2074         /* The head of this sublist is not good for us.  Let's try the
2075            next one. */
2076         add_node (np, &cur_node->left);
2077     } /* if ETAGS mode */
2078
2079   else
2080     {
2081       /* Ctags Mode */
2082       dif = strcmp (np->name, cur_node->name);
2083
2084       /*
2085        * If this tag name matches an existing one, then
2086        * do not add the node, but maybe print a warning.
2087        */
2088       if (no_duplicates && !dif)
2089         {
2090           if (np->fdp == cur_node->fdp)
2091             {
2092               if (!no_warnings)
2093                 {
2094                   fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2095                            np->fdp->infname, lineno, np->name);
2096                   fprintf (stderr, "Second entry ignored\n");
2097                 }
2098             }
2099           else if (!cur_node->been_warned && !no_warnings)
2100             {
2101               fprintf
2102                 (stderr,
2103                  "Duplicate entry in files %s and %s: %s (Warning only)\n",
2104                  np->fdp->infname, cur_node->fdp->infname, np->name);
2105               cur_node->been_warned = true;
2106             }
2107           return;
2108         }
2109
2110       /* Actually add the node */
2111       add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2112     } /* if CTAGS mode */
2113 }
2114
2115 /*
2116  * invalidate_nodes ()
2117  *      Scan the node tree and invalidate all nodes pointing to the
2118  *      given file description (CTAGS case) or free them (ETAGS case).
2119  */
2120 static void
2121 invalidate_nodes (fdesc *badfdp, node **npp)
2122 {
2123   node *np = *npp;
2124
2125   if (np == NULL)
2126     return;
2127
2128   if (CTAGS)
2129     {
2130       if (np->left != NULL)
2131         invalidate_nodes (badfdp, &np->left);
2132       if (np->fdp == badfdp)
2133         np->valid = false;
2134       if (np->right != NULL)
2135         invalidate_nodes (badfdp, &np->right);
2136     }
2137   else
2138     {
2139       assert (np->fdp != NULL);
2140       if (np->fdp == badfdp)
2141         {
2142           *npp = np->left;      /* detach the sublist from the list */
2143           np->left = NULL;      /* isolate it */
2144           free_tree (np);       /* free it */
2145           invalidate_nodes (badfdp, npp);
2146         }
2147       else
2148         invalidate_nodes (badfdp, &np->left);
2149     }
2150 }
2151
2152 \f
2153 static int total_size_of_entries (node *);
2154 static int number_len (long) ATTRIBUTE_CONST;
2155
2156 /* Length of a non-negative number's decimal representation. */
2157 static int
2158 number_len (long int num)
2159 {
2160   int len = 1;
2161   while ((num /= 10) > 0)
2162     len += 1;
2163   return len;
2164 }
2165
2166 /*
2167  * Return total number of characters that put_entries will output for
2168  * the nodes in the linked list at the right of the specified node.
2169  * This count is irrelevant with etags.el since emacs 19.34 at least,
2170  * but is still supplied for backward compatibility.
2171  */
2172 static int
2173 total_size_of_entries (register node *np)
2174 {
2175   register int total = 0;
2176
2177   for (; np != NULL; np = np->right)
2178     if (np->valid)
2179       {
2180         total += strlen (np->regex) + 1;                /* pat\177 */
2181         if (np->name != NULL)
2182           total += strlen (np->name) + 1;               /* name\001 */
2183         total += number_len ((long) np->lno) + 1;       /* lno, */
2184         if (np->cno != invalidcharno)                   /* cno */
2185           total += number_len (np->cno);
2186         total += 1;                                     /* newline */
2187       }
2188
2189   return total;
2190 }
2191
2192 static void
2193 put_entries (register node *np)
2194 {
2195   register char *sp;
2196   static fdesc *fdp = NULL;
2197
2198   if (np == NULL)
2199     return;
2200
2201   /* Output subentries that precede this one */
2202   if (CTAGS)
2203     put_entries (np->left);
2204
2205   /* Output this entry */
2206   if (np->valid)
2207     {
2208       if (!CTAGS)
2209         {
2210           /* Etags mode */
2211           if (fdp != np->fdp)
2212             {
2213               fdp = np->fdp;
2214               fprintf (tagf, "\f\n%s,%d\n",
2215                        fdp->taggedfname, total_size_of_entries (np));
2216               fdp->written = true;
2217             }
2218           fputs (np->regex, tagf);
2219           fputc ('\177', tagf);
2220           if (np->name != NULL)
2221             {
2222               fputs (np->name, tagf);
2223               fputc ('\001', tagf);
2224             }
2225           fprintf (tagf, "%d,", np->lno);
2226           if (np->cno != invalidcharno)
2227             fprintf (tagf, "%ld", np->cno);
2228           fputs ("\n", tagf);
2229         }
2230       else
2231         {
2232           /* Ctags mode */
2233           if (np->name == NULL)
2234             error ("internal error: NULL name in ctags mode.");
2235
2236           if (cxref_style)
2237             {
2238               if (vgrind_style)
2239                 fprintf (stdout, "%s %s %d\n",
2240                          np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2241               else
2242                 fprintf (stdout, "%-16s %3d %-16s %s\n",
2243                          np->name, np->lno, np->fdp->taggedfname, np->regex);
2244             }
2245           else
2246             {
2247               fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2248
2249               if (np->is_func)
2250                 {               /* function or #define macro with args */
2251                   putc (searchar, tagf);
2252                   putc ('^', tagf);
2253
2254                   for (sp = np->regex; *sp; sp++)
2255                     {
2256                       if (*sp == '\\' || *sp == searchar)
2257                         putc ('\\', tagf);
2258                       putc (*sp, tagf);
2259                     }
2260                   putc (searchar, tagf);
2261                 }
2262               else
2263                 {               /* anything else; text pattern inadequate */
2264                   fprintf (tagf, "%d", np->lno);
2265                 }
2266               putc ('\n', tagf);
2267             }
2268         }
2269     } /* if this node contains a valid tag */
2270
2271   /* Output subentries that follow this one */
2272   put_entries (np->right);
2273   if (!CTAGS)
2274     put_entries (np->left);
2275 }
2276
2277 \f
2278 /* C extensions. */
2279 #define C_EXT   0x00fff         /* C extensions */
2280 #define C_PLAIN 0x00000         /* C */
2281 #define C_PLPL  0x00001         /* C++ */
2282 #define C_STAR  0x00003         /* C* */
2283 #define C_JAVA  0x00005         /* JAVA */
2284 #define C_AUTO  0x01000         /* C, but switch to C++ if `class' is met */
2285 #define YACC    0x10000         /* yacc file */
2286
2287 /*
2288  * The C symbol tables.
2289  */
2290 enum sym_type
2291 {
2292   st_none,
2293   st_C_objprot, st_C_objimpl, st_C_objend,
2294   st_C_gnumacro,
2295   st_C_ignore, st_C_attribute,
2296   st_C_javastruct,
2297   st_C_operator,
2298   st_C_class, st_C_template,
2299   st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2300 };
2301
2302 /* Feed stuff between (but not including) %[ and %] lines to:
2303      gperf -m 5
2304 %[
2305 %compare-strncmp
2306 %enum
2307 %struct-type
2308 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2309 %%
2310 if,             0,                      st_C_ignore
2311 for,            0,                      st_C_ignore
2312 while,          0,                      st_C_ignore
2313 switch,         0,                      st_C_ignore
2314 return,         0,                      st_C_ignore
2315 __attribute__,  0,                      st_C_attribute
2316 GTY,            0,                      st_C_attribute
2317 @interface,     0,                      st_C_objprot
2318 @protocol,      0,                      st_C_objprot
2319 @implementation,0,                      st_C_objimpl
2320 @end,           0,                      st_C_objend
2321 import,         (C_JAVA & ~C_PLPL),     st_C_ignore
2322 package,        (C_JAVA & ~C_PLPL),     st_C_ignore
2323 friend,         C_PLPL,                 st_C_ignore
2324 extends,        (C_JAVA & ~C_PLPL),     st_C_javastruct
2325 implements,     (C_JAVA & ~C_PLPL),     st_C_javastruct
2326 interface,      (C_JAVA & ~C_PLPL),     st_C_struct
2327 class,          0,                      st_C_class
2328 namespace,      C_PLPL,                 st_C_struct
2329 domain,         C_STAR,                 st_C_struct
2330 union,          0,                      st_C_struct
2331 struct,         0,                      st_C_struct
2332 extern,         0,                      st_C_extern
2333 enum,           0,                      st_C_enum
2334 typedef,        0,                      st_C_typedef
2335 define,         0,                      st_C_define
2336 undef,          0,                      st_C_define
2337 operator,       C_PLPL,                 st_C_operator
2338 template,       0,                      st_C_template
2339 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2340 DEFUN,          0,                      st_C_gnumacro
2341 SYSCALL,        0,                      st_C_gnumacro
2342 ENTRY,          0,                      st_C_gnumacro
2343 PSEUDO,         0,                      st_C_gnumacro
2344 # These are defined inside C functions, so currently they are not met.
2345 # EXFUN used in glibc, DEFVAR_* in emacs.
2346 #EXFUN,         0,                      st_C_gnumacro
2347 #DEFVAR_,       0,                      st_C_gnumacro
2348 %]
2349 and replace lines between %< and %> with its output, then:
2350  - remove the #if characterset check
2351  - make in_word_set static and not inline. */
2352 /*%<*/
2353 /* C code produced by gperf version 3.0.1 */
2354 /* Command-line: gperf -m 5  */
2355 /* Computed positions: -k'2-3' */
2356
2357 struct C_stab_entry { const char *name; int c_ext; enum sym_type type; };
2358 /* maximum key range = 33, duplicates = 0 */
2359
2360 static int
2361 hash (const char *str, int len)
2362 {
2363   static char const asso_values[] =
2364     {
2365       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2366       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2367       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2368       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2369       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2370       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2371       35, 35, 35, 35, 35, 35, 35, 35, 35,  3,
2372       26, 35, 35, 35, 35, 35, 35, 35, 27, 35,
2373       35, 35, 35, 24,  0, 35, 35, 35, 35,  0,
2374       35, 35, 35, 35, 35,  1, 35, 16, 35,  6,
2375       23,  0,  0, 35, 22,  0, 35, 35,  5,  0,
2376        0, 15,  1, 35,  6, 35,  8, 19, 35, 16,
2377        4,  5, 35, 35, 35, 35, 35, 35, 35, 35,
2378       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2379       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2380       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2381       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2382       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2383       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2384       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2385       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2386       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2387       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2388       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2389       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2390       35, 35, 35, 35, 35, 35
2391     };
2392   int hval = len;
2393
2394   switch (hval)
2395     {
2396       default:
2397         hval += asso_values[(unsigned char) str[2]];
2398       /*FALLTHROUGH*/
2399       case 2:
2400         hval += asso_values[(unsigned char) str[1]];
2401         break;
2402     }
2403   return hval;
2404 }
2405
2406 static struct C_stab_entry *
2407 in_word_set (register const char *str, register unsigned int len)
2408 {
2409   enum
2410     {
2411       TOTAL_KEYWORDS = 33,
2412       MIN_WORD_LENGTH = 2,
2413       MAX_WORD_LENGTH = 15,
2414       MIN_HASH_VALUE = 2,
2415       MAX_HASH_VALUE = 34
2416     };
2417
2418   static struct C_stab_entry wordlist[] =
2419     {
2420       {""}, {""},
2421       {"if",            0,                      st_C_ignore},
2422       {"GTY",           0,                      st_C_attribute},
2423       {"@end",          0,                      st_C_objend},
2424       {"union",         0,                      st_C_struct},
2425       {"define",                0,                      st_C_define},
2426       {"import",                (C_JAVA & ~C_PLPL),     st_C_ignore},
2427       {"template",      0,                      st_C_template},
2428       {"operator",      C_PLPL,                 st_C_operator},
2429       {"@interface",    0,                      st_C_objprot},
2430       {"implements",    (C_JAVA & ~C_PLPL),     st_C_javastruct},
2431       {"friend",                C_PLPL,                 st_C_ignore},
2432       {"typedef",       0,                      st_C_typedef},
2433       {"return",                0,                      st_C_ignore},
2434       {"@implementation",0,                     st_C_objimpl},
2435       {"@protocol",     0,                      st_C_objprot},
2436       {"interface",     (C_JAVA & ~C_PLPL),     st_C_struct},
2437       {"extern",                0,                      st_C_extern},
2438       {"extends",       (C_JAVA & ~C_PLPL),     st_C_javastruct},
2439       {"struct",                0,                      st_C_struct},
2440       {"domain",                C_STAR,                 st_C_struct},
2441       {"switch",                0,                      st_C_ignore},
2442       {"enum",          0,                      st_C_enum},
2443       {"for",           0,                      st_C_ignore},
2444       {"namespace",     C_PLPL,                 st_C_struct},
2445       {"class",         0,                      st_C_class},
2446       {"while",         0,                      st_C_ignore},
2447       {"undef",         0,                      st_C_define},
2448       {"package",       (C_JAVA & ~C_PLPL),     st_C_ignore},
2449       {"__attribute__", 0,                      st_C_attribute},
2450       {"SYSCALL",       0,                      st_C_gnumacro},
2451       {"ENTRY",         0,                      st_C_gnumacro},
2452       {"PSEUDO",                0,                      st_C_gnumacro},
2453       {"DEFUN",         0,                      st_C_gnumacro}
2454     };
2455
2456   if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2457     {
2458       int key = hash (str, len);
2459
2460       if (key <= MAX_HASH_VALUE && key >= 0)
2461         {
2462           const char *s = wordlist[key].name;
2463
2464           if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2465             return &wordlist[key];
2466         }
2467     }
2468   return 0;
2469 }
2470 /*%>*/
2471
2472 static enum sym_type
2473 C_symtype (char *str, int len, int c_ext)
2474 {
2475   register struct C_stab_entry *se = in_word_set (str, len);
2476
2477   if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2478     return st_none;
2479   return se->type;
2480 }
2481
2482 \f
2483 /*
2484  * Ignoring __attribute__ ((list))
2485  */
2486 static bool inattribute;        /* looking at an __attribute__ construct */
2487
2488 /*
2489  * C functions and variables are recognized using a simple
2490  * finite automaton.  fvdef is its state variable.
2491  */
2492 static enum
2493 {
2494   fvnone,                       /* nothing seen */
2495   fdefunkey,                    /* Emacs DEFUN keyword seen */
2496   fdefunname,                   /* Emacs DEFUN name seen */
2497   foperator,                    /* func: operator keyword seen (cplpl) */
2498   fvnameseen,                   /* function or variable name seen */
2499   fstartlist,                   /* func: just after open parenthesis */
2500   finlist,                      /* func: in parameter list */
2501   flistseen,                    /* func: after parameter list */
2502   fignore,                      /* func: before open brace */
2503   vignore                       /* var-like: ignore until ';' */
2504 } fvdef;
2505
2506 static bool fvextern;           /* func or var: extern keyword seen; */
2507
2508 /*
2509  * typedefs are recognized using a simple finite automaton.
2510  * typdef is its state variable.
2511  */
2512 static enum
2513 {
2514   tnone,                        /* nothing seen */
2515   tkeyseen,                     /* typedef keyword seen */
2516   ttypeseen,                    /* defined type seen */
2517   tinbody,                      /* inside typedef body */
2518   tend,                         /* just before typedef tag */
2519   tignore                       /* junk after typedef tag */
2520 } typdef;
2521
2522 /*
2523  * struct-like structures (enum, struct and union) are recognized
2524  * using another simple finite automaton.  `structdef' is its state
2525  * variable.
2526  */
2527 static enum
2528 {
2529   snone,                        /* nothing seen yet,
2530                                    or in struct body if bracelev > 0 */
2531   skeyseen,                     /* struct-like keyword seen */
2532   stagseen,                     /* struct-like tag seen */
2533   scolonseen                    /* colon seen after struct-like tag */
2534 } structdef;
2535
2536 /*
2537  * When objdef is different from onone, objtag is the name of the class.
2538  */
2539 static const char *objtag = "<uninited>";
2540
2541 /*
2542  * Yet another little state machine to deal with preprocessor lines.
2543  */
2544 static enum
2545 {
2546   dnone,                        /* nothing seen */
2547   dsharpseen,                   /* '#' seen as first char on line */
2548   ddefineseen,                  /* '#' and 'define' seen */
2549   dignorerest                   /* ignore rest of line */
2550 } definedef;
2551
2552 /*
2553  * State machine for Objective C protocols and implementations.
2554  * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2555  */
2556 static enum
2557 {
2558   onone,                        /* nothing seen */
2559   oprotocol,                    /* @interface or @protocol seen */
2560   oimplementation,              /* @implementations seen */
2561   otagseen,                     /* class name seen */
2562   oparenseen,                   /* parenthesis before category seen */
2563   ocatseen,                     /* category name seen */
2564   oinbody,                      /* in @implementation body */
2565   omethodsign,                  /* in @implementation body, after +/- */
2566   omethodtag,                   /* after method name */
2567   omethodcolon,                 /* after method colon */
2568   omethodparm,                  /* after method parameter */
2569   oignore                       /* wait for @end */
2570 } objdef;
2571
2572
2573 /*
2574  * Use this structure to keep info about the token read, and how it
2575  * should be tagged.  Used by the make_C_tag function to build a tag.
2576  */
2577 static struct tok
2578 {
2579   char *line;                   /* string containing the token */
2580   int offset;                   /* where the token starts in LINE */
2581   int length;                   /* token length */
2582   /*
2583     The previous members can be used to pass strings around for generic
2584     purposes.  The following ones specifically refer to creating tags.  In this
2585     case the token contained here is the pattern that will be used to create a
2586     tag.
2587   */
2588   bool valid;                   /* do not create a tag; the token should be
2589                                    invalidated whenever a state machine is
2590                                    reset prematurely */
2591   bool named;                   /* create a named tag */
2592   int lineno;                   /* source line number of tag */
2593   long linepos;                 /* source char number of tag */
2594 } token;                        /* latest token read */
2595
2596 /*
2597  * Variables and functions for dealing with nested structures.
2598  * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2599  */
2600 static void pushclass_above (int, char *, int);
2601 static void popclass_above (int);
2602 static void write_classname (linebuffer *, const char *qualifier);
2603
2604 static struct {
2605   char **cname;                 /* nested class names */
2606   int *bracelev;                /* nested class brace level */
2607   int nl;                       /* class nesting level (elements used) */
2608   int size;                     /* length of the array */
2609 } cstack;                       /* stack for nested declaration tags */
2610 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2611 #define nestlev         (cstack.nl)
2612 /* After struct keyword or in struct body, not inside a nested function. */
2613 #define instruct        (structdef == snone && nestlev > 0                      \
2614                          && bracelev == cstack.bracelev[nestlev-1] + 1)
2615
2616 static void
2617 pushclass_above (int bracelev, char *str, int len)
2618 {
2619   int nl;
2620
2621   popclass_above (bracelev);
2622   nl = cstack.nl;
2623   if (nl >= cstack.size)
2624     {
2625       int size = cstack.size *= 2;
2626       xrnew (cstack.cname, size, char *);
2627       xrnew (cstack.bracelev, size, int);
2628     }
2629   assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2630   cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2631   cstack.bracelev[nl] = bracelev;
2632   cstack.nl = nl + 1;
2633 }
2634
2635 static void
2636 popclass_above (int bracelev)
2637 {
2638   int nl;
2639
2640   for (nl = cstack.nl - 1;
2641        nl >= 0 && cstack.bracelev[nl] >= bracelev;
2642        nl--)
2643     {
2644       free (cstack.cname[nl]);
2645       cstack.nl = nl;
2646     }
2647 }
2648
2649 static void
2650 write_classname (linebuffer *cn, const char *qualifier)
2651 {
2652   int i, len;
2653   int qlen = strlen (qualifier);
2654
2655   if (cstack.nl == 0 || cstack.cname[0] == NULL)
2656     {
2657       len = 0;
2658       cn->len = 0;
2659       cn->buffer[0] = '\0';
2660     }
2661   else
2662     {
2663       len = strlen (cstack.cname[0]);
2664       linebuffer_setlen (cn, len);
2665       strcpy (cn->buffer, cstack.cname[0]);
2666     }
2667   for (i = 1; i < cstack.nl; i++)
2668     {
2669       char *s = cstack.cname[i];
2670       if (s == NULL)
2671         continue;
2672       linebuffer_setlen (cn, len + qlen + strlen (s));
2673       len += sprintf (cn->buffer + len, "%s%s", qualifier, s);
2674     }
2675 }
2676
2677 \f
2678 static bool consider_token (char *, int, int, int *, int, int, bool *);
2679 static void make_C_tag (bool);
2680
2681 /*
2682  * consider_token ()
2683  *      checks to see if the current token is at the start of a
2684  *      function or variable, or corresponds to a typedef, or
2685  *      is a struct/union/enum tag, or #define, or an enum constant.
2686  *
2687  *      *IS_FUNC_OR_VAR gets true if the token is a function or #define macro
2688  *      with args.  C_EXTP points to which language we are looking at.
2689  *
2690  * Globals
2691  *      fvdef                   IN OUT
2692  *      structdef               IN OUT
2693  *      definedef               IN OUT
2694  *      typdef                  IN OUT
2695  *      objdef                  IN OUT
2696  */
2697
2698 static bool
2699 consider_token (char *str, int len, int c, int *c_extp,
2700                 int bracelev, int parlev, bool *is_func_or_var)
2701                                 /* IN: token pointer */
2702                                 /* IN: token length */
2703                                 /* IN: first char after the token */
2704                                 /* IN, OUT: C extensions mask */
2705                                 /* IN: brace level */
2706                                 /* IN: parenthesis level */
2707                                 /* OUT: function or variable found */
2708 {
2709   /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2710      structtype is the type of the preceding struct-like keyword, and
2711      structbracelev is the brace level where it has been seen. */
2712   static enum sym_type structtype;
2713   static int structbracelev;
2714   static enum sym_type toktype;
2715
2716
2717   toktype = C_symtype (str, len, *c_extp);
2718
2719   /*
2720    * Skip __attribute__
2721    */
2722   if (toktype == st_C_attribute)
2723     {
2724       inattribute = true;
2725       return false;
2726      }
2727
2728    /*
2729     * Advance the definedef state machine.
2730     */
2731    switch (definedef)
2732      {
2733      case dnone:
2734        /* We're not on a preprocessor line. */
2735        if (toktype == st_C_gnumacro)
2736          {
2737            fvdef = fdefunkey;
2738            return false;
2739          }
2740        break;
2741      case dsharpseen:
2742        if (toktype == st_C_define)
2743          {
2744            definedef = ddefineseen;
2745          }
2746        else
2747          {
2748            definedef = dignorerest;
2749          }
2750        return false;
2751      case ddefineseen:
2752        /*
2753         * Make a tag for any macro, unless it is a constant
2754         * and constantypedefs is false.
2755         */
2756        definedef = dignorerest;
2757        *is_func_or_var = (c == '(');
2758        if (!*is_func_or_var && !constantypedefs)
2759          return false;
2760        else
2761          return true;
2762      case dignorerest:
2763        return false;
2764      default:
2765        error ("internal error: definedef value.");
2766      }
2767
2768    /*
2769     * Now typedefs
2770     */
2771    switch (typdef)
2772      {
2773      case tnone:
2774        if (toktype == st_C_typedef)
2775          {
2776            if (typedefs)
2777              typdef = tkeyseen;
2778            fvextern = false;
2779            fvdef = fvnone;
2780            return false;
2781          }
2782        break;
2783      case tkeyseen:
2784        switch (toktype)
2785          {
2786          case st_none:
2787          case st_C_class:
2788          case st_C_struct:
2789          case st_C_enum:
2790            typdef = ttypeseen;
2791            break;
2792          default:
2793            break;
2794          }
2795        break;
2796      case ttypeseen:
2797        if (structdef == snone && fvdef == fvnone)
2798          {
2799            fvdef = fvnameseen;
2800            return true;
2801          }
2802        break;
2803      case tend:
2804        switch (toktype)
2805          {
2806          case st_C_class:
2807          case st_C_struct:
2808          case st_C_enum:
2809            return false;
2810          default:
2811            return true;
2812          }
2813      default:
2814        break;
2815      }
2816
2817    switch (toktype)
2818      {
2819      case st_C_javastruct:
2820        if (structdef == stagseen)
2821          structdef = scolonseen;
2822        return false;
2823      case st_C_template:
2824      case st_C_class:
2825        if ((*c_extp & C_AUTO)   /* automatic detection of C++ language */
2826            && bracelev == 0
2827            && definedef == dnone && structdef == snone
2828            && typdef == tnone && fvdef == fvnone)
2829          *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2830        if (toktype == st_C_template)
2831          break;
2832        /* FALLTHRU */
2833      case st_C_struct:
2834      case st_C_enum:
2835        if (parlev == 0
2836            && fvdef != vignore
2837            && (typdef == tkeyseen
2838                || (typedefs_or_cplusplus && structdef == snone)))
2839          {
2840            structdef = skeyseen;
2841            structtype = toktype;
2842            structbracelev = bracelev;
2843            if (fvdef == fvnameseen)
2844              fvdef = fvnone;
2845          }
2846        return false;
2847      default:
2848        break;
2849      }
2850
2851    if (structdef == skeyseen)
2852      {
2853        structdef = stagseen;
2854        return true;
2855      }
2856
2857    if (typdef != tnone)
2858      definedef = dnone;
2859
2860    /* Detect Objective C constructs. */
2861    switch (objdef)
2862      {
2863      case onone:
2864        switch (toktype)
2865          {
2866          case st_C_objprot:
2867            objdef = oprotocol;
2868            return false;
2869          case st_C_objimpl:
2870            objdef = oimplementation;
2871            return false;
2872          default:
2873            break;
2874          }
2875        break;
2876      case oimplementation:
2877        /* Save the class tag for functions or variables defined inside. */
2878        objtag = savenstr (str, len);
2879        objdef = oinbody;
2880        return false;
2881      case oprotocol:
2882        /* Save the class tag for categories. */
2883        objtag = savenstr (str, len);
2884        objdef = otagseen;
2885        *is_func_or_var = true;
2886        return true;
2887      case oparenseen:
2888        objdef = ocatseen;
2889        *is_func_or_var = true;
2890        return true;
2891      case oinbody:
2892        break;
2893      case omethodsign:
2894        if (parlev == 0)
2895          {
2896            fvdef = fvnone;
2897            objdef = omethodtag;
2898            linebuffer_setlen (&token_name, len);
2899            memcpy (token_name.buffer, str, len);
2900            token_name.buffer[len] = '\0';
2901            return true;
2902          }
2903        return false;
2904      case omethodcolon:
2905        if (parlev == 0)
2906          objdef = omethodparm;
2907        return false;
2908      case omethodparm:
2909        if (parlev == 0)
2910          {
2911            objdef = omethodtag;
2912            if (class_qualify)
2913              {
2914                int oldlen = token_name.len;
2915                fvdef = fvnone;
2916                linebuffer_setlen (&token_name, oldlen + len);
2917                memcpy (token_name.buffer + oldlen, str, len);
2918                token_name.buffer[oldlen + len] = '\0';
2919              }
2920            return true;
2921          }
2922        return false;
2923      case oignore:
2924        if (toktype == st_C_objend)
2925          {
2926            /* Memory leakage here: the string pointed by objtag is
2927               never released, because many tests would be needed to
2928               avoid breaking on incorrect input code.  The amount of
2929               memory leaked here is the sum of the lengths of the
2930               class tags.
2931            free (objtag); */
2932            objdef = onone;
2933          }
2934        return false;
2935      default:
2936        break;
2937      }
2938
2939    /* A function, variable or enum constant? */
2940    switch (toktype)
2941      {
2942      case st_C_extern:
2943        fvextern = true;
2944        switch  (fvdef)
2945          {
2946          case finlist:
2947          case flistseen:
2948          case fignore:
2949          case vignore:
2950            break;
2951          default:
2952            fvdef = fvnone;
2953          }
2954        return false;
2955      case st_C_ignore:
2956        fvextern = false;
2957        fvdef = vignore;
2958        return false;
2959      case st_C_operator:
2960        fvdef = foperator;
2961        *is_func_or_var = true;
2962        return true;
2963      case st_none:
2964        if (constantypedefs
2965            && structdef == snone
2966            && structtype == st_C_enum && bracelev > structbracelev
2967            /* Don't tag tokens in expressions that assign values to enum
2968               constants.  */
2969            && fvdef != vignore)
2970          return true;           /* enum constant */
2971        switch (fvdef)
2972          {
2973          case fdefunkey:
2974            if (bracelev > 0)
2975              break;
2976            fvdef = fdefunname;  /* GNU macro */
2977            *is_func_or_var = true;
2978            return true;
2979          case fvnone:
2980            switch (typdef)
2981              {
2982              case ttypeseen:
2983                return false;
2984              case tnone:
2985                if ((strneq (str, "asm", 3) && endtoken (str[3]))
2986                    || (strneq (str, "__asm__", 7) && endtoken (str[7])))
2987                  {
2988                    fvdef = vignore;
2989                    return false;
2990                  }
2991                break;
2992              default:
2993                break;
2994              }
2995           /* FALLTHRU */
2996           case fvnameseen:
2997           if (len >= 10 && strneq (str+len-10, "::operator", 10))
2998             {
2999               if (*c_extp & C_AUTO) /* automatic detection of C++ */
3000                 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3001               fvdef = foperator;
3002               *is_func_or_var = true;
3003               return true;
3004             }
3005           if (bracelev > 0 && !instruct)
3006             break;
3007           fvdef = fvnameseen;   /* function or variable */
3008           *is_func_or_var = true;
3009           return true;
3010          default:
3011            break;
3012         }
3013       break;
3014      default:
3015        break;
3016     }
3017
3018   return false;
3019 }
3020
3021 \f
3022 /*
3023  * C_entries often keeps pointers to tokens or lines which are older than
3024  * the line currently read.  By keeping two line buffers, and switching
3025  * them at end of line, it is possible to use those pointers.
3026  */
3027 static struct
3028 {
3029   long linepos;
3030   linebuffer lb;
3031 } lbs[2];
3032
3033 #define current_lb_is_new (newndx == curndx)
3034 #define switch_line_buffers() (curndx = 1 - curndx)
3035
3036 #define curlb (lbs[curndx].lb)
3037 #define newlb (lbs[newndx].lb)
3038 #define curlinepos (lbs[curndx].linepos)
3039 #define newlinepos (lbs[newndx].linepos)
3040
3041 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3042 #define cplpl (c_ext & C_PLPL)
3043 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3044
3045 #define CNL_SAVE_DEFINEDEF()                                            \
3046 do {                                                                    \
3047   curlinepos = charno;                                                  \
3048   readline (&curlb, inf);                                               \
3049   lp = curlb.buffer;                                                    \
3050   quotednl = false;                                                     \
3051   newndx = curndx;                                                      \
3052 } while (0)
3053
3054 #define CNL()                                                           \
3055 do {                                                                    \
3056   CNL_SAVE_DEFINEDEF ();                                                \
3057   if (savetoken.valid)                                                  \
3058     {                                                                   \
3059       token = savetoken;                                                \
3060       savetoken.valid = false;                                          \
3061     }                                                                   \
3062   definedef = dnone;                                                    \
3063 } while (0)
3064
3065
3066 static void
3067 make_C_tag (bool isfun)
3068 {
3069   /* This function is never called when token.valid is false, but
3070      we must protect against invalid input or internal errors. */
3071   if (token.valid)
3072     make_tag (token_name.buffer, token_name.len, isfun, token.line,
3073               token.offset+token.length+1, token.lineno, token.linepos);
3074   else if (DEBUG)
3075     {                             /* this branch is optimized away if !DEBUG */
3076       make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3077                 token_name.len + 17, isfun, token.line,
3078                 token.offset+token.length+1, token.lineno, token.linepos);
3079       error ("INVALID TOKEN");
3080     }
3081
3082   token.valid = false;
3083 }
3084
3085 static bool
3086 perhaps_more_input (FILE *inf)
3087 {
3088   return !feof (inf) && !ferror (inf);
3089 }
3090
3091
3092 /*
3093  * C_entries ()
3094  *      This routine finds functions, variables, typedefs,
3095  *      #define's, enum constants and struct/union/enum definitions in
3096  *      C syntax and adds them to the list.
3097  */
3098 static void
3099 C_entries (int c_ext, FILE *inf)
3100                                 /* extension of C */
3101                                 /* input file */
3102 {
3103   register char c;              /* latest char read; '\0' for end of line */
3104   register char *lp;            /* pointer one beyond the character `c' */
3105   int curndx, newndx;           /* indices for current and new lb */
3106   register int tokoff;          /* offset in line of start of current token */
3107   register int toklen;          /* length of current token */
3108   const char *qualifier;        /* string used to qualify names */
3109   int qlen;                     /* length of qualifier */
3110   int bracelev;                 /* current brace level */
3111   int bracketlev;               /* current bracket level */
3112   int parlev;                   /* current parenthesis level */
3113   int attrparlev;               /* __attribute__ parenthesis level */
3114   int templatelev;              /* current template level */
3115   int typdefbracelev;           /* bracelev where a typedef struct body begun */
3116   bool incomm, inquote, inchar, quotednl, midtoken;
3117   bool yacc_rules;              /* in the rules part of a yacc file */
3118   struct tok savetoken = {0};   /* token saved during preprocessor handling */
3119
3120
3121   linebuffer_init (&lbs[0].lb);
3122   linebuffer_init (&lbs[1].lb);
3123   if (cstack.size == 0)
3124     {
3125       cstack.size = (DEBUG) ? 1 : 4;
3126       cstack.nl = 0;
3127       cstack.cname = xnew (cstack.size, char *);
3128       cstack.bracelev = xnew (cstack.size, int);
3129     }
3130
3131   tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3132   curndx = newndx = 0;
3133   lp = curlb.buffer;
3134   *lp = 0;
3135
3136   fvdef = fvnone; fvextern = false; typdef = tnone;
3137   structdef = snone; definedef = dnone; objdef = onone;
3138   yacc_rules = false;
3139   midtoken = inquote = inchar = incomm = quotednl = false;
3140   token.valid = savetoken.valid = false;
3141   bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3142   if (cjava)
3143     { qualifier = "."; qlen = 1; }
3144   else
3145     { qualifier = "::"; qlen = 2; }
3146
3147
3148   while (perhaps_more_input (inf))
3149     {
3150       c = *lp++;
3151       if (c == '\\')
3152         {
3153           /* If we are at the end of the line, the next character is a
3154              '\0'; do not skip it, because it is what tells us
3155              to read the next line.  */
3156           if (*lp == '\0')
3157             {
3158               quotednl = true;
3159               continue;
3160             }
3161           lp++;
3162           c = ' ';
3163         }
3164       else if (incomm)
3165         {
3166           switch (c)
3167             {
3168             case '*':
3169               if (*lp == '/')
3170                 {
3171                   c = *lp++;
3172                   incomm = false;
3173                 }
3174               break;
3175             case '\0':
3176               /* Newlines inside comments do not end macro definitions in
3177                  traditional cpp. */
3178               CNL_SAVE_DEFINEDEF ();
3179               break;
3180             }
3181           continue;
3182         }
3183       else if (inquote)
3184         {
3185           switch (c)
3186             {
3187             case '"':
3188               inquote = false;
3189               break;
3190             case '\0':
3191               /* Newlines inside strings do not end macro definitions
3192                  in traditional cpp, even though compilers don't
3193                  usually accept them. */
3194               CNL_SAVE_DEFINEDEF ();
3195               break;
3196             }
3197           continue;
3198         }
3199       else if (inchar)
3200         {
3201           switch (c)
3202             {
3203             case '\0':
3204               /* Hmmm, something went wrong. */
3205               CNL ();
3206               /* FALLTHRU */
3207             case '\'':
3208               inchar = false;
3209               break;
3210             }
3211           continue;
3212         }
3213       else switch (c)
3214         {
3215         case '"':
3216           inquote = true;
3217           if (bracketlev > 0)
3218             continue;
3219           if (inattribute)
3220             break;
3221           switch (fvdef)
3222             {
3223             case fdefunkey:
3224             case fstartlist:
3225             case finlist:
3226             case fignore:
3227             case vignore:
3228               break;
3229             default:
3230               fvextern = false;
3231               fvdef = fvnone;
3232             }
3233           continue;
3234         case '\'':
3235           inchar = true;
3236           if (bracketlev > 0)
3237             continue;
3238           if (inattribute)
3239             break;
3240           if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
3241             {
3242               fvextern = false;
3243               fvdef = fvnone;
3244             }
3245           continue;
3246         case '/':
3247           if (*lp == '*')
3248             {
3249               incomm = true;
3250               lp++;
3251               c = ' ';
3252               if (bracketlev > 0)
3253                 continue;
3254             }
3255           else if (/* cplpl && */ *lp == '/')
3256             {
3257               c = '\0';
3258             }
3259           break;
3260         case '%':
3261           if ((c_ext & YACC) && *lp == '%')
3262             {
3263               /* Entering or exiting rules section in yacc file. */
3264               lp++;
3265               definedef = dnone; fvdef = fvnone; fvextern = false;
3266               typdef = tnone; structdef = snone;
3267               midtoken = inquote = inchar = incomm = quotednl = false;
3268               bracelev = 0;
3269               yacc_rules = !yacc_rules;
3270               continue;
3271             }
3272           else
3273             break;
3274         case '#':
3275           if (definedef == dnone)
3276             {
3277               char *cp;
3278               bool cpptoken = true;
3279
3280               /* Look back on this line.  If all blanks, or nonblanks
3281                  followed by an end of comment, this is a preprocessor
3282                  token. */
3283               for (cp = newlb.buffer; cp < lp-1; cp++)
3284                 if (!c_isspace (*cp))
3285                   {
3286                     if (*cp == '*' && cp[1] == '/')
3287                       {
3288                         cp++;
3289                         cpptoken = true;
3290                       }
3291                     else
3292                       cpptoken = false;
3293                   }
3294               if (cpptoken)
3295                 {
3296                   definedef = dsharpseen;
3297                   /* This is needed for tagging enum values: when there are
3298                      preprocessor conditionals inside the enum, we need to
3299                      reset the value of fvdef so that the next enum value is
3300                      tagged even though the one before it did not end in a
3301                      comma.  */
3302                   if (fvdef == vignore && instruct && parlev == 0)
3303                     {
3304                       if (strneq (cp, "#if", 3) || strneq (cp, "#el", 3))
3305                         fvdef = fvnone;
3306                     }
3307                 }
3308             } /* if (definedef == dnone) */
3309           continue;
3310         case '[':
3311           bracketlev++;
3312           continue;
3313         default:
3314           if (bracketlev > 0)
3315             {
3316               if (c == ']')
3317                 --bracketlev;
3318               else if (c == '\0')
3319                 CNL_SAVE_DEFINEDEF ();
3320               continue;
3321             }
3322           break;
3323         } /* switch (c) */
3324
3325
3326       /* Consider token only if some involved conditions are satisfied. */
3327       if (typdef != tignore
3328           && definedef != dignorerest
3329           && fvdef != finlist
3330           && templatelev == 0
3331           && (definedef != dnone
3332               || structdef != scolonseen)
3333           && !inattribute)
3334         {
3335           if (midtoken)
3336             {
3337               if (endtoken (c))
3338                 {
3339                   if (c == ':' && *lp == ':' && begtoken (lp[1]))
3340                     /* This handles :: in the middle,
3341                        but not at the beginning of an identifier.
3342                        Also, space-separated :: is not recognized. */
3343                     {
3344                       if (c_ext & C_AUTO) /* automatic detection of C++ */
3345                         c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3346                       lp += 2;
3347                       toklen += 2;
3348                       c = lp[-1];
3349                       goto still_in_token;
3350                     }
3351                   else
3352                     {
3353                       bool funorvar = false;
3354
3355                       if (yacc_rules
3356                           || consider_token (newlb.buffer + tokoff, toklen, c,
3357                                              &c_ext, bracelev, parlev,
3358                                              &funorvar))
3359                         {
3360                           if (fvdef == foperator)
3361                             {
3362                               char *oldlp = lp;
3363                               lp = skip_spaces (lp-1);
3364                               if (*lp != '\0')
3365                                 lp += 1;
3366                               while (*lp != '\0'
3367                                      && !c_isspace (*lp) && *lp != '(')
3368                                 lp += 1;
3369                               c = *lp++;
3370                               toklen += lp - oldlp;
3371                             }
3372                           token.named = false;
3373                           if (!plainc
3374                               && nestlev > 0 && definedef == dnone)
3375                             /* in struct body */
3376                             {
3377                               if (class_qualify)
3378                                 {
3379                                   int len;
3380                                   write_classname (&token_name, qualifier);
3381                                   len = token_name.len;
3382                                   linebuffer_setlen (&token_name,
3383                                                      len + qlen + toklen);
3384                                   sprintf (token_name.buffer + len, "%s%.*s",
3385                                            qualifier, toklen,
3386                                            newlb.buffer + tokoff);
3387                                 }
3388                               else
3389                                 {
3390                                   linebuffer_setlen (&token_name, toklen);
3391                                   sprintf (token_name.buffer, "%.*s",
3392                                            toklen, newlb.buffer + tokoff);
3393                                 }
3394                               token.named = true;
3395                             }
3396                           else if (objdef == ocatseen)
3397                             /* Objective C category */
3398                             {
3399                               if (class_qualify)
3400                                 {
3401                                   int len = strlen (objtag) + 2 + toklen;
3402                                   linebuffer_setlen (&token_name, len);
3403                                   sprintf (token_name.buffer, "%s(%.*s)",
3404                                            objtag, toklen,
3405                                            newlb.buffer + tokoff);
3406                                 }
3407                               else
3408                                 {
3409                                   linebuffer_setlen (&token_name, toklen);
3410                                   sprintf (token_name.buffer, "%.*s",
3411                                            toklen, newlb.buffer + tokoff);
3412                                 }
3413                               token.named = true;
3414                             }
3415                           else if (objdef == omethodtag
3416                                    || objdef == omethodparm)
3417                             /* Objective C method */
3418                             {
3419                               token.named = true;
3420                             }
3421                           else if (fvdef == fdefunname)
3422                             /* GNU DEFUN and similar macros */
3423                             {
3424                               bool defun = (newlb.buffer[tokoff] == 'F');
3425                               int off = tokoff;
3426                               int len = toklen;
3427
3428                               /* Rewrite the tag so that emacs lisp DEFUNs
3429                                  can be found by their elisp name */
3430                               if (defun)
3431                                 {
3432                                   off += 1;
3433                                   len -= 1;
3434                                 }
3435                               linebuffer_setlen (&token_name, len);
3436                               memcpy (token_name.buffer,
3437                                       newlb.buffer + off, len);
3438                               token_name.buffer[len] = '\0';
3439                               if (defun)
3440                                 while (--len >= 0)
3441                                   if (token_name.buffer[len] == '_')
3442                                     token_name.buffer[len] = '-';
3443                               token.named = defun;
3444                             }
3445                           else
3446                             {
3447                               linebuffer_setlen (&token_name, toklen);
3448                               memcpy (token_name.buffer,
3449                                       newlb.buffer + tokoff, toklen);
3450                               token_name.buffer[toklen] = '\0';
3451                               /* Name macros and members. */
3452                               token.named = (structdef == stagseen
3453                                              || typdef == ttypeseen
3454                                              || typdef == tend
3455                                              || (funorvar
3456                                                  && definedef == dignorerest)
3457                                              || (funorvar
3458                                                  && definedef == dnone
3459                                                  && structdef == snone
3460                                                  && bracelev > 0));
3461                             }
3462                           token.lineno = lineno;
3463                           token.offset = tokoff;
3464                           token.length = toklen;
3465                           token.line = newlb.buffer;
3466                           token.linepos = newlinepos;
3467                           token.valid = true;
3468
3469                           if (definedef == dnone
3470                               && (fvdef == fvnameseen
3471                                   || fvdef == foperator
3472                                   || structdef == stagseen
3473                                   || typdef == tend
3474                                   || typdef == ttypeseen
3475                                   || objdef != onone))
3476                             {
3477                               if (current_lb_is_new)
3478                                 switch_line_buffers ();
3479                             }
3480                           else if (definedef != dnone
3481                                    || fvdef == fdefunname
3482                                    || instruct)
3483                             make_C_tag (funorvar);
3484                         }
3485                       else /* not yacc and consider_token failed */
3486                         {
3487                           if (inattribute && fvdef == fignore)
3488                             {
3489                               /* We have just met __attribute__ after a
3490                                  function parameter list: do not tag the
3491                                  function again. */
3492                               fvdef = fvnone;
3493                             }
3494                         }
3495                       midtoken = false;
3496                     }
3497                 } /* if (endtoken (c)) */
3498               else if (intoken (c))
3499                 still_in_token:
3500                 {
3501                   toklen++;
3502                   continue;
3503                 }
3504             } /* if (midtoken) */
3505           else if (begtoken (c))
3506             {
3507               switch (definedef)
3508                 {
3509                 case dnone:
3510                   switch (fvdef)
3511                     {
3512                     case fstartlist:
3513                       /* This prevents tagging fb in
3514                          void (__attribute__((noreturn)) *fb) (void);
3515                          Fixing this is not easy and not very important. */
3516                       fvdef = finlist;
3517                       continue;
3518                     case flistseen:
3519                       if (plainc || declarations)
3520                         {
3521                           make_C_tag (true); /* a function */
3522                           fvdef = fignore;
3523                         }
3524                       break;
3525                     default:
3526                       break;
3527                     }
3528                   if (structdef == stagseen && !cjava)
3529                     {
3530                       popclass_above (bracelev);
3531                       structdef = snone;
3532                     }
3533                   break;
3534                 case dsharpseen:
3535                   savetoken = token;
3536                   break;
3537                 default:
3538                   break;
3539                 }
3540               if (!yacc_rules || lp == newlb.buffer + 1)
3541                 {
3542                   tokoff = lp - 1 - newlb.buffer;
3543                   toklen = 1;
3544                   midtoken = true;
3545                 }
3546               continue;
3547             } /* if (begtoken) */
3548         } /* if must look at token */
3549
3550
3551       /* Detect end of line, colon, comma, semicolon and various braces
3552          after having handled a token.*/
3553       switch (c)
3554         {
3555         case ':':
3556           if (inattribute)
3557             break;
3558           if (yacc_rules && token.offset == 0 && token.valid)
3559             {
3560               make_C_tag (false); /* a yacc function */
3561               break;
3562             }
3563           if (definedef != dnone)
3564             break;
3565           switch (objdef)
3566             {
3567             case otagseen:
3568               objdef = oignore;
3569               make_C_tag (true); /* an Objective C class */
3570               break;
3571             case omethodtag:
3572             case omethodparm:
3573               objdef = omethodcolon;
3574               if (class_qualify)
3575                 {
3576                   int toklen = token_name.len;
3577                   linebuffer_setlen (&token_name, toklen + 1);
3578                   strcpy (token_name.buffer + toklen, ":");
3579                 }
3580               break;
3581             default:
3582               break;
3583             }
3584           if (structdef == stagseen)
3585             {
3586               structdef = scolonseen;
3587               break;
3588             }
3589           /* Should be useless, but may be work as a safety net. */
3590           if (cplpl && fvdef == flistseen)
3591             {
3592               make_C_tag (true); /* a function */
3593               fvdef = fignore;
3594               break;
3595             }
3596           break;
3597         case ';':
3598           if (definedef != dnone || inattribute)
3599             break;
3600           switch (typdef)
3601             {
3602             case tend:
3603             case ttypeseen:
3604               make_C_tag (false); /* a typedef */
3605               typdef = tnone;
3606               fvdef = fvnone;
3607               break;
3608             case tnone:
3609             case tinbody:
3610             case tignore:
3611               switch (fvdef)
3612                 {
3613                 case fignore:
3614                   if (typdef == tignore || cplpl)
3615                     fvdef = fvnone;
3616                   break;
3617                 case fvnameseen:
3618                   if ((globals && bracelev == 0 && (!fvextern || declarations))
3619                       || (members && instruct))
3620                     make_C_tag (false); /* a variable */
3621                   fvextern = false;
3622                   fvdef = fvnone;
3623                   token.valid = false;
3624                   break;
3625                 case flistseen:
3626                   if ((declarations
3627                        && (cplpl || !instruct)
3628                        && (typdef == tnone || (typdef != tignore && instruct)))
3629                       || (members
3630                           && plainc && instruct))
3631                     make_C_tag (true);  /* a function */
3632                   /* FALLTHRU */
3633                 default:
3634                   fvextern = false;
3635                   fvdef = fvnone;
3636                   if (declarations
3637                        && cplpl && structdef == stagseen)
3638                     make_C_tag (false); /* forward declaration */
3639                   else
3640                     token.valid = false;
3641                 } /* switch (fvdef) */
3642               /* FALLTHRU */
3643             default:
3644               if (!instruct)
3645                 typdef = tnone;
3646             }
3647           if (structdef == stagseen)
3648             structdef = snone;
3649           break;
3650         case ',':
3651           if (definedef != dnone || inattribute)
3652             break;
3653           switch (objdef)
3654             {
3655             case omethodtag:
3656             case omethodparm:
3657               make_C_tag (true); /* an Objective C method */
3658               objdef = oinbody;
3659               break;
3660             default:
3661               break;
3662             }
3663           switch (fvdef)
3664             {
3665             case fdefunkey:
3666             case foperator:
3667             case fstartlist:
3668             case finlist:
3669             case fignore:
3670               break;
3671             case vignore:
3672               if (instruct && parlev == 0)
3673                 fvdef = fvnone;
3674               break;
3675             case fdefunname:
3676               fvdef = fignore;
3677               break;
3678             case fvnameseen:
3679               if (parlev == 0
3680                   && ((globals
3681                        && bracelev == 0
3682                        && templatelev == 0
3683                        && (!fvextern || declarations))
3684                       || (members && instruct)))
3685                   make_C_tag (false); /* a variable */
3686               break;
3687             case flistseen:
3688               if ((declarations && typdef == tnone && !instruct)
3689                   || (members && typdef != tignore && instruct))
3690                 {
3691                   make_C_tag (true); /* a function */
3692                   fvdef = fvnameseen;
3693                 }
3694               else if (!declarations)
3695                 fvdef = fvnone;
3696               token.valid = false;
3697               break;
3698             default:
3699               fvdef = fvnone;
3700             }
3701           if (structdef == stagseen)
3702             structdef = snone;
3703           break;
3704         case ']':
3705           if (definedef != dnone || inattribute)
3706             break;
3707           if (structdef == stagseen)
3708             structdef = snone;
3709           switch (typdef)
3710             {
3711             case ttypeseen:
3712             case tend:
3713               typdef = tignore;
3714               make_C_tag (false);       /* a typedef */
3715               break;
3716             case tnone:
3717             case tinbody:
3718               switch (fvdef)
3719                 {
3720                 case foperator:
3721                 case finlist:
3722                 case fignore:
3723                 case vignore:
3724                   break;
3725                 case fvnameseen:
3726                   if ((members && bracelev == 1)
3727                       || (globals && bracelev == 0
3728                           && (!fvextern || declarations)))
3729                     make_C_tag (false); /* a variable */
3730                   /* FALLTHRU */
3731                 default:
3732                   fvdef = fvnone;
3733                 }
3734               break;
3735             default:
3736               break;
3737             }
3738           break;
3739         case '(':
3740           if (inattribute)
3741             {
3742               attrparlev++;
3743               break;
3744             }
3745           if (definedef != dnone)
3746             break;
3747           if (objdef == otagseen && parlev == 0)
3748             objdef = oparenseen;
3749           switch (fvdef)
3750             {
3751             case fvnameseen:
3752               if (typdef == ttypeseen
3753                   && *lp != '*'
3754                   && !instruct)
3755                 {
3756                   /* This handles constructs like:
3757                      typedef void OperatorFun (int fun); */
3758                   make_C_tag (false);
3759                   typdef = tignore;
3760                   fvdef = fignore;
3761                   break;
3762                 }
3763               /* FALLTHRU */
3764             case foperator:
3765               fvdef = fstartlist;
3766               break;
3767             case flistseen:
3768               fvdef = finlist;
3769               break;
3770             default:
3771               break;
3772             }
3773           parlev++;
3774           break;
3775         case ')':
3776           if (inattribute)
3777             {
3778               if (--attrparlev == 0)
3779                 inattribute = false;
3780               break;
3781             }
3782           if (definedef != dnone)
3783             break;
3784           if (objdef == ocatseen && parlev == 1)
3785             {
3786               make_C_tag (true); /* an Objective C category */
3787               objdef = oignore;
3788             }
3789           if (--parlev == 0)
3790             {
3791               switch (fvdef)
3792                 {
3793                 case fstartlist:
3794                 case finlist:
3795                   fvdef = flistseen;
3796                   break;
3797                 default:
3798                   break;
3799                 }
3800               if (!instruct
3801                   && (typdef == tend
3802                       || typdef == ttypeseen))
3803                 {
3804                   typdef = tignore;
3805                   make_C_tag (false); /* a typedef */
3806                 }
3807             }
3808           else if (parlev < 0)  /* can happen due to ill-conceived #if's. */
3809             parlev = 0;
3810           break;
3811         case '{':
3812           if (definedef != dnone)
3813             break;
3814           if (typdef == ttypeseen)
3815             {
3816               /* Whenever typdef is set to tinbody (currently only
3817                  here), typdefbracelev should be set to bracelev. */
3818               typdef = tinbody;
3819               typdefbracelev = bracelev;
3820             }
3821           switch (fvdef)
3822             {
3823             case flistseen:
3824               if (cplpl && !class_qualify)
3825                 {
3826                   /* Remove class and namespace qualifiers from the token,
3827                      leaving only the method/member name.  */
3828                   char *cc, *uqname = token_name.buffer;
3829                   char *tok_end = token_name.buffer + token_name.len;
3830
3831                   for (cc = token_name.buffer; cc < tok_end; cc++)
3832                     {
3833                       if (*cc == ':' && cc[1] == ':')
3834                         {
3835                           uqname = cc + 2;
3836                           cc++;
3837                         }
3838                     }
3839                   if (uqname > token_name.buffer)
3840                     {
3841                       int uqlen = strlen (uqname);
3842                       linebuffer_setlen (&token_name, uqlen);
3843                       memmove (token_name.buffer, uqname, uqlen + 1);
3844                     }
3845                 }
3846               make_C_tag (true);    /* a function */
3847               /* FALLTHRU */
3848             case fignore:
3849               fvdef = fvnone;
3850               break;
3851             case fvnone:
3852               switch (objdef)
3853                 {
3854                 case otagseen:
3855                   make_C_tag (true); /* an Objective C class */
3856                   objdef = oignore;
3857                   break;
3858                 case omethodtag:
3859                 case omethodparm:
3860                   make_C_tag (true); /* an Objective C method */
3861                   objdef = oinbody;
3862                   break;
3863                 default:
3864                   /* Neutralize `extern "C" {' grot. */
3865                   if (bracelev == 0 && structdef == snone && nestlev == 0
3866                       && typdef == tnone)
3867                     bracelev = -1;
3868                 }
3869               break;
3870             default:
3871               break;
3872             }
3873           switch (structdef)
3874             {
3875             case skeyseen:         /* unnamed struct */
3876               pushclass_above (bracelev, NULL, 0);
3877               structdef = snone;
3878               break;
3879             case stagseen:         /* named struct or enum */
3880             case scolonseen:       /* a class */
3881               pushclass_above (bracelev,token.line+token.offset, token.length);
3882               structdef = snone;
3883               make_C_tag (false);  /* a struct or enum */
3884               break;
3885             default:
3886               break;
3887             }
3888           bracelev += 1;
3889           break;
3890         case '*':
3891           if (definedef != dnone)
3892             break;
3893           if (fvdef == fstartlist)
3894             {
3895               fvdef = fvnone;   /* avoid tagging `foo' in `foo (*bar()) ()' */
3896               token.valid = false;
3897             }
3898           break;
3899         case '}':
3900           if (definedef != dnone)
3901             break;
3902           bracelev -= 1;
3903           if (!ignoreindent && lp == newlb.buffer + 1)
3904             {
3905               if (bracelev != 0)
3906                 token.valid = false; /* unexpected value, token unreliable */
3907               bracelev = 0;     /* reset brace level if first column */
3908               parlev = 0;       /* also reset paren level, just in case... */
3909             }
3910           else if (bracelev < 0)
3911             {
3912               token.valid = false; /* something gone amiss, token unreliable */
3913               bracelev = 0;
3914             }
3915           if (bracelev == 0 && fvdef == vignore)
3916             fvdef = fvnone;             /* end of function */
3917           popclass_above (bracelev);
3918           structdef = snone;
3919           /* Only if typdef == tinbody is typdefbracelev significant. */
3920           if (typdef == tinbody && bracelev <= typdefbracelev)
3921             {
3922               assert (bracelev == typdefbracelev);
3923               typdef = tend;
3924             }
3925           break;
3926         case '=':
3927           if (definedef != dnone)
3928             break;
3929           switch (fvdef)
3930             {
3931             case foperator:
3932             case finlist:
3933             case fignore:
3934             case vignore:
3935               break;
3936             case fvnameseen:
3937               if ((members && bracelev == 1)
3938                   || (globals && bracelev == 0 && (!fvextern || declarations)))
3939                 make_C_tag (false); /* a variable */
3940               /* FALLTHRU */
3941             default:
3942               fvdef = vignore;
3943             }
3944           break;
3945         case '<':
3946           if (cplpl
3947               && (structdef == stagseen || fvdef == fvnameseen))
3948             {
3949               templatelev++;
3950               break;
3951             }
3952           goto resetfvdef;
3953         case '>':
3954           if (templatelev > 0)
3955             {
3956               templatelev--;
3957               break;
3958             }
3959           goto resetfvdef;
3960         case '+':
3961         case '-':
3962           if (objdef == oinbody && bracelev == 0)
3963             {
3964               objdef = omethodsign;
3965               break;
3966             }
3967           /* FALLTHRU */
3968         resetfvdef:
3969         case '#': case '~': case '&': case '%': case '/':
3970         case '|': case '^': case '!': case '.': case '?':
3971           if (definedef != dnone)
3972             break;
3973           /* These surely cannot follow a function tag in C. */
3974           switch (fvdef)
3975             {
3976             case foperator:
3977             case finlist:
3978             case fignore:
3979             case vignore:
3980               break;
3981             default:
3982               fvdef = fvnone;
3983             }
3984           break;
3985         case '\0':
3986           if (objdef == otagseen)
3987             {
3988               make_C_tag (true); /* an Objective C class */
3989               objdef = oignore;
3990             }
3991           /* If a macro spans multiple lines don't reset its state. */
3992           if (quotednl)
3993             CNL_SAVE_DEFINEDEF ();
3994           else
3995             CNL ();
3996           break;
3997         } /* switch (c) */
3998
3999     } /* while not eof */
4000
4001   free (lbs[0].lb.buffer);
4002   free (lbs[1].lb.buffer);
4003 }
4004
4005 /*
4006  * Process either a C++ file or a C file depending on the setting
4007  * of a global flag.
4008  */
4009 static void
4010 default_C_entries (FILE *inf)
4011 {
4012   C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
4013 }
4014
4015 /* Always do plain C. */
4016 static void
4017 plain_C_entries (FILE *inf)
4018 {
4019   C_entries (0, inf);
4020 }
4021
4022 /* Always do C++. */
4023 static void
4024 Cplusplus_entries (FILE *inf)
4025 {
4026   C_entries (C_PLPL, inf);
4027 }
4028
4029 /* Always do Java. */
4030 static void
4031 Cjava_entries (FILE *inf)
4032 {
4033   C_entries (C_JAVA, inf);
4034 }
4035
4036 /* Always do C*. */
4037 static void
4038 Cstar_entries (FILE *inf)
4039 {
4040   C_entries (C_STAR, inf);
4041 }
4042
4043 /* Always do Yacc. */
4044 static void
4045 Yacc_entries (FILE *inf)
4046 {
4047   C_entries (YACC, inf);
4048 }
4049
4050 \f
4051 /* Useful macros. */
4052 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer)    \
4053   while (perhaps_more_input (file_pointer)                              \
4054          && (readline (&(line_buffer), file_pointer),                   \
4055              (char_pointer) = (line_buffer).buffer,                     \
4056              true))                                                     \
4057
4058 #define LOOKING_AT(cp, kw)  /* kw is the keyword, a literal string */   \
4059   ((assert ("" kw), true)   /* syntax error if not a literal string */  \
4060    && strneq ((cp), kw, sizeof (kw)-1)          /* cp points at kw */   \
4061    && notinname ((cp)[sizeof (kw)-1])           /* end of kw */         \
4062    && ((cp) = skip_spaces ((cp)+sizeof (kw)-1))) /* skip spaces */
4063
4064 /* Similar to LOOKING_AT but does not use notinname, does not skip */
4065 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
4066   ((assert ("" kw), true) /* syntax error if not a literal string */    \
4067    && strncaseeq ((cp), kw, sizeof (kw)-1)      /* cp points at kw */   \
4068    && ((cp) += sizeof (kw)-1))                  /* skip spaces */
4069
4070 /*
4071  * Read a file, but do no processing.  This is used to do regexp
4072  * matching on files that have no language defined.
4073  */
4074 static void
4075 just_read_file (FILE *inf)
4076 {
4077   while (perhaps_more_input (inf))
4078     readline (&lb, inf);
4079 }
4080
4081 \f
4082 /* Fortran parsing */
4083
4084 static void F_takeprec (void);
4085 static void F_getit (FILE *);
4086
4087 static void
4088 F_takeprec (void)
4089 {
4090   dbp = skip_spaces (dbp);
4091   if (*dbp != '*')
4092     return;
4093   dbp++;
4094   dbp = skip_spaces (dbp);
4095   if (strneq (dbp, "(*)", 3))
4096     {
4097       dbp += 3;
4098       return;
4099     }
4100   if (!c_isdigit (*dbp))
4101     {
4102       --dbp;                    /* force failure */
4103       return;
4104     }
4105   do
4106     dbp++;
4107   while (c_isdigit (*dbp));
4108 }
4109
4110 static void
4111 F_getit (FILE *inf)
4112 {
4113   register char *cp;
4114
4115   dbp = skip_spaces (dbp);
4116   if (*dbp == '\0')
4117     {
4118       readline (&lb, inf);
4119       dbp = lb.buffer;
4120       if (dbp[5] != '&')
4121         return;
4122       dbp += 6;
4123       dbp = skip_spaces (dbp);
4124     }
4125   if (!c_isalpha (*dbp) && *dbp != '_' && *dbp != '$')
4126     return;
4127   for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4128     continue;
4129   make_tag (dbp, cp-dbp, true,
4130             lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4131 }
4132
4133
4134 static void
4135 Fortran_functions (FILE *inf)
4136 {
4137   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4138     {
4139       if (*dbp == '%')
4140         dbp++;                  /* Ratfor escape to fortran */
4141       dbp = skip_spaces (dbp);
4142       if (*dbp == '\0')
4143         continue;
4144
4145       if (LOOKING_AT_NOCASE (dbp, "recursive"))
4146         dbp = skip_spaces (dbp);
4147
4148       if (LOOKING_AT_NOCASE (dbp, "pure"))
4149         dbp = skip_spaces (dbp);
4150
4151       if (LOOKING_AT_NOCASE (dbp, "elemental"))
4152         dbp = skip_spaces (dbp);
4153
4154       switch (c_tolower (*dbp))
4155         {
4156         case 'i':
4157           if (nocase_tail ("integer"))
4158             F_takeprec ();
4159           break;
4160         case 'r':
4161           if (nocase_tail ("real"))
4162             F_takeprec ();
4163           break;
4164         case 'l':
4165           if (nocase_tail ("logical"))
4166             F_takeprec ();
4167           break;
4168         case 'c':
4169           if (nocase_tail ("complex") || nocase_tail ("character"))
4170             F_takeprec ();
4171           break;
4172         case 'd':
4173           if (nocase_tail ("double"))
4174             {
4175               dbp = skip_spaces (dbp);
4176               if (*dbp == '\0')
4177                 continue;
4178               if (nocase_tail ("precision"))
4179                 break;
4180               continue;
4181             }
4182           break;
4183         }
4184       dbp = skip_spaces (dbp);
4185       if (*dbp == '\0')
4186         continue;
4187       switch (c_tolower (*dbp))
4188         {
4189         case 'f':
4190           if (nocase_tail ("function"))
4191             F_getit (inf);
4192           continue;
4193         case 's':
4194           if (nocase_tail ("subroutine"))
4195             F_getit (inf);
4196           continue;
4197         case 'e':
4198           if (nocase_tail ("entry"))
4199             F_getit (inf);
4200           continue;
4201         case 'b':
4202           if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4203             {
4204               dbp = skip_spaces (dbp);
4205               if (*dbp == '\0') /* assume un-named */
4206                 make_tag ("blockdata", 9, true,
4207                           lb.buffer, dbp - lb.buffer, lineno, linecharno);
4208               else
4209                 F_getit (inf);  /* look for name */
4210             }
4211           continue;
4212         }
4213     }
4214 }
4215
4216 \f
4217 /*
4218  * Go language support
4219  * Original code by Xi Lu <lx@shellcodes.org> (2016)
4220  */
4221 static void
4222 Go_functions(FILE *inf)
4223 {
4224   char *cp, *name;
4225
4226   LOOP_ON_INPUT_LINES(inf, lb, cp)
4227     {
4228       cp = skip_spaces (cp);
4229
4230       if (LOOKING_AT (cp, "package"))
4231         {
4232           name = cp;
4233           while (!notinname (*cp) && *cp != '\0')
4234             cp++;
4235           make_tag (name, cp - name, false, lb.buffer,
4236                     cp - lb.buffer + 1, lineno, linecharno);
4237         }
4238       else if (LOOKING_AT (cp, "func"))
4239         {
4240           /* Go implementation of interface, such as:
4241              func (n *Integer) Add(m Integer) ...
4242              skip `(n *Integer)` part.
4243           */
4244           if (*cp == '(')
4245             {
4246               while (*cp != ')')
4247                 cp++;
4248               cp = skip_spaces (cp+1);
4249             }
4250
4251           if (*cp)
4252             {
4253               name = cp;
4254
4255               while (!notinname (*cp))
4256                 cp++;
4257
4258               make_tag (name, cp - name, true, lb.buffer,
4259                         cp - lb.buffer + 1, lineno, linecharno);
4260             }
4261         }
4262       else if (members && LOOKING_AT (cp, "type"))
4263         {
4264           name = cp;
4265
4266           /* Ignore the likes of the following:
4267              type (
4268                     A
4269              )
4270            */
4271           if (*cp == '(')
4272             return;
4273
4274           while (!notinname (*cp) && *cp != '\0')
4275             cp++;
4276
4277           make_tag (name, cp - name, false, lb.buffer,
4278                     cp - lb.buffer + 1, lineno, linecharno);
4279         }
4280     }
4281 }
4282
4283 \f
4284 /*
4285  * Ada parsing
4286  * Original code by
4287  * Philippe Waroquiers (1998)
4288  */
4289
4290 /* Once we are positioned after an "interesting" keyword, let's get
4291    the real tag value necessary. */
4292 static void
4293 Ada_getit (FILE *inf, const char *name_qualifier)
4294 {
4295   register char *cp;
4296   char *name;
4297   char c;
4298
4299   while (perhaps_more_input (inf))
4300     {
4301       dbp = skip_spaces (dbp);
4302       if (*dbp == '\0'
4303           || (dbp[0] == '-' && dbp[1] == '-'))
4304         {
4305           readline (&lb, inf);
4306           dbp = lb.buffer;
4307         }
4308       switch (c_tolower (*dbp))
4309         {
4310         case 'b':
4311           if (nocase_tail ("body"))
4312             {
4313               /* Skipping body of   procedure body   or   package body or ....
4314                  resetting qualifier to body instead of spec. */
4315               name_qualifier = "/b";
4316               continue;
4317             }
4318           break;
4319         case 't':
4320           /* Skipping type of   task type   or   protected type ... */
4321           if (nocase_tail ("type"))
4322             continue;
4323           break;
4324         }
4325       if (*dbp == '"')
4326         {
4327           dbp += 1;
4328           for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4329             continue;
4330         }
4331       else
4332         {
4333           dbp = skip_spaces (dbp);
4334           for (cp = dbp;
4335                c_isalnum (*cp) || *cp == '_' || *cp == '.';
4336                cp++)
4337             continue;
4338           if (cp == dbp)
4339             return;
4340         }
4341       c = *cp;
4342       *cp = '\0';
4343       name = concat (dbp, name_qualifier, "");
4344       *cp = c;
4345       make_tag (name, strlen (name), true,
4346                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4347       free (name);
4348       if (c == '"')
4349         dbp = cp + 1;
4350       return;
4351     }
4352 }
4353
4354 static void
4355 Ada_funcs (FILE *inf)
4356 {
4357   bool inquote = false;
4358   bool skip_till_semicolumn = false;
4359
4360   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4361     {
4362       while (*dbp != '\0')
4363         {
4364           /* Skip a string i.e. "abcd". */
4365           if (inquote || (*dbp == '"'))
4366             {
4367               dbp = strchr (dbp + !inquote, '"');
4368               if (dbp != NULL)
4369                 {
4370                   inquote = false;
4371                   dbp += 1;
4372                   continue;     /* advance char */
4373                 }
4374               else
4375                 {
4376                   inquote = true;
4377                   break;        /* advance line */
4378                 }
4379             }
4380
4381           /* Skip comments. */
4382           if (dbp[0] == '-' && dbp[1] == '-')
4383             break;              /* advance line */
4384
4385           /* Skip character enclosed in single quote i.e. 'a'
4386              and skip single quote starting an attribute i.e. 'Image. */
4387           if (*dbp == '\'')
4388             {
4389               dbp++ ;
4390               if (*dbp != '\0')
4391                 dbp++;
4392               continue;
4393             }
4394
4395           if (skip_till_semicolumn)
4396             {
4397               if (*dbp == ';')
4398                 skip_till_semicolumn = false;
4399               dbp++;
4400               continue;         /* advance char */
4401             }
4402
4403           /* Search for beginning of a token.  */
4404           if (!begtoken (*dbp))
4405             {
4406               dbp++;
4407               continue;         /* advance char */
4408             }
4409
4410           /* We are at the beginning of a token. */
4411           switch (c_tolower (*dbp))
4412             {
4413             case 'f':
4414               if (!packages_only && nocase_tail ("function"))
4415                 Ada_getit (inf, "/f");
4416               else
4417                 break;          /* from switch */
4418               continue;         /* advance char */
4419             case 'p':
4420               if (!packages_only && nocase_tail ("procedure"))
4421                 Ada_getit (inf, "/p");
4422               else if (nocase_tail ("package"))
4423                 Ada_getit (inf, "/s");
4424               else if (nocase_tail ("protected")) /* protected type */
4425                 Ada_getit (inf, "/t");
4426               else
4427                 break;          /* from switch */
4428               continue;         /* advance char */
4429
4430             case 'u':
4431               if (typedefs && !packages_only && nocase_tail ("use"))
4432                 {
4433                   /* when tagging types, avoid tagging  use type Pack.Typename;
4434                      for this, we will skip everything till a ; */
4435                   skip_till_semicolumn = true;
4436                   continue;     /* advance char */
4437                 }
4438
4439             case 't':
4440               if (!packages_only && nocase_tail ("task"))
4441                 Ada_getit (inf, "/k");
4442               else if (typedefs && !packages_only && nocase_tail ("type"))
4443                 {
4444                   Ada_getit (inf, "/t");
4445                   while (*dbp != '\0')
4446                     dbp += 1;
4447                 }
4448               else
4449                 break;          /* from switch */
4450               continue;         /* advance char */
4451             }
4452
4453           /* Look for the end of the token. */
4454           while (!endtoken (*dbp))
4455             dbp++;
4456
4457         } /* advance char */
4458     } /* advance line */
4459 }
4460
4461 \f
4462 /*
4463  * Unix and microcontroller assembly tag handling
4464  * Labels:  /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4465  * Idea by Bob Weiner, Motorola Inc. (1994)
4466  */
4467 static void
4468 Asm_labels (FILE *inf)
4469 {
4470   register char *cp;
4471
4472   LOOP_ON_INPUT_LINES (inf, lb, cp)
4473     {
4474       /* If first char is alphabetic or one of [_.$], test for colon
4475          following identifier. */
4476       if (c_isalpha (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4477         {
4478           /* Read past label. */
4479           cp++;
4480           while (c_isalnum (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4481             cp++;
4482           if (*cp == ':' || c_isspace (*cp))
4483             /* Found end of label, so copy it and add it to the table. */
4484             make_tag (lb.buffer, cp - lb.buffer, true,
4485                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4486         }
4487     }
4488 }
4489
4490 \f
4491 /*
4492  * Perl support
4493  * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4494  *                 /^use constant[ \t\n]+[^ \t\n{=,;]+/
4495  * Perl variable names: /^(my|local).../
4496  * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4497  * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4498  * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4499  */
4500 static void
4501 Perl_functions (FILE *inf)
4502 {
4503   char *package = savestr ("main"); /* current package name */
4504   register char *cp;
4505
4506   LOOP_ON_INPUT_LINES (inf, lb, cp)
4507     {
4508       cp = skip_spaces (cp);
4509
4510       if (LOOKING_AT (cp, "package"))
4511         {
4512           free (package);
4513           get_tag (cp, &package);
4514         }
4515       else if (LOOKING_AT (cp, "sub"))
4516         {
4517           char *pos, *sp;
4518
4519         subr:
4520           sp = cp;
4521           while (!notinname (*cp))
4522             cp++;
4523           if (cp == sp)
4524             continue;           /* nothing found */
4525           pos = strchr (sp, ':');
4526           if (pos && pos < cp && pos[1] == ':')
4527             /* The name is already qualified. */
4528             make_tag (sp, cp - sp, true,
4529                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4530           else
4531             /* Qualify it. */
4532             {
4533               char savechar, *name;
4534
4535               savechar = *cp;
4536               *cp = '\0';
4537               name = concat (package, "::", sp);
4538               *cp = savechar;
4539               make_tag (name, strlen (name), true,
4540                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4541               free (name);
4542             }
4543         }
4544       else if (LOOKING_AT (cp, "use constant")
4545                || LOOKING_AT (cp, "use constant::defer"))
4546         {
4547           /* For hash style multi-constant like
4548                 use constant { FOO => 123,
4549                                BAR => 456 };
4550              only the first FOO is picked up.  Parsing across the value
4551              expressions would be difficult in general, due to possible nested
4552              hashes, here-documents, etc.  */
4553           if (*cp == '{')
4554             cp = skip_spaces (cp+1);
4555           goto subr;
4556         }
4557       else if (globals) /* only if we are tagging global vars */
4558         {
4559           /* Skip a qualifier, if any. */
4560           bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4561           /* After "my" or "local", but before any following paren or space. */
4562           char *varstart = cp;
4563
4564           if (qual              /* should this be removed?  If yes, how? */
4565               && (*cp == '$' || *cp == '@' || *cp == '%'))
4566             {
4567               varstart += 1;
4568               do
4569                 cp++;
4570               while (c_isalnum (*cp) || *cp == '_');
4571             }
4572           else if (qual)
4573             {
4574               /* Should be examining a variable list at this point;
4575                  could insist on seeing an open parenthesis. */
4576               while (*cp != '\0' && *cp != ';' && *cp != '=' &&  *cp != ')')
4577                 cp++;
4578             }
4579           else
4580             continue;
4581
4582           make_tag (varstart, cp - varstart, false,
4583                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4584         }
4585     }
4586   free (package);
4587 }
4588
4589
4590 /*
4591  * Python support
4592  * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4593  * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4594  * More ideas by seb bacon <seb@jamkit.com> (2002)
4595  */
4596 static void
4597 Python_functions (FILE *inf)
4598 {
4599   register char *cp;
4600
4601   LOOP_ON_INPUT_LINES (inf, lb, cp)
4602     {
4603       cp = skip_spaces (cp);
4604       if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4605         {
4606           char *name = cp;
4607           while (!notinname (*cp) && *cp != ':')
4608             cp++;
4609           make_tag (name, cp - name, true,
4610                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4611         }
4612     }
4613 }
4614
4615 /*
4616  * Ruby support
4617  * Original code by Xi Lu <lx@shellcodes.org> (2015)
4618  */
4619 static void
4620 Ruby_functions (FILE *inf)
4621 {
4622   char *cp = NULL;
4623
4624   LOOP_ON_INPUT_LINES (inf, lb, cp)
4625     {
4626       bool is_class = false;
4627       bool is_method = false;
4628       char *name;
4629
4630       cp = skip_spaces (cp);
4631       if (c_isalpha (*cp) && c_isupper (*cp)) /* constants */
4632         {
4633           char *bp, *colon = NULL;
4634
4635           name = cp;
4636
4637           for (cp++; c_isalnum (*cp) || *cp == '_' || *cp == ':'; cp++)
4638             {
4639               if (*cp == ':')
4640                 colon = cp;
4641             }
4642           if (cp > name + 1)
4643             {
4644               bp = skip_spaces (cp);
4645               if (*bp == '=' && !(bp[1] == '=' || bp[1] == '>'))
4646                 {
4647                   if (colon && !c_isspace (colon[1]))
4648                     name = colon + 1;
4649                   make_tag (name, cp - name, false,
4650                             lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4651                 }
4652             }
4653         }
4654       else if ((is_method = LOOKING_AT (cp, "def")) /* module/class/method */
4655                || (is_class = LOOKING_AT (cp, "class"))
4656                || LOOKING_AT (cp, "module"))
4657         {
4658           const char self_name[] = "self.";
4659           const size_t self_size1 = sizeof (self_name) - 1;
4660
4661           name = cp;
4662
4663          /* Ruby method names can end in a '='.  Also, operator overloading can
4664             define operators whose names include '='.  */
4665           while (!notinname (*cp) || *cp == '=')
4666             cp++;
4667
4668           /* Remove "self." from the method name.  */
4669           if (cp - name > self_size1
4670               && strneq (name, self_name, self_size1))
4671             name += self_size1;
4672
4673           /* Remove the class/module qualifiers from method names.  */
4674           if (is_method)
4675             {
4676               char *q;
4677
4678               for (q = name; q < cp && *q != '.'; q++)
4679                 ;
4680               if (q < cp - 1)   /* punt if we see just "FOO." */
4681                 name = q + 1;
4682             }
4683
4684           /* Don't tag singleton classes.  */
4685           if (is_class && strneq (name, "<<", 2) && cp == name + 2)
4686             continue;
4687
4688           make_tag (name, cp - name, true,
4689                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4690         }
4691       else
4692         {
4693           /* Tag accessors and aliases.  */
4694           while (*cp && *cp != '#')
4695             {
4696               bool reader = false, writer = false, alias = false;
4697
4698               if (LOOKING_AT (cp, "attr_reader"))
4699                 reader = true;
4700               else if (LOOKING_AT (cp, "attr_writer"))
4701                 writer = true;
4702               else if (LOOKING_AT (cp, "attr_accessor"))
4703                 {
4704                   reader = true;
4705                   writer = true;
4706                 }
4707               else if (LOOKING_AT (cp, "alias_method"))
4708                 alias = true;
4709               if (reader || writer || alias)
4710                 {
4711                   do {
4712                     char *np = cp;
4713
4714                     cp = skip_name (cp);
4715                     if (reader)
4716                       make_tag (np, cp - np, true,
4717                                 lb.buffer, cp - lb.buffer + 1,
4718                                 lineno, linecharno);
4719                     if (writer)
4720                       {
4721                         size_t name_len = cp - np + 1;
4722                         char *wr_name = xnew (name_len + 1, char);
4723
4724                         memcpy (wr_name, np, name_len - 1);
4725                         memcpy (wr_name + name_len - 1, "=", 2);
4726                         pfnote (wr_name, true, lb.buffer, cp - lb.buffer + 1,
4727                                 lineno, linecharno);
4728                       }
4729                     if (alias)
4730                       {
4731                         make_tag (np, cp - np, true,
4732                                   lb.buffer, cp - lb.buffer + 1,
4733                                   lineno, linecharno);
4734                         while (*cp && *cp != '#' && *cp != ';')
4735                           cp++;
4736                       }
4737                   } while (*cp == ','
4738                            && (cp = skip_spaces (cp + 1), *cp && *cp != '#'));
4739                 }
4740               cp = skip_name (cp);
4741               while (*cp && *cp != '#' && notinname (*cp))
4742                 cp++;
4743             }
4744         }
4745     }
4746 }
4747
4748 \f
4749 /*
4750  * PHP support
4751  * Look for:
4752  *  - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4753  *  - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4754  *  - /^[ \t]*define\(\"[^\"]+/
4755  * Only with --members:
4756  *  - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4757  * Idea by Diez B. Roggisch (2001)
4758  */
4759 static void
4760 PHP_functions (FILE *inf)
4761 {
4762   char *cp, *name;
4763   bool search_identifier = false;
4764
4765   LOOP_ON_INPUT_LINES (inf, lb, cp)
4766     {
4767       cp = skip_spaces (cp);
4768       name = cp;
4769       if (search_identifier
4770           && *cp != '\0')
4771         {
4772           while (!notinname (*cp))
4773             cp++;
4774           make_tag (name, cp - name, true,
4775                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4776           search_identifier = false;
4777         }
4778       else if (LOOKING_AT (cp, "function"))
4779         {
4780           if (*cp == '&')
4781             cp = skip_spaces (cp+1);
4782           if (*cp != '\0')
4783             {
4784               name = cp;
4785               while (!notinname (*cp))
4786                 cp++;
4787               make_tag (name, cp - name, true,
4788                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4789             }
4790           else
4791             search_identifier = true;
4792         }
4793       else if (LOOKING_AT (cp, "class"))
4794         {
4795           if (*cp != '\0')
4796             {
4797               name = cp;
4798               while (*cp != '\0' && !c_isspace (*cp))
4799                 cp++;
4800               make_tag (name, cp - name, false,
4801                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4802             }
4803           else
4804             search_identifier = true;
4805         }
4806       else if (strneq (cp, "define", 6)
4807                && (cp = skip_spaces (cp+6))
4808                && *cp++ == '('
4809                && (*cp == '"' || *cp == '\''))
4810         {
4811           char quote = *cp++;
4812           name = cp;
4813           while (*cp != quote && *cp != '\0')
4814             cp++;
4815           make_tag (name, cp - name, false,
4816                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4817         }
4818       else if (members
4819                && LOOKING_AT (cp, "var")
4820                && *cp == '$')
4821         {
4822           name = cp;
4823           while (!notinname (*cp))
4824             cp++;
4825           make_tag (name, cp - name, false,
4826                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4827         }
4828     }
4829 }
4830
4831 \f
4832 /*
4833  * Cobol tag functions
4834  * We could look for anything that could be a paragraph name.
4835  * i.e. anything that starts in column 8 is one word and ends in a full stop.
4836  * Idea by Corny de Souza (1993)
4837  */
4838 static void
4839 Cobol_paragraphs (FILE *inf)
4840 {
4841   register char *bp, *ep;
4842
4843   LOOP_ON_INPUT_LINES (inf, lb, bp)
4844     {
4845       if (lb.len < 9)
4846         continue;
4847       bp += 8;
4848
4849       /* If eoln, compiler option or comment ignore whole line. */
4850       if (bp[-1] != ' ' || !c_isalnum (bp[0]))
4851         continue;
4852
4853       for (ep = bp; c_isalnum (*ep) || *ep == '-'; ep++)
4854         continue;
4855       if (*ep++ == '.')
4856         make_tag (bp, ep - bp, true,
4857                   lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4858     }
4859 }
4860
4861 \f
4862 /*
4863  * Makefile support
4864  * Ideas by Assar Westerlund <assar@sics.se> (2001)
4865  */
4866 static void
4867 Makefile_targets (FILE *inf)
4868 {
4869   register char *bp;
4870
4871   LOOP_ON_INPUT_LINES (inf, lb, bp)
4872     {
4873       if (*bp == '\t' || *bp == '#')
4874         continue;
4875       while (*bp != '\0' && *bp != '=' && *bp != ':')
4876         bp++;
4877       if (*bp == ':' || (globals && *bp == '='))
4878         {
4879           /* We should detect if there is more than one tag, but we do not.
4880              We just skip initial and final spaces. */
4881           char * namestart = skip_spaces (lb.buffer);
4882           while (--bp > namestart)
4883             if (!notinname (*bp))
4884               break;
4885           make_tag (namestart, bp - namestart + 1, true,
4886                     lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4887         }
4888     }
4889 }
4890
4891 \f
4892 /*
4893  * Pascal parsing
4894  * Original code by Mosur K. Mohan (1989)
4895  *
4896  *  Locates tags for procedures & functions.  Doesn't do any type- or
4897  *  var-definitions.  It does look for the keyword "extern" or
4898  *  "forward" immediately following the procedure statement; if found,
4899  *  the tag is skipped.
4900  */
4901 static void
4902 Pascal_functions (FILE *inf)
4903 {
4904   linebuffer tline;             /* mostly copied from C_entries */
4905   long save_lcno;
4906   int save_lineno, namelen, taglen;
4907   char c, *name;
4908
4909   bool                          /* each of these flags is true if: */
4910     incomment,                  /* point is inside a comment */
4911     inquote,                    /* point is inside '..' string */
4912     get_tagname,                /* point is after PROCEDURE/FUNCTION
4913                                    keyword, so next item = potential tag */
4914     found_tag,                  /* point is after a potential tag */
4915     inparms,                    /* point is within parameter-list */
4916     verify_tag;                 /* point has passed the parm-list, so the
4917                                    next token will determine whether this
4918                                    is a FORWARD/EXTERN to be ignored, or
4919                                    whether it is a real tag */
4920
4921   save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4922   name = NULL;                  /* keep compiler quiet */
4923   dbp = lb.buffer;
4924   *dbp = '\0';
4925   linebuffer_init (&tline);
4926
4927   incomment = inquote = false;
4928   found_tag = false;            /* have a proc name; check if extern */
4929   get_tagname = false;          /* found "procedure" keyword         */
4930   inparms = false;              /* found '(' after "proc"            */
4931   verify_tag = false;           /* check if "extern" is ahead        */
4932
4933
4934   while (perhaps_more_input (inf)) /* long main loop to get next char */
4935     {
4936       c = *dbp++;
4937       if (c == '\0')            /* if end of line */
4938         {
4939           readline (&lb, inf);
4940           dbp = lb.buffer;
4941           if (*dbp == '\0')
4942             continue;
4943           if (!((found_tag && verify_tag)
4944                 || get_tagname))
4945             c = *dbp++;         /* only if don't need *dbp pointing
4946                                    to the beginning of the name of
4947                                    the procedure or function */
4948         }
4949       if (incomment)
4950         {
4951           if (c == '}')         /* within { } comments */
4952             incomment = false;
4953           else if (c == '*' && *dbp == ')') /* within (* *) comments */
4954             {
4955               dbp++;
4956               incomment = false;
4957             }
4958           continue;
4959         }
4960       else if (inquote)
4961         {
4962           if (c == '\'')
4963             inquote = false;
4964           continue;
4965         }
4966       else
4967         switch (c)
4968           {
4969           case '\'':
4970             inquote = true;     /* found first quote */
4971             continue;
4972           case '{':             /* found open { comment */
4973             incomment = true;
4974             continue;
4975           case '(':
4976             if (*dbp == '*')    /* found open (* comment */
4977               {
4978                 incomment = true;
4979                 dbp++;
4980               }
4981             else if (found_tag) /* found '(' after tag, i.e., parm-list */
4982               inparms = true;
4983             continue;
4984           case ')':             /* end of parms list */
4985             if (inparms)
4986               inparms = false;
4987             continue;
4988           case ';':
4989             if (found_tag && !inparms) /* end of proc or fn stmt */
4990               {
4991                 verify_tag = true;
4992                 break;
4993               }
4994             continue;
4995           }
4996       if (found_tag && verify_tag && (*dbp != ' '))
4997         {
4998           /* Check if this is an "extern" declaration. */
4999           if (*dbp == '\0')
5000             continue;
5001           if (c_tolower (*dbp) == 'e')
5002             {
5003               if (nocase_tail ("extern")) /* superfluous, really! */
5004                 {
5005                   found_tag = false;
5006                   verify_tag = false;
5007                 }
5008             }
5009           else if (c_tolower (*dbp) == 'f')
5010             {
5011               if (nocase_tail ("forward")) /* check for forward reference */
5012                 {
5013                   found_tag = false;
5014                   verify_tag = false;
5015                 }
5016             }
5017           if (found_tag && verify_tag) /* not external proc, so make tag */
5018             {
5019               found_tag = false;
5020               verify_tag = false;
5021               make_tag (name, namelen, true,
5022                         tline.buffer, taglen, save_lineno, save_lcno);
5023               continue;
5024             }
5025         }
5026       if (get_tagname)          /* grab name of proc or fn */
5027         {
5028           char *cp;
5029
5030           if (*dbp == '\0')
5031             continue;
5032
5033           /* Find block name. */
5034           for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
5035             continue;
5036
5037           /* Save all values for later tagging. */
5038           linebuffer_setlen (&tline, lb.len);
5039           strcpy (tline.buffer, lb.buffer);
5040           save_lineno = lineno;
5041           save_lcno = linecharno;
5042           name = tline.buffer + (dbp - lb.buffer);
5043           namelen = cp - dbp;
5044           taglen = cp - lb.buffer + 1;
5045
5046           dbp = cp;             /* set dbp to e-o-token */
5047           get_tagname = false;
5048           found_tag = true;
5049           continue;
5050
5051           /* And proceed to check for "extern". */
5052         }
5053       else if (!incomment && !inquote && !found_tag)
5054         {
5055           /* Check for proc/fn keywords. */
5056           switch (c_tolower (c))
5057             {
5058             case 'p':
5059               if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
5060                 get_tagname = true;
5061               continue;
5062             case 'f':
5063               if (nocase_tail ("unction"))
5064                 get_tagname = true;
5065               continue;
5066             }
5067         }
5068     } /* while not eof */
5069
5070   free (tline.buffer);
5071 }
5072
5073 \f
5074 /*
5075  * Lisp tag functions
5076  *  look for (def or (DEF, quote or QUOTE
5077  */
5078
5079 static void L_getit (void);
5080
5081 static void
5082 L_getit (void)
5083 {
5084   if (*dbp == '\'')             /* Skip prefix quote */
5085     dbp++;
5086   else if (*dbp == '(')
5087   {
5088     dbp++;
5089     /* Try to skip "(quote " */
5090     if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
5091       /* Ok, then skip "(" before name in (defstruct (foo)) */
5092       dbp = skip_spaces (dbp);
5093   }
5094   get_tag (dbp, NULL);
5095 }
5096
5097 static void
5098 Lisp_functions (FILE *inf)
5099 {
5100   LOOP_ON_INPUT_LINES (inf, lb, dbp)
5101     {
5102       if (dbp[0] != '(')
5103         continue;
5104
5105       /* "(defvar foo)" is a declaration rather than a definition.  */
5106       if (! declarations)
5107         {
5108           char *p = dbp + 1;
5109           if (LOOKING_AT (p, "defvar"))
5110             {
5111               p = skip_name (p); /* past var name */
5112               p = skip_spaces (p);
5113               if (*p == ')')
5114                 continue;
5115             }
5116         }
5117
5118       if (strneq (dbp + 1, "cl-", 3) || strneq (dbp + 1, "CL-", 3))
5119         dbp += 3;
5120
5121       if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
5122         {
5123           dbp = skip_non_spaces (dbp);
5124           dbp = skip_spaces (dbp);
5125           L_getit ();
5126         }
5127       else
5128         {
5129           /* Check for (foo::defmumble name-defined ... */
5130           do
5131             dbp++;
5132           while (!notinname (*dbp) && *dbp != ':');
5133           if (*dbp == ':')
5134             {
5135               do
5136                 dbp++;
5137               while (*dbp == ':');
5138
5139               if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
5140                 {
5141                   dbp = skip_non_spaces (dbp);
5142                   dbp = skip_spaces (dbp);
5143                   L_getit ();
5144                 }
5145             }
5146         }
5147     }
5148 }
5149
5150 \f
5151 /*
5152  * Lua script language parsing
5153  * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
5154  *
5155  *  "function" and "local function" are tags if they start at column 1.
5156  */
5157 static void
5158 Lua_functions (FILE *inf)
5159 {
5160   register char *bp;
5161
5162   LOOP_ON_INPUT_LINES (inf, lb, bp)
5163     {
5164       bp = skip_spaces (bp);
5165       if (bp[0] != 'f' && bp[0] != 'l')
5166         continue;
5167
5168       (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
5169
5170       if (LOOKING_AT (bp, "function"))
5171         {
5172           char *tag_name, *tp_dot, *tp_colon;
5173
5174           get_tag (bp, &tag_name);
5175           /* If the tag ends with ".foo" or ":foo", make an additional tag for
5176              "foo".  */
5177           tp_dot = strrchr (tag_name, '.');
5178           tp_colon = strrchr (tag_name, ':');
5179           if (tp_dot || tp_colon)
5180             {
5181               char *p = tp_dot > tp_colon ? tp_dot : tp_colon;
5182               int len_add = p - tag_name + 1;
5183
5184               get_tag (bp + len_add, NULL);
5185             }
5186         }
5187     }
5188 }
5189
5190 \f
5191 /*
5192  * PostScript tags
5193  * Just look for lines where the first character is '/'
5194  * Also look at "defineps" for PSWrap
5195  * Ideas by:
5196  *   Richard Mlynarik <mly@adoc.xerox.com> (1997)
5197  *   Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
5198  */
5199 static void
5200 PS_functions (FILE *inf)
5201 {
5202   register char *bp, *ep;
5203
5204   LOOP_ON_INPUT_LINES (inf, lb, bp)
5205     {
5206       if (bp[0] == '/')
5207         {
5208           for (ep = bp+1;
5209                *ep != '\0' && *ep != ' ' && *ep != '{';
5210                ep++)
5211             continue;
5212           make_tag (bp, ep - bp, true,
5213                     lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
5214         }
5215       else if (LOOKING_AT (bp, "defineps"))
5216         get_tag (bp, NULL);
5217     }
5218 }
5219
5220 \f
5221 /*
5222  * Forth tags
5223  * Ignore anything after \ followed by space or in ( )
5224  * Look for words defined by :
5225  * Look for constant, code, create, defer, value, and variable
5226  * OBP extensions:  Look for buffer:, field,
5227  * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
5228  */
5229 static void
5230 Forth_words (FILE *inf)
5231 {
5232   register char *bp;
5233
5234   LOOP_ON_INPUT_LINES (inf, lb, bp)
5235     while ((bp = skip_spaces (bp))[0] != '\0')
5236       if (bp[0] == '\\' && c_isspace (bp[1]))
5237         break;                  /* read next line */
5238       else if (bp[0] == '(' && c_isspace (bp[1]))
5239         do                      /* skip to ) or eol */
5240           bp++;
5241         while (*bp != ')' && *bp != '\0');
5242       else if ((bp[0] == ':' && c_isspace (bp[1]) && bp++)
5243                || LOOKING_AT_NOCASE (bp, "constant")
5244                || LOOKING_AT_NOCASE (bp, "code")
5245                || LOOKING_AT_NOCASE (bp, "create")
5246                || LOOKING_AT_NOCASE (bp, "defer")
5247                || LOOKING_AT_NOCASE (bp, "value")
5248                || LOOKING_AT_NOCASE (bp, "variable")
5249                || LOOKING_AT_NOCASE (bp, "buffer:")
5250                || LOOKING_AT_NOCASE (bp, "field"))
5251         get_tag (skip_spaces (bp), NULL); /* Yay!  A definition! */
5252       else
5253         bp = skip_non_spaces (bp);
5254 }
5255
5256 \f
5257 /*
5258  * Scheme tag functions
5259  * look for (def... xyzzy
5260  *          (def... (xyzzy
5261  *          (def ... ((...(xyzzy ....
5262  *          (set! xyzzy
5263  * Original code by Ken Haase (1985?)
5264  */
5265 static void
5266 Scheme_functions (FILE *inf)
5267 {
5268   register char *bp;
5269
5270   LOOP_ON_INPUT_LINES (inf, lb, bp)
5271     {
5272       if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
5273         {
5274           bp = skip_non_spaces (bp+4);
5275           /* Skip over open parens and white space.  Don't continue past
5276              '\0'. */
5277           while (*bp && notinname (*bp))
5278             bp++;
5279           get_tag (bp, NULL);
5280         }
5281       if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
5282         get_tag (bp, NULL);
5283     }
5284 }
5285
5286 \f
5287 /* Find tags in TeX and LaTeX input files.  */
5288
5289 /* TEX_toktab is a table of TeX control sequences that define tags.
5290  * Each entry records one such control sequence.
5291  *
5292  * Original code from who knows whom.
5293  * Ideas by:
5294  *   Stefan Monnier (2002)
5295  */
5296
5297 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
5298
5299 /* Default set of control sequences to put into TEX_toktab.
5300    The value of environment var TEXTAGS is prepended to this.  */
5301 static const char *TEX_defenv = "\
5302 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5303 :part:appendix:entry:index:def\
5304 :newcommand:renewcommand:newenvironment:renewenvironment";
5305
5306 static void TEX_decode_env (const char *, const char *);
5307
5308 /*
5309  * TeX/LaTeX scanning loop.
5310  */
5311 static void
5312 TeX_commands (FILE *inf)
5313 {
5314   char *cp;
5315   linebuffer *key;
5316
5317   char TEX_esc = '\0';
5318   char TEX_opgrp, TEX_clgrp;
5319
5320   /* Initialize token table once from environment. */
5321   if (TEX_toktab == NULL)
5322     TEX_decode_env ("TEXTAGS", TEX_defenv);
5323
5324   LOOP_ON_INPUT_LINES (inf, lb, cp)
5325     {
5326       /* Look at each TEX keyword in line. */
5327       for (;;)
5328         {
5329           /* Look for a TEX escape. */
5330           while (true)
5331             {
5332               char c = *cp++;
5333               if (c == '\0' || c == '%')
5334                 goto tex_next_line;
5335
5336               /* Select either \ or ! as escape character, whichever comes
5337                  first outside a comment.  */
5338               if (!TEX_esc)
5339                 switch (c)
5340                   {
5341                   case '\\':
5342                     TEX_esc = c;
5343                     TEX_opgrp = '{';
5344                     TEX_clgrp = '}';
5345                     break;
5346
5347                   case '!':
5348                     TEX_esc = c;
5349                     TEX_opgrp = '<';
5350                     TEX_clgrp = '>';
5351                     break;
5352                   }
5353
5354               if (c == TEX_esc)
5355                 break;
5356             }
5357
5358           for (key = TEX_toktab; key->buffer != NULL; key++)
5359             if (strneq (cp, key->buffer, key->len))
5360               {
5361                 char *p;
5362                 int namelen, linelen;
5363                 bool opgrp = false;
5364
5365                 cp = skip_spaces (cp + key->len);
5366                 if (*cp == TEX_opgrp)
5367                   {
5368                     opgrp = true;
5369                     cp++;
5370                   }
5371                 for (p = cp;
5372                      (!c_isspace (*p) && *p != '#' &&
5373                       *p != TEX_opgrp && *p != TEX_clgrp);
5374                      p++)
5375                   continue;
5376                 namelen = p - cp;
5377                 linelen = lb.len;
5378                 if (!opgrp || *p == TEX_clgrp)
5379                   {
5380                     while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5381                       p++;
5382                     linelen = p - lb.buffer + 1;
5383                   }
5384                 make_tag (cp, namelen, true,
5385                           lb.buffer, linelen, lineno, linecharno);
5386                 goto tex_next_line; /* We only tag a line once */
5387               }
5388         }
5389     tex_next_line:
5390       ;
5391     }
5392 }
5393
5394 /* Read environment and prepend it to the default string.
5395    Build token table. */
5396 static void
5397 TEX_decode_env (const char *evarname, const char *defenv)
5398 {
5399   register const char *env, *p;
5400   int i, len;
5401
5402   /* Append default string to environment. */
5403   env = getenv (evarname);
5404   if (!env)
5405     env = defenv;
5406   else
5407     env = concat (env, defenv, "");
5408
5409   /* Allocate a token table */
5410   for (len = 1, p = env; (p = strchr (p, ':')); )
5411     if (*++p)
5412       len++;
5413   TEX_toktab = xnew (len, linebuffer);
5414
5415   /* Unpack environment string into token table. Be careful about */
5416   /* zero-length strings (leading ':', "::" and trailing ':') */
5417   for (i = 0; *env != '\0';)
5418     {
5419       p = strchr (env, ':');
5420       if (!p)                   /* End of environment string. */
5421         p = env + strlen (env);
5422       if (p - env > 0)
5423         {                       /* Only non-zero strings. */
5424           TEX_toktab[i].buffer = savenstr (env, p - env);
5425           TEX_toktab[i].len = p - env;
5426           i++;
5427         }
5428       if (*p)
5429         env = p + 1;
5430       else
5431         {
5432           TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5433           TEX_toktab[i].len = 0;
5434           break;
5435         }
5436     }
5437 }
5438
5439 \f
5440 /* Texinfo support.  Dave Love, Mar. 2000.  */
5441 static void
5442 Texinfo_nodes (FILE *inf)
5443 {
5444   char *cp, *start;
5445   LOOP_ON_INPUT_LINES (inf, lb, cp)
5446     if (LOOKING_AT (cp, "@node"))
5447       {
5448         start = cp;
5449         while (*cp != '\0' && *cp != ',')
5450           cp++;
5451         make_tag (start, cp - start, true,
5452                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5453       }
5454 }
5455
5456 \f
5457 /*
5458  * HTML support.
5459  * Contents of <title>, <h1>, <h2>, <h3> are tags.
5460  * Contents of <a name=xxx> are tags with name xxx.
5461  *
5462  * Francesco Potortì, 2002.
5463  */
5464 static void
5465 HTML_labels (FILE *inf)
5466 {
5467   bool getnext = false;         /* next text outside of HTML tags is a tag */
5468   bool skiptag = false;         /* skip to the end of the current HTML tag */
5469   bool intag = false;           /* inside an html tag, looking for ID= */
5470   bool inanchor = false;        /* when INTAG, is an anchor, look for NAME= */
5471   char *end;
5472
5473
5474   linebuffer_setlen (&token_name, 0); /* no name in buffer */
5475
5476   LOOP_ON_INPUT_LINES (inf, lb, dbp)
5477     for (;;)                    /* loop on the same line */
5478       {
5479         if (skiptag)            /* skip HTML tag */
5480           {
5481             while (*dbp != '\0' && *dbp != '>')
5482               dbp++;
5483             if (*dbp == '>')
5484               {
5485                 dbp += 1;
5486                 skiptag = false;
5487                 continue;       /* look on the same line */
5488               }
5489             break;              /* go to next line */
5490           }
5491
5492         else if (intag) /* look for "name=" or "id=" */
5493           {
5494             while (*dbp != '\0' && *dbp != '>'
5495                    && c_tolower (*dbp) != 'n' && c_tolower (*dbp) != 'i')
5496               dbp++;
5497             if (*dbp == '\0')
5498               break;            /* go to next line */
5499             if (*dbp == '>')
5500               {
5501                 dbp += 1;
5502                 intag = false;
5503                 continue;       /* look on the same line */
5504               }
5505             if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5506                 || LOOKING_AT_NOCASE (dbp, "id="))
5507               {
5508                 bool quoted = (dbp[0] == '"');
5509
5510                 if (quoted)
5511                   for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5512                     continue;
5513                 else
5514                   for (end = dbp; *end != '\0' && intoken (*end); end++)
5515                     continue;
5516                 linebuffer_setlen (&token_name, end - dbp);
5517                 memcpy (token_name.buffer, dbp, end - dbp);
5518                 token_name.buffer[end - dbp] = '\0';
5519
5520                 dbp = end;
5521                 intag = false;  /* we found what we looked for */
5522                 skiptag = true; /* skip to the end of the tag */
5523                 getnext = true; /* then grab the text */
5524                 continue;       /* look on the same line */
5525               }
5526             dbp += 1;
5527           }
5528
5529         else if (getnext)       /* grab next tokens and tag them */
5530           {
5531             dbp = skip_spaces (dbp);
5532             if (*dbp == '\0')
5533               break;            /* go to next line */
5534             if (*dbp == '<')
5535               {
5536                 intag = true;
5537                 inanchor = (c_tolower (dbp[1]) == 'a' && !intoken (dbp[2]));
5538                 continue;       /* look on the same line */
5539               }
5540
5541             for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5542               continue;
5543             make_tag (token_name.buffer, token_name.len, true,
5544                       dbp, end - dbp, lineno, linecharno);
5545             linebuffer_setlen (&token_name, 0); /* no name in buffer */
5546             getnext = false;
5547             break;              /* go to next line */
5548           }
5549
5550         else                    /* look for an interesting HTML tag */
5551           {
5552             while (*dbp != '\0' && *dbp != '<')
5553               dbp++;
5554             if (*dbp == '\0')
5555               break;            /* go to next line */
5556             intag = true;
5557             if (c_tolower (dbp[1]) == 'a' && !intoken (dbp[2]))
5558               {
5559                 inanchor = true;
5560                 continue;       /* look on the same line */
5561               }
5562             else if (LOOKING_AT_NOCASE (dbp, "<title>")
5563                      || LOOKING_AT_NOCASE (dbp, "<h1>")
5564                      || LOOKING_AT_NOCASE (dbp, "<h2>")
5565                      || LOOKING_AT_NOCASE (dbp, "<h3>"))
5566               {
5567                 intag = false;
5568                 getnext = true;
5569                 continue;       /* look on the same line */
5570               }
5571             dbp += 1;
5572           }
5573       }
5574 }
5575
5576 \f
5577 /*
5578  * Prolog support
5579  *
5580  * Assumes that the predicate or rule starts at column 0.
5581  * Only the first clause of a predicate or rule is added.
5582  * Original code by Sunichirou Sugou (1989)
5583  * Rewritten by Anders Lindgren (1996)
5584  */
5585 static size_t prolog_pr (char *, char *);
5586 static void prolog_skip_comment (linebuffer *, FILE *);
5587 static size_t prolog_atom (char *, size_t);
5588
5589 static void
5590 Prolog_functions (FILE *inf)
5591 {
5592   char *cp, *last;
5593   size_t len;
5594   size_t allocated;
5595
5596   allocated = 0;
5597   len = 0;
5598   last = NULL;
5599
5600   LOOP_ON_INPUT_LINES (inf, lb, cp)
5601     {
5602       if (cp[0] == '\0')        /* Empty line */
5603         continue;
5604       else if (c_isspace (cp[0])) /* Not a predicate */
5605         continue;
5606       else if (cp[0] == '/' && cp[1] == '*')    /* comment. */
5607         prolog_skip_comment (&lb, inf);
5608       else if ((len = prolog_pr (cp, last)) > 0)
5609         {
5610           /* Predicate or rule.  Store the function name so that we
5611              only generate a tag for the first clause.  */
5612           if (last == NULL)
5613             last = xnew (len + 1, char);
5614           else if (len + 1 > allocated)
5615             xrnew (last, len + 1, char);
5616           allocated = len + 1;
5617           memcpy (last, cp, len);
5618           last[len] = '\0';
5619         }
5620     }
5621   free (last);
5622 }
5623
5624
5625 static void
5626 prolog_skip_comment (linebuffer *plb, FILE *inf)
5627 {
5628   char *cp;
5629
5630   do
5631     {
5632       for (cp = plb->buffer; *cp != '\0'; cp++)
5633         if (cp[0] == '*' && cp[1] == '/')
5634           return;
5635       readline (plb, inf);
5636     }
5637   while (perhaps_more_input (inf));
5638 }
5639
5640 /*
5641  * A predicate or rule definition is added if it matches:
5642  *     <beginning of line><Prolog Atom><whitespace>(
5643  * or  <beginning of line><Prolog Atom><whitespace>:-
5644  *
5645  * It is added to the tags database if it doesn't match the
5646  * name of the previous clause header.
5647  *
5648  * Return the size of the name of the predicate or rule, or 0 if no
5649  * header was found.
5650  */
5651 static size_t
5652 prolog_pr (char *s, char *last)
5653
5654                                 /* Name of last clause. */
5655 {
5656   size_t pos;
5657   size_t len;
5658
5659   pos = prolog_atom (s, 0);
5660   if (! pos)
5661     return 0;
5662
5663   len = pos;
5664   pos = skip_spaces (s + pos) - s;
5665
5666   if ((s[pos] == '.'
5667        || (s[pos] == '(' && (pos += 1))
5668        || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5669       && (last == NULL          /* save only the first clause */
5670           || len != strlen (last)
5671           || !strneq (s, last, len)))
5672         {
5673           make_tag (s, len, true, s, pos, lineno, linecharno);
5674           return len;
5675         }
5676   else
5677     return 0;
5678 }
5679
5680 /*
5681  * Consume a Prolog atom.
5682  * Return the number of bytes consumed, or 0 if there was an error.
5683  *
5684  * A prolog atom, in this context, could be one of:
5685  * - An alphanumeric sequence, starting with a lower case letter.
5686  * - A quoted arbitrary string. Single quotes can escape themselves.
5687  *   Backslash quotes everything.
5688  */
5689 static size_t
5690 prolog_atom (char *s, size_t pos)
5691 {
5692   size_t origpos;
5693
5694   origpos = pos;
5695
5696   if (c_islower (s[pos]) || s[pos] == '_')
5697     {
5698       /* The atom is unquoted. */
5699       pos++;
5700       while (c_isalnum (s[pos]) || s[pos] == '_')
5701         {
5702           pos++;
5703         }
5704       return pos - origpos;
5705     }
5706   else if (s[pos] == '\'')
5707     {
5708       pos++;
5709
5710       for (;;)
5711         {
5712           if (s[pos] == '\'')
5713             {
5714               pos++;
5715               if (s[pos] != '\'')
5716                 break;
5717               pos++;            /* A double quote */
5718             }
5719           else if (s[pos] == '\0')
5720             /* Multiline quoted atoms are ignored. */
5721             return 0;
5722           else if (s[pos] == '\\')
5723             {
5724               if (s[pos+1] == '\0')
5725                 return 0;
5726               pos += 2;
5727             }
5728           else
5729             pos++;
5730         }
5731       return pos - origpos;
5732     }
5733   else
5734     return 0;
5735 }
5736
5737 \f
5738 /*
5739  * Support for Erlang
5740  *
5741  * Generates tags for functions, defines, and records.
5742  * Assumes that Erlang functions start at column 0.
5743  * Original code by Anders Lindgren (1996)
5744  */
5745 static int erlang_func (char *, char *);
5746 static void erlang_attribute (char *);
5747 static int erlang_atom (char *);
5748
5749 static void
5750 Erlang_functions (FILE *inf)
5751 {
5752   char *cp, *last;
5753   int len;
5754   int allocated;
5755
5756   allocated = 0;
5757   len = 0;
5758   last = NULL;
5759
5760   LOOP_ON_INPUT_LINES (inf, lb, cp)
5761     {
5762       if (cp[0] == '\0')        /* Empty line */
5763         continue;
5764       else if (c_isspace (cp[0])) /* Not function nor attribute */
5765         continue;
5766       else if (cp[0] == '%')    /* comment */
5767         continue;
5768       else if (cp[0] == '"')    /* Sometimes, strings start in column one */
5769         continue;
5770       else if (cp[0] == '-')    /* attribute, e.g. "-define" */
5771         {
5772           erlang_attribute (cp);
5773           if (last != NULL)
5774             {
5775               free (last);
5776               last = NULL;
5777             }
5778         }
5779       else if ((len = erlang_func (cp, last)) > 0)
5780         {
5781           /*
5782            * Function.  Store the function name so that we only
5783            * generates a tag for the first clause.
5784            */
5785           if (last == NULL)
5786             last = xnew (len + 1, char);
5787           else if (len + 1 > allocated)
5788             xrnew (last, len + 1, char);
5789           allocated = len + 1;
5790           memcpy (last, cp, len);
5791           last[len] = '\0';
5792         }
5793     }
5794   free (last);
5795 }
5796
5797
5798 /*
5799  * A function definition is added if it matches:
5800  *     <beginning of line><Erlang Atom><whitespace>(
5801  *
5802  * It is added to the tags database if it doesn't match the
5803  * name of the previous clause header.
5804  *
5805  * Return the size of the name of the function, or 0 if no function
5806  * was found.
5807  */
5808 static int
5809 erlang_func (char *s, char *last)
5810
5811                                 /* Name of last clause. */
5812 {
5813   int pos;
5814   int len;
5815
5816   pos = erlang_atom (s);
5817   if (pos < 1)
5818     return 0;
5819
5820   len = pos;
5821   pos = skip_spaces (s + pos) - s;
5822
5823   /* Save only the first clause. */
5824   if (s[pos++] == '('
5825       && (last == NULL
5826           || len != (int)strlen (last)
5827           || !strneq (s, last, len)))
5828         {
5829           make_tag (s, len, true, s, pos, lineno, linecharno);
5830           return len;
5831         }
5832
5833   return 0;
5834 }
5835
5836
5837 /*
5838  * Handle attributes.  Currently, tags are generated for defines
5839  * and records.
5840  *
5841  * They are on the form:
5842  * -define(foo, bar).
5843  * -define(Foo(M, N), M+N).
5844  * -record(graph, {vtab = notable, cyclic = true}).
5845  */
5846 static void
5847 erlang_attribute (char *s)
5848 {
5849   char *cp = s;
5850
5851   if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5852       && *cp++ == '(')
5853     {
5854       int len = erlang_atom (skip_spaces (cp));
5855       if (len > 0)
5856         make_tag (cp, len, true, s, cp + len - s, lineno, linecharno);
5857     }
5858   return;
5859 }
5860
5861
5862 /*
5863  * Consume an Erlang atom (or variable).
5864  * Return the number of bytes consumed, or -1 if there was an error.
5865  */
5866 static int
5867 erlang_atom (char *s)
5868 {
5869   int pos = 0;
5870
5871   if (c_isalpha (s[pos]) || s[pos] == '_')
5872     {
5873       /* The atom is unquoted. */
5874       do
5875         pos++;
5876       while (c_isalnum (s[pos]) || s[pos] == '_');
5877     }
5878   else if (s[pos] == '\'')
5879     {
5880       for (pos++; s[pos] != '\''; pos++)
5881         if (s[pos] == '\0'      /* multiline quoted atoms are ignored */
5882             || (s[pos] == '\\' && s[++pos] == '\0'))
5883           return 0;
5884       pos++;
5885     }
5886
5887   return pos;
5888 }
5889
5890 \f
5891 static char *scan_separators (char *);
5892 static void add_regex (char *, language *);
5893 static char *substitute (char *, char *, struct re_registers *);
5894
5895 /*
5896  * Take a string like "/blah/" and turn it into "blah", verifying
5897  * that the first and last characters are the same, and handling
5898  * quoted separator characters.  Actually, stops on the occurrence of
5899  * an unquoted separator.  Also process \t, \n, etc. and turn into
5900  * appropriate characters. Works in place.  Null terminates name string.
5901  * Returns pointer to terminating separator, or NULL for
5902  * unterminated regexps.
5903  */
5904 static char *
5905 scan_separators (char *name)
5906 {
5907   char sep = name[0];
5908   char *copyto = name;
5909   bool quoted = false;
5910
5911   for (++name; *name != '\0'; ++name)
5912     {
5913       if (quoted)
5914         {
5915           switch (*name)
5916             {
5917             case 'a': *copyto++ = '\007'; break; /* BEL (bell)           */
5918             case 'b': *copyto++ = '\b'; break;   /* BS (back space)      */
5919             case 'd': *copyto++ = 0177; break;   /* DEL (delete)         */
5920             case 'e': *copyto++ = 033; break;    /* ESC (delete)         */
5921             case 'f': *copyto++ = '\f'; break;   /* FF (form feed)       */
5922             case 'n': *copyto++ = '\n'; break;   /* NL (new line)        */
5923             case 'r': *copyto++ = '\r'; break;   /* CR (carriage return) */
5924             case 't': *copyto++ = '\t'; break;   /* TAB (horizontal tab) */
5925             case 'v': *copyto++ = '\v'; break;   /* VT (vertical tab)    */
5926             default:
5927               if (*name == sep)
5928                 *copyto++ = sep;
5929               else
5930                 {
5931                   /* Something else is quoted, so preserve the quote. */
5932                   *copyto++ = '\\';
5933                   *copyto++ = *name;
5934                 }
5935               break;
5936             }
5937           quoted = false;
5938         }
5939       else if (*name == '\\')
5940         quoted = true;
5941       else if (*name == sep)
5942         break;
5943       else
5944         *copyto++ = *name;
5945     }
5946   if (*name != sep)
5947     name = NULL;                /* signal unterminated regexp */
5948
5949   /* Terminate copied string. */
5950   *copyto = '\0';
5951   return name;
5952 }
5953
5954 /* Look at the argument of --regex or --no-regex and do the right
5955    thing.  Same for each line of a regexp file. */
5956 static void
5957 analyze_regex (char *regex_arg)
5958 {
5959   if (regex_arg == NULL)
5960     {
5961       free_regexps ();          /* --no-regex: remove existing regexps */
5962       return;
5963     }
5964
5965   /* A real --regexp option or a line in a regexp file. */
5966   switch (regex_arg[0])
5967     {
5968       /* Comments in regexp file or null arg to --regex. */
5969     case '\0':
5970     case ' ':
5971     case '\t':
5972       break;
5973
5974       /* Read a regex file.  This is recursive and may result in a
5975          loop, which will stop when the file descriptors are exhausted. */
5976     case '@':
5977       {
5978         FILE *regexfp;
5979         linebuffer regexbuf;
5980         char *regexfile = regex_arg + 1;
5981
5982         /* regexfile is a file containing regexps, one per line. */
5983         regexfp = fopen (regexfile, "r" FOPEN_BINARY);
5984         if (regexfp == NULL)
5985           pfatal (regexfile);
5986         linebuffer_init (&regexbuf);
5987         while (readline_internal (&regexbuf, regexfp, regexfile) > 0)
5988           analyze_regex (regexbuf.buffer);
5989         free (regexbuf.buffer);
5990         if (fclose (regexfp) != 0)
5991           pfatal (regexfile);
5992       }
5993       break;
5994
5995       /* Regexp to be used for a specific language only. */
5996     case '{':
5997       {
5998         language *lang;
5999         char *lang_name = regex_arg + 1;
6000         char *cp;
6001
6002         for (cp = lang_name; *cp != '}'; cp++)
6003           if (*cp == '\0')
6004             {
6005               error ("unterminated language name in regex: %s", regex_arg);
6006               return;
6007             }
6008         *cp++ = '\0';
6009         lang = get_language_from_langname (lang_name);
6010         if (lang == NULL)
6011           return;
6012         add_regex (cp, lang);
6013       }
6014       break;
6015
6016       /* Regexp to be used for any language. */
6017     default:
6018       add_regex (regex_arg, NULL);
6019       break;
6020     }
6021 }
6022
6023 /* Separate the regexp pattern, compile it,
6024    and care for optional name and modifiers. */
6025 static void
6026 add_regex (char *regexp_pattern, language *lang)
6027 {
6028   static struct re_pattern_buffer zeropattern;
6029   char sep, *pat, *name, *modifiers;
6030   char empty = '\0';
6031   const char *err;
6032   struct re_pattern_buffer *patbuf;
6033   regexp *rp;
6034   bool
6035     force_explicit_name = true, /* do not use implicit tag names */
6036     ignore_case = false,        /* case is significant */
6037     multi_line = false,         /* matches are done one line at a time */
6038     single_line = false;        /* dot does not match newline */
6039
6040
6041   if (strlen (regexp_pattern) < 3)
6042     {
6043       error ("null regexp");
6044       return;
6045     }
6046   sep = regexp_pattern[0];
6047   name = scan_separators (regexp_pattern);
6048   if (name == NULL)
6049     {
6050       error ("%s: unterminated regexp", regexp_pattern);
6051       return;
6052     }
6053   if (name[1] == sep)
6054     {
6055       error ("null name for regexp \"%s\"", regexp_pattern);
6056       return;
6057     }
6058   modifiers = scan_separators (name);
6059   if (modifiers == NULL)        /* no terminating separator --> no name */
6060     {
6061       modifiers = name;
6062       name = &empty;
6063     }
6064   else
6065     modifiers += 1;             /* skip separator */
6066
6067   /* Parse regex modifiers. */
6068   for (; modifiers[0] != '\0'; modifiers++)
6069     switch (modifiers[0])
6070       {
6071       case 'N':
6072         if (modifiers == name)
6073           error ("forcing explicit tag name but no name, ignoring");
6074         force_explicit_name = true;
6075         break;
6076       case 'i':
6077         ignore_case = true;
6078         break;
6079       case 's':
6080         single_line = true;
6081         /* FALLTHRU */
6082       case 'm':
6083         multi_line = true;
6084         need_filebuf = true;
6085         break;
6086       default:
6087         error ("invalid regexp modifier '%c', ignoring", modifiers[0]);
6088         break;
6089       }
6090
6091   patbuf = xnew (1, struct re_pattern_buffer);
6092   *patbuf = zeropattern;
6093   if (ignore_case)
6094     {
6095       static char lc_trans[UCHAR_MAX + 1];
6096       int i;
6097       for (i = 0; i < UCHAR_MAX + 1; i++)
6098         lc_trans[i] = c_tolower (i);
6099       patbuf->translate = lc_trans;     /* translation table to fold case  */
6100     }
6101
6102   if (multi_line)
6103     pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
6104   else
6105     pat = regexp_pattern;
6106
6107   if (single_line)
6108     re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
6109   else
6110     re_set_syntax (RE_SYNTAX_EMACS);
6111
6112   err = re_compile_pattern (pat, strlen (pat), patbuf);
6113   if (multi_line)
6114     free (pat);
6115   if (err != NULL)
6116     {
6117       error ("%s while compiling pattern", err);
6118       return;
6119     }
6120
6121   rp = p_head;
6122   p_head = xnew (1, regexp);
6123   p_head->pattern = savestr (regexp_pattern);
6124   p_head->p_next = rp;
6125   p_head->lang = lang;
6126   p_head->pat = patbuf;
6127   p_head->name = savestr (name);
6128   p_head->error_signaled = false;
6129   p_head->force_explicit_name = force_explicit_name;
6130   p_head->ignore_case = ignore_case;
6131   p_head->multi_line = multi_line;
6132 }
6133
6134 /*
6135  * Do the substitutions indicated by the regular expression and
6136  * arguments.
6137  */
6138 static char *
6139 substitute (char *in, char *out, struct re_registers *regs)
6140 {
6141   char *result, *t;
6142   int size, dig, diglen;
6143
6144   result = NULL;
6145   size = strlen (out);
6146
6147   /* Pass 1: figure out how much to allocate by finding all \N strings. */
6148   if (out[size - 1] == '\\')
6149     fatal ("pattern error in \"%s\"", out);
6150   for (t = strchr (out, '\\');
6151        t != NULL;
6152        t = strchr (t + 2, '\\'))
6153     if (c_isdigit (t[1]))
6154       {
6155         dig = t[1] - '0';
6156         diglen = regs->end[dig] - regs->start[dig];
6157         size += diglen - 2;
6158       }
6159     else
6160       size -= 1;
6161
6162   /* Allocate space and do the substitutions. */
6163   assert (size >= 0);
6164   result = xnew (size + 1, char);
6165
6166   for (t = result; *out != '\0'; out++)
6167     if (*out == '\\' && c_isdigit (*++out))
6168       {
6169         dig = *out - '0';
6170         diglen = regs->end[dig] - regs->start[dig];
6171         memcpy (t, in + regs->start[dig], diglen);
6172         t += diglen;
6173       }
6174     else
6175       *t++ = *out;
6176   *t = '\0';
6177
6178   assert (t <= result + size);
6179   assert (t - result == (int)strlen (result));
6180
6181   return result;
6182 }
6183
6184 /* Deallocate all regexps. */
6185 static void
6186 free_regexps (void)
6187 {
6188   regexp *rp;
6189   while (p_head != NULL)
6190     {
6191       rp = p_head->p_next;
6192       free (p_head->pattern);
6193       free (p_head->name);
6194       free (p_head);
6195       p_head = rp;
6196     }
6197   return;
6198 }
6199
6200 /*
6201  * Reads the whole file as a single string from `filebuf' and looks for
6202  * multi-line regular expressions, creating tags on matches.
6203  * readline already dealt with normal regexps.
6204  *
6205  * Idea by Ben Wing <ben@666.com> (2002).
6206  */
6207 static void
6208 regex_tag_multiline (void)
6209 {
6210   char *buffer = filebuf.buffer;
6211   regexp *rp;
6212   char *name;
6213
6214   for (rp = p_head; rp != NULL; rp = rp->p_next)
6215     {
6216       int match = 0;
6217
6218       if (!rp->multi_line)
6219         continue;               /* skip normal regexps */
6220
6221       /* Generic initializations before parsing file from memory. */
6222       lineno = 1;               /* reset global line number */
6223       charno = 0;               /* reset global char number */
6224       linecharno = 0;           /* reset global char number of line start */
6225
6226       /* Only use generic regexps or those for the current language. */
6227       if (rp->lang != NULL && rp->lang != curfdp->lang)
6228         continue;
6229
6230       while (match >= 0 && match < filebuf.len)
6231         {
6232           match = re_search (rp->pat, buffer, filebuf.len, charno,
6233                              filebuf.len - match, &rp->regs);
6234           switch (match)
6235             {
6236             case -2:
6237               /* Some error. */
6238               if (!rp->error_signaled)
6239                 {
6240                   error ("regexp stack overflow while matching \"%s\"",
6241                          rp->pattern);
6242                   rp->error_signaled = true;
6243                 }
6244               break;
6245             case -1:
6246               /* No match. */
6247               break;
6248             default:
6249               if (match == rp->regs.end[0])
6250                 {
6251                   if (!rp->error_signaled)
6252                     {
6253                       error ("regexp matches the empty string: \"%s\"",
6254                              rp->pattern);
6255                       rp->error_signaled = true;
6256                     }
6257                   match = -3;   /* exit from while loop */
6258                   break;
6259                 }
6260
6261               /* Match occurred.  Construct a tag. */
6262               while (charno < rp->regs.end[0])
6263                 if (buffer[charno++] == '\n')
6264                   lineno++, linecharno = charno;
6265               name = rp->name;
6266               if (name[0] == '\0')
6267                 name = NULL;
6268               else /* make a named tag */
6269                 name = substitute (buffer, rp->name, &rp->regs);
6270               if (rp->force_explicit_name)
6271                 /* Force explicit tag name, if a name is there. */
6272                 pfnote (name, true, buffer + linecharno,
6273                         charno - linecharno + 1, lineno, linecharno);
6274               else
6275                 make_tag (name, strlen (name), true, buffer + linecharno,
6276                           charno - linecharno + 1, lineno, linecharno);
6277               break;
6278             }
6279         }
6280     }
6281 }
6282
6283 \f
6284 static bool
6285 nocase_tail (const char *cp)
6286 {
6287   int len = 0;
6288
6289   while (*cp != '\0' && c_tolower (*cp) == c_tolower (dbp[len]))
6290     cp++, len++;
6291   if (*cp == '\0' && !intoken (dbp[len]))
6292     {
6293       dbp += len;
6294       return true;
6295     }
6296   return false;
6297 }
6298
6299 static void
6300 get_tag (register char *bp, char **namepp)
6301 {
6302   register char *cp = bp;
6303
6304   if (*bp != '\0')
6305     {
6306       /* Go till you get to white space or a syntactic break */
6307       for (cp = bp + 1; !notinname (*cp); cp++)
6308         continue;
6309       make_tag (bp, cp - bp, true,
6310                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6311     }
6312
6313   if (namepp != NULL)
6314     *namepp = savenstr (bp, cp - bp);
6315 }
6316
6317 /*
6318  * Read a line of text from `stream' into `lbp', excluding the
6319  * newline or CR-NL, if any.  Return the number of characters read from
6320  * `stream', which is the length of the line including the newline.
6321  *
6322  * On DOS or Windows we do not count the CR character, if any before the
6323  * NL, in the returned length; this mirrors the behavior of Emacs on those
6324  * platforms (for text files, it translates CR-NL to NL as it reads in the
6325  * file).
6326  *
6327  * If multi-line regular expressions are requested, each line read is
6328  * appended to `filebuf'.
6329  */
6330 static long
6331 readline_internal (linebuffer *lbp, FILE *stream, char const *filename)
6332 {
6333   char *buffer = lbp->buffer;
6334   char *p = lbp->buffer;
6335   char *pend;
6336   int chars_deleted;
6337
6338   pend = p + lbp->size;         /* Separate to avoid 386/IX compiler bug.  */
6339
6340   for (;;)
6341     {
6342       register int c = getc (stream);
6343       if (p == pend)
6344         {
6345           /* We're at the end of linebuffer: expand it. */
6346           lbp->size *= 2;
6347           xrnew (buffer, lbp->size, char);
6348           p += buffer - lbp->buffer;
6349           pend = buffer + lbp->size;
6350           lbp->buffer = buffer;
6351         }
6352       if (c == EOF)
6353         {
6354           if (ferror (stream))
6355             perror (filename);
6356           *p = '\0';
6357           chars_deleted = 0;
6358           break;
6359         }
6360       if (c == '\n')
6361         {
6362           if (p > buffer && p[-1] == '\r')
6363             {
6364               p -= 1;
6365               chars_deleted = 2;
6366             }
6367           else
6368             {
6369               chars_deleted = 1;
6370             }
6371           *p = '\0';
6372           break;
6373         }
6374       *p++ = c;
6375     }
6376   lbp->len = p - buffer;
6377
6378   if (need_filebuf              /* we need filebuf for multi-line regexps */
6379       && chars_deleted > 0)     /* not at EOF */
6380     {
6381       while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6382         {
6383           /* Expand filebuf. */
6384           filebuf.size *= 2;
6385           xrnew (filebuf.buffer, filebuf.size, char);
6386         }
6387       memcpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6388       filebuf.len += lbp->len;
6389       filebuf.buffer[filebuf.len++] = '\n';
6390       filebuf.buffer[filebuf.len] = '\0';
6391     }
6392
6393   return lbp->len + chars_deleted;
6394 }
6395
6396 /*
6397  * Like readline_internal, above, but in addition try to match the
6398  * input line against relevant regular expressions and manage #line
6399  * directives.
6400  */
6401 static void
6402 readline (linebuffer *lbp, FILE *stream)
6403 {
6404   long result;
6405
6406   linecharno = charno;          /* update global char number of line start */
6407   result = readline_internal (lbp, stream, infilename); /* read line */
6408   lineno += 1;                  /* increment global line number */
6409   charno += result;             /* increment global char number */
6410
6411   /* Honor #line directives. */
6412   if (!no_line_directive)
6413     {
6414       static bool discard_until_line_directive;
6415
6416       /* Check whether this is a #line directive. */
6417       if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6418         {
6419           unsigned int lno;
6420           int start = 0;
6421
6422           if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6423               && start > 0)     /* double quote character found */
6424             {
6425               char *endp = lbp->buffer + start;
6426
6427               while ((endp = strchr (endp, '"')) != NULL
6428                      && endp[-1] == '\\')
6429                 endp++;
6430               if (endp != NULL)
6431                 /* Ok, this is a real #line directive.  Let's deal with it. */
6432                 {
6433                   char *taggedabsname;  /* absolute name of original file */
6434                   char *taggedfname;    /* name of original file as given */
6435                   char *name;           /* temp var */
6436
6437                   discard_until_line_directive = false; /* found it */
6438                   name = lbp->buffer + start;
6439                   *endp = '\0';
6440                   canonicalize_filename (name);
6441                   taggedabsname = absolute_filename (name, tagfiledir);
6442                   if (filename_is_absolute (name)
6443                       || filename_is_absolute (curfdp->infname))
6444                     taggedfname = savestr (taggedabsname);
6445                   else
6446                     taggedfname = relative_filename (taggedabsname,tagfiledir);
6447
6448                   if (streq (curfdp->taggedfname, taggedfname))
6449                     /* The #line directive is only a line number change.  We
6450                        deal with this afterwards. */
6451                     free (taggedfname);
6452                   else
6453                     /* The tags following this #line directive should be
6454                        attributed to taggedfname.  In order to do this, set
6455                        curfdp accordingly. */
6456                     {
6457                       fdesc *fdp; /* file description pointer */
6458
6459                       /* Go look for a file description already set up for the
6460                          file indicated in the #line directive.  If there is
6461                          one, use it from now until the next #line
6462                          directive. */
6463                       for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6464                         if (streq (fdp->infname, curfdp->infname)
6465                             && streq (fdp->taggedfname, taggedfname))
6466                           /* If we remove the second test above (after the &&)
6467                              then all entries pertaining to the same file are
6468                              coalesced in the tags file.  If we use it, then
6469                              entries pertaining to the same file but generated
6470                              from different files (via #line directives) will
6471                              go into separate sections in the tags file.  These
6472                              alternatives look equivalent.  The first one
6473                              destroys some apparently useless information. */
6474                           {
6475                             curfdp = fdp;
6476                             free (taggedfname);
6477                             break;
6478                           }
6479                       /* Else, if we already tagged the real file, skip all
6480                          input lines until the next #line directive. */
6481                       if (fdp == NULL) /* not found */
6482                         for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6483                           if (streq (fdp->infabsname, taggedabsname))
6484                             {
6485                               discard_until_line_directive = true;
6486                               free (taggedfname);
6487                               break;
6488                             }
6489                       /* Else create a new file description and use that from
6490                          now on, until the next #line directive. */
6491                       if (fdp == NULL) /* not found */
6492                         {
6493                           fdp = fdhead;
6494                           fdhead = xnew (1, fdesc);
6495                           *fdhead = *curfdp; /* copy curr. file description */
6496                           fdhead->next = fdp;
6497                           fdhead->infname = savestr (curfdp->infname);
6498                           fdhead->infabsname = savestr (curfdp->infabsname);
6499                           fdhead->infabsdir = savestr (curfdp->infabsdir);
6500                           fdhead->taggedfname = taggedfname;
6501                           fdhead->usecharno = false;
6502                           fdhead->prop = NULL;
6503                           fdhead->written = false;
6504                           curfdp = fdhead;
6505                         }
6506                     }
6507                   free (taggedabsname);
6508                   lineno = lno - 1;
6509                   readline (lbp, stream);
6510                   return;
6511                 } /* if a real #line directive */
6512             } /* if #line is followed by a number */
6513         } /* if line begins with "#line " */
6514
6515       /* If we are here, no #line directive was found. */
6516       if (discard_until_line_directive)
6517         {
6518           if (result > 0)
6519             {
6520               /* Do a tail recursion on ourselves, thus discarding the contents
6521                  of the line buffer. */
6522               readline (lbp, stream);
6523               return;
6524             }
6525           /* End of file. */
6526           discard_until_line_directive = false;
6527           return;
6528         }
6529     } /* if #line directives should be considered */
6530
6531   {
6532     int match;
6533     regexp *rp;
6534     char *name;
6535
6536     /* Match against relevant regexps. */
6537     if (lbp->len > 0)
6538       for (rp = p_head; rp != NULL; rp = rp->p_next)
6539         {
6540           /* Only use generic regexps or those for the current language.
6541              Also do not use multiline regexps, which is the job of
6542              regex_tag_multiline. */
6543           if ((rp->lang != NULL && rp->lang != fdhead->lang)
6544               || rp->multi_line)
6545             continue;
6546
6547           match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6548           switch (match)
6549             {
6550             case -2:
6551               /* Some error. */
6552               if (!rp->error_signaled)
6553                 {
6554                   error ("regexp stack overflow while matching \"%s\"",
6555                          rp->pattern);
6556                   rp->error_signaled = true;
6557                 }
6558               break;
6559             case -1:
6560               /* No match. */
6561               break;
6562             case 0:
6563               /* Empty string matched. */
6564               if (!rp->error_signaled)
6565                 {
6566                   error ("regexp matches the empty string: \"%s\"", rp->pattern);
6567                   rp->error_signaled = true;
6568                 }
6569               break;
6570             default:
6571               /* Match occurred.  Construct a tag. */
6572               name = rp->name;
6573               if (name[0] == '\0')
6574                 name = NULL;
6575               else /* make a named tag */
6576                 name = substitute (lbp->buffer, rp->name, &rp->regs);
6577               if (rp->force_explicit_name)
6578                 /* Force explicit tag name, if a name is there. */
6579                 pfnote (name, true, lbp->buffer, match, lineno, linecharno);
6580               else
6581                 make_tag (name, strlen (name), true,
6582                           lbp->buffer, match, lineno, linecharno);
6583               break;
6584             }
6585         }
6586   }
6587 }
6588
6589 \f
6590 /*
6591  * Return a pointer to a space of size strlen(cp)+1 allocated
6592  * with xnew where the string CP has been copied.
6593  */
6594 static char *
6595 savestr (const char *cp)
6596 {
6597   return savenstr (cp, strlen (cp));
6598 }
6599
6600 /*
6601  * Return a pointer to a space of size LEN+1 allocated with xnew where
6602  * the string CP has been copied for at most the first LEN characters.
6603  */
6604 static char *
6605 savenstr (const char *cp, int len)
6606 {
6607   char *dp = xnew (len + 1, char);
6608   dp[len] = '\0';
6609   return memcpy (dp, cp, len);
6610 }
6611
6612 /* Skip spaces (end of string is not space), return new pointer. */
6613 static char *
6614 skip_spaces (char *cp)
6615 {
6616   while (c_isspace (*cp))
6617     cp++;
6618   return cp;
6619 }
6620
6621 /* Skip non spaces, except end of string, return new pointer. */
6622 static char *
6623 skip_non_spaces (char *cp)
6624 {
6625   while (*cp != '\0' && !c_isspace (*cp))
6626     cp++;
6627   return cp;
6628 }
6629
6630 /* Skip any chars in the "name" class.*/
6631 static char *
6632 skip_name (char *cp)
6633 {
6634   /* '\0' is a notinname() so loop stops there too */
6635   while (! notinname (*cp))
6636     cp++;
6637   return cp;
6638 }
6639
6640 /* Print error message and exit.  */
6641 static void
6642 fatal (char const *format, ...)
6643 {
6644   va_list ap;
6645   va_start (ap, format);
6646   verror (format, ap);
6647   va_end (ap);
6648   exit (EXIT_FAILURE);
6649 }
6650
6651 static void
6652 pfatal (const char *s1)
6653 {
6654   perror (s1);
6655   exit (EXIT_FAILURE);
6656 }
6657
6658 static void
6659 suggest_asking_for_help (void)
6660 {
6661   fprintf (stderr, "\tTry '%s --help' for a complete list of options.\n",
6662            progname);
6663   exit (EXIT_FAILURE);
6664 }
6665
6666 /* Output a diagnostic with printf-style FORMAT and args.  */
6667 static void
6668 error (const char *format, ...)
6669 {
6670   va_list ap;
6671   va_start (ap, format);
6672   verror (format, ap);
6673   va_end (ap);
6674 }
6675
6676 static void
6677 verror (char const *format, va_list ap)
6678 {
6679   fprintf (stderr, "%s: ", progname);
6680   vfprintf (stderr, format, ap);
6681   fprintf (stderr, "\n");
6682 }
6683
6684 /* Return a newly-allocated string whose contents
6685    concatenate those of s1, s2, s3.  */
6686 static char *
6687 concat (const char *s1, const char *s2, const char *s3)
6688 {
6689   int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6690   char *result = xnew (len1 + len2 + len3 + 1, char);
6691
6692   strcpy (result, s1);
6693   strcpy (result + len1, s2);
6694   strcpy (result + len1 + len2, s3);
6695
6696   return result;
6697 }
6698
6699 \f
6700 /* Does the same work as the system V getcwd, but does not need to
6701    guess the buffer size in advance. */
6702 static char *
6703 etags_getcwd (void)
6704 {
6705   int bufsize = 200;
6706   char *path = xnew (bufsize, char);
6707
6708   while (getcwd (path, bufsize) == NULL)
6709     {
6710       if (errno != ERANGE)
6711         pfatal ("getcwd");
6712       bufsize *= 2;
6713       free (path);
6714       path = xnew (bufsize, char);
6715     }
6716
6717   canonicalize_filename (path);
6718   return path;
6719 }
6720
6721 /* Return a newly allocated string containing a name of a temporary file.  */
6722 static char *
6723 etags_mktmp (void)
6724 {
6725   const char *tmpdir = getenv ("TMPDIR");
6726   const char *slash = "/";
6727
6728 #if MSDOS || defined (DOS_NT)
6729   if (!tmpdir)
6730     tmpdir = getenv ("TEMP");
6731   if (!tmpdir)
6732     tmpdir = getenv ("TMP");
6733   if (!tmpdir)
6734     tmpdir = ".";
6735   if (tmpdir[strlen (tmpdir) - 1] == '/'
6736       || tmpdir[strlen (tmpdir) - 1] == '\\')
6737     slash = "";
6738 #else
6739   if (!tmpdir)
6740     tmpdir = "/tmp";
6741   if (tmpdir[strlen (tmpdir) - 1] == '/')
6742     slash = "";
6743 #endif
6744
6745   char *templt = concat (tmpdir, slash, "etXXXXXX");
6746   int fd = mkostemp (templt, O_CLOEXEC);
6747   if (fd < 0 || close (fd) != 0)
6748     {
6749       int temp_errno = errno;
6750       free (templt);
6751       errno = temp_errno;
6752       templt = NULL;
6753     }
6754
6755 #if defined (DOS_NT)
6756   /* The file name will be used in shell redirection, so it needs to have
6757      DOS-style backslashes, or else the Windows shell will barf.  */
6758   char *p;
6759   for (p = templt; *p; p++)
6760     if (*p == '/')
6761       *p = '\\';
6762 #endif
6763
6764   return templt;
6765 }
6766
6767 /* Return a newly allocated string containing the file name of FILE
6768    relative to the absolute directory DIR (which should end with a slash). */
6769 static char *
6770 relative_filename (char *file, char *dir)
6771 {
6772   char *fp, *dp, *afn, *res;
6773   int i;
6774
6775   /* Find the common root of file and dir (with a trailing slash). */
6776   afn = absolute_filename (file, cwd);
6777   fp = afn;
6778   dp = dir;
6779   while (*fp++ == *dp++)
6780     continue;
6781   fp--, dp--;                   /* back to the first differing char */
6782 #ifdef DOS_NT
6783   if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6784     return afn;
6785 #endif
6786   do                            /* look at the equal chars until '/' */
6787     fp--, dp--;
6788   while (*fp != '/');
6789
6790   /* Build a sequence of "../" strings for the resulting relative file name. */
6791   i = 0;
6792   while ((dp = strchr (dp + 1, '/')) != NULL)
6793     i += 1;
6794   res = xnew (3*i + strlen (fp + 1) + 1, char);
6795   char *z = res;
6796   while (i-- > 0)
6797     z = stpcpy (z, "../");
6798
6799   /* Add the file name relative to the common root of file and dir. */
6800   strcpy (z, fp + 1);
6801   free (afn);
6802
6803   return res;
6804 }
6805
6806 /* Return a newly allocated string containing the absolute file name
6807    of FILE given DIR (which should end with a slash). */
6808 static char *
6809 absolute_filename (char *file, char *dir)
6810 {
6811   char *slashp, *cp, *res;
6812
6813   if (filename_is_absolute (file))
6814     res = savestr (file);
6815 #ifdef DOS_NT
6816   /* We don't support non-absolute file names with a drive
6817      letter, like `d:NAME' (it's too much hassle).  */
6818   else if (file[1] == ':')
6819     fatal ("%s: relative file names with drive letters not supported", file);
6820 #endif
6821   else
6822     res = concat (dir, file, "");
6823
6824   /* Delete the "/dirname/.." and "/." substrings. */
6825   slashp = strchr (res, '/');
6826   while (slashp != NULL && slashp[0] != '\0')
6827     {
6828       if (slashp[1] == '.')
6829         {
6830           if (slashp[2] == '.'
6831               && (slashp[3] == '/' || slashp[3] == '\0'))
6832             {
6833               cp = slashp;
6834               do
6835                 cp--;
6836               while (cp >= res && !filename_is_absolute (cp));
6837               if (cp < res)
6838                 cp = slashp;    /* the absolute name begins with "/.." */
6839 #ifdef DOS_NT
6840               /* Under MSDOS and NT we get `d:/NAME' as absolute
6841                  file name, so the luser could say `d:/../NAME'.
6842                  We silently treat this as `d:/NAME'.  */
6843               else if (cp[0] != '/')
6844                 cp = slashp;
6845 #endif
6846               memmove (cp, slashp + 3, strlen (slashp + 2));
6847               slashp = cp;
6848               continue;
6849             }
6850           else if (slashp[2] == '/' || slashp[2] == '\0')
6851             {
6852               memmove (slashp, slashp + 2, strlen (slashp + 1));
6853               continue;
6854             }
6855         }
6856
6857       slashp = strchr (slashp + 1, '/');
6858     }
6859
6860   if (res[0] == '\0')           /* just a safety net: should never happen */
6861     {
6862       free (res);
6863       return savestr ("/");
6864     }
6865   else
6866     return res;
6867 }
6868
6869 /* Return a newly allocated string containing the absolute
6870    file name of dir where FILE resides given DIR (which should
6871    end with a slash). */
6872 static char *
6873 absolute_dirname (char *file, char *dir)
6874 {
6875   char *slashp, *res;
6876   char save;
6877
6878   slashp = strrchr (file, '/');
6879   if (slashp == NULL)
6880     return savestr (dir);
6881   save = slashp[1];
6882   slashp[1] = '\0';
6883   res = absolute_filename (file, dir);
6884   slashp[1] = save;
6885
6886   return res;
6887 }
6888
6889 /* Whether the argument string is an absolute file name.  The argument
6890    string must have been canonicalized with canonicalize_filename. */
6891 static bool
6892 filename_is_absolute (char *fn)
6893 {
6894   return (fn[0] == '/'
6895 #ifdef DOS_NT
6896           || (c_isalpha (fn[0]) && fn[1] == ':' && fn[2] == '/')
6897 #endif
6898           );
6899 }
6900
6901 /* Downcase DOS drive letter and collapse separators into single slashes.
6902    Works in place. */
6903 static void
6904 canonicalize_filename (register char *fn)
6905 {
6906   register char* cp;
6907
6908 #ifdef DOS_NT
6909   /* Canonicalize drive letter case.  */
6910   if (c_isupper (fn[0]) && fn[1] == ':')
6911     fn[0] = c_tolower (fn[0]);
6912
6913   /* Collapse multiple forward- and back-slashes into a single forward
6914      slash. */
6915   for (cp = fn; *cp != '\0'; cp++, fn++)
6916     if (*cp == '/' || *cp == '\\')
6917       {
6918         *fn = '/';
6919         while (cp[1] == '/' || cp[1] == '\\')
6920           cp++;
6921       }
6922     else
6923       *fn = *cp;
6924
6925 #else  /* !DOS_NT */
6926
6927   /* Collapse multiple slashes into a single slash. */
6928   for (cp = fn; *cp != '\0'; cp++, fn++)
6929     if (*cp == '/')
6930       {
6931         *fn = '/';
6932         while (cp[1] == '/')
6933           cp++;
6934       }
6935     else
6936       *fn = *cp;
6937
6938 #endif  /* !DOS_NT */
6939
6940   *fn = '\0';
6941 }
6942
6943 \f
6944 /* Initialize a linebuffer for use. */
6945 static void
6946 linebuffer_init (linebuffer *lbp)
6947 {
6948   lbp->size = (DEBUG) ? 3 : 200;
6949   lbp->buffer = xnew (lbp->size, char);
6950   lbp->buffer[0] = '\0';
6951   lbp->len = 0;
6952 }
6953
6954 /* Set the minimum size of a string contained in a linebuffer. */
6955 static void
6956 linebuffer_setlen (linebuffer *lbp, int toksize)
6957 {
6958   while (lbp->size <= toksize)
6959     {
6960       lbp->size *= 2;
6961       xrnew (lbp->buffer, lbp->size, char);
6962     }
6963   lbp->len = toksize;
6964 }
6965
6966 /* Like malloc but get fatal error if memory is exhausted. */
6967 static void *
6968 xmalloc (size_t size)
6969 {
6970   void *result = malloc (size);
6971   if (result == NULL)
6972     fatal ("virtual memory exhausted");
6973   return result;
6974 }
6975
6976 static void *
6977 xrealloc (void *ptr, size_t size)
6978 {
6979   void *result = realloc (ptr, size);
6980   if (result == NULL)
6981     fatal ("virtual memory exhausted");
6982   return result;
6983 }
6984
6985 /*
6986  * Local Variables:
6987  * indent-tabs-mode: t
6988  * tab-width: 8
6989  * fill-column: 79
6990  * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6991  * c-file-style: "gnu"
6992  * End:
6993  */
6994
6995 /* etags.c ends here */