code.delx.au - gnu-emacs/blob - lib-src/etags.c

   1 /* Tags file maker to go with GNU Emacs           -*- coding: utf-8 -*-
   2
   3 Copyright (C) 1984 The Regents of the University of California
   4
   5 Redistribution and use in source and binary forms, with or without
   6 modification, are permitted provided that the following conditions are
   7 met:
   8 1. Redistributions of source code must retain the above copyright
   9    notice, this list of conditions and the following disclaimer.
  10 2. Redistributions in binary form must reproduce the above copyright
  11    notice, this list of conditions and the following disclaimer in the
  12    documentation and/or other materials provided with the
  13    distribution.
  14 3. Neither the name of the University nor the names of its
  15    contributors may be used to endorse or promote products derived
  16    from this software without specific prior written permission.
  17
  18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
  19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  21 PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
  22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
  28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29
  30
  31 Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2016 Free Software
  32 Foundation, Inc.
  33
  34 This file is not considered part of GNU Emacs.
  35
  36 This program is free software: you can redistribute it and/or modify
  37 it under the terms of the GNU General Public License as published by
  38 the Free Software Foundation, either version 3 of the License, or (at
  39 your option) any later version.
  40
  41 This program is distributed in the hope that it will be useful,
  42 but WITHOUT ANY WARRANTY; without even the implied warranty of
  43 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  44 GNU General Public License for more details.
  45
  46 You should have received a copy of the GNU General Public License
  47 along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  48
  49
  50 /* NB To comply with the above BSD license, copyright information is
  51 reproduced in etc/ETAGS.README.  That file should be updated when the
  52 above notices are.
  53
  54 To the best of our knowledge, this code was originally based on the
  55 ctags.c distributed with BSD4.2, which was copyrighted by the
  56 University of California, as described above. */
  57
  58
  59 /*
  60  * Authors:
  61  * 1983 Ctags originally by Ken Arnold.
  62  * 1984 Fortran added by Jim Kleckner.
  63  * 1984 Ed Pelegri-Llopart added C typedefs.
  64  * 1985 Emacs TAGS format by Richard Stallman.
  65  * 1989 Sam Kendall added C++.
  66  * 1992 Joseph B. Wells improved C and C++ parsing.
  67  * 1993 Francesco Potortì reorganized C and C++.
  68  * 1994 Line-by-line regexp tags by Tom Tromey.
  69  * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
  70  * 2002 #line directives by Francesco Potortì.
  71  * Francesco Potortì maintained and improved it for many years
  72    starting in 1993.
  73  */
  74
  75 /*
  76  * If you want to add support for a new language, start by looking at the LUA
  77  * language, which is the simplest.  Alternatively, consider distributing etags
  78  * together with a configuration file containing regexp definitions for etags.
  79  */
  80
  81 char pot_etags_version[] = "@(#) pot revision number is 17.38.1.4";
  82
  83 #ifdef DEBUG
  84 #  undef DEBUG
  85 #  define DEBUG true
  86 #else
  87 #  define DEBUG  false
  88 #  define NDEBUG                /* disable assert */
  89 #endif
  90
  91 #include <config.h>
  92
  93 #ifndef _GNU_SOURCE
  94 # define _GNU_SOURCE 1          /* enables some compiler checks on GNU */
  95 #endif
  96
  97 /* WIN32_NATIVE is for XEmacs.
  98    MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
  99 #ifdef WIN32_NATIVE
 100 # undef MSDOS
 101 # undef  WINDOWSNT
 102 # define WINDOWSNT
 103 #endif /* WIN32_NATIVE */
 104
 105 #ifdef MSDOS
 106 # undef MSDOS
 107 # define MSDOS true
 108 # include <sys/param.h>
 109 #else
 110 # define MSDOS false
 111 #endif /* MSDOS */
 112
 113 #ifdef WINDOWSNT
 114 # include <direct.h>
 115 # undef HAVE_NTGUI
 116 # undef  DOS_NT
 117 # define DOS_NT
 118 # define O_CLOEXEC O_NOINHERIT
 119 #endif /* WINDOWSNT */
 120
 121 #include <limits.h>
 122 #include <unistd.h>
 123 #include <stdarg.h>
 124 #include <stdlib.h>
 125 #include <string.h>
 126 #include <sysstdio.h>
 127 #include <errno.h>
 128 #include <fcntl.h>
 129 #include <binary-io.h>
 130 #include <c-ctype.h>
 131 #include <c-strcase.h>
 132
 133 #include <assert.h>
 134 #ifdef NDEBUG
 135 # undef  assert                 /* some systems have a buggy assert.h */
 136 # define assert(x) ((void) 0)
 137 #endif
 138
 139 #include <getopt.h>
 140 #include <regex.h>
 141
 142 /* Define CTAGS to make the program "ctags" compatible with the usual one.
 143  Leave it undefined to make the program "etags", which makes emacs-style
 144  tag tables and tags typedefs, #defines and struct/union/enum by default. */
 145 #ifdef CTAGS
 146 # undef  CTAGS
 147 # define CTAGS true
 148 #else
 149 # define CTAGS false
 150 #endif
 151
 152 static bool
 153 streq (char const *s, char const *t)
 154 {
 155   return strcmp (s, t) == 0;
 156 }
 157
 158 static bool
 159 strcaseeq (char const *s, char const *t)
 160 {
 161   return c_strcasecmp (s, t) == 0;
 162 }
 163
 164 static bool
 165 strneq (char const *s, char const *t, size_t n)
 166 {
 167   return strncmp (s, t, n) == 0;
 168 }
 169
 170 static bool
 171 strncaseeq (char const *s, char const *t, size_t n)
 172 {
 173   return c_strncasecmp (s, t, n) == 0;
 174 }
 175
 176 /* C is not in a name.  */
 177 static bool
 178 notinname (unsigned char c)
 179 {
 180   /* Look at make_tag before modifying!  */
 181   static bool const table[UCHAR_MAX + 1] = {
 182     ['\0']=1, ['\t']=1, ['\n']=1, ['\f']=1, ['\r']=1, [' ']=1,
 183     ['(']=1, [')']=1, [',']=1, [';']=1, ['=']=1
 184   };
 185   return table[c];
 186 }
 187
 188 /* C can start a token.  */
 189 static bool
 190 begtoken (unsigned char c)
 191 {
 192   static bool const table[UCHAR_MAX + 1] = {
 193     ['$']=1, ['@']=1,
 194     ['A']=1, ['B']=1, ['C']=1, ['D']=1, ['E']=1, ['F']=1, ['G']=1, ['H']=1,
 195     ['I']=1, ['J']=1, ['K']=1, ['L']=1, ['M']=1, ['N']=1, ['O']=1, ['P']=1,
 196     ['Q']=1, ['R']=1, ['S']=1, ['T']=1, ['U']=1, ['V']=1, ['W']=1, ['X']=1,
 197     ['Y']=1, ['Z']=1,
 198     ['_']=1,
 199     ['a']=1, ['b']=1, ['c']=1, ['d']=1, ['e']=1, ['f']=1, ['g']=1, ['h']=1,
 200     ['i']=1, ['j']=1, ['k']=1, ['l']=1, ['m']=1, ['n']=1, ['o']=1, ['p']=1,
 201     ['q']=1, ['r']=1, ['s']=1, ['t']=1, ['u']=1, ['v']=1, ['w']=1, ['x']=1,
 202     ['y']=1, ['z']=1,
 203     ['~']=1
 204   };
 205   return table[c];
 206 }
 207
 208 /* C can be in the middle of a token.  */
 209 static bool
 210 intoken (unsigned char c)
 211 {
 212   static bool const table[UCHAR_MAX + 1] = {
 213     ['$']=1,
 214     ['0']=1, ['1']=1, ['2']=1, ['3']=1, ['4']=1,
 215     ['5']=1, ['6']=1, ['7']=1, ['8']=1, ['9']=1,
 216     ['A']=1, ['B']=1, ['C']=1, ['D']=1, ['E']=1, ['F']=1, ['G']=1, ['H']=1,
 217     ['I']=1, ['J']=1, ['K']=1, ['L']=1, ['M']=1, ['N']=1, ['O']=1, ['P']=1,
 218     ['Q']=1, ['R']=1, ['S']=1, ['T']=1, ['U']=1, ['V']=1, ['W']=1, ['X']=1,
 219     ['Y']=1, ['Z']=1,
 220     ['_']=1,
 221     ['a']=1, ['b']=1, ['c']=1, ['d']=1, ['e']=1, ['f']=1, ['g']=1, ['h']=1,
 222     ['i']=1, ['j']=1, ['k']=1, ['l']=1, ['m']=1, ['n']=1, ['o']=1, ['p']=1,
 223     ['q']=1, ['r']=1, ['s']=1, ['t']=1, ['u']=1, ['v']=1, ['w']=1, ['x']=1,
 224     ['y']=1, ['z']=1
 225   };
 226   return table[c];
 227 }
 228
 229 /* C can end a token.  */
 230 static bool
 231 endtoken (unsigned char c)
 232 {
 233   static bool const table[UCHAR_MAX + 1] = {
 234     ['\0']=1, ['\t']=1, ['\n']=1, ['\r']=1, [' ']=1,
 235     ['!']=1, ['"']=1, ['#']=1, ['%']=1, ['&']=1, ['\'']=1, ['(']=1, [')']=1,
 236     ['*']=1, ['+']=1, [',']=1, ['-']=1, ['.']=1, ['/']=1, [':']=1, [';']=1,
 237     ['<']=1, ['=']=1, ['>']=1, ['?']=1, ['[']=1, [']']=1, ['^']=1,
 238     ['{']=1, ['|']=1, ['}']=1, ['~']=1
 239   };
 240   return table[c];
 241 }
 242
 243 /*
 244  *      xnew, xrnew -- allocate, reallocate storage
 245  *
 246  * SYNOPSIS:    Type *xnew (int n, Type);
 247  *              void xrnew (OldPointer, int n, Type);
 248  */
 249 #define xnew(n, Type)      ((Type *) xmalloc ((n) * sizeof (Type)))
 250 #define xrnew(op, n, Type) ((op) = (Type *) xrealloc (op, (n) * sizeof (Type)))
 251
 252 typedef void Lang_function (FILE *);
 253
 254 typedef struct
 255 {
 256   const char *suffix;           /* file name suffix for this compressor */
 257   const char *command;          /* takes one arg and decompresses to stdout */
 258 } compressor;
 259
 260 typedef struct
 261 {
 262   const char *name;             /* language name */
 263   const char *help;             /* detailed help for the language */
 264   Lang_function *function;      /* parse function */
 265   const char **suffixes;        /* name suffixes of this language's files */
 266   const char **filenames;       /* names of this language's files */
 267   const char **interpreters;    /* interpreters for this language */
 268   bool metasource;              /* source used to generate other sources */
 269 } language;
 270
 271 typedef struct fdesc
 272 {
 273   struct fdesc *next;           /* for the linked list */
 274   char *infname;                /* uncompressed input file name */
 275   char *infabsname;             /* absolute uncompressed input file name */
 276   char *infabsdir;              /* absolute dir of input file */
 277   char *taggedfname;            /* file name to write in tagfile */
 278   language *lang;               /* language of file */
 279   char *prop;                   /* file properties to write in tagfile */
 280   bool usecharno;               /* etags tags shall contain char number */
 281   bool written;                 /* entry written in the tags file */
 282 } fdesc;
 283
 284 typedef struct node_st
 285 {                               /* sorting structure */
 286   struct node_st *left, *right; /* left and right sons */
 287   fdesc *fdp;                   /* description of file to whom tag belongs */
 288   char *name;                   /* tag name */
 289   char *regex;                  /* search regexp */
 290   bool valid;                   /* write this tag on the tag file */
 291   bool is_func;                 /* function tag: use regexp in CTAGS mode */
 292   bool been_warned;             /* warning already given for duplicated tag */
 293   int lno;                      /* line number tag is on */
 294   long cno;                     /* character number line starts on */
 295 } node;
 296
 297 /*
 298  * A `linebuffer' is a structure which holds a line of text.
 299  * `readline_internal' reads a line from a stream into a linebuffer
 300  * and works regardless of the length of the line.
 301  * SIZE is the size of BUFFER, LEN is the length of the string in
 302  * BUFFER after readline reads it.
 303  */
 304 typedef struct
 305 {
 306   long size;
 307   int len;
 308   char *buffer;
 309 } linebuffer;
 310
 311 /* Used to support mixing of --lang and file names. */
 312 typedef struct
 313 {
 314   enum {
 315     at_language,                /* a language specification */
 316     at_regexp,                  /* a regular expression */
 317     at_filename,                /* a file name */
 318     at_stdin,                   /* read from stdin here */
 319     at_end                      /* stop parsing the list */
 320   } arg_type;                   /* argument type */
 321   language *lang;               /* language associated with the argument */
 322   char *what;                   /* the argument itself */
 323 } argument;
 324
 325 /* Structure defining a regular expression. */
 326 typedef struct regexp
 327 {
 328   struct regexp *p_next;        /* pointer to next in list */
 329   language *lang;               /* if set, use only for this language */
 330   char *pattern;                /* the regexp pattern */
 331   char *name;                   /* tag name */
 332   struct re_pattern_buffer *pat; /* the compiled pattern */
 333   struct re_registers regs;     /* re registers */
 334   bool error_signaled;          /* already signaled for this regexp */
 335   bool force_explicit_name;     /* do not allow implicit tag name */
 336   bool ignore_case;             /* ignore case when matching */
 337   bool multi_line;              /* do a multi-line match on the whole file */
 338 } regexp;
 339
 340
 341 /* Many compilers barf on this:
 342         Lang_function Ada_funcs;
 343    so let's write it this way */
 344 static void Ada_funcs (FILE *);
 345 static void Asm_labels (FILE *);
 346 static void C_entries (int c_ext, FILE *);
 347 static void default_C_entries (FILE *);
 348 static void plain_C_entries (FILE *);
 349 static void Cjava_entries (FILE *);
 350 static void Cobol_paragraphs (FILE *);
 351 static void Cplusplus_entries (FILE *);
 352 static void Cstar_entries (FILE *);
 353 static void Erlang_functions (FILE *);
 354 static void Forth_words (FILE *);
 355 static void Fortran_functions (FILE *);
 356 static void Go_functions (FILE *);
 357 static void HTML_labels (FILE *);
 358 static void Lisp_functions (FILE *);
 359 static void Lua_functions (FILE *);
 360 static void Makefile_targets (FILE *);
 361 static void Pascal_functions (FILE *);
 362 static void Perl_functions (FILE *);
 363 static void PHP_functions (FILE *);
 364 static void PS_functions (FILE *);
 365 static void Prolog_functions (FILE *);
 366 static void Python_functions (FILE *);
 367 static void Ruby_functions (FILE *);
 368 static void Scheme_functions (FILE *);
 369 static void TeX_commands (FILE *);
 370 static void Texinfo_nodes (FILE *);
 371 static void Yacc_entries (FILE *);
 372 static void just_read_file (FILE *);
 373
 374 static language *get_language_from_langname (const char *);
 375 static void readline (linebuffer *, FILE *);
 376 static long readline_internal (linebuffer *, FILE *, char const *);
 377 static bool nocase_tail (const char *);
 378 static void get_tag (char *, char **);
 379
 380 static void analyze_regex (char *);
 381 static void free_regexps (void);
 382 static void regex_tag_multiline (void);
 383 static void error (const char *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
 384 static void verror (char const *, va_list) ATTRIBUTE_FORMAT_PRINTF (1, 0);
 385 static _Noreturn void suggest_asking_for_help (void);
 386 static _Noreturn void fatal (char const *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
 387 static _Noreturn void pfatal (const char *);
 388 static void add_node (node *, node **);
 389
 390 static void process_file_name (char *, language *);
 391 static void process_file (FILE *, char *, language *);
 392 static void find_entries (FILE *);
 393 static void free_tree (node *);
 394 static void free_fdesc (fdesc *);
 395 static void pfnote (char *, bool, char *, int, int, long);
 396 static void invalidate_nodes (fdesc *, node **);
 397 static void put_entries (node *);
 398
 399 static char *concat (const char *, const char *, const char *);
 400 static char *skip_spaces (char *);
 401 static char *skip_non_spaces (char *);
 402 static char *skip_name (char *);
 403 static char *savenstr (const char *, int);
 404 static char *savestr (const char *);
 405 static char *etags_getcwd (void);
 406 static char *relative_filename (char *, char *);
 407 static char *absolute_filename (char *, char *);
 408 static char *absolute_dirname (char *, char *);
 409 static bool filename_is_absolute (char *f);
 410 static void canonicalize_filename (char *);
 411 static char *etags_mktmp (void);
 412 static void linebuffer_init (linebuffer *);
 413 static void linebuffer_setlen (linebuffer *, int);
 414 static void *xmalloc (size_t);
 415 static void *xrealloc (void *, size_t);
 416
 417 \f
 418 static char searchar = '/';     /* use /.../ searches */
 419
 420 static char *tagfile;           /* output file */
 421 static char *progname;          /* name this program was invoked with */
 422 static char *cwd;               /* current working directory */
 423 static char *tagfiledir;        /* directory of tagfile */
 424 static FILE *tagf;              /* ioptr for tags file */
 425 static ptrdiff_t whatlen_max;   /* maximum length of any 'what' member */
 426
 427 static fdesc *fdhead;           /* head of file description list */
 428 static fdesc *curfdp;           /* current file description */
 429 static char *infilename;        /* current input file name */
 430 static int lineno;              /* line number of current line */
 431 static long charno;             /* current character number */
 432 static long linecharno;         /* charno of start of current line */
 433 static char *dbp;               /* pointer to start of current tag */
 434
 435 static const int invalidcharno = -1;
 436
 437 static node *nodehead;          /* the head of the binary tree of tags */
 438 static node *last_node;         /* the last node created */
 439
 440 static linebuffer lb;           /* the current line */
 441 static linebuffer filebuf;      /* a buffer containing the whole file */
 442 static linebuffer token_name;   /* a buffer containing a tag name */
 443
 444 static bool append_to_tagfile;  /* -a: append to tags */
 445 /* The next five default to true in C and derived languages.  */
 446 static bool typedefs;           /* -t: create tags for C and Ada typedefs */
 447 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
 448                                 /* 0 struct/enum/union decls, and C++ */
 449                                 /* member functions. */
 450 static bool constantypedefs;    /* -d: create tags for C #define, enum */
 451                                 /* constants and variables. */
 452                                 /* -D: opposite of -d.  Default under ctags. */
 453 static int globals;             /* create tags for global variables */
 454 static int members;             /* create tags for C member variables */
 455 static int declarations;        /* --declarations: tag them and extern in C&Co*/
 456 static int no_line_directive;   /* ignore #line directives (undocumented) */
 457 static int no_duplicates;       /* no duplicate tags for ctags (undocumented) */
 458 static bool update;             /* -u: update tags */
 459 static bool vgrind_style;       /* -v: create vgrind style index output */
 460 static bool no_warnings;        /* -w: suppress warnings (undocumented) */
 461 static bool cxref_style;        /* -x: create cxref style output */
 462 static bool cplusplus;          /* .[hc] means C++, not C (undocumented) */
 463 static bool ignoreindent;       /* -I: ignore indentation in C */
 464 static int packages_only;       /* --packages-only: in Ada, only tag packages*/
 465 static int class_qualify;       /* -Q: produce class-qualified tags in C++/Java */
 466
 467 /* STDIN is defined in LynxOS system headers */
 468 #ifdef STDIN
 469 # undef STDIN
 470 #endif
 471
 472 #define STDIN 0x1001            /* returned by getopt_long on --parse-stdin */
 473 static bool parsing_stdin;      /* --parse-stdin used */
 474
 475 static regexp *p_head;          /* list of all regexps */
 476 static bool need_filebuf;       /* some regexes are multi-line */
 477
 478 static struct option longopts[] =
 479 {
 480   { "append",             no_argument,       NULL,               'a'   },
 481   { "packages-only",      no_argument,       &packages_only,     1     },
 482   { "c++",                no_argument,       NULL,               'C'   },
 483   { "declarations",       no_argument,       &declarations,      1     },
 484   { "no-line-directive",  no_argument,       &no_line_directive, 1     },
 485   { "no-duplicates",      no_argument,       &no_duplicates,     1     },
 486   { "help",               no_argument,       NULL,               'h'   },
 487   { "help",               no_argument,       NULL,               'H'   },
 488   { "ignore-indentation", no_argument,       NULL,               'I'   },
 489   { "language",           required_argument, NULL,               'l'   },
 490   { "members",            no_argument,       &members,           1     },
 491   { "no-members",         no_argument,       &members,           0     },
 492   { "output",             required_argument, NULL,               'o'   },
 493   { "class-qualify",      no_argument,       &class_qualify,     'Q'   },
 494   { "regex",              required_argument, NULL,               'r'   },
 495   { "no-regex",           no_argument,       NULL,               'R'   },
 496   { "ignore-case-regex",  required_argument, NULL,               'c'   },
 497   { "parse-stdin",        required_argument, NULL,               STDIN },
 498   { "version",            no_argument,       NULL,               'V'   },
 499
 500 #if CTAGS /* Ctags options */
 501   { "backward-search",    no_argument,       NULL,               'B'   },
 502   { "cxref",              no_argument,       NULL,               'x'   },
 503   { "defines",            no_argument,       NULL,               'd'   },
 504   { "globals",            no_argument,       &globals,           1     },
 505   { "typedefs",           no_argument,       NULL,               't'   },
 506   { "typedefs-and-c++",   no_argument,       NULL,               'T'   },
 507   { "update",             no_argument,       NULL,               'u'   },
 508   { "vgrind",             no_argument,       NULL,               'v'   },
 509   { "no-warn",            no_argument,       NULL,               'w'   },
 510
 511 #else /* Etags options */
 512   { "no-defines",         no_argument,       NULL,               'D'   },
 513   { "no-globals",         no_argument,       &globals,           0     },
 514   { "include",            required_argument, NULL,               'i'   },
 515 #endif
 516   { NULL }
 517 };
 518
 519 static compressor compressors[] =
 520 {
 521   { "z", "gzip -d -c"},
 522   { "Z", "gzip -d -c"},
 523   { "gz", "gzip -d -c"},
 524   { "GZ", "gzip -d -c"},
 525   { "bz2", "bzip2 -d -c" },
 526   { "xz", "xz -d -c" },
 527   { NULL }
 528 };
 529
 530 /*
 531  * Language stuff.
 532  */
 533
 534 /* Ada code */
 535 static const char *Ada_suffixes [] =
 536   { "ads", "adb", "ada", NULL };
 537 static const char Ada_help [] =
 538 "In Ada code, functions, procedures, packages, tasks and types are\n\
 539 tags.  Use the '--packages-only' option to create tags for\n\
 540 packages only.\n\
 541 Ada tag names have suffixes indicating the type of entity:\n\
 542         Entity type:    Qualifier:\n\
 543         ------------    ----------\n\
 544         function        /f\n\
 545         procedure       /p\n\
 546         package spec    /s\n\
 547         package body    /b\n\
 548         type            /t\n\
 549         task            /k\n\
 550 Thus, 'M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
 551 body of the package 'bidule', while 'M-x find-tag <RET> bidule <RET>'\n\
 552 will just search for any tag 'bidule'.";
 553
 554 /* Assembly code */
 555 static const char *Asm_suffixes [] =
 556   { "a",        /* Unix assembler */
 557     "asm", /* Microcontroller assembly */
 558     "def", /* BSO/Tasking definition includes  */
 559     "inc", /* Microcontroller include files */
 560     "ins", /* Microcontroller include files */
 561     "s", "sa", /* Unix assembler */
 562     "S",   /* cpp-processed Unix assembler */
 563     "src", /* BSO/Tasking C compiler output */
 564     NULL
 565   };
 566 static const char Asm_help [] =
 567 "In assembler code, labels appearing at the beginning of a line,\n\
 568 followed by a colon, are tags.";
 569
 570
 571 /* Note that .c and .h can be considered C++, if the --c++ flag was
 572    given, or if the `class' or `template' keywords are met inside the file.
 573    That is why default_C_entries is called for these. */
 574 static const char *default_C_suffixes [] =
 575   { "c", "h", NULL };
 576 #if CTAGS                               /* C help for Ctags */
 577 static const char default_C_help [] =
 578 "In C code, any C function is a tag.  Use -t to tag typedefs.\n\
 579 Use -T to tag definitions of 'struct', 'union' and 'enum'.\n\
 580 Use -d to tag '#define' macro definitions and 'enum' constants.\n\
 581 Use --globals to tag global variables.\n\
 582 You can tag function declarations and external variables by\n\
 583 using '--declarations', and struct members by using '--members'.";
 584 #else                                   /* C help for Etags */
 585 static const char default_C_help [] =
 586 "In C code, any C function or typedef is a tag, and so are\n\
 587 definitions of 'struct', 'union' and 'enum'.  '#define' macro\n\
 588 definitions and 'enum' constants are tags unless you specify\n\
 589 '--no-defines'.  Global variables are tags unless you specify\n\
 590 '--no-globals' and so are struct members unless you specify\n\
 591 '--no-members'.  Use of '--no-globals', '--no-defines' and\n\
 592 '--no-members' can make the tags table file much smaller.\n\
 593 You can tag function declarations and external variables by\n\
 594 using '--declarations'.";
 595 #endif  /* C help for Ctags and Etags */
 596
 597 static const char *Cplusplus_suffixes [] =
 598   { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
 599     "M",                        /* Objective C++ */
 600     "pdb",                      /* PostScript with C syntax */
 601     NULL };
 602 static const char Cplusplus_help [] =
 603 "In C++ code, all the tag constructs of C code are tagged.  (Use\n\
 604 --help --lang=c --lang=c++ for full help.)\n\
 605 In addition to C tags, member functions are also recognized.  Member\n\
 606 variables are recognized unless you use the '--no-members' option.\n\
 607 Tags for variables and functions in classes are named 'CLASS::VARIABLE'\n\
 608 and 'CLASS::FUNCTION'.  'operator' definitions have tag names like\n\
 609 'operator+'.";
 610
 611 static const char *Cjava_suffixes [] =
 612   { "java", NULL };
 613 static char Cjava_help [] =
 614 "In Java code, all the tags constructs of C and C++ code are\n\
 615 tagged.  (Use --help --lang=c --lang=c++ --lang=java for full help.)";
 616
 617
 618 static const char *Cobol_suffixes [] =
 619   { "COB", "cob", NULL };
 620 static char Cobol_help [] =
 621 "In Cobol code, tags are paragraph names; that is, any word\n\
 622 starting in column 8 and followed by a period.";
 623
 624 static const char *Cstar_suffixes [] =
 625   { "cs", "hs", NULL };
 626
 627 static const char *Erlang_suffixes [] =
 628   { "erl", "hrl", NULL };
 629 static const char Erlang_help [] =
 630 "In Erlang code, the tags are the functions, records and macros\n\
 631 defined in the file.";
 632
 633 const char *Forth_suffixes [] =
 634   { "fth", "tok", NULL };
 635 static const char Forth_help [] =
 636 "In Forth code, tags are words defined by ':',\n\
 637 constant, code, create, defer, value, variable, buffer:, field.";
 638
 639 static const char *Fortran_suffixes [] =
 640   { "F", "f", "f90", "for", NULL };
 641 static const char Fortran_help [] =
 642 "In Fortran code, functions, subroutines and block data are tags.";
 643
 644 static const char *Go_suffixes [] = {"go", NULL};
 645 static const char Go_help [] =
 646   "In Go code, functions, interfaces and packages are tags.";
 647
 648 static const char *HTML_suffixes [] =
 649   { "htm", "html", "shtml", NULL };
 650 static const char HTML_help [] =
 651 "In HTML input files, the tags are the 'title' and the 'h1', 'h2',\n\
 652 'h3' headers.  Also, tags are 'name=' in anchors and all\n\
 653 occurrences of 'id='.";
 654
 655 static const char *Lisp_suffixes [] =
 656   { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
 657 static const char Lisp_help [] =
 658 "In Lisp code, any function defined with 'defun', any variable\n\
 659 defined with 'defvar' or 'defconst', and in general the first\n\
 660 argument of any expression that starts with '(def' in column zero\n\
 661 is a tag.\n\
 662 The '--declarations' option tags \"(defvar foo)\" constructs too.";
 663
 664 static const char *Lua_suffixes [] =
 665   { "lua", "LUA", NULL };
 666 static const char Lua_help [] =
 667 "In Lua scripts, all functions are tags.";
 668
 669 static const char *Makefile_filenames [] =
 670   { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
 671 static const char Makefile_help [] =
 672 "In makefiles, targets are tags; additionally, variables are tags\n\
 673 unless you specify '--no-globals'.";
 674
 675 static const char *Objc_suffixes [] =
 676   { "lm",                       /* Objective lex file */
 677     "m",                        /* Objective C file */
 678      NULL };
 679 static const char Objc_help [] =
 680 "In Objective C code, tags include Objective C definitions for classes,\n\
 681 class categories, methods and protocols.  Tags for variables and\n\
 682 functions in classes are named 'CLASS::VARIABLE' and 'CLASS::FUNCTION'.\
 683 \n(Use --help --lang=c --lang=objc --lang=java for full help.)";
 684
 685 static const char *Pascal_suffixes [] =
 686   { "p", "pas", NULL };
 687 static const char Pascal_help [] =
 688 "In Pascal code, the tags are the functions and procedures defined\n\
 689 in the file.";
 690 /* " // this is for working around an Emacs highlighting bug... */
 691
 692 static const char *Perl_suffixes [] =
 693   { "pl", "pm", NULL };
 694 static const char *Perl_interpreters [] =
 695   { "perl", "@PERL@", NULL };
 696 static const char Perl_help [] =
 697 "In Perl code, the tags are the packages, subroutines and variables\n\
 698 defined by the 'package', 'sub', 'my' and 'local' keywords.  Use\n\
 699 '--globals' if you want to tag global variables.  Tags for\n\
 700 subroutines are named 'PACKAGE::SUB'.  The name for subroutines\n\
 701 defined in the default package is 'main::SUB'.";
 702
 703 static const char *PHP_suffixes [] =
 704   { "php", "php3", "php4", NULL };
 705 static const char PHP_help [] =
 706 "In PHP code, tags are functions, classes and defines.  Unless you use\n\
 707 the '--no-members' option, vars are tags too.";
 708
 709 static const char *plain_C_suffixes [] =
 710   { "pc",                       /* Pro*C file */
 711      NULL };
 712
 713 static const char *PS_suffixes [] =
 714   { "ps", "psw", NULL };        /* .psw is for PSWrap */
 715 static const char PS_help [] =
 716 "In PostScript code, the tags are the functions.";
 717
 718 static const char *Prolog_suffixes [] =
 719   { "prolog", NULL };
 720 static const char Prolog_help [] =
 721 "In Prolog code, tags are predicates and rules at the beginning of\n\
 722 line.";
 723
 724 static const char *Python_suffixes [] =
 725   { "py", NULL };
 726 static const char Python_help [] =
 727 "In Python code, 'def' or 'class' at the beginning of a line\n\
 728 generate a tag.";
 729
 730 static const char *Ruby_suffixes [] =
 731   { "rb", "ru", "rbw", NULL };
 732 static const char *Ruby_filenames [] =
 733   { "Rakefile", "Thorfile", NULL };
 734 static const char Ruby_help [] =
 735   "In Ruby code, 'def' or 'class' or 'module' at the beginning of\n\
 736 a line generate a tag.  Constants also generate a tag.";
 737
 738 /* Can't do the `SCM' or `scm' prefix with a version number. */
 739 static const char *Scheme_suffixes [] =
 740   { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
 741 static const char Scheme_help [] =
 742 "In Scheme code, tags include anything defined with 'def' or with a\n\
 743 construct whose name starts with 'def'.  They also include\n\
 744 variables set with 'set!' at top level in the file.";
 745
 746 static const char *TeX_suffixes [] =
 747   { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
 748 static const char TeX_help [] =
 749 "In LaTeX text, the argument of any of the commands '\\chapter',\n\
 750 '\\section', '\\subsection', '\\subsubsection', '\\eqno', '\\label',\n\
 751 '\\ref', '\\cite', '\\bibitem', '\\part', '\\appendix', '\\entry',\n\
 752 '\\index', '\\def', '\\newcommand', '\\renewcommand',\n\
 753 '\\newenvironment' or '\\renewenvironment' is a tag.\n\
 754 \n\
 755 Other commands can be specified by setting the environment variable\n\
 756 'TEXTAGS' to a colon-separated list like, for example,\n\
 757      TEXTAGS=\"mycommand:myothercommand\".";
 758
 759
 760 static const char *Texinfo_suffixes [] =
 761   { "texi", "texinfo", "txi", NULL };
 762 static const char Texinfo_help [] =
 763 "for texinfo files, lines starting with @node are tagged.";
 764
 765 static const char *Yacc_suffixes [] =
 766   { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
 767 static const char Yacc_help [] =
 768 "In Bison or Yacc input files, each rule defines as a tag the\n\
 769 nonterminal it constructs.  The portions of the file that contain\n\
 770 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
 771 for full help).";
 772
 773 static const char auto_help [] =
 774 "'auto' is not a real language, it indicates to use\n\
 775 a default language for files base on file name suffix and file contents.";
 776
 777 static const char none_help [] =
 778 "'none' is not a real language, it indicates to only do\n\
 779 regexp processing on files.";
 780
 781 static const char no_lang_help [] =
 782 "No detailed help available for this language.";
 783
 784
 785 /*
 786  * Table of languages.
 787  *
 788  * It is ok for a given function to be listed under more than one
 789  * name.  I just didn't.
 790  */
 791
 792 static language lang_names [] =
 793 {
 794   { "ada",       Ada_help,       Ada_funcs,         Ada_suffixes       },
 795   { "asm",       Asm_help,       Asm_labels,        Asm_suffixes       },
 796   { "c",         default_C_help, default_C_entries, default_C_suffixes },
 797   { "c++",       Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
 798   { "c*",        no_lang_help,   Cstar_entries,     Cstar_suffixes     },
 799   { "cobol",     Cobol_help,     Cobol_paragraphs,  Cobol_suffixes     },
 800   { "erlang",    Erlang_help,    Erlang_functions,  Erlang_suffixes    },
 801   { "forth",     Forth_help,     Forth_words,       Forth_suffixes     },
 802   { "fortran",   Fortran_help,   Fortran_functions, Fortran_suffixes   },
 803   { "go",        Go_help,        Go_functions,      Go_suffixes        },
 804   { "html",      HTML_help,      HTML_labels,       HTML_suffixes      },
 805   { "java",      Cjava_help,     Cjava_entries,     Cjava_suffixes     },
 806   { "lisp",      Lisp_help,      Lisp_functions,    Lisp_suffixes      },
 807   { "lua",       Lua_help,       Lua_functions,     Lua_suffixes       },
 808   { "makefile",  Makefile_help,Makefile_targets,NULL,Makefile_filenames},
 809   { "objc",      Objc_help,      plain_C_entries,   Objc_suffixes      },
 810   { "pascal",    Pascal_help,    Pascal_functions,  Pascal_suffixes    },
 811   { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
 812   { "php",       PHP_help,       PHP_functions,     PHP_suffixes       },
 813   { "postscript",PS_help,        PS_functions,      PS_suffixes        },
 814   { "proc",      no_lang_help,   plain_C_entries,   plain_C_suffixes   },
 815   { "prolog",    Prolog_help,    Prolog_functions,  Prolog_suffixes    },
 816   { "python",    Python_help,    Python_functions,  Python_suffixes    },
 817   { "ruby",      Ruby_help,Ruby_functions,Ruby_suffixes,Ruby_filenames },
 818   { "scheme",    Scheme_help,    Scheme_functions,  Scheme_suffixes    },
 819   { "tex",       TeX_help,       TeX_commands,      TeX_suffixes       },
 820   { "texinfo",   Texinfo_help,   Texinfo_nodes,     Texinfo_suffixes   },
 821   { "yacc",      Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,true},
 822   { "auto",      auto_help },                      /* default guessing scheme */
 823   { "none",      none_help,      just_read_file }, /* regexp matching only */
 824   { NULL }                /* end of list */
 825 };
 826
 827 \f
 828 static void
 829 print_language_names (void)
 830 {
 831   language *lang;
 832   const char **name, **ext;
 833
 834   puts ("\nThese are the currently supported languages, along with the\n\
 835 default file names and dot suffixes:");
 836   for (lang = lang_names; lang->name != NULL; lang++)
 837     {
 838       printf ("  %-*s", 10, lang->name);
 839       if (lang->filenames != NULL)
 840         for (name = lang->filenames; *name != NULL; name++)
 841           printf (" %s", *name);
 842       if (lang->suffixes != NULL)
 843         for (ext = lang->suffixes; *ext != NULL; ext++)
 844           printf (" .%s", *ext);
 845       puts ("");
 846     }
 847   puts ("where 'auto' means use default language for files based on file\n\
 848 name suffix, and 'none' means only do regexp processing on files.\n\
 849 If no language is specified and no matching suffix is found,\n\
 850 the first line of the file is read for a sharp-bang (#!) sequence\n\
 851 followed by the name of an interpreter.  If no such sequence is found,\n\
 852 Fortran is tried first; if no tags are found, C is tried next.\n\
 853 When parsing any C file, a \"class\" or \"template\" keyword\n\
 854 switches to C++.");
 855   puts ("Compressed files are supported using gzip, bzip2, and xz.\n\
 856 \n\
 857 For detailed help on a given language use, for example,\n\
 858 etags --help --lang=ada.");
 859 }
 860
 861 #ifndef EMACS_NAME
 862 # define EMACS_NAME "standalone"
 863 #endif
 864 #ifndef VERSION
 865 # define VERSION "17.38.1.4"
 866 #endif
 867 static _Noreturn void
 868 print_version (void)
 869 {
 870   char emacs_copyright[] = COPYRIGHT;
 871
 872   printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
 873   puts (emacs_copyright);
 874   puts ("This program is distributed under the terms in ETAGS.README");
 875
 876   exit (EXIT_SUCCESS);
 877 }
 878
 879 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
 880 # define PRINT_UNDOCUMENTED_OPTIONS_HELP false
 881 #endif
 882
 883 static _Noreturn void
 884 print_help (argument *argbuffer)
 885 {
 886   bool help_for_lang = false;
 887
 888   for (; argbuffer->arg_type != at_end; argbuffer++)
 889     if (argbuffer->arg_type == at_language)
 890       {
 891         if (help_for_lang)
 892           puts ("");
 893         puts (argbuffer->lang->help);
 894         help_for_lang = true;
 895       }
 896
 897   if (help_for_lang)
 898     exit (EXIT_SUCCESS);
 899
 900   printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
 901 \n\
 902 These are the options accepted by %s.\n", progname, progname);
 903   puts ("You may use unambiguous abbreviations for the long option names.");
 904   puts ("  A - as file name means read names from stdin (one per line).\n\
 905 Absolute names are stored in the output file as they are.\n\
 906 Relative ones are stored relative to the output file's directory.\n");
 907
 908   puts ("-a, --append\n\
 909         Append tag entries to existing tags file.");
 910
 911   puts ("--packages-only\n\
 912         For Ada files, only generate tags for packages.");
 913
 914   if (CTAGS)
 915     puts ("-B, --backward-search\n\
 916         Write the search commands for the tag entries using '?', the\n\
 917         backward-search command instead of '/', the forward-search command.");
 918
 919   /* This option is mostly obsolete, because etags can now automatically
 920      detect C++.  Retained for backward compatibility and for debugging and
 921      experimentation.  In principle, we could want to tag as C++ even
 922      before any "class" or "template" keyword.
 923   puts ("-C, --c++\n\
 924         Treat files whose name suffix defaults to C language as C++ files.");
 925   */
 926
 927   puts ("--declarations\n\
 928         In C and derived languages, create tags for function declarations,");
 929   if (CTAGS)
 930     puts ("\tand create tags for extern variables if --globals is used.");
 931   else
 932     puts
 933       ("\tand create tags for extern variables unless --no-globals is used.");
 934
 935   if (CTAGS)
 936     puts ("-d, --defines\n\
 937         Create tag entries for C #define constants and enum constants, too.");
 938   else
 939     puts ("-D, --no-defines\n\
 940         Don't create tag entries for C #define constants and enum constants.\n\
 941         This makes the tags file smaller.");
 942
 943   if (!CTAGS)
 944     puts ("-i FILE, --include=FILE\n\
 945         Include a note in tag file indicating that, when searching for\n\
 946         a tag, one should also consult the tags file FILE after\n\
 947         checking the current file.");
 948
 949   puts ("-l LANG, --language=LANG\n\
 950         Force the following files to be considered as written in the\n\
 951         named language up to the next --language=LANG option.");
 952
 953   if (CTAGS)
 954     puts ("--globals\n\
 955         Create tag entries for global variables in some languages.");
 956   else
 957     puts ("--no-globals\n\
 958         Do not create tag entries for global variables in some\n\
 959         languages.  This makes the tags file smaller.");
 960
 961   puts ("--no-line-directive\n\
 962         Ignore #line preprocessor directives in C and derived languages.");
 963
 964   if (CTAGS)
 965     puts ("--members\n\
 966         Create tag entries for members of structures in some languages.");
 967   else
 968     puts ("--no-members\n\
 969         Do not create tag entries for members of structures\n\
 970         in some languages.");
 971
 972   puts ("-Q, --class-qualify\n\
 973         Qualify tag names with their class name in C++, ObjC, Java, and Perl.\n\
 974         This produces tag names of the form \"class::member\" for C++,\n\
 975         \"class(category)\" for Objective C, and \"class.member\" for Java.\n\
 976         For Objective C, this also produces class methods qualified with\n\
 977         their arguments, as in \"foo:bar:baz:more\".\n\
 978         For Perl, this produces \"package::member\".");
 979   puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
 980         Make a tag for each line matching a regular expression pattern\n\
 981         in the following files.  {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
 982         files only.  REGEXFILE is a file containing one REGEXP per line.\n\
 983         REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
 984         optional.  The TAGREGEXP pattern is anchored (as if preceded by ^).");
 985   puts ("       If TAGNAME/ is present, the tags created are named.\n\
 986         For example Tcl named tags can be created with:\n\
 987           --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
 988         MODS are optional one-letter modifiers: 'i' means to ignore case,\n\
 989         'm' means to allow multi-line matches, 's' implies 'm' and\n\
 990         causes dot to match any character, including newline.");
 991
 992   puts ("-R, --no-regex\n\
 993         Don't create tags from regexps for the following files.");
 994
 995   puts ("-I, --ignore-indentation\n\
 996         In C and C++ do not assume that a closing brace in the first\n\
 997         column is the final brace of a function or structure definition.");
 998
 999   puts ("-o FILE, --output=FILE\n\
1000         Write the tags to FILE.");
1001
1002   puts ("--parse-stdin=NAME\n\
1003         Read from standard input and record tags as belonging to file NAME.");
1004
1005   if (CTAGS)
1006     {
1007       puts ("-t, --typedefs\n\
1008         Generate tag entries for C and Ada typedefs.");
1009       puts ("-T, --typedefs-and-c++\n\
1010         Generate tag entries for C typedefs, C struct/enum/union tags,\n\
1011         and C++ member functions.");
1012     }
1013
1014   if (CTAGS)
1015     puts ("-u, --update\n\
1016         Update the tag entries for the given files, leaving tag\n\
1017         entries for other files in place.  Currently, this is\n\
1018         implemented by deleting the existing entries for the given\n\
1019         files and then rewriting the new entries at the end of the\n\
1020         tags file.  It is often faster to simply rebuild the entire\n\
1021         tag file than to use this.");
1022
1023   if (CTAGS)
1024     {
1025       puts ("-v, --vgrind\n\
1026         Print on the standard output an index of items intended for\n\
1027         human consumption, similar to the output of vgrind.  The index\n\
1028         is sorted, and gives the page number of each item.");
1029
1030       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1031         puts ("-w, --no-duplicates\n\
1032         Do not create duplicate tag entries, for compatibility with\n\
1033         traditional ctags.");
1034
1035       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1036         puts ("-w, --no-warn\n\
1037         Suppress warning messages about duplicate tag entries.");
1038
1039       puts ("-x, --cxref\n\
1040         Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1041         The output uses line numbers instead of page numbers, but\n\
1042         beyond that the differences are cosmetic; try both to see\n\
1043         which you like.");
1044     }
1045
1046   puts ("-V, --version\n\
1047         Print the version of the program.\n\
1048 -h, --help\n\
1049         Print this help message.\n\
1050         Followed by one or more '--language' options prints detailed\n\
1051         help about tag generation for the specified languages.");
1052
1053   print_language_names ();
1054
1055   puts ("");
1056   puts ("Report bugs to bug-gnu-emacs@gnu.org");
1057
1058   exit (EXIT_SUCCESS);
1059 }
1060
1061 \f
1062 int
1063 main (int argc, char **argv)
1064 {
1065   int i;
1066   unsigned int nincluded_files;
1067   char **included_files;
1068   argument *argbuffer;
1069   int current_arg, file_count;
1070   linebuffer filename_lb;
1071   bool help_asked = false;
1072   ptrdiff_t len;
1073   char *optstring;
1074   int opt;
1075
1076   progname = argv[0];
1077   nincluded_files = 0;
1078   included_files = xnew (argc, char *);
1079   current_arg = 0;
1080   file_count = 0;
1081
1082   /* Allocate enough no matter what happens.  Overkill, but each one
1083      is small. */
1084   argbuffer = xnew (argc, argument);
1085
1086   /*
1087    * Always find typedefs and structure tags.
1088    * Also default to find macro constants, enum constants, struct
1089    * members and global variables.  Do it for both etags and ctags.
1090    */
1091   typedefs = typedefs_or_cplusplus = constantypedefs = true;
1092   globals = members = true;
1093
1094   /* When the optstring begins with a '-' getopt_long does not rearrange the
1095      non-options arguments to be at the end, but leaves them alone. */
1096   optstring = concat ("-ac:Cf:Il:o:Qr:RSVhH",
1097                       (CTAGS) ? "BxdtTuvw" : "Di:",
1098                       "");
1099
1100   while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1101     switch (opt)
1102       {
1103       case 0:
1104         /* If getopt returns 0, then it has already processed a
1105            long-named option.  We should do nothing.  */
1106         break;
1107
1108       case 1:
1109         /* This means that a file name has been seen.  Record it. */
1110         argbuffer[current_arg].arg_type = at_filename;
1111         argbuffer[current_arg].what     = optarg;
1112         len = strlen (optarg);
1113         if (whatlen_max < len)
1114           whatlen_max = len;
1115         ++current_arg;
1116         ++file_count;
1117         break;
1118
1119       case STDIN:
1120         /* Parse standard input.  Idea by Vivek <vivek@etla.org>. */
1121         argbuffer[current_arg].arg_type = at_stdin;
1122         argbuffer[current_arg].what     = optarg;
1123         len = strlen (optarg);
1124         if (whatlen_max < len)
1125           whatlen_max = len;
1126         ++current_arg;
1127         ++file_count;
1128         if (parsing_stdin)
1129           fatal ("cannot parse standard input more than once");
1130         parsing_stdin = true;
1131         break;
1132
1133         /* Common options. */
1134       case 'a': append_to_tagfile = true;       break;
1135       case 'C': cplusplus = true;               break;
1136       case 'f':         /* for compatibility with old makefiles */
1137       case 'o':
1138         if (tagfile)
1139           {
1140             error ("-o option may only be given once.");
1141             suggest_asking_for_help ();
1142             /* NOTREACHED */
1143           }
1144         tagfile = optarg;
1145         break;
1146       case 'I':
1147       case 'S':         /* for backward compatibility */
1148         ignoreindent = true;
1149         break;
1150       case 'l':
1151         {
1152           language *lang = get_language_from_langname (optarg);
1153           if (lang != NULL)
1154             {
1155               argbuffer[current_arg].lang = lang;
1156               argbuffer[current_arg].arg_type = at_language;
1157               ++current_arg;
1158             }
1159         }
1160         break;
1161       case 'c':
1162         /* Backward compatibility: support obsolete --ignore-case-regexp. */
1163         optarg = concat (optarg, "i", ""); /* memory leak here */
1164         /* FALLTHRU */
1165       case 'r':
1166         argbuffer[current_arg].arg_type = at_regexp;
1167         argbuffer[current_arg].what = optarg;
1168         len = strlen (optarg);
1169         if (whatlen_max < len)
1170           whatlen_max = len;
1171         ++current_arg;
1172         break;
1173       case 'R':
1174         argbuffer[current_arg].arg_type = at_regexp;
1175         argbuffer[current_arg].what = NULL;
1176         ++current_arg;
1177         break;
1178       case 'V':
1179         print_version ();
1180         break;
1181       case 'h':
1182       case 'H':
1183         help_asked = true;
1184         break;
1185       case 'Q':
1186         class_qualify = 1;
1187         break;
1188
1189         /* Etags options */
1190       case 'D': constantypedefs = false;                        break;
1191       case 'i': included_files[nincluded_files++] = optarg;     break;
1192
1193         /* Ctags options. */
1194       case 'B': searchar = '?';                                 break;
1195       case 'd': constantypedefs = true;                         break;
1196       case 't': typedefs = true;                                break;
1197       case 'T': typedefs = typedefs_or_cplusplus = true;        break;
1198       case 'u': update = true;                                  break;
1199       case 'v': vgrind_style = true;                      /*FALLTHRU*/
1200       case 'x': cxref_style = true;                             break;
1201       case 'w': no_warnings = true;                             break;
1202       default:
1203         suggest_asking_for_help ();
1204         /* NOTREACHED */
1205       }
1206
1207   /* No more options.  Store the rest of arguments. */
1208   for (; optind < argc; optind++)
1209     {
1210       argbuffer[current_arg].arg_type = at_filename;
1211       argbuffer[current_arg].what = argv[optind];
1212       len = strlen (argv[optind]);
1213       if (whatlen_max < len)
1214         whatlen_max = len;
1215       ++current_arg;
1216       ++file_count;
1217     }
1218
1219   argbuffer[current_arg].arg_type = at_end;
1220
1221   if (help_asked)
1222     print_help (argbuffer);
1223     /* NOTREACHED */
1224
1225   if (nincluded_files == 0 && file_count == 0)
1226     {
1227       error ("no input files specified.");
1228       suggest_asking_for_help ();
1229       /* NOTREACHED */
1230     }
1231
1232   if (tagfile == NULL)
1233     tagfile = savestr (CTAGS ? "tags" : "TAGS");
1234   cwd = etags_getcwd ();        /* the current working directory */
1235   if (cwd[strlen (cwd) - 1] != '/')
1236     {
1237       char *oldcwd = cwd;
1238       cwd = concat (oldcwd, "/", "");
1239       free (oldcwd);
1240     }
1241
1242   /* Compute base directory for relative file names. */
1243   if (streq (tagfile, "-")
1244       || strneq (tagfile, "/dev/", 5))
1245     tagfiledir = cwd;            /* relative file names are relative to cwd */
1246   else
1247     {
1248       canonicalize_filename (tagfile);
1249       tagfiledir = absolute_dirname (tagfile, cwd);
1250     }
1251
1252   linebuffer_init (&lb);
1253   linebuffer_init (&filename_lb);
1254   linebuffer_init (&filebuf);
1255   linebuffer_init (&token_name);
1256
1257   if (!CTAGS)
1258     {
1259       if (streq (tagfile, "-"))
1260         {
1261           tagf = stdout;
1262           SET_BINARY (fileno (stdout));
1263         }
1264       else
1265         tagf = fopen (tagfile, append_to_tagfile ? "ab" : "wb");
1266       if (tagf == NULL)
1267         pfatal (tagfile);
1268     }
1269
1270   /*
1271    * Loop through files finding functions.
1272    */
1273   for (i = 0; i < current_arg; i++)
1274     {
1275       static language *lang;    /* non-NULL if language is forced */
1276       char *this_file;
1277
1278       switch (argbuffer[i].arg_type)
1279         {
1280         case at_language:
1281           lang = argbuffer[i].lang;
1282           break;
1283         case at_regexp:
1284           analyze_regex (argbuffer[i].what);
1285           break;
1286         case at_filename:
1287               this_file = argbuffer[i].what;
1288               /* Input file named "-" means read file names from stdin
1289                  (one per line) and use them. */
1290               if (streq (this_file, "-"))
1291                 {
1292                   if (parsing_stdin)
1293                     fatal ("cannot parse standard input "
1294                            "AND read file names from it");
1295                   while (readline_internal (&filename_lb, stdin, "-") > 0)
1296                     process_file_name (filename_lb.buffer, lang);
1297                 }
1298               else
1299                 process_file_name (this_file, lang);
1300           break;
1301         case at_stdin:
1302           this_file = argbuffer[i].what;
1303           process_file (stdin, this_file, lang);
1304           break;
1305         default:
1306           error ("internal error: arg_type");
1307         }
1308     }
1309
1310   free_regexps ();
1311   free (lb.buffer);
1312   free (filebuf.buffer);
1313   free (token_name.buffer);
1314
1315   if (!CTAGS || cxref_style)
1316     {
1317       /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1318       put_entries (nodehead);
1319       free_tree (nodehead);
1320       nodehead = NULL;
1321       if (!CTAGS)
1322         {
1323           fdesc *fdp;
1324
1325           /* Output file entries that have no tags. */
1326           for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1327             if (!fdp->written)
1328               fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1329
1330           while (nincluded_files-- > 0)
1331             fprintf (tagf, "\f\n%s,include\n", *included_files++);
1332
1333           if (fclose (tagf) == EOF)
1334             pfatal (tagfile);
1335         }
1336
1337       exit (EXIT_SUCCESS);
1338     }
1339
1340   /* From here on, we are in (CTAGS && !cxref_style) */
1341   if (update)
1342     {
1343       char *cmd =
1344         xmalloc (strlen (tagfile) + whatlen_max +
1345                  sizeof "mv..OTAGS;grep -Fv '\t\t' OTAGS >;rm OTAGS");
1346       for (i = 0; i < current_arg; ++i)
1347         {
1348           switch (argbuffer[i].arg_type)
1349             {
1350             case at_filename:
1351             case at_stdin:
1352               break;
1353             default:
1354               continue;         /* the for loop */
1355             }
1356           char *z = stpcpy (cmd, "mv ");
1357           z = stpcpy (z, tagfile);
1358           z = stpcpy (z, " OTAGS;grep -Fv '\t");
1359           z = stpcpy (z, argbuffer[i].what);
1360           z = stpcpy (z, "\t' OTAGS >");
1361           z = stpcpy (z, tagfile);
1362           strcpy (z, ";rm OTAGS");
1363           if (system (cmd) != EXIT_SUCCESS)
1364             fatal ("failed to execute shell command");
1365         }
1366       free (cmd);
1367       append_to_tagfile = true;
1368     }
1369
1370   tagf = fopen (tagfile, append_to_tagfile ? "ab" : "wb");
1371   if (tagf == NULL)
1372     pfatal (tagfile);
1373   put_entries (nodehead);       /* write all the tags (CTAGS) */
1374   free_tree (nodehead);
1375   nodehead = NULL;
1376   if (fclose (tagf) == EOF)
1377     pfatal (tagfile);
1378
1379   if (CTAGS)
1380     if (append_to_tagfile || update)
1381       {
1382         char *cmd = xmalloc (2 * strlen (tagfile) + sizeof "sort -u -o..");
1383         /* Maybe these should be used:
1384            setenv ("LC_COLLATE", "C", 1);
1385            setenv ("LC_ALL", "C", 1); */
1386         char *z = stpcpy (cmd, "sort -u -o ");
1387         z = stpcpy (z, tagfile);
1388         *z++ = ' ';
1389         strcpy (z, tagfile);
1390         exit (system (cmd));
1391       }
1392   return EXIT_SUCCESS;
1393 }
1394
1395
1396 /*
1397  * Return a compressor given the file name.  If EXTPTR is non-zero,
1398  * return a pointer into FILE where the compressor-specific
1399  * extension begins.  If no compressor is found, NULL is returned
1400  * and EXTPTR is not significant.
1401  * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1402  */
1403 static compressor *
1404 get_compressor_from_suffix (char *file, char **extptr)
1405 {
1406   compressor *compr;
1407   char *slash, *suffix;
1408
1409   /* File has been processed by canonicalize_filename,
1410      so we don't need to consider backslashes on DOS_NT.  */
1411   slash = strrchr (file, '/');
1412   suffix = strrchr (file, '.');
1413   if (suffix == NULL || suffix < slash)
1414     return NULL;
1415   if (extptr != NULL)
1416     *extptr = suffix;
1417   suffix += 1;
1418   /* Let those poor souls who live with DOS 8+3 file name limits get
1419      some solace by treating foo.cgz as if it were foo.c.gz, etc.
1420      Only the first do loop is run if not MSDOS */
1421   do
1422     {
1423       for (compr = compressors; compr->suffix != NULL; compr++)
1424         if (streq (compr->suffix, suffix))
1425           return compr;
1426       if (!MSDOS)
1427         break;                  /* do it only once: not really a loop */
1428       if (extptr != NULL)
1429         *extptr = ++suffix;
1430     } while (*suffix != '\0');
1431   return NULL;
1432 }
1433
1434
1435
1436 /*
1437  * Return a language given the name.
1438  */
1439 static language *
1440 get_language_from_langname (const char *name)
1441 {
1442   language *lang;
1443
1444   if (name == NULL)
1445     error ("empty language name");
1446   else
1447     {
1448       for (lang = lang_names; lang->name != NULL; lang++)
1449         if (streq (name, lang->name))
1450           return lang;
1451       error ("unknown language \"%s\"", name);
1452     }
1453
1454   return NULL;
1455 }
1456
1457
1458 /*
1459  * Return a language given the interpreter name.
1460  */
1461 static language *
1462 get_language_from_interpreter (char *interpreter)
1463 {
1464   language *lang;
1465   const char **iname;
1466
1467   if (interpreter == NULL)
1468     return NULL;
1469   for (lang = lang_names; lang->name != NULL; lang++)
1470     if (lang->interpreters != NULL)
1471       for (iname = lang->interpreters; *iname != NULL; iname++)
1472         if (streq (*iname, interpreter))
1473             return lang;
1474
1475   return NULL;
1476 }
1477
1478
1479
1480 /*
1481  * Return a language given the file name.
1482  */
1483 static language *
1484 get_language_from_filename (char *file, int case_sensitive)
1485 {
1486   language *lang;
1487   const char **name, **ext, *suffix;
1488   char *slash;
1489
1490   /* Try whole file name first. */
1491   slash = strrchr (file, '/');
1492   if (slash != NULL)
1493     file = slash + 1;
1494 #ifdef DOS_NT
1495   else if (file[0] && file[1] == ':')
1496     file += 2;
1497 #endif
1498   for (lang = lang_names; lang->name != NULL; lang++)
1499     if (lang->filenames != NULL)
1500       for (name = lang->filenames; *name != NULL; name++)
1501         if ((case_sensitive)
1502             ? streq (*name, file)
1503             : strcaseeq (*name, file))
1504           return lang;
1505
1506   /* If not found, try suffix after last dot. */
1507   suffix = strrchr (file, '.');
1508   if (suffix == NULL)
1509     return NULL;
1510   suffix += 1;
1511   for (lang = lang_names; lang->name != NULL; lang++)
1512     if (lang->suffixes != NULL)
1513       for (ext = lang->suffixes; *ext != NULL; ext++)
1514         if ((case_sensitive)
1515             ? streq (*ext, suffix)
1516             : strcaseeq (*ext, suffix))
1517           return lang;
1518   return NULL;
1519 }
1520
1521 \f
1522 /*
1523  * This routine is called on each file argument.
1524  */
1525 static void
1526 process_file_name (char *file, language *lang)
1527 {
1528   FILE *inf;
1529   fdesc *fdp;
1530   compressor *compr;
1531   char *compressed_name, *uncompressed_name;
1532   char *ext, *real_name, *tmp_name;
1533   int retval;
1534
1535   canonicalize_filename (file);
1536   if (streq (file, tagfile) && !streq (tagfile, "-"))
1537     {
1538       error ("skipping inclusion of %s in self.", file);
1539       return;
1540     }
1541   compr = get_compressor_from_suffix (file, &ext);
1542   if (compr)
1543     {
1544       compressed_name = file;
1545       uncompressed_name = savenstr (file, ext - file);
1546     }
1547   else
1548     {
1549       compressed_name = NULL;
1550       uncompressed_name = file;
1551     }
1552
1553   /* If the canonicalized uncompressed name
1554      has already been dealt with, skip it silently. */
1555   for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1556     {
1557       assert (fdp->infname != NULL);
1558       if (streq (uncompressed_name, fdp->infname))
1559         goto cleanup;
1560     }
1561
1562   inf = fopen (file, "r" FOPEN_BINARY);
1563   if (inf)
1564     real_name = file;
1565   else
1566     {
1567       int file_errno = errno;
1568       if (compressed_name)
1569         {
1570           /* Try with the given suffix.  */
1571           inf = fopen (uncompressed_name, "r" FOPEN_BINARY);
1572           if (inf)
1573             real_name = uncompressed_name;
1574         }
1575       else
1576         {
1577           /* Try all possible suffixes.  */
1578           for (compr = compressors; compr->suffix != NULL; compr++)
1579             {
1580               compressed_name = concat (file, ".", compr->suffix);
1581               inf = fopen (compressed_name, "r" FOPEN_BINARY);
1582               if (inf)
1583                 {
1584                   real_name = compressed_name;
1585                   break;
1586                 }
1587               if (MSDOS)
1588                 {
1589                   char *suf = compressed_name + strlen (file);
1590                   size_t suflen = strlen (compr->suffix) + 1;
1591                   for ( ; suf[1]; suf++, suflen--)
1592                     {
1593                       memmove (suf, suf + 1, suflen);
1594                       inf = fopen (compressed_name, "r" FOPEN_BINARY);
1595                       if (inf)
1596                         {
1597                           real_name = compressed_name;
1598                           break;
1599                         }
1600                     }
1601                   if (inf)
1602                     break;
1603                 }
1604               free (compressed_name);
1605               compressed_name = NULL;
1606             }
1607         }
1608       if (! inf)
1609         {
1610           errno = file_errno;
1611           perror (file);
1612           goto cleanup;
1613         }
1614     }
1615
1616   if (real_name == compressed_name)
1617     {
1618       fclose (inf);
1619       tmp_name = etags_mktmp ();
1620       if (!tmp_name)
1621         inf = NULL;
1622       else
1623         {
1624 #if MSDOS || defined (DOS_NT)
1625           char *cmd1 = concat (compr->command, " \"", real_name);
1626           char *cmd = concat (cmd1, "\" > ", tmp_name);
1627 #else
1628           char *cmd1 = concat (compr->command, " '", real_name);
1629           char *cmd = concat (cmd1, "' > ", tmp_name);
1630 #endif
1631           free (cmd1);
1632           int tmp_errno;
1633           if (system (cmd) == -1)
1634             {
1635               inf = NULL;
1636               tmp_errno = EINVAL;
1637             }
1638           else
1639             {
1640               inf = fopen (tmp_name, "r" FOPEN_BINARY);
1641               tmp_errno = errno;
1642             }
1643           free (cmd);
1644           errno = tmp_errno;
1645         }
1646
1647       if (!inf)
1648         {
1649           perror (real_name);
1650           goto cleanup;
1651         }
1652     }
1653
1654   process_file (inf, uncompressed_name, lang);
1655
1656   retval = fclose (inf);
1657   if (real_name == compressed_name)
1658     {
1659       remove (tmp_name);
1660       free (tmp_name);
1661     }
1662   if (retval < 0)
1663     pfatal (file);
1664
1665  cleanup:
1666   if (compressed_name != file)
1667     free (compressed_name);
1668   if (uncompressed_name != file)
1669     free (uncompressed_name);
1670   last_node = NULL;
1671   curfdp = NULL;
1672   return;
1673 }
1674
1675 static void
1676 process_file (FILE *fh, char *fn, language *lang)
1677 {
1678   static const fdesc emptyfdesc;
1679   fdesc *fdp;
1680
1681   infilename = fn;
1682   /* Create a new input file description entry. */
1683   fdp = xnew (1, fdesc);
1684   *fdp = emptyfdesc;
1685   fdp->next = fdhead;
1686   fdp->infname = savestr (fn);
1687   fdp->lang = lang;
1688   fdp->infabsname = absolute_filename (fn, cwd);
1689   fdp->infabsdir = absolute_dirname (fn, cwd);
1690   if (filename_is_absolute (fn))
1691     {
1692       /* An absolute file name.  Canonicalize it. */
1693       fdp->taggedfname = absolute_filename (fn, NULL);
1694     }
1695   else
1696     {
1697       /* A file name relative to cwd.  Make it relative
1698          to the directory of the tags file. */
1699       fdp->taggedfname = relative_filename (fn, tagfiledir);
1700     }
1701   fdp->usecharno = true;        /* use char position when making tags */
1702   fdp->prop = NULL;
1703   fdp->written = false;         /* not written on tags file yet */
1704
1705   fdhead = fdp;
1706   curfdp = fdhead;              /* the current file description */
1707
1708   find_entries (fh);
1709
1710   /* If not Ctags, and if this is not metasource and if it contained no #line
1711      directives, we can write the tags and free all nodes pointing to
1712      curfdp. */
1713   if (!CTAGS
1714       && curfdp->usecharno      /* no #line directives in this file */
1715       && !curfdp->lang->metasource)
1716     {
1717       node *np, *prev;
1718
1719       /* Look for the head of the sublist relative to this file.  See add_node
1720          for the structure of the node tree. */
1721       prev = NULL;
1722       for (np = nodehead; np != NULL; prev = np, np = np->left)
1723         if (np->fdp == curfdp)
1724           break;
1725
1726       /* If we generated tags for this file, write and delete them. */
1727       if (np != NULL)
1728         {
1729           /* This is the head of the last sublist, if any.  The following
1730              instructions depend on this being true. */
1731           assert (np->left == NULL);
1732
1733           assert (fdhead == curfdp);
1734           assert (last_node->fdp == curfdp);
1735           put_entries (np);     /* write tags for file curfdp->taggedfname */
1736           free_tree (np);       /* remove the written nodes */
1737           if (prev == NULL)
1738             nodehead = NULL;    /* no nodes left */
1739           else
1740             prev->left = NULL;  /* delete the pointer to the sublist */
1741         }
1742     }
1743 }
1744
1745 static void
1746 reset_input (FILE *inf)
1747 {
1748   if (fseek (inf, 0, SEEK_SET) != 0)
1749     perror (infilename);
1750 }
1751
1752 /*
1753  * This routine opens the specified file and calls the function
1754  * which finds the function and type definitions.
1755  */
1756 static void
1757 find_entries (FILE *inf)
1758 {
1759   char *cp;
1760   language *lang = curfdp->lang;
1761   Lang_function *parser = NULL;
1762
1763   /* If user specified a language, use it. */
1764   if (lang != NULL && lang->function != NULL)
1765     {
1766       parser = lang->function;
1767     }
1768
1769   /* Else try to guess the language given the file name. */
1770   if (parser == NULL)
1771     {
1772       lang = get_language_from_filename (curfdp->infname, true);
1773       if (lang != NULL && lang->function != NULL)
1774         {
1775           curfdp->lang = lang;
1776           parser = lang->function;
1777         }
1778     }
1779
1780   /* Else look for sharp-bang as the first two characters. */
1781   if (parser == NULL
1782       && readline_internal (&lb, inf, infilename) > 0
1783       && lb.len >= 2
1784       && lb.buffer[0] == '#'
1785       && lb.buffer[1] == '!')
1786     {
1787       char *lp;
1788
1789       /* Set lp to point at the first char after the last slash in the
1790          line or, if no slashes, at the first nonblank.  Then set cp to
1791          the first successive blank and terminate the string. */
1792       lp = strrchr (lb.buffer+2, '/');
1793       if (lp != NULL)
1794         lp += 1;
1795       else
1796         lp = skip_spaces (lb.buffer + 2);
1797       cp = skip_non_spaces (lp);
1798       *cp = '\0';
1799
1800       if (strlen (lp) > 0)
1801         {
1802           lang = get_language_from_interpreter (lp);
1803           if (lang != NULL && lang->function != NULL)
1804             {
1805               curfdp->lang = lang;
1806               parser = lang->function;
1807             }
1808         }
1809     }
1810
1811   reset_input (inf);
1812
1813   /* Else try to guess the language given the case insensitive file name. */
1814   if (parser == NULL)
1815     {
1816       lang = get_language_from_filename (curfdp->infname, false);
1817       if (lang != NULL && lang->function != NULL)
1818         {
1819           curfdp->lang = lang;
1820           parser = lang->function;
1821         }
1822     }
1823
1824   /* Else try Fortran or C. */
1825   if (parser == NULL)
1826     {
1827       node *old_last_node = last_node;
1828
1829       curfdp->lang = get_language_from_langname ("fortran");
1830       find_entries (inf);
1831
1832       if (old_last_node == last_node)
1833         /* No Fortran entries found.  Try C. */
1834         {
1835           reset_input (inf);
1836           curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1837           find_entries (inf);
1838         }
1839       return;
1840     }
1841
1842   if (!no_line_directive
1843       && curfdp->lang != NULL && curfdp->lang->metasource)
1844     /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1845        file, or anyway we parsed a file that is automatically generated from
1846        this one.  If this is the case, the bingo.c file contained #line
1847        directives that generated tags pointing to this file.  Let's delete
1848        them all before parsing this file, which is the real source. */
1849     {
1850       fdesc **fdpp = &fdhead;
1851       while (*fdpp != NULL)
1852         if (*fdpp != curfdp
1853             && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1854           /* We found one of those!  We must delete both the file description
1855              and all tags referring to it. */
1856           {
1857             fdesc *badfdp = *fdpp;
1858
1859             /* Delete the tags referring to badfdp->taggedfname
1860                that were obtained from badfdp->infname. */
1861             invalidate_nodes (badfdp, &nodehead);
1862
1863             *fdpp = badfdp->next; /* remove the bad description from the list */
1864             free_fdesc (badfdp);
1865           }
1866         else
1867           fdpp = &(*fdpp)->next; /* advance the list pointer */
1868     }
1869
1870   assert (parser != NULL);
1871
1872   /* Generic initializations before reading from file. */
1873   linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1874
1875   /* Generic initializations before parsing file with readline. */
1876   lineno = 0;                  /* reset global line number */
1877   charno = 0;                  /* reset global char number */
1878   linecharno = 0;              /* reset global char number of line start */
1879
1880   parser (inf);
1881
1882   regex_tag_multiline ();
1883 }
1884
1885 \f
1886 /*
1887  * Check whether an implicitly named tag should be created,
1888  * then call `pfnote'.
1889  * NAME is a string that is internally copied by this function.
1890  *
1891  * TAGS format specification
1892  * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1893  * The following is explained in some more detail in etc/ETAGS.EBNF.
1894  *
1895  * make_tag creates tags with "implicit tag names" (unnamed tags)
1896  * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1897  *  1. NAME does not contain any of the characters in NONAM;
1898  *  2. LINESTART contains name as either a rightmost, or rightmost but
1899  *     one character, substring;
1900  *  3. the character, if any, immediately before NAME in LINESTART must
1901  *     be a character in NONAM;
1902  *  4. the character, if any, immediately after NAME in LINESTART must
1903  *     also be a character in NONAM.
1904  *
1905  * The implementation uses the notinname() macro, which recognizes the
1906  * characters stored in the string `nonam'.
1907  * etags.el needs to use the same characters that are in NONAM.
1908  */
1909 static void
1910 make_tag (const char *name,     /* tag name, or NULL if unnamed */
1911           int namelen,          /* tag length */
1912           bool is_func,         /* tag is a function */
1913           char *linestart,      /* start of the line where tag is */
1914           int linelen,          /* length of the line where tag is */
1915           int lno,              /* line number */
1916           long int cno)         /* character number */
1917 {
1918   bool named = (name != NULL && namelen > 0);
1919   char *nname = NULL;
1920
1921   if (!CTAGS && named)          /* maybe set named to false */
1922     /* Let's try to make an implicit tag name, that is, create an unnamed tag
1923        such that etags.el can guess a name from it. */
1924     {
1925       int i;
1926       register const char *cp = name;
1927
1928       for (i = 0; i < namelen; i++)
1929         if (notinname (*cp++))
1930           break;
1931       if (i == namelen)                         /* rule #1 */
1932         {
1933           cp = linestart + linelen - namelen;
1934           if (notinname (linestart[linelen-1]))
1935             cp -= 1;                            /* rule #4 */
1936           if (cp >= linestart                   /* rule #2 */
1937               && (cp == linestart
1938                   || notinname (cp[-1]))        /* rule #3 */
1939               && strneq (name, cp, namelen))    /* rule #2 */
1940             named = false;      /* use implicit tag name */
1941         }
1942     }
1943
1944   if (named)
1945     nname = savenstr (name, namelen);
1946
1947   pfnote (nname, is_func, linestart, linelen, lno, cno);
1948 }
1949
1950 /* Record a tag. */
1951 static void
1952 pfnote (char *name, bool is_func, char *linestart, int linelen, int lno,
1953         long int cno)
1954                                 /* tag name, or NULL if unnamed */
1955                                 /* tag is a function */
1956                                 /* start of the line where tag is */
1957                                 /* length of the line where tag is */
1958                                 /* line number */
1959                                 /* character number */
1960 {
1961   register node *np;
1962
1963   assert (name == NULL || name[0] != '\0');
1964   if (CTAGS && name == NULL)
1965     return;
1966
1967   np = xnew (1, node);
1968
1969   /* If ctags mode, change name "main" to M<thisfilename>. */
1970   if (CTAGS && !cxref_style && streq (name, "main"))
1971     {
1972       char *fp = strrchr (curfdp->taggedfname, '/');
1973       np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
1974       fp = strrchr (np->name, '.');
1975       if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1976         fp[0] = '\0';
1977     }
1978   else
1979     np->name = name;
1980   np->valid = true;
1981   np->been_warned = false;
1982   np->fdp = curfdp;
1983   np->is_func = is_func;
1984   np->lno = lno;
1985   if (np->fdp->usecharno)
1986     /* Our char numbers are 0-base, because of C language tradition?
1987        ctags compatibility?  old versions compatibility?   I don't know.
1988        Anyway, since emacs's are 1-base we expect etags.el to take care
1989        of the difference.  If we wanted to have 1-based numbers, we would
1990        uncomment the +1 below. */
1991     np->cno = cno /* + 1 */ ;
1992   else
1993     np->cno = invalidcharno;
1994   np->left = np->right = NULL;
1995   if (CTAGS && !cxref_style)
1996     {
1997       if (strlen (linestart) < 50)
1998         np->regex = concat (linestart, "$", "");
1999       else
2000         np->regex = savenstr (linestart, 50);
2001     }
2002   else
2003     np->regex = savenstr (linestart, linelen);
2004
2005   add_node (np, &nodehead);
2006 }
2007
2008 /*
2009  * free_tree ()
2010  *      recurse on left children, iterate on right children.
2011  */
2012 static void
2013 free_tree (register node *np)
2014 {
2015   while (np)
2016     {
2017       register node *node_right = np->right;
2018       free_tree (np->left);
2019       free (np->name);
2020       free (np->regex);
2021       free (np);
2022       np = node_right;
2023     }
2024 }
2025
2026 /*
2027  * free_fdesc ()
2028  *      delete a file description
2029  */
2030 static void
2031 free_fdesc (register fdesc *fdp)
2032 {
2033   free (fdp->infname);
2034   free (fdp->infabsname);
2035   free (fdp->infabsdir);
2036   free (fdp->taggedfname);
2037   free (fdp->prop);
2038   free (fdp);
2039 }
2040
2041 /*
2042  * add_node ()
2043  *      Adds a node to the tree of nodes.  In etags mode, sort by file
2044  *      name.  In ctags mode, sort by tag name.  Make no attempt at
2045  *      balancing.
2046  *
2047  *      add_node is the only function allowed to add nodes, so it can
2048  *      maintain state.
2049  */
2050 static void
2051 add_node (node *np, node **cur_node_p)
2052 {
2053   register int dif;
2054   register node *cur_node = *cur_node_p;
2055
2056   if (cur_node == NULL)
2057     {
2058       *cur_node_p = np;
2059       last_node = np;
2060       return;
2061     }
2062
2063   if (!CTAGS)
2064     /* Etags Mode */
2065     {
2066       /* For each file name, tags are in a linked sublist on the right
2067          pointer.  The first tags of different files are a linked list
2068          on the left pointer.  last_node points to the end of the last
2069          used sublist. */
2070       if (last_node != NULL && last_node->fdp == np->fdp)
2071         {
2072           /* Let's use the same sublist as the last added node. */
2073           assert (last_node->right == NULL);
2074           last_node->right = np;
2075           last_node = np;
2076         }
2077       else if (cur_node->fdp == np->fdp)
2078         {
2079           /* Scanning the list we found the head of a sublist which is
2080              good for us.  Let's scan this sublist. */
2081           add_node (np, &cur_node->right);
2082         }
2083       else
2084         /* The head of this sublist is not good for us.  Let's try the
2085            next one. */
2086         add_node (np, &cur_node->left);
2087     } /* if ETAGS mode */
2088
2089   else
2090     {
2091       /* Ctags Mode */
2092       dif = strcmp (np->name, cur_node->name);
2093
2094       /*
2095        * If this tag name matches an existing one, then
2096        * do not add the node, but maybe print a warning.
2097        */
2098       if (no_duplicates && !dif)
2099         {
2100           if (np->fdp == cur_node->fdp)
2101             {
2102               if (!no_warnings)
2103                 {
2104                   fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2105                            np->fdp->infname, lineno, np->name);
2106                   fprintf (stderr, "Second entry ignored\n");
2107                 }
2108             }
2109           else if (!cur_node->been_warned && !no_warnings)
2110             {
2111               fprintf
2112                 (stderr,
2113                  "Duplicate entry in files %s and %s: %s (Warning only)\n",
2114                  np->fdp->infname, cur_node->fdp->infname, np->name);
2115               cur_node->been_warned = true;
2116             }
2117           return;
2118         }
2119
2120       /* Actually add the node */
2121       add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2122     } /* if CTAGS mode */
2123 }
2124
2125 /*
2126  * invalidate_nodes ()
2127  *      Scan the node tree and invalidate all nodes pointing to the
2128  *      given file description (CTAGS case) or free them (ETAGS case).
2129  */
2130 static void
2131 invalidate_nodes (fdesc *badfdp, node **npp)
2132 {
2133   node *np = *npp;
2134
2135   if (np == NULL)
2136     return;
2137
2138   if (CTAGS)
2139     {
2140       if (np->left != NULL)
2141         invalidate_nodes (badfdp, &np->left);
2142       if (np->fdp == badfdp)
2143         np->valid = false;
2144       if (np->right != NULL)
2145         invalidate_nodes (badfdp, &np->right);
2146     }
2147   else
2148     {
2149       assert (np->fdp != NULL);
2150       if (np->fdp == badfdp)
2151         {
2152           *npp = np->left;      /* detach the sublist from the list */
2153           np->left = NULL;      /* isolate it */
2154           free_tree (np);       /* free it */
2155           invalidate_nodes (badfdp, npp);
2156         }
2157       else
2158         invalidate_nodes (badfdp, &np->left);
2159     }
2160 }
2161
2162 \f
2163 static int total_size_of_entries (node *);
2164 static int number_len (long) ATTRIBUTE_CONST;
2165
2166 /* Length of a non-negative number's decimal representation. */
2167 static int
2168 number_len (long int num)
2169 {
2170   int len = 1;
2171   while ((num /= 10) > 0)
2172     len += 1;
2173   return len;
2174 }
2175
2176 /*
2177  * Return total number of characters that put_entries will output for
2178  * the nodes in the linked list at the right of the specified node.
2179  * This count is irrelevant with etags.el since emacs 19.34 at least,
2180  * but is still supplied for backward compatibility.
2181  */
2182 static int
2183 total_size_of_entries (register node *np)
2184 {
2185   register int total = 0;
2186
2187   for (; np != NULL; np = np->right)
2188     if (np->valid)
2189       {
2190         total += strlen (np->regex) + 1;                /* pat\177 */
2191         if (np->name != NULL)
2192           total += strlen (np->name) + 1;               /* name\001 */
2193         total += number_len ((long) np->lno) + 1;       /* lno, */
2194         if (np->cno != invalidcharno)                   /* cno */
2195           total += number_len (np->cno);
2196         total += 1;                                     /* newline */
2197       }
2198
2199   return total;
2200 }
2201
2202 static void
2203 put_entries (register node *np)
2204 {
2205   register char *sp;
2206   static fdesc *fdp = NULL;
2207
2208   if (np == NULL)
2209     return;
2210
2211   /* Output subentries that precede this one */
2212   if (CTAGS)
2213     put_entries (np->left);
2214
2215   /* Output this entry */
2216   if (np->valid)
2217     {
2218       if (!CTAGS)
2219         {
2220           /* Etags mode */
2221           if (fdp != np->fdp)
2222             {
2223               fdp = np->fdp;
2224               fprintf (tagf, "\f\n%s,%d\n",
2225                        fdp->taggedfname, total_size_of_entries (np));
2226               fdp->written = true;
2227             }
2228           fputs (np->regex, tagf);
2229           fputc ('\177', tagf);
2230           if (np->name != NULL)
2231             {
2232               fputs (np->name, tagf);
2233               fputc ('\001', tagf);
2234             }
2235           fprintf (tagf, "%d,", np->lno);
2236           if (np->cno != invalidcharno)
2237             fprintf (tagf, "%ld", np->cno);
2238           fputs ("\n", tagf);
2239         }
2240       else
2241         {
2242           /* Ctags mode */
2243           if (np->name == NULL)
2244             error ("internal error: NULL name in ctags mode.");
2245
2246           if (cxref_style)
2247             {
2248               if (vgrind_style)
2249                 fprintf (stdout, "%s %s %d\n",
2250                          np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2251               else
2252                 fprintf (stdout, "%-16s %3d %-16s %s\n",
2253                          np->name, np->lno, np->fdp->taggedfname, np->regex);
2254             }
2255           else
2256             {
2257               fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2258
2259               if (np->is_func)
2260                 {               /* function or #define macro with args */
2261                   putc (searchar, tagf);
2262                   putc ('^', tagf);
2263
2264                   for (sp = np->regex; *sp; sp++)
2265                     {
2266                       if (*sp == '\\' || *sp == searchar)
2267                         putc ('\\', tagf);
2268                       putc (*sp, tagf);
2269                     }
2270                   putc (searchar, tagf);
2271                 }
2272               else
2273                 {               /* anything else; text pattern inadequate */
2274                   fprintf (tagf, "%d", np->lno);
2275                 }
2276               putc ('\n', tagf);
2277             }
2278         }
2279     } /* if this node contains a valid tag */
2280
2281   /* Output subentries that follow this one */
2282   put_entries (np->right);
2283   if (!CTAGS)
2284     put_entries (np->left);
2285 }
2286
2287 \f
2288 /* C extensions. */
2289 #define C_EXT   0x00fff         /* C extensions */
2290 #define C_PLAIN 0x00000         /* C */
2291 #define C_PLPL  0x00001         /* C++ */
2292 #define C_STAR  0x00003         /* C* */
2293 #define C_JAVA  0x00005         /* JAVA */
2294 #define C_AUTO  0x01000         /* C, but switch to C++ if `class' is met */
2295 #define YACC    0x10000         /* yacc file */
2296
2297 /*
2298  * The C symbol tables.
2299  */
2300 enum sym_type
2301 {
2302   st_none,
2303   st_C_objprot, st_C_objimpl, st_C_objend,
2304   st_C_gnumacro,
2305   st_C_ignore, st_C_attribute,
2306   st_C_javastruct,
2307   st_C_operator,
2308   st_C_class, st_C_template,
2309   st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2310 };
2311
2312 /* Feed stuff between (but not including) %[ and %] lines to:
2313      gperf -m 5
2314 %[
2315 %compare-strncmp
2316 %enum
2317 %struct-type
2318 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2319 %%
2320 if,             0,                      st_C_ignore
2321 for,            0,                      st_C_ignore
2322 while,          0,                      st_C_ignore
2323 switch,         0,                      st_C_ignore
2324 return,         0,                      st_C_ignore
2325 __attribute__,  0,                      st_C_attribute
2326 GTY,            0,                      st_C_attribute
2327 @interface,     0,                      st_C_objprot
2328 @protocol,      0,                      st_C_objprot
2329 @implementation,0,                      st_C_objimpl
2330 @end,           0,                      st_C_objend
2331 import,         (C_JAVA & ~C_PLPL),     st_C_ignore
2332 package,        (C_JAVA & ~C_PLPL),     st_C_ignore
2333 friend,         C_PLPL,                 st_C_ignore
2334 extends,        (C_JAVA & ~C_PLPL),     st_C_javastruct
2335 implements,     (C_JAVA & ~C_PLPL),     st_C_javastruct
2336 interface,      (C_JAVA & ~C_PLPL),     st_C_struct
2337 class,          0,                      st_C_class
2338 namespace,      C_PLPL,                 st_C_struct
2339 domain,         C_STAR,                 st_C_struct
2340 union,          0,                      st_C_struct
2341 struct,         0,                      st_C_struct
2342 extern,         0,                      st_C_extern
2343 enum,           0,                      st_C_enum
2344 typedef,        0,                      st_C_typedef
2345 define,         0,                      st_C_define
2346 undef,          0,                      st_C_define
2347 operator,       C_PLPL,                 st_C_operator
2348 template,       0,                      st_C_template
2349 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2350 DEFUN,          0,                      st_C_gnumacro
2351 SYSCALL,        0,                      st_C_gnumacro
2352 ENTRY,          0,                      st_C_gnumacro
2353 PSEUDO,         0,                      st_C_gnumacro
2354 # These are defined inside C functions, so currently they are not met.
2355 # EXFUN used in glibc, DEFVAR_* in emacs.
2356 #EXFUN,         0,                      st_C_gnumacro
2357 #DEFVAR_,       0,                      st_C_gnumacro
2358 %]
2359 and replace lines between %< and %> with its output, then:
2360  - remove the #if characterset check
2361  - make in_word_set static and not inline. */
2362 /*%<*/
2363 /* C code produced by gperf version 3.0.1 */
2364 /* Command-line: gperf -m 5  */
2365 /* Computed positions: -k'2-3' */
2366
2367 struct C_stab_entry { const char *name; int c_ext; enum sym_type type; };
2368 /* maximum key range = 33, duplicates = 0 */
2369
2370 static int
2371 hash (const char *str, int len)
2372 {
2373   static char const asso_values[] =
2374     {
2375       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2376       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2377       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2378       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2379       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2380       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2381       35, 35, 35, 35, 35, 35, 35, 35, 35,  3,
2382       26, 35, 35, 35, 35, 35, 35, 35, 27, 35,
2383       35, 35, 35, 24,  0, 35, 35, 35, 35,  0,
2384       35, 35, 35, 35, 35,  1, 35, 16, 35,  6,
2385       23,  0,  0, 35, 22,  0, 35, 35,  5,  0,
2386        0, 15,  1, 35,  6, 35,  8, 19, 35, 16,
2387        4,  5, 35, 35, 35, 35, 35, 35, 35, 35,
2388       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2389       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2390       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2391       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2392       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2393       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2394       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2395       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2396       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2397       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2398       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2399       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2400       35, 35, 35, 35, 35, 35
2401     };
2402   int hval = len;
2403
2404   switch (hval)
2405     {
2406       default:
2407         hval += asso_values[(unsigned char) str[2]];
2408       /*FALLTHROUGH*/
2409       case 2:
2410         hval += asso_values[(unsigned char) str[1]];
2411         break;
2412     }
2413   return hval;
2414 }
2415
2416 static struct C_stab_entry *
2417 in_word_set (register const char *str, register unsigned int len)
2418 {
2419   enum
2420     {
2421       TOTAL_KEYWORDS = 33,
2422       MIN_WORD_LENGTH = 2,
2423       MAX_WORD_LENGTH = 15,
2424       MIN_HASH_VALUE = 2,
2425       MAX_HASH_VALUE = 34
2426     };
2427
2428   static struct C_stab_entry wordlist[] =
2429     {
2430       {""}, {""},
2431       {"if",            0,                      st_C_ignore},
2432       {"GTY",           0,                      st_C_attribute},
2433       {"@end",          0,                      st_C_objend},
2434       {"union",         0,                      st_C_struct},
2435       {"define",                0,                      st_C_define},
2436       {"import",                (C_JAVA & ~C_PLPL),     st_C_ignore},
2437       {"template",      0,                      st_C_template},
2438       {"operator",      C_PLPL,                 st_C_operator},
2439       {"@interface",    0,                      st_C_objprot},
2440       {"implements",    (C_JAVA & ~C_PLPL),     st_C_javastruct},
2441       {"friend",                C_PLPL,                 st_C_ignore},
2442       {"typedef",       0,                      st_C_typedef},
2443       {"return",                0,                      st_C_ignore},
2444       {"@implementation",0,                     st_C_objimpl},
2445       {"@protocol",     0,                      st_C_objprot},
2446       {"interface",     (C_JAVA & ~C_PLPL),     st_C_struct},
2447       {"extern",                0,                      st_C_extern},
2448       {"extends",       (C_JAVA & ~C_PLPL),     st_C_javastruct},
2449       {"struct",                0,                      st_C_struct},
2450       {"domain",                C_STAR,                 st_C_struct},
2451       {"switch",                0,                      st_C_ignore},
2452       {"enum",          0,                      st_C_enum},
2453       {"for",           0,                      st_C_ignore},
2454       {"namespace",     C_PLPL,                 st_C_struct},
2455       {"class",         0,                      st_C_class},
2456       {"while",         0,                      st_C_ignore},
2457       {"undef",         0,                      st_C_define},
2458       {"package",       (C_JAVA & ~C_PLPL),     st_C_ignore},
2459       {"__attribute__", 0,                      st_C_attribute},
2460       {"SYSCALL",       0,                      st_C_gnumacro},
2461       {"ENTRY",         0,                      st_C_gnumacro},
2462       {"PSEUDO",                0,                      st_C_gnumacro},
2463       {"DEFUN",         0,                      st_C_gnumacro}
2464     };
2465
2466   if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2467     {
2468       int key = hash (str, len);
2469
2470       if (key <= MAX_HASH_VALUE && key >= 0)
2471         {
2472           const char *s = wordlist[key].name;
2473
2474           if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2475             return &wordlist[key];
2476         }
2477     }
2478   return 0;
2479 }
2480 /*%>*/
2481
2482 static enum sym_type
2483 C_symtype (char *str, int len, int c_ext)
2484 {
2485   register struct C_stab_entry *se = in_word_set (str, len);
2486
2487   if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2488     return st_none;
2489   return se->type;
2490 }
2491
2492 \f
2493 /*
2494  * Ignoring __attribute__ ((list))
2495  */
2496 static bool inattribute;        /* looking at an __attribute__ construct */
2497
2498 /*
2499  * C functions and variables are recognized using a simple
2500  * finite automaton.  fvdef is its state variable.
2501  */
2502 static enum
2503 {
2504   fvnone,                       /* nothing seen */
2505   fdefunkey,                    /* Emacs DEFUN keyword seen */
2506   fdefunname,                   /* Emacs DEFUN name seen */
2507   foperator,                    /* func: operator keyword seen (cplpl) */
2508   fvnameseen,                   /* function or variable name seen */
2509   fstartlist,                   /* func: just after open parenthesis */
2510   finlist,                      /* func: in parameter list */
2511   flistseen,                    /* func: after parameter list */
2512   fignore,                      /* func: before open brace */
2513   vignore                       /* var-like: ignore until ';' */
2514 } fvdef;
2515
2516 static bool fvextern;           /* func or var: extern keyword seen; */
2517
2518 /*
2519  * typedefs are recognized using a simple finite automaton.
2520  * typdef is its state variable.
2521  */
2522 static enum
2523 {
2524   tnone,                        /* nothing seen */
2525   tkeyseen,                     /* typedef keyword seen */
2526   ttypeseen,                    /* defined type seen */
2527   tinbody,                      /* inside typedef body */
2528   tend,                         /* just before typedef tag */
2529   tignore                       /* junk after typedef tag */
2530 } typdef;
2531
2532 /*
2533  * struct-like structures (enum, struct and union) are recognized
2534  * using another simple finite automaton.  `structdef' is its state
2535  * variable.
2536  */
2537 static enum
2538 {
2539   snone,                        /* nothing seen yet,
2540                                    or in struct body if bracelev > 0 */
2541   skeyseen,                     /* struct-like keyword seen */
2542   stagseen,                     /* struct-like tag seen */
2543   scolonseen                    /* colon seen after struct-like tag */
2544 } structdef;
2545
2546 /*
2547  * When objdef is different from onone, objtag is the name of the class.
2548  */
2549 static const char *objtag = "<uninited>";
2550
2551 /*
2552  * Yet another little state machine to deal with preprocessor lines.
2553  */
2554 static enum
2555 {
2556   dnone,                        /* nothing seen */
2557   dsharpseen,                   /* '#' seen as first char on line */
2558   ddefineseen,                  /* '#' and 'define' seen */
2559   dignorerest                   /* ignore rest of line */
2560 } definedef;
2561
2562 /*
2563  * State machine for Objective C protocols and implementations.
2564  * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2565  */
2566 static enum
2567 {
2568   onone,                        /* nothing seen */
2569   oprotocol,                    /* @interface or @protocol seen */
2570   oimplementation,              /* @implementations seen */
2571   otagseen,                     /* class name seen */
2572   oparenseen,                   /* parenthesis before category seen */
2573   ocatseen,                     /* category name seen */
2574   oinbody,                      /* in @implementation body */
2575   omethodsign,                  /* in @implementation body, after +/- */
2576   omethodtag,                   /* after method name */
2577   omethodcolon,                 /* after method colon */
2578   omethodparm,                  /* after method parameter */
2579   oignore                       /* wait for @end */
2580 } objdef;
2581
2582
2583 /*
2584  * Use this structure to keep info about the token read, and how it
2585  * should be tagged.  Used by the make_C_tag function to build a tag.
2586  */
2587 static struct tok
2588 {
2589   char *line;                   /* string containing the token */
2590   int offset;                   /* where the token starts in LINE */
2591   int length;                   /* token length */
2592   /*
2593     The previous members can be used to pass strings around for generic
2594     purposes.  The following ones specifically refer to creating tags.  In this
2595     case the token contained here is the pattern that will be used to create a
2596     tag.
2597   */
2598   bool valid;                   /* do not create a tag; the token should be
2599                                    invalidated whenever a state machine is
2600                                    reset prematurely */
2601   bool named;                   /* create a named tag */
2602   int lineno;                   /* source line number of tag */
2603   long linepos;                 /* source char number of tag */
2604 } token;                        /* latest token read */
2605
2606 /*
2607  * Variables and functions for dealing with nested structures.
2608  * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2609  */
2610 static void pushclass_above (int, char *, int);
2611 static void popclass_above (int);
2612 static void write_classname (linebuffer *, const char *qualifier);
2613
2614 static struct {
2615   char **cname;                 /* nested class names */
2616   int *bracelev;                /* nested class brace level */
2617   int nl;                       /* class nesting level (elements used) */
2618   int size;                     /* length of the array */
2619 } cstack;                       /* stack for nested declaration tags */
2620 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2621 #define nestlev         (cstack.nl)
2622 /* After struct keyword or in struct body, not inside a nested function. */
2623 #define instruct        (structdef == snone && nestlev > 0                      \
2624                          && bracelev == cstack.bracelev[nestlev-1] + 1)
2625
2626 static void
2627 pushclass_above (int bracelev, char *str, int len)
2628 {
2629   int nl;
2630
2631   popclass_above (bracelev);
2632   nl = cstack.nl;
2633   if (nl >= cstack.size)
2634     {
2635       int size = cstack.size *= 2;
2636       xrnew (cstack.cname, size, char *);
2637       xrnew (cstack.bracelev, size, int);
2638     }
2639   assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2640   cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2641   cstack.bracelev[nl] = bracelev;
2642   cstack.nl = nl + 1;
2643 }
2644
2645 static void
2646 popclass_above (int bracelev)
2647 {
2648   int nl;
2649
2650   for (nl = cstack.nl - 1;
2651        nl >= 0 && cstack.bracelev[nl] >= bracelev;
2652        nl--)
2653     {
2654       free (cstack.cname[nl]);
2655       cstack.nl = nl;
2656     }
2657 }
2658
2659 static void
2660 write_classname (linebuffer *cn, const char *qualifier)
2661 {
2662   int i, len;
2663   int qlen = strlen (qualifier);
2664
2665   if (cstack.nl == 0 || cstack.cname[0] == NULL)
2666     {
2667       len = 0;
2668       cn->len = 0;
2669       cn->buffer[0] = '\0';
2670     }
2671   else
2672     {
2673       len = strlen (cstack.cname[0]);
2674       linebuffer_setlen (cn, len);
2675       strcpy (cn->buffer, cstack.cname[0]);
2676     }
2677   for (i = 1; i < cstack.nl; i++)
2678     {
2679       char *s = cstack.cname[i];
2680       if (s == NULL)
2681         continue;
2682       linebuffer_setlen (cn, len + qlen + strlen (s));
2683       len += sprintf (cn->buffer + len, "%s%s", qualifier, s);
2684     }
2685 }
2686
2687 \f
2688 static bool consider_token (char *, int, int, int *, int, int, bool *);
2689 static void make_C_tag (bool);
2690
2691 /*
2692  * consider_token ()
2693  *      checks to see if the current token is at the start of a
2694  *      function or variable, or corresponds to a typedef, or
2695  *      is a struct/union/enum tag, or #define, or an enum constant.
2696  *
2697  *      *IS_FUNC_OR_VAR gets true if the token is a function or #define macro
2698  *      with args.  C_EXTP points to which language we are looking at.
2699  *
2700  * Globals
2701  *      fvdef                   IN OUT
2702  *      structdef               IN OUT
2703  *      definedef               IN OUT
2704  *      typdef                  IN OUT
2705  *      objdef                  IN OUT
2706  */
2707
2708 static bool
2709 consider_token (char *str, int len, int c, int *c_extp,
2710                 int bracelev, int parlev, bool *is_func_or_var)
2711                                 /* IN: token pointer */
2712                                 /* IN: token length */
2713                                 /* IN: first char after the token */
2714                                 /* IN, OUT: C extensions mask */
2715                                 /* IN: brace level */
2716                                 /* IN: parenthesis level */
2717                                 /* OUT: function or variable found */
2718 {
2719   /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2720      structtype is the type of the preceding struct-like keyword, and
2721      structbracelev is the brace level where it has been seen. */
2722   static enum sym_type structtype;
2723   static int structbracelev;
2724   static enum sym_type toktype;
2725
2726
2727   toktype = C_symtype (str, len, *c_extp);
2728
2729   /*
2730    * Skip __attribute__
2731    */
2732   if (toktype == st_C_attribute)
2733     {
2734       inattribute = true;
2735       return false;
2736      }
2737
2738    /*
2739     * Advance the definedef state machine.
2740     */
2741    switch (definedef)
2742      {
2743      case dnone:
2744        /* We're not on a preprocessor line. */
2745        if (toktype == st_C_gnumacro)
2746          {
2747            fvdef = fdefunkey;
2748            return false;
2749          }
2750        break;
2751      case dsharpseen:
2752        if (toktype == st_C_define)
2753          {
2754            definedef = ddefineseen;
2755          }
2756        else
2757          {
2758            definedef = dignorerest;
2759          }
2760        return false;
2761      case ddefineseen:
2762        /*
2763         * Make a tag for any macro, unless it is a constant
2764         * and constantypedefs is false.
2765         */
2766        definedef = dignorerest;
2767        *is_func_or_var = (c == '(');
2768        if (!*is_func_or_var && !constantypedefs)
2769          return false;
2770        else
2771          return true;
2772      case dignorerest:
2773        return false;
2774      default:
2775        error ("internal error: definedef value.");
2776      }
2777
2778    /*
2779     * Now typedefs
2780     */
2781    switch (typdef)
2782      {
2783      case tnone:
2784        if (toktype == st_C_typedef)
2785          {
2786            if (typedefs)
2787              typdef = tkeyseen;
2788            fvextern = false;
2789            fvdef = fvnone;
2790            return false;
2791          }
2792        break;
2793      case tkeyseen:
2794        switch (toktype)
2795          {
2796          case st_none:
2797          case st_C_class:
2798          case st_C_struct:
2799          case st_C_enum:
2800            typdef = ttypeseen;
2801            break;
2802          default:
2803            break;
2804          }
2805        break;
2806      case ttypeseen:
2807        if (structdef == snone && fvdef == fvnone)
2808          {
2809            fvdef = fvnameseen;
2810            return true;
2811          }
2812        break;
2813      case tend:
2814        switch (toktype)
2815          {
2816          case st_C_class:
2817          case st_C_struct:
2818          case st_C_enum:
2819            return false;
2820          default:
2821            return true;
2822          }
2823      default:
2824        break;
2825      }
2826
2827    switch (toktype)
2828      {
2829      case st_C_javastruct:
2830        if (structdef == stagseen)
2831          structdef = scolonseen;
2832        return false;
2833      case st_C_template:
2834      case st_C_class:
2835        if ((*c_extp & C_AUTO)   /* automatic detection of C++ language */
2836            && bracelev == 0
2837            && definedef == dnone && structdef == snone
2838            && typdef == tnone && fvdef == fvnone)
2839          *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2840        if (toktype == st_C_template)
2841          break;
2842        /* FALLTHRU */
2843      case st_C_struct:
2844      case st_C_enum:
2845        if (parlev == 0
2846            && fvdef != vignore
2847            && (typdef == tkeyseen
2848                || (typedefs_or_cplusplus && structdef == snone)))
2849          {
2850            structdef = skeyseen;
2851            structtype = toktype;
2852            structbracelev = bracelev;
2853            if (fvdef == fvnameseen)
2854              fvdef = fvnone;
2855          }
2856        return false;
2857      default:
2858        break;
2859      }
2860
2861    if (structdef == skeyseen)
2862      {
2863        structdef = stagseen;
2864        return true;
2865      }
2866
2867    if (typdef != tnone)
2868      definedef = dnone;
2869
2870    /* Detect Objective C constructs. */
2871    switch (objdef)
2872      {
2873      case onone:
2874        switch (toktype)
2875          {
2876          case st_C_objprot:
2877            objdef = oprotocol;
2878            return false;
2879          case st_C_objimpl:
2880            objdef = oimplementation;
2881            return false;
2882          default:
2883            break;
2884          }
2885        break;
2886      case oimplementation:
2887        /* Save the class tag for functions or variables defined inside. */
2888        objtag = savenstr (str, len);
2889        objdef = oinbody;
2890        return false;
2891      case oprotocol:
2892        /* Save the class tag for categories. */
2893        objtag = savenstr (str, len);
2894        objdef = otagseen;
2895        *is_func_or_var = true;
2896        return true;
2897      case oparenseen:
2898        objdef = ocatseen;
2899        *is_func_or_var = true;
2900        return true;
2901      case oinbody:
2902        break;
2903      case omethodsign:
2904        if (parlev == 0)
2905          {
2906            fvdef = fvnone;
2907            objdef = omethodtag;
2908            linebuffer_setlen (&token_name, len);
2909            memcpy (token_name.buffer, str, len);
2910            token_name.buffer[len] = '\0';
2911            return true;
2912          }
2913        return false;
2914      case omethodcolon:
2915        if (parlev == 0)
2916          objdef = omethodparm;
2917        return false;
2918      case omethodparm:
2919        if (parlev == 0)
2920          {
2921            objdef = omethodtag;
2922            if (class_qualify)
2923              {
2924                int oldlen = token_name.len;
2925                fvdef = fvnone;
2926                linebuffer_setlen (&token_name, oldlen + len);
2927                memcpy (token_name.buffer + oldlen, str, len);
2928                token_name.buffer[oldlen + len] = '\0';
2929              }
2930            return true;
2931          }
2932        return false;
2933      case oignore:
2934        if (toktype == st_C_objend)
2935          {
2936            /* Memory leakage here: the string pointed by objtag is
2937               never released, because many tests would be needed to
2938               avoid breaking on incorrect input code.  The amount of
2939               memory leaked here is the sum of the lengths of the
2940               class tags.
2941            free (objtag); */
2942            objdef = onone;
2943          }
2944        return false;
2945      default:
2946        break;
2947      }
2948
2949    /* A function, variable or enum constant? */
2950    switch (toktype)
2951      {
2952      case st_C_extern:
2953        fvextern = true;
2954        switch  (fvdef)
2955          {
2956          case finlist:
2957          case flistseen:
2958          case fignore:
2959          case vignore:
2960            break;
2961          default:
2962            fvdef = fvnone;
2963          }
2964        return false;
2965      case st_C_ignore:
2966        fvextern = false;
2967        fvdef = vignore;
2968        return false;
2969      case st_C_operator:
2970        fvdef = foperator;
2971        *is_func_or_var = true;
2972        return true;
2973      case st_none:
2974        if (constantypedefs
2975            && structdef == snone
2976            && structtype == st_C_enum && bracelev > structbracelev
2977            /* Don't tag tokens in expressions that assign values to enum
2978               constants.  */
2979            && fvdef != vignore)
2980          return true;           /* enum constant */
2981        switch (fvdef)
2982          {
2983          case fdefunkey:
2984            if (bracelev > 0)
2985              break;
2986            fvdef = fdefunname;  /* GNU macro */
2987            *is_func_or_var = true;
2988            return true;
2989          case fvnone:
2990            switch (typdef)
2991              {
2992              case ttypeseen:
2993                return false;
2994              case tnone:
2995                if ((strneq (str, "asm", 3) && endtoken (str[3]))
2996                    || (strneq (str, "__asm__", 7) && endtoken (str[7])))
2997                  {
2998                    fvdef = vignore;
2999                    return false;
3000                  }
3001                break;
3002              default:
3003                break;
3004              }
3005           /* FALLTHRU */
3006           case fvnameseen:
3007           if (len >= 10 && strneq (str+len-10, "::operator", 10))
3008             {
3009               if (*c_extp & C_AUTO) /* automatic detection of C++ */
3010                 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3011               fvdef = foperator;
3012               *is_func_or_var = true;
3013               return true;
3014             }
3015           if (bracelev > 0 && !instruct)
3016             break;
3017           fvdef = fvnameseen;   /* function or variable */
3018           *is_func_or_var = true;
3019           return true;
3020          default:
3021            break;
3022         }
3023       break;
3024      default:
3025        break;
3026     }
3027
3028   return false;
3029 }
3030
3031 \f
3032 /*
3033  * C_entries often keeps pointers to tokens or lines which are older than
3034  * the line currently read.  By keeping two line buffers, and switching
3035  * them at end of line, it is possible to use those pointers.
3036  */
3037 static struct
3038 {
3039   long linepos;
3040   linebuffer lb;
3041 } lbs[2];
3042
3043 #define current_lb_is_new (newndx == curndx)
3044 #define switch_line_buffers() (curndx = 1 - curndx)
3045
3046 #define curlb (lbs[curndx].lb)
3047 #define newlb (lbs[newndx].lb)
3048 #define curlinepos (lbs[curndx].linepos)
3049 #define newlinepos (lbs[newndx].linepos)
3050
3051 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3052 #define cplpl (c_ext & C_PLPL)
3053 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3054
3055 #define CNL_SAVE_DEFINEDEF()                                            \
3056 do {                                                                    \
3057   curlinepos = charno;                                                  \
3058   readline (&curlb, inf);                                               \
3059   lp = curlb.buffer;                                                    \
3060   quotednl = false;                                                     \
3061   newndx = curndx;                                                      \
3062 } while (0)
3063
3064 #define CNL()                                                           \
3065 do {                                                                    \
3066   CNL_SAVE_DEFINEDEF ();                                                \
3067   if (savetoken.valid)                                                  \
3068     {                                                                   \
3069       token = savetoken;                                                \
3070       savetoken.valid = false;                                          \
3071     }                                                                   \
3072   definedef = dnone;                                                    \
3073 } while (0)
3074
3075
3076 static void
3077 make_C_tag (bool isfun)
3078 {
3079   /* This function is never called when token.valid is false, but
3080      we must protect against invalid input or internal errors. */
3081   if (token.valid)
3082     make_tag (token_name.buffer, token_name.len, isfun, token.line,
3083               token.offset+token.length+1, token.lineno, token.linepos);
3084   else if (DEBUG)
3085     {                             /* this branch is optimized away if !DEBUG */
3086       make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3087                 token_name.len + 17, isfun, token.line,
3088                 token.offset+token.length+1, token.lineno, token.linepos);
3089       error ("INVALID TOKEN");
3090     }
3091
3092   token.valid = false;
3093 }
3094
3095 static bool
3096 perhaps_more_input (FILE *inf)
3097 {
3098   return !feof (inf) && !ferror (inf);
3099 }
3100
3101
3102 /*
3103  * C_entries ()
3104  *      This routine finds functions, variables, typedefs,
3105  *      #define's, enum constants and struct/union/enum definitions in
3106  *      C syntax and adds them to the list.
3107  */
3108 static void
3109 C_entries (int c_ext, FILE *inf)
3110                                 /* extension of C */
3111                                 /* input file */
3112 {
3113   register char c;              /* latest char read; '\0' for end of line */
3114   register char *lp;            /* pointer one beyond the character `c' */
3115   int curndx, newndx;           /* indices for current and new lb */
3116   register int tokoff;          /* offset in line of start of current token */
3117   register int toklen;          /* length of current token */
3118   const char *qualifier;        /* string used to qualify names */
3119   int qlen;                     /* length of qualifier */
3120   int bracelev;                 /* current brace level */
3121   int bracketlev;               /* current bracket level */
3122   int parlev;                   /* current parenthesis level */
3123   int attrparlev;               /* __attribute__ parenthesis level */
3124   int templatelev;              /* current template level */
3125   int typdefbracelev;           /* bracelev where a typedef struct body begun */
3126   bool incomm, inquote, inchar, quotednl, midtoken;
3127   bool yacc_rules;              /* in the rules part of a yacc file */
3128   struct tok savetoken = {0};   /* token saved during preprocessor handling */
3129
3130
3131   linebuffer_init (&lbs[0].lb);
3132   linebuffer_init (&lbs[1].lb);
3133   if (cstack.size == 0)
3134     {
3135       cstack.size = (DEBUG) ? 1 : 4;
3136       cstack.nl = 0;
3137       cstack.cname = xnew (cstack.size, char *);
3138       cstack.bracelev = xnew (cstack.size, int);
3139     }
3140
3141   tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3142   curndx = newndx = 0;
3143   lp = curlb.buffer;
3144   *lp = 0;
3145
3146   fvdef = fvnone; fvextern = false; typdef = tnone;
3147   structdef = snone; definedef = dnone; objdef = onone;
3148   yacc_rules = false;
3149   midtoken = inquote = inchar = incomm = quotednl = false;
3150   token.valid = savetoken.valid = false;
3151   bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3152   if (cjava)
3153     { qualifier = "."; qlen = 1; }
3154   else
3155     { qualifier = "::"; qlen = 2; }
3156
3157
3158   while (perhaps_more_input (inf))
3159     {
3160       c = *lp++;
3161       if (c == '\\')
3162         {
3163           /* If we are at the end of the line, the next character is a
3164              '\0'; do not skip it, because it is what tells us
3165              to read the next line.  */
3166           if (*lp == '\0')
3167             {
3168               quotednl = true;
3169               continue;
3170             }
3171           lp++;
3172           c = ' ';
3173         }
3174       else if (incomm)
3175         {
3176           switch (c)
3177             {
3178             case '*':
3179               if (*lp == '/')
3180                 {
3181                   c = *lp++;
3182                   incomm = false;
3183                 }
3184               break;
3185             case '\0':
3186               /* Newlines inside comments do not end macro definitions in
3187                  traditional cpp. */
3188               CNL_SAVE_DEFINEDEF ();
3189               break;
3190             }
3191           continue;
3192         }
3193       else if (inquote)
3194         {
3195           switch (c)
3196             {
3197             case '"':
3198               inquote = false;
3199               break;
3200             case '\0':
3201               /* Newlines inside strings do not end macro definitions
3202                  in traditional cpp, even though compilers don't
3203                  usually accept them. */
3204               CNL_SAVE_DEFINEDEF ();
3205               break;
3206             }
3207           continue;
3208         }
3209       else if (inchar)
3210         {
3211           switch (c)
3212             {
3213             case '\0':
3214               /* Hmmm, something went wrong. */
3215               CNL ();
3216               /* FALLTHRU */
3217             case '\'':
3218               inchar = false;
3219               break;
3220             }
3221           continue;
3222         }
3223       else switch (c)
3224         {
3225         case '"':
3226           inquote = true;
3227           if (bracketlev > 0)
3228             continue;
3229           if (inattribute)
3230             break;
3231           switch (fvdef)
3232             {
3233             case fdefunkey:
3234             case fstartlist:
3235             case finlist:
3236             case fignore:
3237             case vignore:
3238               break;
3239             default:
3240               fvextern = false;
3241               fvdef = fvnone;
3242             }
3243           continue;
3244         case '\'':
3245           inchar = true;
3246           if (bracketlev > 0)
3247             continue;
3248           if (inattribute)
3249             break;
3250           if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
3251             {
3252               fvextern = false;
3253               fvdef = fvnone;
3254             }
3255           continue;
3256         case '/':
3257           if (*lp == '*')
3258             {
3259               incomm = true;
3260               lp++;
3261               c = ' ';
3262               if (bracketlev > 0)
3263                 continue;
3264             }
3265           else if (/* cplpl && */ *lp == '/')
3266             {
3267               c = '\0';
3268             }
3269           break;
3270         case '%':
3271           if ((c_ext & YACC) && *lp == '%')
3272             {
3273               /* Entering or exiting rules section in yacc file. */
3274               lp++;
3275               definedef = dnone; fvdef = fvnone; fvextern = false;
3276               typdef = tnone; structdef = snone;
3277               midtoken = inquote = inchar = incomm = quotednl = false;
3278               bracelev = 0;
3279               yacc_rules = !yacc_rules;
3280               continue;
3281             }
3282           else
3283             break;
3284         case '#':
3285           if (definedef == dnone)
3286             {
3287               char *cp;
3288               bool cpptoken = true;
3289
3290               /* Look back on this line.  If all blanks, or nonblanks
3291                  followed by an end of comment, this is a preprocessor
3292                  token. */
3293               for (cp = newlb.buffer; cp < lp-1; cp++)
3294                 if (!c_isspace (*cp))
3295                   {
3296                     if (*cp == '*' && cp[1] == '/')
3297                       {
3298                         cp++;
3299                         cpptoken = true;
3300                       }
3301                     else
3302                       cpptoken = false;
3303                   }
3304               if (cpptoken)
3305                 {
3306                   definedef = dsharpseen;
3307                   /* This is needed for tagging enum values: when there are
3308                      preprocessor conditionals inside the enum, we need to
3309                      reset the value of fvdef so that the next enum value is
3310                      tagged even though the one before it did not end in a
3311                      comma.  */
3312                   if (fvdef == vignore && instruct && parlev == 0)
3313                     {
3314                       if (strneq (cp, "#if", 3) || strneq (cp, "#el", 3))
3315                         fvdef = fvnone;
3316                     }
3317                 }
3318             } /* if (definedef == dnone) */
3319           continue;
3320         case '[':
3321           bracketlev++;
3322           continue;
3323         default:
3324           if (bracketlev > 0)
3325             {
3326               if (c == ']')
3327                 --bracketlev;
3328               else if (c == '\0')
3329                 CNL_SAVE_DEFINEDEF ();
3330               continue;
3331             }
3332           break;
3333         } /* switch (c) */
3334
3335
3336       /* Consider token only if some involved conditions are satisfied. */
3337       if (typdef != tignore
3338           && definedef != dignorerest
3339           && fvdef != finlist
3340           && templatelev == 0
3341           && (definedef != dnone
3342               || structdef != scolonseen)
3343           && !inattribute)
3344         {
3345           if (midtoken)
3346             {
3347               if (endtoken (c))
3348                 {
3349                   if (c == ':' && *lp == ':' && begtoken (lp[1]))
3350                     /* This handles :: in the middle,
3351                        but not at the beginning of an identifier.
3352                        Also, space-separated :: is not recognized. */
3353                     {
3354                       if (c_ext & C_AUTO) /* automatic detection of C++ */
3355                         c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3356                       lp += 2;
3357                       toklen += 2;
3358                       c = lp[-1];
3359                       goto still_in_token;
3360                     }
3361                   else
3362                     {
3363                       bool funorvar = false;
3364
3365                       if (yacc_rules
3366                           || consider_token (newlb.buffer + tokoff, toklen, c,
3367                                              &c_ext, bracelev, parlev,
3368                                              &funorvar))
3369                         {
3370                           if (fvdef == foperator)
3371                             {
3372                               char *oldlp = lp;
3373                               lp = skip_spaces (lp-1);
3374                               if (*lp != '\0')
3375                                 lp += 1;
3376                               while (*lp != '\0'
3377                                      && !c_isspace (*lp) && *lp != '(')
3378                                 lp += 1;
3379                               c = *lp++;
3380                               toklen += lp - oldlp;
3381                             }
3382                           token.named = false;
3383                           if (!plainc
3384                               && nestlev > 0 && definedef == dnone)
3385                             /* in struct body */
3386                             {
3387                               if (class_qualify)
3388                                 {
3389                                   int len;
3390                                   write_classname (&token_name, qualifier);
3391                                   len = token_name.len;
3392                                   linebuffer_setlen (&token_name,
3393                                                      len + qlen + toklen);
3394                                   sprintf (token_name.buffer + len, "%s%.*s",
3395                                            qualifier, toklen,
3396                                            newlb.buffer + tokoff);
3397                                 }
3398                               else
3399                                 {
3400                                   linebuffer_setlen (&token_name, toklen);
3401                                   sprintf (token_name.buffer, "%.*s",
3402                                            toklen, newlb.buffer + tokoff);
3403                                 }
3404                               token.named = true;
3405                             }
3406                           else if (objdef == ocatseen)
3407                             /* Objective C category */
3408                             {
3409                               if (class_qualify)
3410                                 {
3411                                   int len = strlen (objtag) + 2 + toklen;
3412                                   linebuffer_setlen (&token_name, len);
3413                                   sprintf (token_name.buffer, "%s(%.*s)",
3414                                            objtag, toklen,
3415                                            newlb.buffer + tokoff);
3416                                 }
3417                               else
3418                                 {
3419                                   linebuffer_setlen (&token_name, toklen);
3420                                   sprintf (token_name.buffer, "%.*s",
3421                                            toklen, newlb.buffer + tokoff);
3422                                 }
3423                               token.named = true;
3424                             }
3425                           else if (objdef == omethodtag
3426                                    || objdef == omethodparm)
3427                             /* Objective C method */
3428                             {
3429                               token.named = true;
3430                             }
3431                           else if (fvdef == fdefunname)
3432                             /* GNU DEFUN and similar macros */
3433                             {
3434                               bool defun = (newlb.buffer[tokoff] == 'F');
3435                               int off = tokoff;
3436                               int len = toklen;
3437
3438                               /* Rewrite the tag so that emacs lisp DEFUNs
3439                                  can be found by their elisp name */
3440                               if (defun)
3441                                 {
3442                                   off += 1;
3443                                   len -= 1;
3444                                 }
3445                               linebuffer_setlen (&token_name, len);
3446                               memcpy (token_name.buffer,
3447                                       newlb.buffer + off, len);
3448                               token_name.buffer[len] = '\0';
3449                               if (defun)
3450                                 while (--len >= 0)
3451                                   if (token_name.buffer[len] == '_')
3452                                     token_name.buffer[len] = '-';
3453                               token.named = defun;
3454                             }
3455                           else
3456                             {
3457                               linebuffer_setlen (&token_name, toklen);
3458                               memcpy (token_name.buffer,
3459                                       newlb.buffer + tokoff, toklen);
3460                               token_name.buffer[toklen] = '\0';
3461                               /* Name macros and members. */
3462                               token.named = (structdef == stagseen
3463                                              || typdef == ttypeseen
3464                                              || typdef == tend
3465                                              || (funorvar
3466                                                  && definedef == dignorerest)
3467                                              || (funorvar
3468                                                  && definedef == dnone
3469                                                  && structdef == snone
3470                                                  && bracelev > 0));
3471                             }
3472                           token.lineno = lineno;
3473                           token.offset = tokoff;
3474                           token.length = toklen;
3475                           token.line = newlb.buffer;
3476                           token.linepos = newlinepos;
3477                           token.valid = true;
3478
3479                           if (definedef == dnone
3480                               && (fvdef == fvnameseen
3481                                   || fvdef == foperator
3482                                   || structdef == stagseen
3483                                   || typdef == tend
3484                                   || typdef == ttypeseen
3485                                   || objdef != onone))
3486                             {
3487                               if (current_lb_is_new)
3488                                 switch_line_buffers ();
3489                             }
3490                           else if (definedef != dnone
3491                                    || fvdef == fdefunname
3492                                    || instruct)
3493                             make_C_tag (funorvar);
3494                         }
3495                       else /* not yacc and consider_token failed */
3496                         {
3497                           if (inattribute && fvdef == fignore)
3498                             {
3499                               /* We have just met __attribute__ after a
3500                                  function parameter list: do not tag the
3501                                  function again. */
3502                               fvdef = fvnone;
3503                             }
3504                         }
3505                       midtoken = false;
3506                     }
3507                 } /* if (endtoken (c)) */
3508               else if (intoken (c))
3509                 still_in_token:
3510                 {
3511                   toklen++;
3512                   continue;
3513                 }
3514             } /* if (midtoken) */
3515           else if (begtoken (c))
3516             {
3517               switch (definedef)
3518                 {
3519                 case dnone:
3520                   switch (fvdef)
3521                     {
3522                     case fstartlist:
3523                       /* This prevents tagging fb in
3524                          void (__attribute__((noreturn)) *fb) (void);
3525                          Fixing this is not easy and not very important. */
3526                       fvdef = finlist;
3527                       continue;
3528                     case flistseen:
3529                       if (plainc || declarations)
3530                         {
3531                           make_C_tag (true); /* a function */
3532                           fvdef = fignore;
3533                         }
3534                       break;
3535                     default:
3536                       break;
3537                     }
3538                   if (structdef == stagseen && !cjava)
3539                     {
3540                       popclass_above (bracelev);
3541                       structdef = snone;
3542                     }
3543                   break;
3544                 case dsharpseen:
3545                   savetoken = token;
3546                   break;
3547                 default:
3548                   break;
3549                 }
3550               if (!yacc_rules || lp == newlb.buffer + 1)
3551                 {
3552                   tokoff = lp - 1 - newlb.buffer;
3553                   toklen = 1;
3554                   midtoken = true;
3555                 }
3556               continue;
3557             } /* if (begtoken) */
3558         } /* if must look at token */
3559
3560
3561       /* Detect end of line, colon, comma, semicolon and various braces
3562          after having handled a token.*/
3563       switch (c)
3564         {
3565         case ':':
3566           if (inattribute)
3567             break;
3568           if (yacc_rules && token.offset == 0 && token.valid)
3569             {
3570               make_C_tag (false); /* a yacc function */
3571               break;
3572             }
3573           if (definedef != dnone)
3574             break;
3575           switch (objdef)
3576             {
3577             case otagseen:
3578               objdef = oignore;
3579               make_C_tag (true); /* an Objective C class */
3580               break;
3581             case omethodtag:
3582             case omethodparm:
3583               objdef = omethodcolon;
3584               if (class_qualify)
3585                 {
3586                   int toklen = token_name.len;
3587                   linebuffer_setlen (&token_name, toklen + 1);
3588                   strcpy (token_name.buffer + toklen, ":");
3589                 }
3590               break;
3591             default:
3592               break;
3593             }
3594           if (structdef == stagseen)
3595             {
3596               structdef = scolonseen;
3597               break;
3598             }
3599           /* Should be useless, but may be work as a safety net. */
3600           if (cplpl && fvdef == flistseen)
3601             {
3602               make_C_tag (true); /* a function */
3603               fvdef = fignore;
3604               break;
3605             }
3606           break;
3607         case ';':
3608           if (definedef != dnone || inattribute)
3609             break;
3610           switch (typdef)
3611             {
3612             case tend:
3613             case ttypeseen:
3614               make_C_tag (false); /* a typedef */
3615               typdef = tnone;
3616               fvdef = fvnone;
3617               break;
3618             case tnone:
3619             case tinbody:
3620             case tignore:
3621               switch (fvdef)
3622                 {
3623                 case fignore:
3624                   if (typdef == tignore || cplpl)
3625                     fvdef = fvnone;
3626                   break;
3627                 case fvnameseen:
3628                   if ((globals && bracelev == 0 && (!fvextern || declarations))
3629                       || (members && instruct))
3630                     make_C_tag (false); /* a variable */
3631                   fvextern = false;
3632                   fvdef = fvnone;
3633                   token.valid = false;
3634                   break;
3635                 case flistseen:
3636                   if ((declarations
3637                        && (cplpl || !instruct)
3638                        && (typdef == tnone || (typdef != tignore && instruct)))
3639                       || (members
3640                           && plainc && instruct))
3641                     make_C_tag (true);  /* a function */
3642                   /* FALLTHRU */
3643                 default:
3644                   fvextern = false;
3645                   fvdef = fvnone;
3646                   if (declarations
3647                        && cplpl && structdef == stagseen)
3648                     make_C_tag (false); /* forward declaration */
3649                   else
3650                     token.valid = false;
3651                 } /* switch (fvdef) */
3652               /* FALLTHRU */
3653             default:
3654               if (!instruct)
3655                 typdef = tnone;
3656             }
3657           if (structdef == stagseen)
3658             structdef = snone;
3659           break;
3660         case ',':
3661           if (definedef != dnone || inattribute)
3662             break;
3663           switch (objdef)
3664             {
3665             case omethodtag:
3666             case omethodparm:
3667               make_C_tag (true); /* an Objective C method */
3668               objdef = oinbody;
3669               break;
3670             default:
3671               break;
3672             }
3673           switch (fvdef)
3674             {
3675             case fdefunkey:
3676             case foperator:
3677             case fstartlist:
3678             case finlist:
3679             case fignore:
3680               break;
3681             case vignore:
3682               if (instruct && parlev == 0)
3683                 fvdef = fvnone;
3684               break;
3685             case fdefunname:
3686               fvdef = fignore;
3687               break;
3688             case fvnameseen:
3689               if (parlev == 0
3690                   && ((globals
3691                        && bracelev == 0
3692                        && templatelev == 0
3693                        && (!fvextern || declarations))
3694                       || (members && instruct)))
3695                   make_C_tag (false); /* a variable */
3696               break;
3697             case flistseen:
3698               if ((declarations && typdef == tnone && !instruct)
3699                   || (members && typdef != tignore && instruct))
3700                 {
3701                   make_C_tag (true); /* a function */
3702                   fvdef = fvnameseen;
3703                 }
3704               else if (!declarations)
3705                 fvdef = fvnone;
3706               token.valid = false;
3707               break;
3708             default:
3709               fvdef = fvnone;
3710             }
3711           if (structdef == stagseen)
3712             structdef = snone;
3713           break;
3714         case ']':
3715           if (definedef != dnone || inattribute)
3716             break;
3717           if (structdef == stagseen)
3718             structdef = snone;
3719           switch (typdef)
3720             {
3721             case ttypeseen:
3722             case tend:
3723               typdef = tignore;
3724               make_C_tag (false);       /* a typedef */
3725               break;
3726             case tnone:
3727             case tinbody:
3728               switch (fvdef)
3729                 {
3730                 case foperator:
3731                 case finlist:
3732                 case fignore:
3733                 case vignore:
3734                   break;
3735                 case fvnameseen:
3736                   if ((members && bracelev == 1)
3737                       || (globals && bracelev == 0
3738                           && (!fvextern || declarations)))
3739                     make_C_tag (false); /* a variable */
3740                   /* FALLTHRU */
3741                 default:
3742                   fvdef = fvnone;
3743                 }
3744               break;
3745             default:
3746               break;
3747             }
3748           break;
3749         case '(':
3750           if (inattribute)
3751             {
3752               attrparlev++;
3753               break;
3754             }
3755           if (definedef != dnone)
3756             break;
3757           if (objdef == otagseen && parlev == 0)
3758             objdef = oparenseen;
3759           switch (fvdef)
3760             {
3761             case fvnameseen:
3762               if (typdef == ttypeseen
3763                   && *lp != '*'
3764                   && !instruct)
3765                 {
3766                   /* This handles constructs like:
3767                      typedef void OperatorFun (int fun); */
3768                   make_C_tag (false);
3769                   typdef = tignore;
3770                   fvdef = fignore;
3771                   break;
3772                 }
3773               /* FALLTHRU */
3774             case foperator:
3775               fvdef = fstartlist;
3776               break;
3777             case flistseen:
3778               fvdef = finlist;
3779               break;
3780             default:
3781               break;
3782             }
3783           parlev++;
3784           break;
3785         case ')':
3786           if (inattribute)
3787             {
3788               if (--attrparlev == 0)
3789                 inattribute = false;
3790               break;
3791             }
3792           if (definedef != dnone)
3793             break;
3794           if (objdef == ocatseen && parlev == 1)
3795             {
3796               make_C_tag (true); /* an Objective C category */
3797               objdef = oignore;
3798             }
3799           if (--parlev == 0)
3800             {
3801               switch (fvdef)
3802                 {
3803                 case fstartlist:
3804                 case finlist:
3805                   fvdef = flistseen;
3806                   break;
3807                 default:
3808                   break;
3809                 }
3810               if (!instruct
3811                   && (typdef == tend
3812                       || typdef == ttypeseen))
3813                 {
3814                   typdef = tignore;
3815                   make_C_tag (false); /* a typedef */
3816                 }
3817             }
3818           else if (parlev < 0)  /* can happen due to ill-conceived #if's. */
3819             parlev = 0;
3820           break;
3821         case '{':
3822           if (definedef != dnone)
3823             break;
3824           if (typdef == ttypeseen)
3825             {
3826               /* Whenever typdef is set to tinbody (currently only
3827                  here), typdefbracelev should be set to bracelev. */
3828               typdef = tinbody;
3829               typdefbracelev = bracelev;
3830             }
3831           switch (fvdef)
3832             {
3833             case flistseen:
3834               if (cplpl && !class_qualify)
3835                 {
3836                   /* Remove class and namespace qualifiers from the token,
3837                      leaving only the method/member name.  */
3838                   char *cc, *uqname = token_name.buffer;
3839                   char *tok_end = token_name.buffer + token_name.len;
3840
3841                   for (cc = token_name.buffer; cc < tok_end; cc++)
3842                     {
3843                       if (*cc == ':' && cc[1] == ':')
3844                         {
3845                           uqname = cc + 2;
3846                           cc++;
3847                         }
3848                     }
3849                   if (uqname > token_name.buffer)
3850                     {
3851                       int uqlen = strlen (uqname);
3852                       linebuffer_setlen (&token_name, uqlen);
3853                       memmove (token_name.buffer, uqname, uqlen + 1);
3854                     }
3855                 }
3856               make_C_tag (true);    /* a function */
3857               /* FALLTHRU */
3858             case fignore:
3859               fvdef = fvnone;
3860               break;
3861             case fvnone:
3862               switch (objdef)
3863                 {
3864                 case otagseen:
3865                   make_C_tag (true); /* an Objective C class */
3866                   objdef = oignore;
3867                   break;
3868                 case omethodtag:
3869                 case omethodparm:
3870                   make_C_tag (true); /* an Objective C method */
3871                   objdef = oinbody;
3872                   break;
3873                 default:
3874                   /* Neutralize `extern "C" {' grot. */
3875                   if (bracelev == 0 && structdef == snone && nestlev == 0
3876                       && typdef == tnone)
3877                     bracelev = -1;
3878                 }
3879               break;
3880             default:
3881               break;
3882             }
3883           switch (structdef)
3884             {
3885             case skeyseen:         /* unnamed struct */
3886               pushclass_above (bracelev, NULL, 0);
3887               structdef = snone;
3888               break;
3889             case stagseen:         /* named struct or enum */
3890             case scolonseen:       /* a class */
3891               pushclass_above (bracelev,token.line+token.offset, token.length);
3892               structdef = snone;
3893               make_C_tag (false);  /* a struct or enum */
3894               break;
3895             default:
3896               break;
3897             }
3898           bracelev += 1;
3899           break;
3900         case '*':
3901           if (definedef != dnone)
3902             break;
3903           if (fvdef == fstartlist)
3904             {
3905               fvdef = fvnone;   /* avoid tagging `foo' in `foo (*bar()) ()' */
3906               token.valid = false;
3907             }
3908           break;
3909         case '}':
3910           if (definedef != dnone)
3911             break;
3912           bracelev -= 1;
3913           if (!ignoreindent && lp == newlb.buffer + 1)
3914             {
3915               if (bracelev != 0)
3916                 token.valid = false; /* unexpected value, token unreliable */
3917               bracelev = 0;     /* reset brace level if first column */
3918               parlev = 0;       /* also reset paren level, just in case... */
3919             }
3920           else if (bracelev < 0)
3921             {
3922               token.valid = false; /* something gone amiss, token unreliable */
3923               bracelev = 0;
3924             }
3925           if (bracelev == 0 && fvdef == vignore)
3926             fvdef = fvnone;             /* end of function */
3927           popclass_above (bracelev);
3928           structdef = snone;
3929           /* Only if typdef == tinbody is typdefbracelev significant. */
3930           if (typdef == tinbody && bracelev <= typdefbracelev)
3931             {
3932               assert (bracelev == typdefbracelev);
3933               typdef = tend;
3934             }
3935           break;
3936         case '=':
3937           if (definedef != dnone)
3938             break;
3939           switch (fvdef)
3940             {
3941             case foperator:
3942             case finlist:
3943             case fignore:
3944             case vignore:
3945               break;
3946             case fvnameseen:
3947               if ((members && bracelev == 1)
3948                   || (globals && bracelev == 0 && (!fvextern || declarations)))
3949                 make_C_tag (false); /* a variable */
3950               /* FALLTHRU */
3951             default:
3952               fvdef = vignore;
3953             }
3954           break;
3955         case '<':
3956           if (cplpl
3957               && (structdef == stagseen || fvdef == fvnameseen))
3958             {
3959               templatelev++;
3960               break;
3961             }
3962           goto resetfvdef;
3963         case '>':
3964           if (templatelev > 0)
3965             {
3966               templatelev--;
3967               break;
3968             }
3969           goto resetfvdef;
3970         case '+':
3971         case '-':
3972           if (objdef == oinbody && bracelev == 0)
3973             {
3974               objdef = omethodsign;
3975               break;
3976             }
3977           /* FALLTHRU */
3978         resetfvdef:
3979         case '#': case '~': case '&': case '%': case '/':
3980         case '|': case '^': case '!': case '.': case '?':
3981           if (definedef != dnone)
3982             break;
3983           /* These surely cannot follow a function tag in C. */
3984           switch (fvdef)
3985             {
3986             case foperator:
3987             case finlist:
3988             case fignore:
3989             case vignore:
3990               break;
3991             default:
3992               fvdef = fvnone;
3993             }
3994           break;
3995         case '\0':
3996           if (objdef == otagseen)
3997             {
3998               make_C_tag (true); /* an Objective C class */
3999               objdef = oignore;
4000             }
4001           /* If a macro spans multiple lines don't reset its state. */
4002           if (quotednl)
4003             CNL_SAVE_DEFINEDEF ();
4004           else
4005             CNL ();
4006           break;
4007         } /* switch (c) */
4008
4009     } /* while not eof */
4010
4011   free (lbs[0].lb.buffer);
4012   free (lbs[1].lb.buffer);
4013 }
4014
4015 /*
4016  * Process either a C++ file or a C file depending on the setting
4017  * of a global flag.
4018  */
4019 static void
4020 default_C_entries (FILE *inf)
4021 {
4022   C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
4023 }
4024
4025 /* Always do plain C. */
4026 static void
4027 plain_C_entries (FILE *inf)
4028 {
4029   C_entries (0, inf);
4030 }
4031
4032 /* Always do C++. */
4033 static void
4034 Cplusplus_entries (FILE *inf)
4035 {
4036   C_entries (C_PLPL, inf);
4037 }
4038
4039 /* Always do Java. */
4040 static void
4041 Cjava_entries (FILE *inf)
4042 {
4043   C_entries (C_JAVA, inf);
4044 }
4045
4046 /* Always do C*. */
4047 static void
4048 Cstar_entries (FILE *inf)
4049 {
4050   C_entries (C_STAR, inf);
4051 }
4052
4053 /* Always do Yacc. */
4054 static void
4055 Yacc_entries (FILE *inf)
4056 {
4057   C_entries (YACC, inf);
4058 }
4059
4060 \f
4061 /* Useful macros. */
4062 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer)    \
4063   while (perhaps_more_input (file_pointer)                              \
4064          && (readline (&(line_buffer), file_pointer),                   \
4065              (char_pointer) = (line_buffer).buffer,                     \
4066              true))                                                     \
4067
4068 #define LOOKING_AT(cp, kw)  /* kw is the keyword, a literal string */   \
4069   ((assert ("" kw), true)   /* syntax error if not a literal string */  \
4070    && strneq ((cp), kw, sizeof (kw)-1)          /* cp points at kw */   \
4071    && notinname ((cp)[sizeof (kw)-1])           /* end of kw */         \
4072    && ((cp) = skip_spaces ((cp) + sizeof (kw) - 1), true)) /* skip spaces */
4073
4074 /* Similar to LOOKING_AT but does not use notinname, does not skip */
4075 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
4076   ((assert ("" kw), true) /* syntax error if not a literal string */    \
4077    && strncaseeq ((cp), kw, sizeof (kw)-1)      /* cp points at kw */   \
4078    && ((cp) += sizeof (kw) - 1, true))          /* skip spaces */
4079
4080 /*
4081  * Read a file, but do no processing.  This is used to do regexp
4082  * matching on files that have no language defined.
4083  */
4084 static void
4085 just_read_file (FILE *inf)
4086 {
4087   while (perhaps_more_input (inf))
4088     readline (&lb, inf);
4089 }
4090
4091 \f
4092 /* Fortran parsing */
4093
4094 static void F_takeprec (void);
4095 static void F_getit (FILE *);
4096
4097 static void
4098 F_takeprec (void)
4099 {
4100   dbp = skip_spaces (dbp);
4101   if (*dbp != '*')
4102     return;
4103   dbp++;
4104   dbp = skip_spaces (dbp);
4105   if (strneq (dbp, "(*)", 3))
4106     {
4107       dbp += 3;
4108       return;
4109     }
4110   if (!c_isdigit (*dbp))
4111     {
4112       --dbp;                    /* force failure */
4113       return;
4114     }
4115   do
4116     dbp++;
4117   while (c_isdigit (*dbp));
4118 }
4119
4120 static void
4121 F_getit (FILE *inf)
4122 {
4123   register char *cp;
4124
4125   dbp = skip_spaces (dbp);
4126   if (*dbp == '\0')
4127     {
4128       readline (&lb, inf);
4129       dbp = lb.buffer;
4130       if (dbp[5] != '&')
4131         return;
4132       dbp += 6;
4133       dbp = skip_spaces (dbp);
4134     }
4135   if (!c_isalpha (*dbp) && *dbp != '_' && *dbp != '$')
4136     return;
4137   for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4138     continue;
4139   make_tag (dbp, cp-dbp, true,
4140             lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4141 }
4142
4143
4144 static void
4145 Fortran_functions (FILE *inf)
4146 {
4147   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4148     {
4149       if (*dbp == '%')
4150         dbp++;                  /* Ratfor escape to fortran */
4151       dbp = skip_spaces (dbp);
4152       if (*dbp == '\0')
4153         continue;
4154
4155       if (LOOKING_AT_NOCASE (dbp, "recursive"))
4156         dbp = skip_spaces (dbp);
4157
4158       if (LOOKING_AT_NOCASE (dbp, "pure"))
4159         dbp = skip_spaces (dbp);
4160
4161       if (LOOKING_AT_NOCASE (dbp, "elemental"))
4162         dbp = skip_spaces (dbp);
4163
4164       switch (c_tolower (*dbp))
4165         {
4166         case 'i':
4167           if (nocase_tail ("integer"))
4168             F_takeprec ();
4169           break;
4170         case 'r':
4171           if (nocase_tail ("real"))
4172             F_takeprec ();
4173           break;
4174         case 'l':
4175           if (nocase_tail ("logical"))
4176             F_takeprec ();
4177           break;
4178         case 'c':
4179           if (nocase_tail ("complex") || nocase_tail ("character"))
4180             F_takeprec ();
4181           break;
4182         case 'd':
4183           if (nocase_tail ("double"))
4184             {
4185               dbp = skip_spaces (dbp);
4186               if (*dbp == '\0')
4187                 continue;
4188               if (nocase_tail ("precision"))
4189                 break;
4190               continue;
4191             }
4192           break;
4193         }
4194       dbp = skip_spaces (dbp);
4195       if (*dbp == '\0')
4196         continue;
4197       switch (c_tolower (*dbp))
4198         {
4199         case 'f':
4200           if (nocase_tail ("function"))
4201             F_getit (inf);
4202           continue;
4203         case 's':
4204           if (nocase_tail ("subroutine"))
4205             F_getit (inf);
4206           continue;
4207         case 'e':
4208           if (nocase_tail ("entry"))
4209             F_getit (inf);
4210           continue;
4211         case 'b':
4212           if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4213             {
4214               dbp = skip_spaces (dbp);
4215               if (*dbp == '\0') /* assume un-named */
4216                 make_tag ("blockdata", 9, true,
4217                           lb.buffer, dbp - lb.buffer, lineno, linecharno);
4218               else
4219                 F_getit (inf);  /* look for name */
4220             }
4221           continue;
4222         }
4223     }
4224 }
4225
4226 \f
4227 /*
4228  * Go language support
4229  * Original code by Xi Lu <lx@shellcodes.org> (2016)
4230  */
4231 static void
4232 Go_functions(FILE *inf)
4233 {
4234   char *cp, *name;
4235
4236   LOOP_ON_INPUT_LINES(inf, lb, cp)
4237     {
4238       cp = skip_spaces (cp);
4239
4240       if (LOOKING_AT (cp, "package"))
4241         {
4242           name = cp;
4243           while (!notinname (*cp) && *cp != '\0')
4244             cp++;
4245           make_tag (name, cp - name, false, lb.buffer,
4246                     cp - lb.buffer + 1, lineno, linecharno);
4247         }
4248       else if (LOOKING_AT (cp, "func"))
4249         {
4250           /* Go implementation of interface, such as:
4251              func (n *Integer) Add(m Integer) ...
4252              skip `(n *Integer)` part.
4253           */
4254           if (*cp == '(')
4255             {
4256               while (*cp != ')')
4257                 cp++;
4258               cp = skip_spaces (cp+1);
4259             }
4260
4261           if (*cp)
4262             {
4263               name = cp;
4264
4265               while (!notinname (*cp))
4266                 cp++;
4267
4268               make_tag (name, cp - name, true, lb.buffer,
4269                         cp - lb.buffer + 1, lineno, linecharno);
4270             }
4271         }
4272       else if (members && LOOKING_AT (cp, "type"))
4273         {
4274           name = cp;
4275
4276           /* Ignore the likes of the following:
4277              type (
4278                     A
4279              )
4280            */
4281           if (*cp == '(')
4282             return;
4283
4284           while (!notinname (*cp) && *cp != '\0')
4285             cp++;
4286
4287           make_tag (name, cp - name, false, lb.buffer,
4288                     cp - lb.buffer + 1, lineno, linecharno);
4289         }
4290     }
4291 }
4292
4293 \f
4294 /*
4295  * Ada parsing
4296  * Original code by
4297  * Philippe Waroquiers (1998)
4298  */
4299
4300 /* Once we are positioned after an "interesting" keyword, let's get
4301    the real tag value necessary. */
4302 static void
4303 Ada_getit (FILE *inf, const char *name_qualifier)
4304 {
4305   register char *cp;
4306   char *name;
4307   char c;
4308
4309   while (perhaps_more_input (inf))
4310     {
4311       dbp = skip_spaces (dbp);
4312       if (*dbp == '\0'
4313           || (dbp[0] == '-' && dbp[1] == '-'))
4314         {
4315           readline (&lb, inf);
4316           dbp = lb.buffer;
4317         }
4318       switch (c_tolower (*dbp))
4319         {
4320         case 'b':
4321           if (nocase_tail ("body"))
4322             {
4323               /* Skipping body of   procedure body   or   package body or ....
4324                  resetting qualifier to body instead of spec. */
4325               name_qualifier = "/b";
4326               continue;
4327             }
4328           break;
4329         case 't':
4330           /* Skipping type of   task type   or   protected type ... */
4331           if (nocase_tail ("type"))
4332             continue;
4333           break;
4334         }
4335       if (*dbp == '"')
4336         {
4337           dbp += 1;
4338           for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4339             continue;
4340         }
4341       else
4342         {
4343           dbp = skip_spaces (dbp);
4344           for (cp = dbp;
4345                c_isalnum (*cp) || *cp == '_' || *cp == '.';
4346                cp++)
4347             continue;
4348           if (cp == dbp)
4349             return;
4350         }
4351       c = *cp;
4352       *cp = '\0';
4353       name = concat (dbp, name_qualifier, "");
4354       *cp = c;
4355       make_tag (name, strlen (name), true,
4356                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4357       free (name);
4358       if (c == '"')
4359         dbp = cp + 1;
4360       return;
4361     }
4362 }
4363
4364 static void
4365 Ada_funcs (FILE *inf)
4366 {
4367   bool inquote = false;
4368   bool skip_till_semicolumn = false;
4369
4370   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4371     {
4372       while (*dbp != '\0')
4373         {
4374           /* Skip a string i.e. "abcd". */
4375           if (inquote || (*dbp == '"'))
4376             {
4377               dbp = strchr (dbp + !inquote, '"');
4378               if (dbp != NULL)
4379                 {
4380                   inquote = false;
4381                   dbp += 1;
4382                   continue;     /* advance char */
4383                 }
4384               else
4385                 {
4386                   inquote = true;
4387                   break;        /* advance line */
4388                 }
4389             }
4390
4391           /* Skip comments. */
4392           if (dbp[0] == '-' && dbp[1] == '-')
4393             break;              /* advance line */
4394
4395           /* Skip character enclosed in single quote i.e. 'a'
4396              and skip single quote starting an attribute i.e. 'Image. */
4397           if (*dbp == '\'')
4398             {
4399               dbp++ ;
4400               if (*dbp != '\0')
4401                 dbp++;
4402               continue;
4403             }
4404
4405           if (skip_till_semicolumn)
4406             {
4407               if (*dbp == ';')
4408                 skip_till_semicolumn = false;
4409               dbp++;
4410               continue;         /* advance char */
4411             }
4412
4413           /* Search for beginning of a token.  */
4414           if (!begtoken (*dbp))
4415             {
4416               dbp++;
4417               continue;         /* advance char */
4418             }
4419
4420           /* We are at the beginning of a token. */
4421           switch (c_tolower (*dbp))
4422             {
4423             case 'f':
4424               if (!packages_only && nocase_tail ("function"))
4425                 Ada_getit (inf, "/f");
4426               else
4427                 break;          /* from switch */
4428               continue;         /* advance char */
4429             case 'p':
4430               if (!packages_only && nocase_tail ("procedure"))
4431                 Ada_getit (inf, "/p");
4432               else if (nocase_tail ("package"))
4433                 Ada_getit (inf, "/s");
4434               else if (nocase_tail ("protected")) /* protected type */
4435                 Ada_getit (inf, "/t");
4436               else
4437                 break;          /* from switch */
4438               continue;         /* advance char */
4439
4440             case 'u':
4441               if (typedefs && !packages_only && nocase_tail ("use"))
4442                 {
4443                   /* when tagging types, avoid tagging  use type Pack.Typename;
4444                      for this, we will skip everything till a ; */
4445                   skip_till_semicolumn = true;
4446                   continue;     /* advance char */
4447                 }
4448
4449             case 't':
4450               if (!packages_only && nocase_tail ("task"))
4451                 Ada_getit (inf, "/k");
4452               else if (typedefs && !packages_only && nocase_tail ("type"))
4453                 {
4454                   Ada_getit (inf, "/t");
4455                   while (*dbp != '\0')
4456                     dbp += 1;
4457                 }
4458               else
4459                 break;          /* from switch */
4460               continue;         /* advance char */
4461             }
4462
4463           /* Look for the end of the token. */
4464           while (!endtoken (*dbp))
4465             dbp++;
4466
4467         } /* advance char */
4468     } /* advance line */
4469 }
4470
4471 \f
4472 /*
4473  * Unix and microcontroller assembly tag handling
4474  * Labels:  /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4475  * Idea by Bob Weiner, Motorola Inc. (1994)
4476  */
4477 static void
4478 Asm_labels (FILE *inf)
4479 {
4480   register char *cp;
4481
4482   LOOP_ON_INPUT_LINES (inf, lb, cp)
4483     {
4484       /* If first char is alphabetic or one of [_.$], test for colon
4485          following identifier. */
4486       if (c_isalpha (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4487         {
4488           /* Read past label. */
4489           cp++;
4490           while (c_isalnum (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4491             cp++;
4492           if (*cp == ':' || c_isspace (*cp))
4493             /* Found end of label, so copy it and add it to the table. */
4494             make_tag (lb.buffer, cp - lb.buffer, true,
4495                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4496         }
4497     }
4498 }
4499
4500 \f
4501 /*
4502  * Perl support
4503  * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4504  *                 /^use constant[ \t\n]+[^ \t\n{=,;]+/
4505  * Perl variable names: /^(my|local).../
4506  * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4507  * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4508  * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4509  */
4510 static void
4511 Perl_functions (FILE *inf)
4512 {
4513   char *package = savestr ("main"); /* current package name */
4514   register char *cp;
4515
4516   LOOP_ON_INPUT_LINES (inf, lb, cp)
4517     {
4518       cp = skip_spaces (cp);
4519
4520       if (LOOKING_AT (cp, "package"))
4521         {
4522           free (package);
4523           get_tag (cp, &package);
4524         }
4525       else if (LOOKING_AT (cp, "sub"))
4526         {
4527           char *pos, *sp;
4528
4529         subr:
4530           sp = cp;
4531           while (!notinname (*cp))
4532             cp++;
4533           if (cp == sp)
4534             continue;           /* nothing found */
4535           pos = strchr (sp, ':');
4536           if (pos && pos < cp && pos[1] == ':')
4537             {
4538               /* The name is already qualified. */
4539               if (!class_qualify)
4540                 {
4541                   char *q = pos + 2, *qpos;
4542                   while ((qpos = strchr (q, ':')) != NULL
4543                          && qpos < cp
4544                          && qpos[1] == ':')
4545                     q = qpos + 2;
4546                   sp = q;
4547                 }
4548               make_tag (sp, cp - sp, true,
4549                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4550             }
4551           else if (class_qualify)
4552             /* Qualify it. */
4553             {
4554               char savechar, *name;
4555
4556               savechar = *cp;
4557               *cp = '\0';
4558               name = concat (package, "::", sp);
4559               *cp = savechar;
4560               make_tag (name, strlen (name), true,
4561                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4562               free (name);
4563             }
4564           else
4565             make_tag (sp, cp - sp, true,
4566                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4567         }
4568       else if (LOOKING_AT (cp, "use constant")
4569                || LOOKING_AT (cp, "use constant::defer"))
4570         {
4571           /* For hash style multi-constant like
4572                 use constant { FOO => 123,
4573                                BAR => 456 };
4574              only the first FOO is picked up.  Parsing across the value
4575              expressions would be difficult in general, due to possible nested
4576              hashes, here-documents, etc.  */
4577           if (*cp == '{')
4578             cp = skip_spaces (cp+1);
4579           goto subr;
4580         }
4581       else if (globals) /* only if we are tagging global vars */
4582         {
4583           /* Skip a qualifier, if any. */
4584           bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4585           /* After "my" or "local", but before any following paren or space. */
4586           char *varstart = cp;
4587
4588           if (qual              /* should this be removed?  If yes, how? */
4589               && (*cp == '$' || *cp == '@' || *cp == '%'))
4590             {
4591               varstart += 1;
4592               do
4593                 cp++;
4594               while (c_isalnum (*cp) || *cp == '_');
4595             }
4596           else if (qual)
4597             {
4598               /* Should be examining a variable list at this point;
4599                  could insist on seeing an open parenthesis. */
4600               while (*cp != '\0' && *cp != ';' && *cp != '=' &&  *cp != ')')
4601                 cp++;
4602             }
4603           else
4604             continue;
4605
4606           make_tag (varstart, cp - varstart, false,
4607                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4608         }
4609     }
4610   free (package);
4611 }
4612
4613
4614 /*
4615  * Python support
4616  * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4617  * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4618  * More ideas by seb bacon <seb@jamkit.com> (2002)
4619  */
4620 static void
4621 Python_functions (FILE *inf)
4622 {
4623   register char *cp;
4624
4625   LOOP_ON_INPUT_LINES (inf, lb, cp)
4626     {
4627       cp = skip_spaces (cp);
4628       if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4629         {
4630           char *name = cp;
4631           while (!notinname (*cp) && *cp != ':')
4632             cp++;
4633           make_tag (name, cp - name, true,
4634                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4635         }
4636     }
4637 }
4638
4639 /*
4640  * Ruby support
4641  * Original code by Xi Lu <lx@shellcodes.org> (2015)
4642  */
4643 static void
4644 Ruby_functions (FILE *inf)
4645 {
4646   char *cp = NULL;
4647   bool reader = false, writer = false, alias = false, continuation = false;
4648
4649   LOOP_ON_INPUT_LINES (inf, lb, cp)
4650     {
4651       bool is_class = false;
4652       bool is_method = false;
4653       char *name;
4654
4655       cp = skip_spaces (cp);
4656       if (!continuation
4657           /* Constants.  */
4658           && c_isalpha (*cp) && c_isupper (*cp))
4659         {
4660           char *bp, *colon = NULL;
4661
4662           name = cp;
4663
4664           for (cp++; c_isalnum (*cp) || *cp == '_' || *cp == ':'; cp++)
4665             {
4666               if (*cp == ':')
4667                 colon = cp;
4668             }
4669           if (cp > name + 1)
4670             {
4671               bp = skip_spaces (cp);
4672               if (*bp == '=' && !(bp[1] == '=' || bp[1] == '>'))
4673                 {
4674                   if (colon && !c_isspace (colon[1]))
4675                     name = colon + 1;
4676                   make_tag (name, cp - name, false,
4677                             lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4678                 }
4679             }
4680         }
4681       else if (!continuation
4682                /* Modules, classes, methods.  */
4683                && ((is_method = LOOKING_AT (cp, "def"))
4684                    || (is_class = LOOKING_AT (cp, "class"))
4685                    || LOOKING_AT (cp, "module")))
4686         {
4687           const char self_name[] = "self.";
4688           const size_t self_size1 = sizeof (self_name) - 1;
4689
4690           name = cp;
4691
4692          /* Ruby method names can end in a '='.  Also, operator overloading can
4693             define operators whose names include '='.  */
4694           while (!notinname (*cp) || *cp == '=')
4695             cp++;
4696
4697           /* Remove "self." from the method name.  */
4698           if (cp - name > self_size1
4699               && strneq (name, self_name, self_size1))
4700             name += self_size1;
4701
4702           /* Remove the class/module qualifiers from method names.  */
4703           if (is_method)
4704             {
4705               char *q;
4706
4707               for (q = name; q < cp && *q != '.'; q++)
4708                 ;
4709               if (q < cp - 1)   /* punt if we see just "FOO." */
4710                 name = q + 1;
4711             }
4712
4713           /* Don't tag singleton classes.  */
4714           if (is_class && strneq (name, "<<", 2) && cp == name + 2)
4715             continue;
4716
4717           make_tag (name, cp - name, true,
4718                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4719         }
4720       else
4721         {
4722           /* Tag accessors and aliases.  */
4723
4724           if (!continuation)
4725             reader = writer = alias = false;
4726
4727           while (*cp && *cp != '#')
4728             {
4729               if (!continuation)
4730                 {
4731                   reader = writer = alias = false;
4732                   if (LOOKING_AT (cp, "attr_reader"))
4733                     reader = true;
4734                   else if (LOOKING_AT (cp, "attr_writer"))
4735                     writer = true;
4736                   else if (LOOKING_AT (cp, "attr_accessor"))
4737                     {
4738                       reader = true;
4739                       writer = true;
4740                     }
4741                   else if (LOOKING_AT (cp, "alias_method"))
4742                     alias = true;
4743                 }
4744               if (reader || writer || alias)
4745                 {
4746                   do {
4747                     char *np;
4748
4749                     cp = skip_spaces (cp);
4750                     if (*cp == '(')
4751                       cp = skip_spaces (cp + 1);
4752                     np = cp;
4753                     cp = skip_name (cp);
4754                     if (*np != ':')
4755                       continue;
4756                     np++;
4757                     if (reader)
4758                       {
4759                         make_tag (np, cp - np, true,
4760                                   lb.buffer, cp - lb.buffer + 1,
4761                                   lineno, linecharno);
4762                         continuation = false;
4763                       }
4764                     if (writer)
4765                       {
4766                         size_t name_len = cp - np + 1;
4767                         char *wr_name = xnew (name_len + 1, char);
4768
4769                         memcpy (wr_name, np, name_len - 1);
4770                         memcpy (wr_name + name_len - 1, "=", 2);
4771                         pfnote (wr_name, true, lb.buffer, cp - lb.buffer + 1,
4772                                 lineno, linecharno);
4773                         continuation = false;
4774                       }
4775                     if (alias)
4776                       {
4777                         if (!continuation)
4778                           make_tag (np, cp - np, true,
4779                                     lb.buffer, cp - lb.buffer + 1,
4780                                     lineno, linecharno);
4781                         continuation = false;
4782                         while (*cp && *cp != '#' && *cp != ';')
4783                           {
4784                             if (*cp == ',')
4785                               continuation = true;
4786                             else if (!c_isspace (*cp))
4787                               continuation = false;
4788                             cp++;
4789                           }
4790                         if (*cp == ';')
4791                           continuation = false;
4792                       }
4793                     cp = skip_spaces (cp);
4794                   } while ((alias
4795                             ? (*cp == ',')
4796                             : (continuation = (*cp == ',')))
4797                            && (cp = skip_spaces (cp + 1), *cp && *cp != '#'));
4798                 }
4799               if (*cp != '#')
4800                 cp = skip_name (cp);
4801               while (*cp && *cp != '#' && notinname (*cp))
4802                 cp++;
4803             }
4804         }
4805     }
4806 }
4807
4808 \f
4809 /*
4810  * PHP support
4811  * Look for:
4812  *  - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4813  *  - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4814  *  - /^[ \t]*define\(\"[^\"]+/
4815  * Only with --members:
4816  *  - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4817  * Idea by Diez B. Roggisch (2001)
4818  */
4819 static void
4820 PHP_functions (FILE *inf)
4821 {
4822   char *cp, *name;
4823   bool search_identifier = false;
4824
4825   LOOP_ON_INPUT_LINES (inf, lb, cp)
4826     {
4827       cp = skip_spaces (cp);
4828       name = cp;
4829       if (search_identifier
4830           && *cp != '\0')
4831         {
4832           while (!notinname (*cp))
4833             cp++;
4834           make_tag (name, cp - name, true,
4835                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4836           search_identifier = false;
4837         }
4838       else if (LOOKING_AT (cp, "function"))
4839         {
4840           if (*cp == '&')
4841             cp = skip_spaces (cp+1);
4842           if (*cp != '\0')
4843             {
4844               name = cp;
4845               while (!notinname (*cp))
4846                 cp++;
4847               make_tag (name, cp - name, true,
4848                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4849             }
4850           else
4851             search_identifier = true;
4852         }
4853       else if (LOOKING_AT (cp, "class"))
4854         {
4855           if (*cp != '\0')
4856             {
4857               name = cp;
4858               while (*cp != '\0' && !c_isspace (*cp))
4859                 cp++;
4860               make_tag (name, cp - name, false,
4861                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4862             }
4863           else
4864             search_identifier = true;
4865         }
4866       else if (strneq (cp, "define", 6)
4867                && (cp = skip_spaces (cp+6))
4868                && *cp++ == '('
4869                && (*cp == '"' || *cp == '\''))
4870         {
4871           char quote = *cp++;
4872           name = cp;
4873           while (*cp != quote && *cp != '\0')
4874             cp++;
4875           make_tag (name, cp - name, false,
4876                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4877         }
4878       else if (members
4879                && LOOKING_AT (cp, "var")
4880                && *cp == '$')
4881         {
4882           name = cp;
4883           while (!notinname (*cp))
4884             cp++;
4885           make_tag (name, cp - name, false,
4886                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4887         }
4888     }
4889 }
4890
4891 \f
4892 /*
4893  * Cobol tag functions
4894  * We could look for anything that could be a paragraph name.
4895  * i.e. anything that starts in column 8 is one word and ends in a full stop.
4896  * Idea by Corny de Souza (1993)
4897  */
4898 static void
4899 Cobol_paragraphs (FILE *inf)
4900 {
4901   register char *bp, *ep;
4902
4903   LOOP_ON_INPUT_LINES (inf, lb, bp)
4904     {
4905       if (lb.len < 9)
4906         continue;
4907       bp += 8;
4908
4909       /* If eoln, compiler option or comment ignore whole line. */
4910       if (bp[-1] != ' ' || !c_isalnum (bp[0]))
4911         continue;
4912
4913       for (ep = bp; c_isalnum (*ep) || *ep == '-'; ep++)
4914         continue;
4915       if (*ep++ == '.')
4916         make_tag (bp, ep - bp, true,
4917                   lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4918     }
4919 }
4920
4921 \f
4922 /*
4923  * Makefile support
4924  * Ideas by Assar Westerlund <assar@sics.se> (2001)
4925  */
4926 static void
4927 Makefile_targets (FILE *inf)
4928 {
4929   register char *bp;
4930
4931   LOOP_ON_INPUT_LINES (inf, lb, bp)
4932     {
4933       if (*bp == '\t' || *bp == '#')
4934         continue;
4935       while (*bp != '\0' && *bp != '=' && *bp != ':')
4936         bp++;
4937       if (*bp == ':' || (globals && *bp == '='))
4938         {
4939           /* We should detect if there is more than one tag, but we do not.
4940              We just skip initial and final spaces. */
4941           char * namestart = skip_spaces (lb.buffer);
4942           while (--bp > namestart)
4943             if (!notinname (*bp))
4944               break;
4945           make_tag (namestart, bp - namestart + 1, true,
4946                     lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4947         }
4948     }
4949 }
4950
4951 \f
4952 /*
4953  * Pascal parsing
4954  * Original code by Mosur K. Mohan (1989)
4955  *
4956  *  Locates tags for procedures & functions.  Doesn't do any type- or
4957  *  var-definitions.  It does look for the keyword "extern" or
4958  *  "forward" immediately following the procedure statement; if found,
4959  *  the tag is skipped.
4960  */
4961 static void
4962 Pascal_functions (FILE *inf)
4963 {
4964   linebuffer tline;             /* mostly copied from C_entries */
4965   long save_lcno;
4966   int save_lineno, namelen, taglen;
4967   char c, *name;
4968
4969   bool                          /* each of these flags is true if: */
4970     incomment,                  /* point is inside a comment */
4971     inquote,                    /* point is inside '..' string */
4972     get_tagname,                /* point is after PROCEDURE/FUNCTION
4973                                    keyword, so next item = potential tag */
4974     found_tag,                  /* point is after a potential tag */
4975     inparms,                    /* point is within parameter-list */
4976     verify_tag;                 /* point has passed the parm-list, so the
4977                                    next token will determine whether this
4978                                    is a FORWARD/EXTERN to be ignored, or
4979                                    whether it is a real tag */
4980
4981   save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4982   name = NULL;                  /* keep compiler quiet */
4983   dbp = lb.buffer;
4984   *dbp = '\0';
4985   linebuffer_init (&tline);
4986
4987   incomment = inquote = false;
4988   found_tag = false;            /* have a proc name; check if extern */
4989   get_tagname = false;          /* found "procedure" keyword         */
4990   inparms = false;              /* found '(' after "proc"            */
4991   verify_tag = false;           /* check if "extern" is ahead        */
4992
4993
4994   while (perhaps_more_input (inf)) /* long main loop to get next char */
4995     {
4996       c = *dbp++;
4997       if (c == '\0')            /* if end of line */
4998         {
4999           readline (&lb, inf);
5000           dbp = lb.buffer;
5001           if (*dbp == '\0')
5002             continue;
5003           if (!((found_tag && verify_tag)
5004                 || get_tagname))
5005             c = *dbp++;         /* only if don't need *dbp pointing
5006                                    to the beginning of the name of
5007                                    the procedure or function */
5008         }
5009       if (incomment)
5010         {
5011           if (c == '}')         /* within { } comments */
5012             incomment = false;
5013           else if (c == '*' && *dbp == ')') /* within (* *) comments */
5014             {
5015               dbp++;
5016               incomment = false;
5017             }
5018           continue;
5019         }
5020       else if (inquote)
5021         {
5022           if (c == '\'')
5023             inquote = false;
5024           continue;
5025         }
5026       else
5027         switch (c)
5028           {
5029           case '\'':
5030             inquote = true;     /* found first quote */
5031             continue;
5032           case '{':             /* found open { comment */
5033             incomment = true;
5034             continue;
5035           case '(':
5036             if (*dbp == '*')    /* found open (* comment */
5037               {
5038                 incomment = true;
5039                 dbp++;
5040               }
5041             else if (found_tag) /* found '(' after tag, i.e., parm-list */
5042               inparms = true;
5043             continue;
5044           case ')':             /* end of parms list */
5045             if (inparms)
5046               inparms = false;
5047             continue;
5048           case ';':
5049             if (found_tag && !inparms) /* end of proc or fn stmt */
5050               {
5051                 verify_tag = true;
5052                 break;
5053               }
5054             continue;
5055           }
5056       if (found_tag && verify_tag && (*dbp != ' '))
5057         {
5058           /* Check if this is an "extern" declaration. */
5059           if (*dbp == '\0')
5060             continue;
5061           if (c_tolower (*dbp) == 'e')
5062             {
5063               if (nocase_tail ("extern")) /* superfluous, really! */
5064                 {
5065                   found_tag = false;
5066                   verify_tag = false;
5067                 }
5068             }
5069           else if (c_tolower (*dbp) == 'f')
5070             {
5071               if (nocase_tail ("forward")) /* check for forward reference */
5072                 {
5073                   found_tag = false;
5074                   verify_tag = false;
5075                 }
5076             }
5077           if (found_tag && verify_tag) /* not external proc, so make tag */
5078             {
5079               found_tag = false;
5080               verify_tag = false;
5081               make_tag (name, namelen, true,
5082                         tline.buffer, taglen, save_lineno, save_lcno);
5083               continue;
5084             }
5085         }
5086       if (get_tagname)          /* grab name of proc or fn */
5087         {
5088           char *cp;
5089
5090           if (*dbp == '\0')
5091             continue;
5092
5093           /* Find block name. */
5094           for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
5095             continue;
5096
5097           /* Save all values for later tagging. */
5098           linebuffer_setlen (&tline, lb.len);
5099           strcpy (tline.buffer, lb.buffer);
5100           save_lineno = lineno;
5101           save_lcno = linecharno;
5102           name = tline.buffer + (dbp - lb.buffer);
5103           namelen = cp - dbp;
5104           taglen = cp - lb.buffer + 1;
5105
5106           dbp = cp;             /* set dbp to e-o-token */
5107           get_tagname = false;
5108           found_tag = true;
5109           continue;
5110
5111           /* And proceed to check for "extern". */
5112         }
5113       else if (!incomment && !inquote && !found_tag)
5114         {
5115           /* Check for proc/fn keywords. */
5116           switch (c_tolower (c))
5117             {
5118             case 'p':
5119               if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
5120                 get_tagname = true;
5121               continue;
5122             case 'f':
5123               if (nocase_tail ("unction"))
5124                 get_tagname = true;
5125               continue;
5126             }
5127         }
5128     } /* while not eof */
5129
5130   free (tline.buffer);
5131 }
5132
5133 \f
5134 /*
5135  * Lisp tag functions
5136  *  look for (def or (DEF, quote or QUOTE
5137  */
5138
5139 static void L_getit (void);
5140
5141 static void
5142 L_getit (void)
5143 {
5144   if (*dbp == '\'')             /* Skip prefix quote */
5145     dbp++;
5146   else if (*dbp == '(')
5147   {
5148     dbp++;
5149     /* Try to skip "(quote " */
5150     if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
5151       /* Ok, then skip "(" before name in (defstruct (foo)) */
5152       dbp = skip_spaces (dbp);
5153   }
5154   get_tag (dbp, NULL);
5155 }
5156
5157 static void
5158 Lisp_functions (FILE *inf)
5159 {
5160   LOOP_ON_INPUT_LINES (inf, lb, dbp)
5161     {
5162       if (dbp[0] != '(')
5163         continue;
5164
5165       /* "(defvar foo)" is a declaration rather than a definition.  */
5166       if (! declarations)
5167         {
5168           char *p = dbp + 1;
5169           if (LOOKING_AT (p, "defvar"))
5170             {
5171               p = skip_name (p); /* past var name */
5172               p = skip_spaces (p);
5173               if (*p == ')')
5174                 continue;
5175             }
5176         }
5177
5178       if (strneq (dbp + 1, "cl-", 3) || strneq (dbp + 1, "CL-", 3))
5179         dbp += 3;
5180
5181       if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
5182         {
5183           dbp = skip_non_spaces (dbp);
5184           dbp = skip_spaces (dbp);
5185           L_getit ();
5186         }
5187       else
5188         {
5189           /* Check for (foo::defmumble name-defined ... */
5190           do
5191             dbp++;
5192           while (!notinname (*dbp) && *dbp != ':');
5193           if (*dbp == ':')
5194             {
5195               do
5196                 dbp++;
5197               while (*dbp == ':');
5198
5199               if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
5200                 {
5201                   dbp = skip_non_spaces (dbp);
5202                   dbp = skip_spaces (dbp);
5203                   L_getit ();
5204                 }
5205             }
5206         }
5207     }
5208 }
5209
5210 \f
5211 /*
5212  * Lua script language parsing
5213  * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
5214  *
5215  *  "function" and "local function" are tags if they start at column 1.
5216  */
5217 static void
5218 Lua_functions (FILE *inf)
5219 {
5220   register char *bp;
5221
5222   LOOP_ON_INPUT_LINES (inf, lb, bp)
5223     {
5224       bp = skip_spaces (bp);
5225       if (bp[0] != 'f' && bp[0] != 'l')
5226         continue;
5227
5228       (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
5229
5230       if (LOOKING_AT (bp, "function"))
5231         {
5232           char *tag_name, *tp_dot, *tp_colon;
5233
5234           get_tag (bp, &tag_name);
5235           /* If the tag ends with ".foo" or ":foo", make an additional tag for
5236              "foo".  */
5237           tp_dot = strrchr (tag_name, '.');
5238           tp_colon = strrchr (tag_name, ':');
5239           if (tp_dot || tp_colon)
5240             {
5241               char *p = tp_dot > tp_colon ? tp_dot : tp_colon;
5242               int len_add = p - tag_name + 1;
5243
5244               get_tag (bp + len_add, NULL);
5245             }
5246         }
5247     }
5248 }
5249
5250 \f
5251 /*
5252  * PostScript tags
5253  * Just look for lines where the first character is '/'
5254  * Also look at "defineps" for PSWrap
5255  * Ideas by:
5256  *   Richard Mlynarik <mly@adoc.xerox.com> (1997)
5257  *   Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
5258  */
5259 static void
5260 PS_functions (FILE *inf)
5261 {
5262   register char *bp, *ep;
5263
5264   LOOP_ON_INPUT_LINES (inf, lb, bp)
5265     {
5266       if (bp[0] == '/')
5267         {
5268           for (ep = bp+1;
5269                *ep != '\0' && *ep != ' ' && *ep != '{';
5270                ep++)
5271             continue;
5272           make_tag (bp, ep - bp, true,
5273                     lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
5274         }
5275       else if (LOOKING_AT (bp, "defineps"))
5276         get_tag (bp, NULL);
5277     }
5278 }
5279
5280 \f
5281 /*
5282  * Forth tags
5283  * Ignore anything after \ followed by space or in ( )
5284  * Look for words defined by :
5285  * Look for constant, code, create, defer, value, and variable
5286  * OBP extensions:  Look for buffer:, field,
5287  * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
5288  */
5289 static void
5290 Forth_words (FILE *inf)
5291 {
5292   register char *bp;
5293
5294   LOOP_ON_INPUT_LINES (inf, lb, bp)
5295     while ((bp = skip_spaces (bp))[0] != '\0')
5296       if (bp[0] == '\\' && c_isspace (bp[1]))
5297         break;                  /* read next line */
5298       else if (bp[0] == '(' && c_isspace (bp[1]))
5299         do                      /* skip to ) or eol */
5300           bp++;
5301         while (*bp != ')' && *bp != '\0');
5302       else if ((bp[0] == ':' && c_isspace (bp[1]) && bp++)
5303                || LOOKING_AT_NOCASE (bp, "constant")
5304                || LOOKING_AT_NOCASE (bp, "code")
5305                || LOOKING_AT_NOCASE (bp, "create")
5306                || LOOKING_AT_NOCASE (bp, "defer")
5307                || LOOKING_AT_NOCASE (bp, "value")
5308                || LOOKING_AT_NOCASE (bp, "variable")
5309                || LOOKING_AT_NOCASE (bp, "buffer:")
5310                || LOOKING_AT_NOCASE (bp, "field"))
5311         get_tag (skip_spaces (bp), NULL); /* Yay!  A definition! */
5312       else
5313         bp = skip_non_spaces (bp);
5314 }
5315
5316 \f
5317 /*
5318  * Scheme tag functions
5319  * look for (def... xyzzy
5320  *          (def... (xyzzy
5321  *          (def ... ((...(xyzzy ....
5322  *          (set! xyzzy
5323  * Original code by Ken Haase (1985?)
5324  */
5325 static void
5326 Scheme_functions (FILE *inf)
5327 {
5328   register char *bp;
5329
5330   LOOP_ON_INPUT_LINES (inf, lb, bp)
5331     {
5332       if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
5333         {
5334           bp = skip_non_spaces (bp+4);
5335           /* Skip over open parens and white space.  Don't continue past
5336              '\0'. */
5337           while (*bp && notinname (*bp))
5338             bp++;
5339           get_tag (bp, NULL);
5340         }
5341       if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
5342         get_tag (bp, NULL);
5343     }
5344 }
5345
5346 \f
5347 /* Find tags in TeX and LaTeX input files.  */
5348
5349 /* TEX_toktab is a table of TeX control sequences that define tags.
5350  * Each entry records one such control sequence.
5351  *
5352  * Original code from who knows whom.
5353  * Ideas by:
5354  *   Stefan Monnier (2002)
5355  */
5356
5357 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
5358
5359 /* Default set of control sequences to put into TEX_toktab.
5360    The value of environment var TEXTAGS is prepended to this.  */
5361 static const char *TEX_defenv = "\
5362 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5363 :part:appendix:entry:index:def\
5364 :newcommand:renewcommand:newenvironment:renewenvironment";
5365
5366 static void TEX_decode_env (const char *, const char *);
5367
5368 /*
5369  * TeX/LaTeX scanning loop.
5370  */
5371 static void
5372 TeX_commands (FILE *inf)
5373 {
5374   char *cp;
5375   linebuffer *key;
5376
5377   char TEX_esc = '\0';
5378   char TEX_opgrp, TEX_clgrp;
5379
5380   /* Initialize token table once from environment. */
5381   if (TEX_toktab == NULL)
5382     TEX_decode_env ("TEXTAGS", TEX_defenv);
5383
5384   LOOP_ON_INPUT_LINES (inf, lb, cp)
5385     {
5386       /* Look at each TEX keyword in line. */
5387       for (;;)
5388         {
5389           /* Look for a TEX escape. */
5390           while (true)
5391             {
5392               char c = *cp++;
5393               if (c == '\0' || c == '%')
5394                 goto tex_next_line;
5395
5396               /* Select either \ or ! as escape character, whichever comes
5397                  first outside a comment.  */
5398               if (!TEX_esc)
5399                 switch (c)
5400                   {
5401                   case '\\':
5402                     TEX_esc = c;
5403                     TEX_opgrp = '{';
5404                     TEX_clgrp = '}';
5405                     break;
5406
5407                   case '!':
5408                     TEX_esc = c;
5409                     TEX_opgrp = '<';
5410                     TEX_clgrp = '>';
5411                     break;
5412                   }
5413
5414               if (c == TEX_esc)
5415                 break;
5416             }
5417
5418           for (key = TEX_toktab; key->buffer != NULL; key++)
5419             if (strneq (cp, key->buffer, key->len))
5420               {
5421                 char *p;
5422                 int namelen, linelen;
5423                 bool opgrp = false;
5424
5425                 cp = skip_spaces (cp + key->len);
5426                 if (*cp == TEX_opgrp)
5427                   {
5428                     opgrp = true;
5429                     cp++;
5430                   }
5431                 for (p = cp;
5432                      (!c_isspace (*p) && *p != '#' &&
5433                       *p != TEX_opgrp && *p != TEX_clgrp);
5434                      p++)
5435                   continue;
5436                 namelen = p - cp;
5437                 linelen = lb.len;
5438                 if (!opgrp || *p == TEX_clgrp)
5439                   {
5440                     while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5441                       p++;
5442                     linelen = p - lb.buffer + 1;
5443                   }
5444                 make_tag (cp, namelen, true,
5445                           lb.buffer, linelen, lineno, linecharno);
5446                 goto tex_next_line; /* We only tag a line once */
5447               }
5448         }
5449     tex_next_line:
5450       ;
5451     }
5452 }
5453
5454 /* Read environment and prepend it to the default string.
5455    Build token table. */
5456 static void
5457 TEX_decode_env (const char *evarname, const char *defenv)
5458 {
5459   register const char *env, *p;
5460   int i, len;
5461
5462   /* Append default string to environment. */
5463   env = getenv (evarname);
5464   if (!env)
5465     env = defenv;
5466   else
5467     env = concat (env, defenv, "");
5468
5469   /* Allocate a token table */
5470   for (len = 1, p = env; (p = strchr (p, ':')); )
5471     if (*++p)
5472       len++;
5473   TEX_toktab = xnew (len, linebuffer);
5474
5475   /* Unpack environment string into token table. Be careful about */
5476   /* zero-length strings (leading ':', "::" and trailing ':') */
5477   for (i = 0; *env != '\0';)
5478     {
5479       p = strchr (env, ':');
5480       if (!p)                   /* End of environment string. */
5481         p = env + strlen (env);
5482       if (p - env > 0)
5483         {                       /* Only non-zero strings. */
5484           TEX_toktab[i].buffer = savenstr (env, p - env);
5485           TEX_toktab[i].len = p - env;
5486           i++;
5487         }
5488       if (*p)
5489         env = p + 1;
5490       else
5491         {
5492           TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5493           TEX_toktab[i].len = 0;
5494           break;
5495         }
5496     }
5497 }
5498
5499 \f
5500 /* Texinfo support.  Dave Love, Mar. 2000.  */
5501 static void
5502 Texinfo_nodes (FILE *inf)
5503 {
5504   char *cp, *start;
5505   LOOP_ON_INPUT_LINES (inf, lb, cp)
5506     if (LOOKING_AT (cp, "@node"))
5507       {
5508         start = cp;
5509         while (*cp != '\0' && *cp != ',')
5510           cp++;
5511         make_tag (start, cp - start, true,
5512                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5513       }
5514 }
5515
5516 \f
5517 /*
5518  * HTML support.
5519  * Contents of <title>, <h1>, <h2>, <h3> are tags.
5520  * Contents of <a name=xxx> are tags with name xxx.
5521  *
5522  * Francesco Potortì, 2002.
5523  */
5524 static void
5525 HTML_labels (FILE *inf)
5526 {
5527   bool getnext = false;         /* next text outside of HTML tags is a tag */
5528   bool skiptag = false;         /* skip to the end of the current HTML tag */
5529   bool intag = false;           /* inside an html tag, looking for ID= */
5530   bool inanchor = false;        /* when INTAG, is an anchor, look for NAME= */
5531   char *end;
5532
5533
5534   linebuffer_setlen (&token_name, 0); /* no name in buffer */
5535
5536   LOOP_ON_INPUT_LINES (inf, lb, dbp)
5537     for (;;)                    /* loop on the same line */
5538       {
5539         if (skiptag)            /* skip HTML tag */
5540           {
5541             while (*dbp != '\0' && *dbp != '>')
5542               dbp++;
5543             if (*dbp == '>')
5544               {
5545                 dbp += 1;
5546                 skiptag = false;
5547                 continue;       /* look on the same line */
5548               }
5549             break;              /* go to next line */
5550           }
5551
5552         else if (intag) /* look for "name=" or "id=" */
5553           {
5554             while (*dbp != '\0' && *dbp != '>'
5555                    && c_tolower (*dbp) != 'n' && c_tolower (*dbp) != 'i')
5556               dbp++;
5557             if (*dbp == '\0')
5558               break;            /* go to next line */
5559             if (*dbp == '>')
5560               {
5561                 dbp += 1;
5562                 intag = false;
5563                 continue;       /* look on the same line */
5564               }
5565             if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5566                 || LOOKING_AT_NOCASE (dbp, "id="))
5567               {
5568                 bool quoted = (dbp[0] == '"');
5569
5570                 if (quoted)
5571                   for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5572                     continue;
5573                 else
5574                   for (end = dbp; *end != '\0' && intoken (*end); end++)
5575                     continue;
5576                 linebuffer_setlen (&token_name, end - dbp);
5577                 memcpy (token_name.buffer, dbp, end - dbp);
5578                 token_name.buffer[end - dbp] = '\0';
5579
5580                 dbp = end;
5581                 intag = false;  /* we found what we looked for */
5582                 skiptag = true; /* skip to the end of the tag */
5583                 getnext = true; /* then grab the text */
5584                 continue;       /* look on the same line */
5585               }
5586             dbp += 1;
5587           }
5588
5589         else if (getnext)       /* grab next tokens and tag them */
5590           {
5591             dbp = skip_spaces (dbp);
5592             if (*dbp == '\0')
5593               break;            /* go to next line */
5594             if (*dbp == '<')
5595               {
5596                 intag = true;
5597                 inanchor = (c_tolower (dbp[1]) == 'a' && !intoken (dbp[2]));
5598                 continue;       /* look on the same line */
5599               }
5600
5601             for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5602               continue;
5603             make_tag (token_name.buffer, token_name.len, true,
5604                       dbp, end - dbp, lineno, linecharno);
5605             linebuffer_setlen (&token_name, 0); /* no name in buffer */
5606             getnext = false;
5607             break;              /* go to next line */
5608           }
5609
5610         else                    /* look for an interesting HTML tag */
5611           {
5612             while (*dbp != '\0' && *dbp != '<')
5613               dbp++;
5614             if (*dbp == '\0')
5615               break;            /* go to next line */
5616             intag = true;
5617             if (c_tolower (dbp[1]) == 'a' && !intoken (dbp[2]))
5618               {
5619                 inanchor = true;
5620                 continue;       /* look on the same line */
5621               }
5622             else if (LOOKING_AT_NOCASE (dbp, "<title>")
5623                      || LOOKING_AT_NOCASE (dbp, "<h1>")
5624                      || LOOKING_AT_NOCASE (dbp, "<h2>")
5625                      || LOOKING_AT_NOCASE (dbp, "<h3>"))
5626               {
5627                 intag = false;
5628                 getnext = true;
5629                 continue;       /* look on the same line */
5630               }
5631             dbp += 1;
5632           }
5633       }
5634 }
5635
5636 \f
5637 /*
5638  * Prolog support
5639  *
5640  * Assumes that the predicate or rule starts at column 0.
5641  * Only the first clause of a predicate or rule is added.
5642  * Original code by Sunichirou Sugou (1989)
5643  * Rewritten by Anders Lindgren (1996)
5644  */
5645 static size_t prolog_pr (char *, char *);
5646 static void prolog_skip_comment (linebuffer *, FILE *);
5647 static size_t prolog_atom (char *, size_t);
5648
5649 static void
5650 Prolog_functions (FILE *inf)
5651 {
5652   char *cp, *last;
5653   size_t len;
5654   size_t allocated;
5655
5656   allocated = 0;
5657   len = 0;
5658   last = NULL;
5659
5660   LOOP_ON_INPUT_LINES (inf, lb, cp)
5661     {
5662       if (cp[0] == '\0')        /* Empty line */
5663         continue;
5664       else if (c_isspace (cp[0])) /* Not a predicate */
5665         continue;
5666       else if (cp[0] == '/' && cp[1] == '*')    /* comment. */
5667         prolog_skip_comment (&lb, inf);
5668       else if ((len = prolog_pr (cp, last)) > 0)
5669         {
5670           /* Predicate or rule.  Store the function name so that we
5671              only generate a tag for the first clause.  */
5672           if (last == NULL)
5673             last = xnew (len + 1, char);
5674           else if (len + 1 > allocated)
5675             xrnew (last, len + 1, char);
5676           allocated = len + 1;
5677           memcpy (last, cp, len);
5678           last[len] = '\0';
5679         }
5680     }
5681   free (last);
5682 }
5683
5684
5685 static void
5686 prolog_skip_comment (linebuffer *plb, FILE *inf)
5687 {
5688   char *cp;
5689
5690   do
5691     {
5692       for (cp = plb->buffer; *cp != '\0'; cp++)
5693         if (cp[0] == '*' && cp[1] == '/')
5694           return;
5695       readline (plb, inf);
5696     }
5697   while (perhaps_more_input (inf));
5698 }
5699
5700 /*
5701  * A predicate or rule definition is added if it matches:
5702  *     <beginning of line><Prolog Atom><whitespace>(
5703  * or  <beginning of line><Prolog Atom><whitespace>:-
5704  *
5705  * It is added to the tags database if it doesn't match the
5706  * name of the previous clause header.
5707  *
5708  * Return the size of the name of the predicate or rule, or 0 if no
5709  * header was found.
5710  */
5711 static size_t
5712 prolog_pr (char *s, char *last)
5713
5714                                 /* Name of last clause. */
5715 {
5716   size_t pos;
5717   size_t len;
5718
5719   pos = prolog_atom (s, 0);
5720   if (! pos)
5721     return 0;
5722
5723   len = pos;
5724   pos = skip_spaces (s + pos) - s;
5725
5726   if ((s[pos] == '.'
5727        || (s[pos] == '(' && (pos += 1))
5728        || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5729       && (last == NULL          /* save only the first clause */
5730           || len != strlen (last)
5731           || !strneq (s, last, len)))
5732         {
5733           make_tag (s, len, true, s, pos, lineno, linecharno);
5734           return len;
5735         }
5736   else
5737     return 0;
5738 }
5739
5740 /*
5741  * Consume a Prolog atom.
5742  * Return the number of bytes consumed, or 0 if there was an error.
5743  *
5744  * A prolog atom, in this context, could be one of:
5745  * - An alphanumeric sequence, starting with a lower case letter.
5746  * - A quoted arbitrary string. Single quotes can escape themselves.
5747  *   Backslash quotes everything.
5748  */
5749 static size_t
5750 prolog_atom (char *s, size_t pos)
5751 {
5752   size_t origpos;
5753
5754   origpos = pos;
5755
5756   if (c_islower (s[pos]) || s[pos] == '_')
5757     {
5758       /* The atom is unquoted. */
5759       pos++;
5760       while (c_isalnum (s[pos]) || s[pos] == '_')
5761         {
5762           pos++;
5763         }
5764       return pos - origpos;
5765     }
5766   else if (s[pos] == '\'')
5767     {
5768       pos++;
5769
5770       for (;;)
5771         {
5772           if (s[pos] == '\'')
5773             {
5774               pos++;
5775               if (s[pos] != '\'')
5776                 break;
5777               pos++;            /* A double quote */
5778             }
5779           else if (s[pos] == '\0')
5780             /* Multiline quoted atoms are ignored. */
5781             return 0;
5782           else if (s[pos] == '\\')
5783             {
5784               if (s[pos+1] == '\0')
5785                 return 0;
5786               pos += 2;
5787             }
5788           else
5789             pos++;
5790         }
5791       return pos - origpos;
5792     }
5793   else
5794     return 0;
5795 }
5796
5797 \f
5798 /*
5799  * Support for Erlang
5800  *
5801  * Generates tags for functions, defines, and records.
5802  * Assumes that Erlang functions start at column 0.
5803  * Original code by Anders Lindgren (1996)
5804  */
5805 static int erlang_func (char *, char *);
5806 static void erlang_attribute (char *);
5807 static int erlang_atom (char *);
5808
5809 static void
5810 Erlang_functions (FILE *inf)
5811 {
5812   char *cp, *last;
5813   int len;
5814   int allocated;
5815
5816   allocated = 0;
5817   len = 0;
5818   last = NULL;
5819
5820   LOOP_ON_INPUT_LINES (inf, lb, cp)
5821     {
5822       if (cp[0] == '\0')        /* Empty line */
5823         continue;
5824       else if (c_isspace (cp[0])) /* Not function nor attribute */
5825         continue;
5826       else if (cp[0] == '%')    /* comment */
5827         continue;
5828       else if (cp[0] == '"')    /* Sometimes, strings start in column one */
5829         continue;
5830       else if (cp[0] == '-')    /* attribute, e.g. "-define" */
5831         {
5832           erlang_attribute (cp);
5833           if (last != NULL)
5834             {
5835               free (last);
5836               last = NULL;
5837             }
5838         }
5839       else if ((len = erlang_func (cp, last)) > 0)
5840         {
5841           /*
5842            * Function.  Store the function name so that we only
5843            * generates a tag for the first clause.
5844            */
5845           if (last == NULL)
5846             last = xnew (len + 1, char);
5847           else if (len + 1 > allocated)
5848             xrnew (last, len + 1, char);
5849           allocated = len + 1;
5850           memcpy (last, cp, len);
5851           last[len] = '\0';
5852         }
5853     }
5854   free (last);
5855 }
5856
5857
5858 /*
5859  * A function definition is added if it matches:
5860  *     <beginning of line><Erlang Atom><whitespace>(
5861  *
5862  * It is added to the tags database if it doesn't match the
5863  * name of the previous clause header.
5864  *
5865  * Return the size of the name of the function, or 0 if no function
5866  * was found.
5867  */
5868 static int
5869 erlang_func (char *s, char *last)
5870
5871                                 /* Name of last clause. */
5872 {
5873   int pos;
5874   int len;
5875
5876   pos = erlang_atom (s);
5877   if (pos < 1)
5878     return 0;
5879
5880   len = pos;
5881   pos = skip_spaces (s + pos) - s;
5882
5883   /* Save only the first clause. */
5884   if (s[pos++] == '('
5885       && (last == NULL
5886           || len != (int)strlen (last)
5887           || !strneq (s, last, len)))
5888         {
5889           make_tag (s, len, true, s, pos, lineno, linecharno);
5890           return len;
5891         }
5892
5893   return 0;
5894 }
5895
5896
5897 /*
5898  * Handle attributes.  Currently, tags are generated for defines
5899  * and records.
5900  *
5901  * They are on the form:
5902  * -define(foo, bar).
5903  * -define(Foo(M, N), M+N).
5904  * -record(graph, {vtab = notable, cyclic = true}).
5905  */
5906 static void
5907 erlang_attribute (char *s)
5908 {
5909   char *cp = s;
5910
5911   if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5912       && *cp++ == '(')
5913     {
5914       int len = erlang_atom (skip_spaces (cp));
5915       if (len > 0)
5916         make_tag (cp, len, true, s, cp + len - s, lineno, linecharno);
5917     }
5918   return;
5919 }
5920
5921
5922 /*
5923  * Consume an Erlang atom (or variable).
5924  * Return the number of bytes consumed, or -1 if there was an error.
5925  */
5926 static int
5927 erlang_atom (char *s)
5928 {
5929   int pos = 0;
5930
5931   if (c_isalpha (s[pos]) || s[pos] == '_')
5932     {
5933       /* The atom is unquoted. */
5934       do
5935         pos++;
5936       while (c_isalnum (s[pos]) || s[pos] == '_');
5937     }
5938   else if (s[pos] == '\'')
5939     {
5940       for (pos++; s[pos] != '\''; pos++)
5941         if (s[pos] == '\0'      /* multiline quoted atoms are ignored */
5942             || (s[pos] == '\\' && s[++pos] == '\0'))
5943           return 0;
5944       pos++;
5945     }
5946
5947   return pos;
5948 }
5949
5950 \f
5951 static char *scan_separators (char *);
5952 static void add_regex (char *, language *);
5953 static char *substitute (char *, char *, struct re_registers *);
5954
5955 /*
5956  * Take a string like "/blah/" and turn it into "blah", verifying
5957  * that the first and last characters are the same, and handling
5958  * quoted separator characters.  Actually, stops on the occurrence of
5959  * an unquoted separator.  Also process \t, \n, etc. and turn into
5960  * appropriate characters. Works in place.  Null terminates name string.
5961  * Returns pointer to terminating separator, or NULL for
5962  * unterminated regexps.
5963  */
5964 static char *
5965 scan_separators (char *name)
5966 {
5967   char sep = name[0];
5968   char *copyto = name;
5969   bool quoted = false;
5970
5971   for (++name; *name != '\0'; ++name)
5972     {
5973       if (quoted)
5974         {
5975           switch (*name)
5976             {
5977             case 'a': *copyto++ = '\007'; break; /* BEL (bell)           */
5978             case 'b': *copyto++ = '\b'; break;   /* BS (back space)      */
5979             case 'd': *copyto++ = 0177; break;   /* DEL (delete)         */
5980             case 'e': *copyto++ = 033; break;    /* ESC (delete)         */
5981             case 'f': *copyto++ = '\f'; break;   /* FF (form feed)       */
5982             case 'n': *copyto++ = '\n'; break;   /* NL (new line)        */
5983             case 'r': *copyto++ = '\r'; break;   /* CR (carriage return) */
5984             case 't': *copyto++ = '\t'; break;   /* TAB (horizontal tab) */
5985             case 'v': *copyto++ = '\v'; break;   /* VT (vertical tab)    */
5986             default:
5987               if (*name == sep)
5988                 *copyto++ = sep;
5989               else
5990                 {
5991                   /* Something else is quoted, so preserve the quote. */
5992                   *copyto++ = '\\';
5993                   *copyto++ = *name;
5994                 }
5995               break;
5996             }
5997           quoted = false;
5998         }
5999       else if (*name == '\\')
6000         quoted = true;
6001       else if (*name == sep)
6002         break;
6003       else
6004         *copyto++ = *name;
6005     }
6006   if (*name != sep)
6007     name = NULL;                /* signal unterminated regexp */
6008
6009   /* Terminate copied string. */
6010   *copyto = '\0';
6011   return name;
6012 }
6013
6014 /* Look at the argument of --regex or --no-regex and do the right
6015    thing.  Same for each line of a regexp file. */
6016 static void
6017 analyze_regex (char *regex_arg)
6018 {
6019   if (regex_arg == NULL)
6020     {
6021       free_regexps ();          /* --no-regex: remove existing regexps */
6022       return;
6023     }
6024
6025   /* A real --regexp option or a line in a regexp file. */
6026   switch (regex_arg[0])
6027     {
6028       /* Comments in regexp file or null arg to --regex. */
6029     case '\0':
6030     case ' ':
6031     case '\t':
6032       break;
6033
6034       /* Read a regex file.  This is recursive and may result in a
6035          loop, which will stop when the file descriptors are exhausted. */
6036     case '@':
6037       {
6038         FILE *regexfp;
6039         linebuffer regexbuf;
6040         char *regexfile = regex_arg + 1;
6041
6042         /* regexfile is a file containing regexps, one per line. */
6043         regexfp = fopen (regexfile, "r" FOPEN_BINARY);
6044         if (regexfp == NULL)
6045           pfatal (regexfile);
6046         linebuffer_init (&regexbuf);
6047         while (readline_internal (&regexbuf, regexfp, regexfile) > 0)
6048           analyze_regex (regexbuf.buffer);
6049         free (regexbuf.buffer);
6050         if (fclose (regexfp) != 0)
6051           pfatal (regexfile);
6052       }
6053       break;
6054
6055       /* Regexp to be used for a specific language only. */
6056     case '{':
6057       {
6058         language *lang;
6059         char *lang_name = regex_arg + 1;
6060         char *cp;
6061
6062         for (cp = lang_name; *cp != '}'; cp++)
6063           if (*cp == '\0')
6064             {
6065               error ("unterminated language name in regex: %s", regex_arg);
6066               return;
6067             }
6068         *cp++ = '\0';
6069         lang = get_language_from_langname (lang_name);
6070         if (lang == NULL)
6071           return;
6072         add_regex (cp, lang);
6073       }
6074       break;
6075
6076       /* Regexp to be used for any language. */
6077     default:
6078       add_regex (regex_arg, NULL);
6079       break;
6080     }
6081 }
6082
6083 /* Separate the regexp pattern, compile it,
6084    and care for optional name and modifiers. */
6085 static void
6086 add_regex (char *regexp_pattern, language *lang)
6087 {
6088   static struct re_pattern_buffer zeropattern;
6089   char sep, *pat, *name, *modifiers;
6090   char empty = '\0';
6091   const char *err;
6092   struct re_pattern_buffer *patbuf;
6093   regexp *rp;
6094   bool
6095     force_explicit_name = true, /* do not use implicit tag names */
6096     ignore_case = false,        /* case is significant */
6097     multi_line = false,         /* matches are done one line at a time */
6098     single_line = false;        /* dot does not match newline */
6099
6100
6101   if (strlen (regexp_pattern) < 3)
6102     {
6103       error ("null regexp");
6104       return;
6105     }
6106   sep = regexp_pattern[0];
6107   name = scan_separators (regexp_pattern);
6108   if (name == NULL)
6109     {
6110       error ("%s: unterminated regexp", regexp_pattern);
6111       return;
6112     }
6113   if (name[1] == sep)
6114     {
6115       error ("null name for regexp \"%s\"", regexp_pattern);
6116       return;
6117     }
6118   modifiers = scan_separators (name);
6119   if (modifiers == NULL)        /* no terminating separator --> no name */
6120     {
6121       modifiers = name;
6122       name = &empty;
6123     }
6124   else
6125     modifiers += 1;             /* skip separator */
6126
6127   /* Parse regex modifiers. */
6128   for (; modifiers[0] != '\0'; modifiers++)
6129     switch (modifiers[0])
6130       {
6131       case 'N':
6132         if (modifiers == name)
6133           error ("forcing explicit tag name but no name, ignoring");
6134         force_explicit_name = true;
6135         break;
6136       case 'i':
6137         ignore_case = true;
6138         break;
6139       case 's':
6140         single_line = true;
6141         /* FALLTHRU */
6142       case 'm':
6143         multi_line = true;
6144         need_filebuf = true;
6145         break;
6146       default:
6147         error ("invalid regexp modifier '%c', ignoring", modifiers[0]);
6148         break;
6149       }
6150
6151   patbuf = xnew (1, struct re_pattern_buffer);
6152   *patbuf = zeropattern;
6153   if (ignore_case)
6154     {
6155       static char lc_trans[UCHAR_MAX + 1];
6156       int i;
6157       for (i = 0; i < UCHAR_MAX + 1; i++)
6158         lc_trans[i] = c_tolower (i);
6159       patbuf->translate = lc_trans;     /* translation table to fold case  */
6160     }
6161
6162   if (multi_line)
6163     pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
6164   else
6165     pat = regexp_pattern;
6166
6167   if (single_line)
6168     re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
6169   else
6170     re_set_syntax (RE_SYNTAX_EMACS);
6171
6172   err = re_compile_pattern (pat, strlen (pat), patbuf);
6173   if (multi_line)
6174     free (pat);
6175   if (err != NULL)
6176     {
6177       error ("%s while compiling pattern", err);
6178       return;
6179     }
6180
6181   rp = p_head;
6182   p_head = xnew (1, regexp);
6183   p_head->pattern = savestr (regexp_pattern);
6184   p_head->p_next = rp;
6185   p_head->lang = lang;
6186   p_head->pat = patbuf;
6187   p_head->name = savestr (name);
6188   p_head->error_signaled = false;
6189   p_head->force_explicit_name = force_explicit_name;
6190   p_head->ignore_case = ignore_case;
6191   p_head->multi_line = multi_line;
6192 }
6193
6194 /*
6195  * Do the substitutions indicated by the regular expression and
6196  * arguments.
6197  */
6198 static char *
6199 substitute (char *in, char *out, struct re_registers *regs)
6200 {
6201   char *result, *t;
6202   int size, dig, diglen;
6203
6204   result = NULL;
6205   size = strlen (out);
6206
6207   /* Pass 1: figure out how much to allocate by finding all \N strings. */
6208   if (out[size - 1] == '\\')
6209     fatal ("pattern error in \"%s\"", out);
6210   for (t = strchr (out, '\\');
6211        t != NULL;
6212        t = strchr (t + 2, '\\'))
6213     if (c_isdigit (t[1]))
6214       {
6215         dig = t[1] - '0';
6216         diglen = regs->end[dig] - regs->start[dig];
6217         size += diglen - 2;
6218       }
6219     else
6220       size -= 1;
6221
6222   /* Allocate space and do the substitutions. */
6223   assert (size >= 0);
6224   result = xnew (size + 1, char);
6225
6226   for (t = result; *out != '\0'; out++)
6227     if (*out == '\\' && c_isdigit (*++out))
6228       {
6229         dig = *out - '0';
6230         diglen = regs->end[dig] - regs->start[dig];
6231         memcpy (t, in + regs->start[dig], diglen);
6232         t += diglen;
6233       }
6234     else
6235       *t++ = *out;
6236   *t = '\0';
6237
6238   assert (t <= result + size);
6239   assert (t - result == (int)strlen (result));
6240
6241   return result;
6242 }
6243
6244 /* Deallocate all regexps. */
6245 static void
6246 free_regexps (void)
6247 {
6248   regexp *rp;
6249   while (p_head != NULL)
6250     {
6251       rp = p_head->p_next;
6252       free (p_head->pattern);
6253       free (p_head->name);
6254       free (p_head);
6255       p_head = rp;
6256     }
6257   return;
6258 }
6259
6260 /*
6261  * Reads the whole file as a single string from `filebuf' and looks for
6262  * multi-line regular expressions, creating tags on matches.
6263  * readline already dealt with normal regexps.
6264  *
6265  * Idea by Ben Wing <ben@666.com> (2002).
6266  */
6267 static void
6268 regex_tag_multiline (void)
6269 {
6270   char *buffer = filebuf.buffer;
6271   regexp *rp;
6272   char *name;
6273
6274   for (rp = p_head; rp != NULL; rp = rp->p_next)
6275     {
6276       int match = 0;
6277
6278       if (!rp->multi_line)
6279         continue;               /* skip normal regexps */
6280
6281       /* Generic initializations before parsing file from memory. */
6282       lineno = 1;               /* reset global line number */
6283       charno = 0;               /* reset global char number */
6284       linecharno = 0;           /* reset global char number of line start */
6285
6286       /* Only use generic regexps or those for the current language. */
6287       if (rp->lang != NULL && rp->lang != curfdp->lang)
6288         continue;
6289
6290       while (match >= 0 && match < filebuf.len)
6291         {
6292           match = re_search (rp->pat, buffer, filebuf.len, charno,
6293                              filebuf.len - match, &rp->regs);
6294           switch (match)
6295             {
6296             case -2:
6297               /* Some error. */
6298               if (!rp->error_signaled)
6299                 {
6300                   error ("regexp stack overflow while matching \"%s\"",
6301                          rp->pattern);
6302                   rp->error_signaled = true;
6303                 }
6304               break;
6305             case -1:
6306               /* No match. */
6307               break;
6308             default:
6309               if (match == rp->regs.end[0])
6310                 {
6311                   if (!rp->error_signaled)
6312                     {
6313                       error ("regexp matches the empty string: \"%s\"",
6314                              rp->pattern);
6315                       rp->error_signaled = true;
6316                     }
6317                   match = -3;   /* exit from while loop */
6318                   break;
6319                 }
6320
6321               /* Match occurred.  Construct a tag. */
6322               while (charno < rp->regs.end[0])
6323                 if (buffer[charno++] == '\n')
6324                   lineno++, linecharno = charno;
6325               name = rp->name;
6326               if (name[0] == '\0')
6327                 name = NULL;
6328               else /* make a named tag */
6329                 name = substitute (buffer, rp->name, &rp->regs);
6330               if (rp->force_explicit_name)
6331                 /* Force explicit tag name, if a name is there. */
6332                 pfnote (name, true, buffer + linecharno,
6333                         charno - linecharno + 1, lineno, linecharno);
6334               else
6335                 make_tag (name, strlen (name), true, buffer + linecharno,
6336                           charno - linecharno + 1, lineno, linecharno);
6337               break;
6338             }
6339         }
6340     }
6341 }
6342
6343 \f
6344 static bool
6345 nocase_tail (const char *cp)
6346 {
6347   int len = 0;
6348
6349   while (*cp != '\0' && c_tolower (*cp) == c_tolower (dbp[len]))
6350     cp++, len++;
6351   if (*cp == '\0' && !intoken (dbp[len]))
6352     {
6353       dbp += len;
6354       return true;
6355     }
6356   return false;
6357 }
6358
6359 static void
6360 get_tag (register char *bp, char **namepp)
6361 {
6362   register char *cp = bp;
6363
6364   if (*bp != '\0')
6365     {
6366       /* Go till you get to white space or a syntactic break */
6367       for (cp = bp + 1; !notinname (*cp); cp++)
6368         continue;
6369       make_tag (bp, cp - bp, true,
6370                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6371     }
6372
6373   if (namepp != NULL)
6374     *namepp = savenstr (bp, cp - bp);
6375 }
6376
6377 /*
6378  * Read a line of text from `stream' into `lbp', excluding the
6379  * newline or CR-NL, if any.  Return the number of characters read from
6380  * `stream', which is the length of the line including the newline.
6381  *
6382  * On DOS or Windows we do not count the CR character, if any before the
6383  * NL, in the returned length; this mirrors the behavior of Emacs on those
6384  * platforms (for text files, it translates CR-NL to NL as it reads in the
6385  * file).
6386  *
6387  * If multi-line regular expressions are requested, each line read is
6388  * appended to `filebuf'.
6389  */
6390 static long
6391 readline_internal (linebuffer *lbp, FILE *stream, char const *filename)
6392 {
6393   char *buffer = lbp->buffer;
6394   char *p = lbp->buffer;
6395   char *pend;
6396   int chars_deleted;
6397
6398   pend = p + lbp->size;         /* Separate to avoid 386/IX compiler bug.  */
6399
6400   for (;;)
6401     {
6402       register int c = getc (stream);
6403       if (p == pend)
6404         {
6405           /* We're at the end of linebuffer: expand it. */
6406           lbp->size *= 2;
6407           xrnew (buffer, lbp->size, char);
6408           p += buffer - lbp->buffer;
6409           pend = buffer + lbp->size;
6410           lbp->buffer = buffer;
6411         }
6412       if (c == EOF)
6413         {
6414           if (ferror (stream))
6415             perror (filename);
6416           *p = '\0';
6417           chars_deleted = 0;
6418           break;
6419         }
6420       if (c == '\n')
6421         {
6422           if (p > buffer && p[-1] == '\r')
6423             {
6424               p -= 1;
6425               chars_deleted = 2;
6426             }
6427           else
6428             {
6429               chars_deleted = 1;
6430             }
6431           *p = '\0';
6432           break;
6433         }
6434       *p++ = c;
6435     }
6436   lbp->len = p - buffer;
6437
6438   if (need_filebuf              /* we need filebuf for multi-line regexps */
6439       && chars_deleted > 0)     /* not at EOF */
6440     {
6441       while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6442         {
6443           /* Expand filebuf. */
6444           filebuf.size *= 2;
6445           xrnew (filebuf.buffer, filebuf.size, char);
6446         }
6447       memcpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6448       filebuf.len += lbp->len;
6449       filebuf.buffer[filebuf.len++] = '\n';
6450       filebuf.buffer[filebuf.len] = '\0';
6451     }
6452
6453   return lbp->len + chars_deleted;
6454 }
6455
6456 /*
6457  * Like readline_internal, above, but in addition try to match the
6458  * input line against relevant regular expressions and manage #line
6459  * directives.
6460  */
6461 static void
6462 readline (linebuffer *lbp, FILE *stream)
6463 {
6464   long result;
6465
6466   linecharno = charno;          /* update global char number of line start */
6467   result = readline_internal (lbp, stream, infilename); /* read line */
6468   lineno += 1;                  /* increment global line number */
6469   charno += result;             /* increment global char number */
6470
6471   /* Honor #line directives. */
6472   if (!no_line_directive)
6473     {
6474       static bool discard_until_line_directive;
6475
6476       /* Check whether this is a #line directive. */
6477       if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6478         {
6479           unsigned int lno;
6480           int start = 0;
6481
6482           if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6483               && start > 0)     /* double quote character found */
6484             {
6485               char *endp = lbp->buffer + start;
6486
6487               while ((endp = strchr (endp, '"')) != NULL
6488                      && endp[-1] == '\\')
6489                 endp++;
6490               if (endp != NULL)
6491                 /* Ok, this is a real #line directive.  Let's deal with it. */
6492                 {
6493                   char *taggedabsname;  /* absolute name of original file */
6494                   char *taggedfname;    /* name of original file as given */
6495                   char *name;           /* temp var */
6496
6497                   discard_until_line_directive = false; /* found it */
6498                   name = lbp->buffer + start;
6499                   *endp = '\0';
6500                   canonicalize_filename (name);
6501                   taggedabsname = absolute_filename (name, tagfiledir);
6502                   if (filename_is_absolute (name)
6503                       || filename_is_absolute (curfdp->infname))
6504                     taggedfname = savestr (taggedabsname);
6505                   else
6506                     taggedfname = relative_filename (taggedabsname,tagfiledir);
6507
6508                   if (streq (curfdp->taggedfname, taggedfname))
6509                     /* The #line directive is only a line number change.  We
6510                        deal with this afterwards. */
6511                     free (taggedfname);
6512                   else
6513                     /* The tags following this #line directive should be
6514                        attributed to taggedfname.  In order to do this, set
6515                        curfdp accordingly. */
6516                     {
6517                       fdesc *fdp; /* file description pointer */
6518
6519                       /* Go look for a file description already set up for the
6520                          file indicated in the #line directive.  If there is
6521                          one, use it from now until the next #line
6522                          directive. */
6523                       for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6524                         if (streq (fdp->infname, curfdp->infname)
6525                             && streq (fdp->taggedfname, taggedfname))
6526                           /* If we remove the second test above (after the &&)
6527                              then all entries pertaining to the same file are
6528                              coalesced in the tags file.  If we use it, then
6529                              entries pertaining to the same file but generated
6530                              from different files (via #line directives) will
6531                              go into separate sections in the tags file.  These
6532                              alternatives look equivalent.  The first one
6533                              destroys some apparently useless information. */
6534                           {
6535                             curfdp = fdp;
6536                             free (taggedfname);
6537                             break;
6538                           }
6539                       /* Else, if we already tagged the real file, skip all
6540                          input lines until the next #line directive. */
6541                       if (fdp == NULL) /* not found */
6542                         for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6543                           if (streq (fdp->infabsname, taggedabsname))
6544                             {
6545                               discard_until_line_directive = true;
6546                               free (taggedfname);
6547                               break;
6548                             }
6549                       /* Else create a new file description and use that from
6550                          now on, until the next #line directive. */
6551                       if (fdp == NULL) /* not found */
6552                         {
6553                           fdp = fdhead;
6554                           fdhead = xnew (1, fdesc);
6555                           *fdhead = *curfdp; /* copy curr. file description */
6556                           fdhead->next = fdp;
6557                           fdhead->infname = savestr (curfdp->infname);
6558                           fdhead->infabsname = savestr (curfdp->infabsname);
6559                           fdhead->infabsdir = savestr (curfdp->infabsdir);
6560                           fdhead->taggedfname = taggedfname;
6561                           fdhead->usecharno = false;
6562                           fdhead->prop = NULL;
6563                           fdhead->written = false;
6564                           curfdp = fdhead;
6565                         }
6566                     }
6567                   free (taggedabsname);
6568                   lineno = lno - 1;
6569                   readline (lbp, stream);
6570                   return;
6571                 } /* if a real #line directive */
6572             } /* if #line is followed by a number */
6573         } /* if line begins with "#line " */
6574
6575       /* If we are here, no #line directive was found. */
6576       if (discard_until_line_directive)
6577         {
6578           if (result > 0)
6579             {
6580               /* Do a tail recursion on ourselves, thus discarding the contents
6581                  of the line buffer. */
6582               readline (lbp, stream);
6583               return;
6584             }
6585           /* End of file. */
6586           discard_until_line_directive = false;
6587           return;
6588         }
6589     } /* if #line directives should be considered */
6590
6591   {
6592     int match;
6593     regexp *rp;
6594     char *name;
6595
6596     /* Match against relevant regexps. */
6597     if (lbp->len > 0)
6598       for (rp = p_head; rp != NULL; rp = rp->p_next)
6599         {
6600           /* Only use generic regexps or those for the current language.
6601              Also do not use multiline regexps, which is the job of
6602              regex_tag_multiline. */
6603           if ((rp->lang != NULL && rp->lang != fdhead->lang)
6604               || rp->multi_line)
6605             continue;
6606
6607           match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6608           switch (match)
6609             {
6610             case -2:
6611               /* Some error. */
6612               if (!rp->error_signaled)
6613                 {
6614                   error ("regexp stack overflow while matching \"%s\"",
6615                          rp->pattern);
6616                   rp->error_signaled = true;
6617                 }
6618               break;
6619             case -1:
6620               /* No match. */
6621               break;
6622             case 0:
6623               /* Empty string matched. */
6624               if (!rp->error_signaled)
6625                 {
6626                   error ("regexp matches the empty string: \"%s\"", rp->pattern);
6627                   rp->error_signaled = true;
6628                 }
6629               break;
6630             default:
6631               /* Match occurred.  Construct a tag. */
6632               name = rp->name;
6633               if (name[0] == '\0')
6634                 name = NULL;
6635               else /* make a named tag */
6636                 name = substitute (lbp->buffer, rp->name, &rp->regs);
6637               if (rp->force_explicit_name)
6638                 /* Force explicit tag name, if a name is there. */
6639                 pfnote (name, true, lbp->buffer, match, lineno, linecharno);
6640               else
6641                 make_tag (name, strlen (name), true,
6642                           lbp->buffer, match, lineno, linecharno);
6643               break;
6644             }
6645         }
6646   }
6647 }
6648
6649 \f
6650 /*
6651  * Return a pointer to a space of size strlen(cp)+1 allocated
6652  * with xnew where the string CP has been copied.
6653  */
6654 static char *
6655 savestr (const char *cp)
6656 {
6657   return savenstr (cp, strlen (cp));
6658 }
6659
6660 /*
6661  * Return a pointer to a space of size LEN+1 allocated with xnew where
6662  * the string CP has been copied for at most the first LEN characters.
6663  */
6664 static char *
6665 savenstr (const char *cp, int len)
6666 {
6667   char *dp = xnew (len + 1, char);
6668   dp[len] = '\0';
6669   return memcpy (dp, cp, len);
6670 }
6671
6672 /* Skip spaces (end of string is not space), return new pointer. */
6673 static char *
6674 skip_spaces (char *cp)
6675 {
6676   while (c_isspace (*cp))
6677     cp++;
6678   return cp;
6679 }
6680
6681 /* Skip non spaces, except end of string, return new pointer. */
6682 static char *
6683 skip_non_spaces (char *cp)
6684 {
6685   while (*cp != '\0' && !c_isspace (*cp))
6686     cp++;
6687   return cp;
6688 }
6689
6690 /* Skip any chars in the "name" class.*/
6691 static char *
6692 skip_name (char *cp)
6693 {
6694   /* '\0' is a notinname() so loop stops there too */
6695   while (! notinname (*cp))
6696     cp++;
6697   return cp;
6698 }
6699
6700 /* Print error message and exit.  */
6701 static void
6702 fatal (char const *format, ...)
6703 {
6704   va_list ap;
6705   va_start (ap, format);
6706   verror (format, ap);
6707   va_end (ap);
6708   exit (EXIT_FAILURE);
6709 }
6710
6711 static void
6712 pfatal (const char *s1)
6713 {
6714   perror (s1);
6715   exit (EXIT_FAILURE);
6716 }
6717
6718 static void
6719 suggest_asking_for_help (void)
6720 {
6721   fprintf (stderr, "\tTry '%s --help' for a complete list of options.\n",
6722            progname);
6723   exit (EXIT_FAILURE);
6724 }
6725
6726 /* Output a diagnostic with printf-style FORMAT and args.  */
6727 static void
6728 error (const char *format, ...)
6729 {
6730   va_list ap;
6731   va_start (ap, format);
6732   verror (format, ap);
6733   va_end (ap);
6734 }
6735
6736 static void
6737 verror (char const *format, va_list ap)
6738 {
6739   fprintf (stderr, "%s: ", progname);
6740   vfprintf (stderr, format, ap);
6741   fprintf (stderr, "\n");
6742 }
6743
6744 /* Return a newly-allocated string whose contents
6745    concatenate those of s1, s2, s3.  */
6746 static char *
6747 concat (const char *s1, const char *s2, const char *s3)
6748 {
6749   int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6750   char *result = xnew (len1 + len2 + len3 + 1, char);
6751
6752   strcpy (result, s1);
6753   strcpy (result + len1, s2);
6754   strcpy (result + len1 + len2, s3);
6755
6756   return result;
6757 }
6758
6759 \f
6760 /* Does the same work as the system V getcwd, but does not need to
6761    guess the buffer size in advance. */
6762 static char *
6763 etags_getcwd (void)
6764 {
6765   int bufsize = 200;
6766   char *path = xnew (bufsize, char);
6767
6768   while (getcwd (path, bufsize) == NULL)
6769     {
6770       if (errno != ERANGE)
6771         pfatal ("getcwd");
6772       bufsize *= 2;
6773       free (path);
6774       path = xnew (bufsize, char);
6775     }
6776
6777   canonicalize_filename (path);
6778   return path;
6779 }
6780
6781 /* Return a newly allocated string containing a name of a temporary file.  */
6782 static char *
6783 etags_mktmp (void)
6784 {
6785   const char *tmpdir = getenv ("TMPDIR");
6786   const char *slash = "/";
6787
6788 #if MSDOS || defined (DOS_NT)
6789   if (!tmpdir)
6790     tmpdir = getenv ("TEMP");
6791   if (!tmpdir)
6792     tmpdir = getenv ("TMP");
6793   if (!tmpdir)
6794     tmpdir = ".";
6795   if (tmpdir[strlen (tmpdir) - 1] == '/'
6796       || tmpdir[strlen (tmpdir) - 1] == '\\')
6797     slash = "";
6798 #else
6799   if (!tmpdir)
6800     tmpdir = "/tmp";
6801   if (tmpdir[strlen (tmpdir) - 1] == '/')
6802     slash = "";
6803 #endif
6804
6805   char *templt = concat (tmpdir, slash, "etXXXXXX");
6806   int fd = mkostemp (templt, O_CLOEXEC);
6807   if (fd < 0 || close (fd) != 0)
6808     {
6809       int temp_errno = errno;
6810       free (templt);
6811       errno = temp_errno;
6812       templt = NULL;
6813     }
6814
6815 #if defined (DOS_NT)
6816   /* The file name will be used in shell redirection, so it needs to have
6817      DOS-style backslashes, or else the Windows shell will barf.  */
6818   char *p;
6819   for (p = templt; *p; p++)
6820     if (*p == '/')
6821       *p = '\\';
6822 #endif
6823
6824   return templt;
6825 }
6826
6827 /* Return a newly allocated string containing the file name of FILE
6828    relative to the absolute directory DIR (which should end with a slash). */
6829 static char *
6830 relative_filename (char *file, char *dir)
6831 {
6832   char *fp, *dp, *afn, *res;
6833   int i;
6834
6835   /* Find the common root of file and dir (with a trailing slash). */
6836   afn = absolute_filename (file, cwd);
6837   fp = afn;
6838   dp = dir;
6839   while (*fp++ == *dp++)
6840     continue;
6841   fp--, dp--;                   /* back to the first differing char */
6842 #ifdef DOS_NT
6843   if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6844     return afn;
6845 #endif
6846   do                            /* look at the equal chars until '/' */
6847     fp--, dp--;
6848   while (*fp != '/');
6849
6850   /* Build a sequence of "../" strings for the resulting relative file name. */
6851   i = 0;
6852   while ((dp = strchr (dp + 1, '/')) != NULL)
6853     i += 1;
6854   res = xnew (3*i + strlen (fp + 1) + 1, char);
6855   char *z = res;
6856   while (i-- > 0)
6857     z = stpcpy (z, "../");
6858
6859   /* Add the file name relative to the common root of file and dir. */
6860   strcpy (z, fp + 1);
6861   free (afn);
6862
6863   return res;
6864 }
6865
6866 /* Return a newly allocated string containing the absolute file name
6867    of FILE given DIR (which should end with a slash). */
6868 static char *
6869 absolute_filename (char *file, char *dir)
6870 {
6871   char *slashp, *cp, *res;
6872
6873   if (filename_is_absolute (file))
6874     res = savestr (file);
6875 #ifdef DOS_NT
6876   /* We don't support non-absolute file names with a drive
6877      letter, like `d:NAME' (it's too much hassle).  */
6878   else if (file[1] == ':')
6879     fatal ("%s: relative file names with drive letters not supported", file);
6880 #endif
6881   else
6882     res = concat (dir, file, "");
6883
6884   /* Delete the "/dirname/.." and "/." substrings. */
6885   slashp = strchr (res, '/');
6886   while (slashp != NULL && slashp[0] != '\0')
6887     {
6888       if (slashp[1] == '.')
6889         {
6890           if (slashp[2] == '.'
6891               && (slashp[3] == '/' || slashp[3] == '\0'))
6892             {
6893               cp = slashp;
6894               do
6895                 cp--;
6896               while (cp >= res && !filename_is_absolute (cp));
6897               if (cp < res)
6898                 cp = slashp;    /* the absolute name begins with "/.." */
6899 #ifdef DOS_NT
6900               /* Under MSDOS and NT we get `d:/NAME' as absolute
6901                  file name, so the luser could say `d:/../NAME'.
6902                  We silently treat this as `d:/NAME'.  */
6903               else if (cp[0] != '/')
6904                 cp = slashp;
6905 #endif
6906               memmove (cp, slashp + 3, strlen (slashp + 2));
6907               slashp = cp;
6908               continue;
6909             }
6910           else if (slashp[2] == '/' || slashp[2] == '\0')
6911             {
6912               memmove (slashp, slashp + 2, strlen (slashp + 1));
6913               continue;
6914             }
6915         }
6916
6917       slashp = strchr (slashp + 1, '/');
6918     }
6919
6920   if (res[0] == '\0')           /* just a safety net: should never happen */
6921     {
6922       free (res);
6923       return savestr ("/");
6924     }
6925   else
6926     return res;
6927 }
6928
6929 /* Return a newly allocated string containing the absolute
6930    file name of dir where FILE resides given DIR (which should
6931    end with a slash). */
6932 static char *
6933 absolute_dirname (char *file, char *dir)
6934 {
6935   char *slashp, *res;
6936   char save;
6937
6938   slashp = strrchr (file, '/');
6939   if (slashp == NULL)
6940     return savestr (dir);
6941   save = slashp[1];
6942   slashp[1] = '\0';
6943   res = absolute_filename (file, dir);
6944   slashp[1] = save;
6945
6946   return res;
6947 }
6948
6949 /* Whether the argument string is an absolute file name.  The argument
6950    string must have been canonicalized with canonicalize_filename. */
6951 static bool
6952 filename_is_absolute (char *fn)
6953 {
6954   return (fn[0] == '/'
6955 #ifdef DOS_NT
6956           || (c_isalpha (fn[0]) && fn[1] == ':' && fn[2] == '/')
6957 #endif
6958           );
6959 }
6960
6961 /* Downcase DOS drive letter and collapse separators into single slashes.
6962    Works in place. */
6963 static void
6964 canonicalize_filename (register char *fn)
6965 {
6966   register char* cp;
6967
6968 #ifdef DOS_NT
6969   /* Canonicalize drive letter case.  */
6970   if (c_isupper (fn[0]) && fn[1] == ':')
6971     fn[0] = c_tolower (fn[0]);
6972
6973   /* Collapse multiple forward- and back-slashes into a single forward
6974      slash. */
6975   for (cp = fn; *cp != '\0'; cp++, fn++)
6976     if (*cp == '/' || *cp == '\\')
6977       {
6978         *fn = '/';
6979         while (cp[1] == '/' || cp[1] == '\\')
6980           cp++;
6981       }
6982     else
6983       *fn = *cp;
6984
6985 #else  /* !DOS_NT */
6986
6987   /* Collapse multiple slashes into a single slash. */
6988   for (cp = fn; *cp != '\0'; cp++, fn++)
6989     if (*cp == '/')
6990       {
6991         *fn = '/';
6992         while (cp[1] == '/')
6993           cp++;
6994       }
6995     else
6996       *fn = *cp;
6997
6998 #endif  /* !DOS_NT */
6999
7000   *fn = '\0';
7001 }
7002
7003 \f
7004 /* Initialize a linebuffer for use. */
7005 static void
7006 linebuffer_init (linebuffer *lbp)
7007 {
7008   lbp->size = (DEBUG) ? 3 : 200;
7009   lbp->buffer = xnew (lbp->size, char);
7010   lbp->buffer[0] = '\0';
7011   lbp->len = 0;
7012 }
7013
7014 /* Set the minimum size of a string contained in a linebuffer. */
7015 static void
7016 linebuffer_setlen (linebuffer *lbp, int toksize)
7017 {
7018   while (lbp->size <= toksize)
7019     {
7020       lbp->size *= 2;
7021       xrnew (lbp->buffer, lbp->size, char);
7022     }
7023   lbp->len = toksize;
7024 }
7025
7026 /* Like malloc but get fatal error if memory is exhausted. */
7027 static void *
7028 xmalloc (size_t size)
7029 {
7030   void *result = malloc (size);
7031   if (result == NULL)
7032     fatal ("virtual memory exhausted");
7033   return result;
7034 }
7035
7036 static void *
7037 xrealloc (void *ptr, size_t size)
7038 {
7039   void *result = realloc (ptr, size);
7040   if (result == NULL)
7041     fatal ("virtual memory exhausted");
7042   return result;
7043 }
7044
7045 /*
7046  * Local Variables:
7047  * indent-tabs-mode: t
7048  * tab-width: 8
7049  * fill-column: 79
7050  * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
7051  * c-file-style: "gnu"
7052  * End:
7053  */
7054
7055 /* etags.c ends here */