code.delx.au - gnu-emacs/blob - lib-src/etags.c

   1 /* Tags file maker to go with GNU Emacs           -*- coding: utf-8 -*-
   2
   3 Copyright (C) 1984 The Regents of the University of California
   4
   5 Redistribution and use in source and binary forms, with or without
   6 modification, are permitted provided that the following conditions are
   7 met:
   8 1. Redistributions of source code must retain the above copyright
   9    notice, this list of conditions and the following disclaimer.
  10 2. Redistributions in binary form must reproduce the above copyright
  11    notice, this list of conditions and the following disclaimer in the
  12    documentation and/or other materials provided with the
  13    distribution.
  14 3. Neither the name of the University nor the names of its
  15    contributors may be used to endorse or promote products derived
  16    from this software without specific prior written permission.
  17
  18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
  19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  21 PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
  22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
  28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29
  30
  31 Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2016 Free Software
  32 Foundation, Inc.
  33
  34 This file is not considered part of GNU Emacs.
  35
  36 This program is free software: you can redistribute it and/or modify
  37 it under the terms of the GNU General Public License as published by
  38 the Free Software Foundation, either version 3 of the License, or
  39 (at your option) any later version.
  40
  41 This program is distributed in the hope that it will be useful,
  42 but WITHOUT ANY WARRANTY; without even the implied warranty of
  43 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  44 GNU General Public License for more details.
  45
  46 You should have received a copy of the GNU General Public License
  47 along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  48
  49
  50 /* NB To comply with the above BSD license, copyright information is
  51 reproduced in etc/ETAGS.README.  That file should be updated when the
  52 above notices are.
  53
  54 To the best of our knowledge, this code was originally based on the
  55 ctags.c distributed with BSD4.2, which was copyrighted by the
  56 University of California, as described above. */
  57
  58
  59 /*
  60  * Authors:
  61  * 1983 Ctags originally by Ken Arnold.
  62  * 1984 Fortran added by Jim Kleckner.
  63  * 1984 Ed Pelegri-Llopart added C typedefs.
  64  * 1985 Emacs TAGS format by Richard Stallman.
  65  * 1989 Sam Kendall added C++.
  66  * 1992 Joseph B. Wells improved C and C++ parsing.
  67  * 1993 Francesco Potortì reorganized C and C++.
  68  * 1994 Line-by-line regexp tags by Tom Tromey.
  69  * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
  70  * 2002 #line directives by Francesco Potortì.
  71  * Francesco Potortì maintained and improved it for many years
  72    starting in 1993.
  73  */
  74
  75 /*
  76  * If you want to add support for a new language, start by looking at the LUA
  77  * language, which is the simplest.  Alternatively, consider distributing etags
  78  * together with a configuration file containing regexp definitions for etags.
  79  */
  80
  81 char pot_etags_version[] = "@(#) pot revision number is 17.38.1.4";
  82
  83 #ifdef DEBUG
  84 #  undef DEBUG
  85 #  define DEBUG true
  86 #else
  87 #  define DEBUG  false
  88 #  define NDEBUG                /* disable assert */
  89 #endif
  90
  91 #include <config.h>
  92
  93 #ifndef _GNU_SOURCE
  94 # define _GNU_SOURCE 1          /* enables some compiler checks on GNU */
  95 #endif
  96
  97 /* WIN32_NATIVE is for XEmacs.
  98    MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
  99 #ifdef WIN32_NATIVE
 100 # undef MSDOS
 101 # undef  WINDOWSNT
 102 # define WINDOWSNT
 103 #endif /* WIN32_NATIVE */
 104
 105 #ifdef MSDOS
 106 # undef MSDOS
 107 # define MSDOS true
 108 # include <sys/param.h>
 109 #else
 110 # define MSDOS false
 111 #endif /* MSDOS */
 112
 113 #ifdef WINDOWSNT
 114 # include <direct.h>
 115 # define MAXPATHLEN _MAX_PATH
 116 # undef HAVE_NTGUI
 117 # undef  DOS_NT
 118 # define DOS_NT
 119 # define O_CLOEXEC O_NOINHERIT
 120 #endif /* WINDOWSNT */
 121
 122 #include <limits.h>
 123 #include <unistd.h>
 124 #include <stdarg.h>
 125 #include <stdlib.h>
 126 #include <string.h>
 127 #include <sysstdio.h>
 128 #include <errno.h>
 129 #include <fcntl.h>
 130 #include <binary-io.h>
 131 #include <c-ctype.h>
 132 #include <c-strcase.h>
 133
 134 #include <assert.h>
 135 #ifdef NDEBUG
 136 # undef  assert                 /* some systems have a buggy assert.h */
 137 # define assert(x) ((void) 0)
 138 #endif
 139
 140 #include <getopt.h>
 141 #include <regex.h>
 142
 143 /* Define CTAGS to make the program "ctags" compatible with the usual one.
 144  Leave it undefined to make the program "etags", which makes emacs-style
 145  tag tables and tags typedefs, #defines and struct/union/enum by default. */
 146 #ifdef CTAGS
 147 # undef  CTAGS
 148 # define CTAGS true
 149 #else
 150 # define CTAGS false
 151 #endif
 152
 153 static bool
 154 streq (char const *s, char const *t)
 155 {
 156   return strcmp (s, t) == 0;
 157 }
 158
 159 static bool
 160 strcaseeq (char const *s, char const *t)
 161 {
 162   return c_strcasecmp (s, t) == 0;
 163 }
 164
 165 static bool
 166 strneq (char const *s, char const *t, size_t n)
 167 {
 168   return strncmp (s, t, n) == 0;
 169 }
 170
 171 static bool
 172 strncaseeq (char const *s, char const *t, size_t n)
 173 {
 174   return c_strncasecmp (s, t, n) == 0;
 175 }
 176
 177 /* C is not in a name.  */
 178 static bool
 179 notinname (unsigned char c)
 180 {
 181   /* Look at make_tag before modifying!  */
 182   static bool const table[UCHAR_MAX + 1] = {
 183     ['\0']=1, ['\t']=1, ['\n']=1, ['\f']=1, ['\r']=1, [' ']=1,
 184     ['(']=1, [')']=1, [',']=1, [';']=1, ['=']=1
 185   };
 186   return table[c];
 187 }
 188
 189 /* C can start a token.  */
 190 static bool
 191 begtoken (unsigned char c)
 192 {
 193   static bool const table[UCHAR_MAX + 1] = {
 194     ['$']=1, ['@']=1,
 195     ['A']=1, ['B']=1, ['C']=1, ['D']=1, ['E']=1, ['F']=1, ['G']=1, ['H']=1,
 196     ['I']=1, ['J']=1, ['K']=1, ['L']=1, ['M']=1, ['N']=1, ['O']=1, ['P']=1,
 197     ['Q']=1, ['R']=1, ['S']=1, ['T']=1, ['U']=1, ['V']=1, ['W']=1, ['X']=1,
 198     ['Y']=1, ['Z']=1,
 199     ['_']=1,
 200     ['a']=1, ['b']=1, ['c']=1, ['d']=1, ['e']=1, ['f']=1, ['g']=1, ['h']=1,
 201     ['i']=1, ['j']=1, ['k']=1, ['l']=1, ['m']=1, ['n']=1, ['o']=1, ['p']=1,
 202     ['q']=1, ['r']=1, ['s']=1, ['t']=1, ['u']=1, ['v']=1, ['w']=1, ['x']=1,
 203     ['y']=1, ['z']=1,
 204     ['~']=1
 205   };
 206   return table[c];
 207 }
 208
 209 /* C can be in the middle of a token.  */
 210 static bool
 211 intoken (unsigned char c)
 212 {
 213   static bool const table[UCHAR_MAX + 1] = {
 214     ['$']=1,
 215     ['0']=1, ['1']=1, ['2']=1, ['3']=1, ['4']=1,
 216     ['5']=1, ['6']=1, ['7']=1, ['8']=1, ['9']=1,
 217     ['A']=1, ['B']=1, ['C']=1, ['D']=1, ['E']=1, ['F']=1, ['G']=1, ['H']=1,
 218     ['I']=1, ['J']=1, ['K']=1, ['L']=1, ['M']=1, ['N']=1, ['O']=1, ['P']=1,
 219     ['Q']=1, ['R']=1, ['S']=1, ['T']=1, ['U']=1, ['V']=1, ['W']=1, ['X']=1,
 220     ['Y']=1, ['Z']=1,
 221     ['_']=1,
 222     ['a']=1, ['b']=1, ['c']=1, ['d']=1, ['e']=1, ['f']=1, ['g']=1, ['h']=1,
 223     ['i']=1, ['j']=1, ['k']=1, ['l']=1, ['m']=1, ['n']=1, ['o']=1, ['p']=1,
 224     ['q']=1, ['r']=1, ['s']=1, ['t']=1, ['u']=1, ['v']=1, ['w']=1, ['x']=1,
 225     ['y']=1, ['z']=1
 226   };
 227   return table[c];
 228 }
 229
 230 /* C can end a token.  */
 231 static bool
 232 endtoken (unsigned char c)
 233 {
 234   static bool const table[UCHAR_MAX + 1] = {
 235     ['\0']=1, ['\t']=1, ['\n']=1, ['\r']=1, [' ']=1,
 236     ['!']=1, ['"']=1, ['#']=1, ['%']=1, ['&']=1, ['\'']=1, ['(']=1, [')']=1,
 237     ['*']=1, ['+']=1, [',']=1, ['-']=1, ['.']=1, ['/']=1, [':']=1, [';']=1,
 238     ['<']=1, ['=']=1, ['>']=1, ['?']=1, ['[']=1, [']']=1, ['^']=1,
 239     ['{']=1, ['|']=1, ['}']=1, ['~']=1
 240   };
 241   return table[c];
 242 }
 243
 244 /*
 245  *      xnew, xrnew -- allocate, reallocate storage
 246  *
 247  * SYNOPSIS:    Type *xnew (int n, Type);
 248  *              void xrnew (OldPointer, int n, Type);
 249  */
 250 #define xnew(n, Type)      ((Type *) xmalloc ((n) * sizeof (Type)))
 251 #define xrnew(op, n, Type) ((op) = (Type *) xrealloc (op, (n) * sizeof (Type)))
 252
 253 typedef void Lang_function (FILE *);
 254
 255 typedef struct
 256 {
 257   const char *suffix;           /* file name suffix for this compressor */
 258   const char *command;          /* takes one arg and decompresses to stdout */
 259 } compressor;
 260
 261 typedef struct
 262 {
 263   const char *name;             /* language name */
 264   const char *help;             /* detailed help for the language */
 265   Lang_function *function;      /* parse function */
 266   const char **suffixes;        /* name suffixes of this language's files */
 267   const char **filenames;       /* names of this language's files */
 268   const char **interpreters;    /* interpreters for this language */
 269   bool metasource;              /* source used to generate other sources */
 270 } language;
 271
 272 typedef struct fdesc
 273 {
 274   struct fdesc *next;           /* for the linked list */
 275   char *infname;                /* uncompressed input file name */
 276   char *infabsname;             /* absolute uncompressed input file name */
 277   char *infabsdir;              /* absolute dir of input file */
 278   char *taggedfname;            /* file name to write in tagfile */
 279   language *lang;               /* language of file */
 280   char *prop;                   /* file properties to write in tagfile */
 281   bool usecharno;               /* etags tags shall contain char number */
 282   bool written;                 /* entry written in the tags file */
 283 } fdesc;
 284
 285 typedef struct node_st
 286 {                               /* sorting structure */
 287   struct node_st *left, *right; /* left and right sons */
 288   fdesc *fdp;                   /* description of file to whom tag belongs */
 289   char *name;                   /* tag name */
 290   char *regex;                  /* search regexp */
 291   bool valid;                   /* write this tag on the tag file */
 292   bool is_func;                 /* function tag: use regexp in CTAGS mode */
 293   bool been_warned;             /* warning already given for duplicated tag */
 294   int lno;                      /* line number tag is on */
 295   long cno;                     /* character number line starts on */
 296 } node;
 297
 298 /*
 299  * A `linebuffer' is a structure which holds a line of text.
 300  * `readline_internal' reads a line from a stream into a linebuffer
 301  * and works regardless of the length of the line.
 302  * SIZE is the size of BUFFER, LEN is the length of the string in
 303  * BUFFER after readline reads it.
 304  */
 305 typedef struct
 306 {
 307   long size;
 308   int len;
 309   char *buffer;
 310 } linebuffer;
 311
 312 /* Used to support mixing of --lang and file names. */
 313 typedef struct
 314 {
 315   enum {
 316     at_language,                /* a language specification */
 317     at_regexp,                  /* a regular expression */
 318     at_filename,                /* a file name */
 319     at_stdin,                   /* read from stdin here */
 320     at_end                      /* stop parsing the list */
 321   } arg_type;                   /* argument type */
 322   language *lang;               /* language associated with the argument */
 323   char *what;                   /* the argument itself */
 324 } argument;
 325
 326 /* Structure defining a regular expression. */
 327 typedef struct regexp
 328 {
 329   struct regexp *p_next;        /* pointer to next in list */
 330   language *lang;               /* if set, use only for this language */
 331   char *pattern;                /* the regexp pattern */
 332   char *name;                   /* tag name */
 333   struct re_pattern_buffer *pat; /* the compiled pattern */
 334   struct re_registers regs;     /* re registers */
 335   bool error_signaled;          /* already signaled for this regexp */
 336   bool force_explicit_name;     /* do not allow implicit tag name */
 337   bool ignore_case;             /* ignore case when matching */
 338   bool multi_line;              /* do a multi-line match on the whole file */
 339 } regexp;
 340
 341
 342 /* Many compilers barf on this:
 343         Lang_function Ada_funcs;
 344    so let's write it this way */
 345 static void Ada_funcs (FILE *);
 346 static void Asm_labels (FILE *);
 347 static void C_entries (int c_ext, FILE *);
 348 static void default_C_entries (FILE *);
 349 static void plain_C_entries (FILE *);
 350 static void Cjava_entries (FILE *);
 351 static void Cobol_paragraphs (FILE *);
 352 static void Cplusplus_entries (FILE *);
 353 static void Cstar_entries (FILE *);
 354 static void Erlang_functions (FILE *);
 355 static void Forth_words (FILE *);
 356 static void Fortran_functions (FILE *);
 357 static void Go_functions (FILE *);
 358 static void HTML_labels (FILE *);
 359 static void Lisp_functions (FILE *);
 360 static void Lua_functions (FILE *);
 361 static void Makefile_targets (FILE *);
 362 static void Pascal_functions (FILE *);
 363 static void Perl_functions (FILE *);
 364 static void PHP_functions (FILE *);
 365 static void PS_functions (FILE *);
 366 static void Prolog_functions (FILE *);
 367 static void Python_functions (FILE *);
 368 static void Ruby_functions (FILE *);
 369 static void Scheme_functions (FILE *);
 370 static void TeX_commands (FILE *);
 371 static void Texinfo_nodes (FILE *);
 372 static void Yacc_entries (FILE *);
 373 static void just_read_file (FILE *);
 374
 375 static language *get_language_from_langname (const char *);
 376 static void readline (linebuffer *, FILE *);
 377 static long readline_internal (linebuffer *, FILE *, char const *);
 378 static bool nocase_tail (const char *);
 379 static void get_tag (char *, char **);
 380
 381 static void analyze_regex (char *);
 382 static void free_regexps (void);
 383 static void regex_tag_multiline (void);
 384 static void error (const char *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
 385 static void verror (char const *, va_list) ATTRIBUTE_FORMAT_PRINTF (1, 0);
 386 static _Noreturn void suggest_asking_for_help (void);
 387 static _Noreturn void fatal (char const *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
 388 static _Noreturn void pfatal (const char *);
 389 static void add_node (node *, node **);
 390
 391 static void process_file_name (char *, language *);
 392 static void process_file (FILE *, char *, language *);
 393 static void find_entries (FILE *);
 394 static void free_tree (node *);
 395 static void free_fdesc (fdesc *);
 396 static void pfnote (char *, bool, char *, int, int, long);
 397 static void invalidate_nodes (fdesc *, node **);
 398 static void put_entries (node *);
 399
 400 static char *concat (const char *, const char *, const char *);
 401 static char *skip_spaces (char *);
 402 static char *skip_non_spaces (char *);
 403 static char *skip_name (char *);
 404 static char *savenstr (const char *, int);
 405 static char *savestr (const char *);
 406 static char *etags_getcwd (void);
 407 static char *relative_filename (char *, char *);
 408 static char *absolute_filename (char *, char *);
 409 static char *absolute_dirname (char *, char *);
 410 static bool filename_is_absolute (char *f);
 411 static void canonicalize_filename (char *);
 412 static char *etags_mktmp (void);
 413 static void linebuffer_init (linebuffer *);
 414 static void linebuffer_setlen (linebuffer *, int);
 415 static void *xmalloc (size_t);
 416 static void *xrealloc (void *, size_t);
 417
 418 \f
 419 static char searchar = '/';     /* use /.../ searches */
 420
 421 static char *tagfile;           /* output file */
 422 static char *progname;          /* name this program was invoked with */
 423 static char *cwd;               /* current working directory */
 424 static char *tagfiledir;        /* directory of tagfile */
 425 static FILE *tagf;              /* ioptr for tags file */
 426 static ptrdiff_t whatlen_max;   /* maximum length of any 'what' member */
 427
 428 static fdesc *fdhead;           /* head of file description list */
 429 static fdesc *curfdp;           /* current file description */
 430 static char *infilename;        /* current input file name */
 431 static int lineno;              /* line number of current line */
 432 static long charno;             /* current character number */
 433 static long linecharno;         /* charno of start of current line */
 434 static char *dbp;               /* pointer to start of current tag */
 435
 436 static const int invalidcharno = -1;
 437
 438 static node *nodehead;          /* the head of the binary tree of tags */
 439 static node *last_node;         /* the last node created */
 440
 441 static linebuffer lb;           /* the current line */
 442 static linebuffer filebuf;      /* a buffer containing the whole file */
 443 static linebuffer token_name;   /* a buffer containing a tag name */
 444
 445 static bool append_to_tagfile;  /* -a: append to tags */
 446 /* The next five default to true in C and derived languages.  */
 447 static bool typedefs;           /* -t: create tags for C and Ada typedefs */
 448 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
 449                                 /* 0 struct/enum/union decls, and C++ */
 450                                 /* member functions. */
 451 static bool constantypedefs;    /* -d: create tags for C #define, enum */
 452                                 /* constants and variables. */
 453                                 /* -D: opposite of -d.  Default under ctags. */
 454 static int globals;             /* create tags for global variables */
 455 static int members;             /* create tags for C member variables */
 456 static int declarations;        /* --declarations: tag them and extern in C&Co*/
 457 static int no_line_directive;   /* ignore #line directives (undocumented) */
 458 static int no_duplicates;       /* no duplicate tags for ctags (undocumented) */
 459 static bool update;             /* -u: update tags */
 460 static bool vgrind_style;       /* -v: create vgrind style index output */
 461 static bool no_warnings;        /* -w: suppress warnings (undocumented) */
 462 static bool cxref_style;        /* -x: create cxref style output */
 463 static bool cplusplus;          /* .[hc] means C++, not C (undocumented) */
 464 static bool ignoreindent;       /* -I: ignore indentation in C */
 465 static int packages_only;       /* --packages-only: in Ada, only tag packages*/
 466 static int class_qualify;       /* -Q: produce class-qualified tags in C++/Java */
 467
 468 /* STDIN is defined in LynxOS system headers */
 469 #ifdef STDIN
 470 # undef STDIN
 471 #endif
 472
 473 #define STDIN 0x1001            /* returned by getopt_long on --parse-stdin */
 474 static bool parsing_stdin;      /* --parse-stdin used */
 475
 476 static regexp *p_head;          /* list of all regexps */
 477 static bool need_filebuf;       /* some regexes are multi-line */
 478
 479 static struct option longopts[] =
 480 {
 481   { "append",             no_argument,       NULL,               'a'   },
 482   { "packages-only",      no_argument,       &packages_only,     1     },
 483   { "c++",                no_argument,       NULL,               'C'   },
 484   { "declarations",       no_argument,       &declarations,      1     },
 485   { "no-line-directive",  no_argument,       &no_line_directive, 1     },
 486   { "no-duplicates",      no_argument,       &no_duplicates,     1     },
 487   { "help",               no_argument,       NULL,               'h'   },
 488   { "help",               no_argument,       NULL,               'H'   },
 489   { "ignore-indentation", no_argument,       NULL,               'I'   },
 490   { "language",           required_argument, NULL,               'l'   },
 491   { "members",            no_argument,       &members,           1     },
 492   { "no-members",         no_argument,       &members,           0     },
 493   { "output",             required_argument, NULL,               'o'   },
 494   { "class-qualify",      no_argument,       &class_qualify,     'Q'   },
 495   { "regex",              required_argument, NULL,               'r'   },
 496   { "no-regex",           no_argument,       NULL,               'R'   },
 497   { "ignore-case-regex",  required_argument, NULL,               'c'   },
 498   { "parse-stdin",        required_argument, NULL,               STDIN },
 499   { "version",            no_argument,       NULL,               'V'   },
 500
 501 #if CTAGS /* Ctags options */
 502   { "backward-search",    no_argument,       NULL,               'B'   },
 503   { "cxref",              no_argument,       NULL,               'x'   },
 504   { "defines",            no_argument,       NULL,               'd'   },
 505   { "globals",            no_argument,       &globals,           1     },
 506   { "typedefs",           no_argument,       NULL,               't'   },
 507   { "typedefs-and-c++",   no_argument,       NULL,               'T'   },
 508   { "update",             no_argument,       NULL,               'u'   },
 509   { "vgrind",             no_argument,       NULL,               'v'   },
 510   { "no-warn",            no_argument,       NULL,               'w'   },
 511
 512 #else /* Etags options */
 513   { "no-defines",         no_argument,       NULL,               'D'   },
 514   { "no-globals",         no_argument,       &globals,           0     },
 515   { "include",            required_argument, NULL,               'i'   },
 516 #endif
 517   { NULL }
 518 };
 519
 520 static compressor compressors[] =
 521 {
 522   { "z", "gzip -d -c"},
 523   { "Z", "gzip -d -c"},
 524   { "gz", "gzip -d -c"},
 525   { "GZ", "gzip -d -c"},
 526   { "bz2", "bzip2 -d -c" },
 527   { "xz", "xz -d -c" },
 528   { NULL }
 529 };
 530
 531 /*
 532  * Language stuff.
 533  */
 534
 535 /* Ada code */
 536 static const char *Ada_suffixes [] =
 537   { "ads", "adb", "ada", NULL };
 538 static const char Ada_help [] =
 539 "In Ada code, functions, procedures, packages, tasks and types are\n\
 540 tags.  Use the '--packages-only' option to create tags for\n\
 541 packages only.\n\
 542 Ada tag names have suffixes indicating the type of entity:\n\
 543         Entity type:    Qualifier:\n\
 544         ------------    ----------\n\
 545         function        /f\n\
 546         procedure       /p\n\
 547         package spec    /s\n\
 548         package body    /b\n\
 549         type            /t\n\
 550         task            /k\n\
 551 Thus, 'M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
 552 body of the package 'bidule', while 'M-x find-tag <RET> bidule <RET>'\n\
 553 will just search for any tag 'bidule'.";
 554
 555 /* Assembly code */
 556 static const char *Asm_suffixes [] =
 557   { "a",        /* Unix assembler */
 558     "asm", /* Microcontroller assembly */
 559     "def", /* BSO/Tasking definition includes  */
 560     "inc", /* Microcontroller include files */
 561     "ins", /* Microcontroller include files */
 562     "s", "sa", /* Unix assembler */
 563     "S",   /* cpp-processed Unix assembler */
 564     "src", /* BSO/Tasking C compiler output */
 565     NULL
 566   };
 567 static const char Asm_help [] =
 568 "In assembler code, labels appearing at the beginning of a line,\n\
 569 followed by a colon, are tags.";
 570
 571
 572 /* Note that .c and .h can be considered C++, if the --c++ flag was
 573    given, or if the `class' or `template' keywords are met inside the file.
 574    That is why default_C_entries is called for these. */
 575 static const char *default_C_suffixes [] =
 576   { "c", "h", NULL };
 577 #if CTAGS                               /* C help for Ctags */
 578 static const char default_C_help [] =
 579 "In C code, any C function is a tag.  Use -t to tag typedefs.\n\
 580 Use -T to tag definitions of 'struct', 'union' and 'enum'.\n\
 581 Use -d to tag '#define' macro definitions and 'enum' constants.\n\
 582 Use --globals to tag global variables.\n\
 583 You can tag function declarations and external variables by\n\
 584 using '--declarations', and struct members by using '--members'.";
 585 #else                                   /* C help for Etags */
 586 static const char default_C_help [] =
 587 "In C code, any C function or typedef is a tag, and so are\n\
 588 definitions of 'struct', 'union' and 'enum'.  '#define' macro\n\
 589 definitions and 'enum' constants are tags unless you specify\n\
 590 '--no-defines'.  Global variables are tags unless you specify\n\
 591 '--no-globals' and so are struct members unless you specify\n\
 592 '--no-members'.  Use of '--no-globals', '--no-defines' and\n\
 593 '--no-members' can make the tags table file much smaller.\n\
 594 You can tag function declarations and external variables by\n\
 595 using '--declarations'.";
 596 #endif  /* C help for Ctags and Etags */
 597
 598 static const char *Cplusplus_suffixes [] =
 599   { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
 600     "M",                        /* Objective C++ */
 601     "pdb",                      /* PostScript with C syntax */
 602     NULL };
 603 static const char Cplusplus_help [] =
 604 "In C++ code, all the tag constructs of C code are tagged.  (Use\n\
 605 --help --lang=c --lang=c++ for full help.)\n\
 606 In addition to C tags, member functions are also recognized.  Member\n\
 607 variables are recognized unless you use the '--no-members' option.\n\
 608 Tags for variables and functions in classes are named 'CLASS::VARIABLE'\n\
 609 and 'CLASS::FUNCTION'.  'operator' definitions have tag names like\n\
 610 'operator+'.";
 611
 612 static const char *Cjava_suffixes [] =
 613   { "java", NULL };
 614 static char Cjava_help [] =
 615 "In Java code, all the tags constructs of C and C++ code are\n\
 616 tagged.  (Use --help --lang=c --lang=c++ --lang=java for full help.)";
 617
 618
 619 static const char *Cobol_suffixes [] =
 620   { "COB", "cob", NULL };
 621 static char Cobol_help [] =
 622 "In Cobol code, tags are paragraph names; that is, any word\n\
 623 starting in column 8 and followed by a period.";
 624
 625 static const char *Cstar_suffixes [] =
 626   { "cs", "hs", NULL };
 627
 628 static const char *Erlang_suffixes [] =
 629   { "erl", "hrl", NULL };
 630 static const char Erlang_help [] =
 631 "In Erlang code, the tags are the functions, records and macros\n\
 632 defined in the file.";
 633
 634 const char *Forth_suffixes [] =
 635   { "fth", "tok", NULL };
 636 static const char Forth_help [] =
 637 "In Forth code, tags are words defined by ':',\n\
 638 constant, code, create, defer, value, variable, buffer:, field.";
 639
 640 static const char *Fortran_suffixes [] =
 641   { "F", "f", "f90", "for", NULL };
 642 static const char Fortran_help [] =
 643 "In Fortran code, functions, subroutines and block data are tags.";
 644
 645 static const char *Go_suffixes [] = {"go", NULL};
 646 static const char Go_help [] =
 647   "In Go code, functions, interfaces and packages are tags.";
 648
 649 static const char *HTML_suffixes [] =
 650   { "htm", "html", "shtml", NULL };
 651 static const char HTML_help [] =
 652 "In HTML input files, the tags are the 'title' and the 'h1', 'h2',\n\
 653 'h3' headers.  Also, tags are 'name=' in anchors and all\n\
 654 occurrences of 'id='.";
 655
 656 static const char *Lisp_suffixes [] =
 657   { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
 658 static const char Lisp_help [] =
 659 "In Lisp code, any function defined with 'defun', any variable\n\
 660 defined with 'defvar' or 'defconst', and in general the first\n\
 661 argument of any expression that starts with '(def' in column zero\n\
 662 is a tag.\n\
 663 The '--declarations' option tags \"(defvar foo)\" constructs too.";
 664
 665 static const char *Lua_suffixes [] =
 666   { "lua", "LUA", NULL };
 667 static const char Lua_help [] =
 668 "In Lua scripts, all functions are tags.";
 669
 670 static const char *Makefile_filenames [] =
 671   { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
 672 static const char Makefile_help [] =
 673 "In makefiles, targets are tags; additionally, variables are tags\n\
 674 unless you specify '--no-globals'.";
 675
 676 static const char *Objc_suffixes [] =
 677   { "lm",                       /* Objective lex file */
 678     "m",                        /* Objective C file */
 679      NULL };
 680 static const char Objc_help [] =
 681 "In Objective C code, tags include Objective C definitions for classes,\n\
 682 class categories, methods and protocols.  Tags for variables and\n\
 683 functions in classes are named 'CLASS::VARIABLE' and 'CLASS::FUNCTION'.\n\
 684 (Use --help --lang=c --lang=objc --lang=java for full help.)";
 685
 686 static const char *Pascal_suffixes [] =
 687   { "p", "pas", NULL };
 688 static const char Pascal_help [] =
 689 "In Pascal code, the tags are the functions and procedures defined\n\
 690 in the file.";
 691 /* " // this is for working around an Emacs highlighting bug... */
 692
 693 static const char *Perl_suffixes [] =
 694   { "pl", "pm", NULL };
 695 static const char *Perl_interpreters [] =
 696   { "perl", "@PERL@", NULL };
 697 static const char Perl_help [] =
 698 "In Perl code, the tags are the packages, subroutines and variables\n\
 699 defined by the 'package', 'sub', 'my' and 'local' keywords.  Use\n\
 700 '--globals' if you want to tag global variables.  Tags for\n\
 701 subroutines are named 'PACKAGE::SUB'.  The name for subroutines\n\
 702 defined in the default package is 'main::SUB'.";
 703
 704 static const char *PHP_suffixes [] =
 705   { "php", "php3", "php4", NULL };
 706 static const char PHP_help [] =
 707 "In PHP code, tags are functions, classes and defines.  Unless you use\n\
 708 the '--no-members' option, vars are tags too.";
 709
 710 static const char *plain_C_suffixes [] =
 711   { "pc",                       /* Pro*C file */
 712      NULL };
 713
 714 static const char *PS_suffixes [] =
 715   { "ps", "psw", NULL };        /* .psw is for PSWrap */
 716 static const char PS_help [] =
 717 "In PostScript code, the tags are the functions.";
 718
 719 static const char *Prolog_suffixes [] =
 720   { "prolog", NULL };
 721 static const char Prolog_help [] =
 722 "In Prolog code, tags are predicates and rules at the beginning of\n\
 723 line.";
 724
 725 static const char *Python_suffixes [] =
 726   { "py", NULL };
 727 static const char Python_help [] =
 728 "In Python code, 'def' or 'class' at the beginning of a line\n\
 729 generate a tag.";
 730
 731 static const char *Ruby_suffixes [] =
 732   { "rb", "ruby", NULL };
 733 static const char Ruby_help [] =
 734   "In Ruby code, 'def' or 'class' or 'module' at the beginning of\n\
 735 a line generate a tag.  Constants also generate a tag.";
 736
 737 /* Can't do the `SCM' or `scm' prefix with a version number. */
 738 static const char *Scheme_suffixes [] =
 739   { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
 740 static const char Scheme_help [] =
 741 "In Scheme code, tags include anything defined with 'def' or with a\n\
 742 construct whose name starts with 'def'.  They also include\n\
 743 variables set with 'set!' at top level in the file.";
 744
 745 static const char *TeX_suffixes [] =
 746   { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
 747 static const char TeX_help [] =
 748 "In LaTeX text, the argument of any of the commands '\\chapter',\n\
 749 '\\section', '\\subsection', '\\subsubsection', '\\eqno', '\\label',\n\
 750 '\\ref', '\\cite', '\\bibitem', '\\part', '\\appendix', '\\entry',\n\
 751 '\\index', '\\def', '\\newcommand', '\\renewcommand',\n\
 752 '\\newenvironment' or '\\renewenvironment' is a tag.\n\
 753 \n\
 754 Other commands can be specified by setting the environment variable\n\
 755 'TEXTAGS' to a colon-separated list like, for example,\n\
 756      TEXTAGS=\"mycommand:myothercommand\".";
 757
 758
 759 static const char *Texinfo_suffixes [] =
 760   { "texi", "texinfo", "txi", NULL };
 761 static const char Texinfo_help [] =
 762 "for texinfo files, lines starting with @node are tagged.";
 763
 764 static const char *Yacc_suffixes [] =
 765   { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
 766 static const char Yacc_help [] =
 767 "In Bison or Yacc input files, each rule defines as a tag the\n\
 768 nonterminal it constructs.  The portions of the file that contain\n\
 769 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
 770 for full help).";
 771
 772 static const char auto_help [] =
 773 "'auto' is not a real language, it indicates to use\n\
 774 a default language for files base on file name suffix and file contents.";
 775
 776 static const char none_help [] =
 777 "'none' is not a real language, it indicates to only do\n\
 778 regexp processing on files.";
 779
 780 static const char no_lang_help [] =
 781 "No detailed help available for this language.";
 782
 783
 784 /*
 785  * Table of languages.
 786  *
 787  * It is ok for a given function to be listed under more than one
 788  * name.  I just didn't.
 789  */
 790
 791 static language lang_names [] =
 792 {
 793   { "ada",       Ada_help,       Ada_funcs,         Ada_suffixes       },
 794   { "asm",       Asm_help,       Asm_labels,        Asm_suffixes       },
 795   { "c",         default_C_help, default_C_entries, default_C_suffixes },
 796   { "c++",       Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
 797   { "c*",        no_lang_help,   Cstar_entries,     Cstar_suffixes     },
 798   { "cobol",     Cobol_help,     Cobol_paragraphs,  Cobol_suffixes     },
 799   { "erlang",    Erlang_help,    Erlang_functions,  Erlang_suffixes    },
 800   { "forth",     Forth_help,     Forth_words,       Forth_suffixes     },
 801   { "fortran",   Fortran_help,   Fortran_functions, Fortran_suffixes   },
 802   { "go",        Go_help,        Go_functions,      Go_suffixes        },
 803   { "html",      HTML_help,      HTML_labels,       HTML_suffixes      },
 804   { "java",      Cjava_help,     Cjava_entries,     Cjava_suffixes     },
 805   { "lisp",      Lisp_help,      Lisp_functions,    Lisp_suffixes      },
 806   { "lua",       Lua_help,       Lua_functions,     Lua_suffixes       },
 807   { "makefile",  Makefile_help,Makefile_targets,NULL,Makefile_filenames},
 808   { "objc",      Objc_help,      plain_C_entries,   Objc_suffixes      },
 809   { "pascal",    Pascal_help,    Pascal_functions,  Pascal_suffixes    },
 810   { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
 811   { "php",       PHP_help,       PHP_functions,     PHP_suffixes       },
 812   { "postscript",PS_help,        PS_functions,      PS_suffixes        },
 813   { "proc",      no_lang_help,   plain_C_entries,   plain_C_suffixes   },
 814   { "prolog",    Prolog_help,    Prolog_functions,  Prolog_suffixes    },
 815   { "python",    Python_help,    Python_functions,  Python_suffixes    },
 816   { "ruby",      Ruby_help,      Ruby_functions,    Ruby_suffixes      },
 817   { "scheme",    Scheme_help,    Scheme_functions,  Scheme_suffixes    },
 818   { "tex",       TeX_help,       TeX_commands,      TeX_suffixes       },
 819   { "texinfo",   Texinfo_help,   Texinfo_nodes,     Texinfo_suffixes   },
 820   { "yacc",      Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,true},
 821   { "auto",      auto_help },                      /* default guessing scheme */
 822   { "none",      none_help,      just_read_file }, /* regexp matching only */
 823   { NULL }                /* end of list */
 824 };
 825
 826 \f
 827 static void
 828 print_language_names (void)
 829 {
 830   language *lang;
 831   const char **name, **ext;
 832
 833   puts ("\nThese are the currently supported languages, along with the\n\
 834 default file names and dot suffixes:");
 835   for (lang = lang_names; lang->name != NULL; lang++)
 836     {
 837       printf ("  %-*s", 10, lang->name);
 838       if (lang->filenames != NULL)
 839         for (name = lang->filenames; *name != NULL; name++)
 840           printf (" %s", *name);
 841       if (lang->suffixes != NULL)
 842         for (ext = lang->suffixes; *ext != NULL; ext++)
 843           printf (" .%s", *ext);
 844       puts ("");
 845     }
 846   puts ("where 'auto' means use default language for files based on file\n\
 847 name suffix, and 'none' means only do regexp processing on files.\n\
 848 If no language is specified and no matching suffix is found,\n\
 849 the first line of the file is read for a sharp-bang (#!) sequence\n\
 850 followed by the name of an interpreter.  If no such sequence is found,\n\
 851 Fortran is tried first; if no tags are found, C is tried next.\n\
 852 When parsing any C file, a \"class\" or \"template\" keyword\n\
 853 switches to C++.");
 854   puts ("Compressed files are supported using gzip, bzip2, and xz.\n\
 855 \n\
 856 For detailed help on a given language use, for example,\n\
 857 etags --help --lang=ada.");
 858 }
 859
 860 #ifndef EMACS_NAME
 861 # define EMACS_NAME "standalone"
 862 #endif
 863 #ifndef VERSION
 864 # define VERSION "17.38.1.4"
 865 #endif
 866 static _Noreturn void
 867 print_version (void)
 868 {
 869   char emacs_copyright[] = COPYRIGHT;
 870
 871   printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
 872   puts (emacs_copyright);
 873   puts ("This program is distributed under the terms in ETAGS.README");
 874
 875   exit (EXIT_SUCCESS);
 876 }
 877
 878 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
 879 # define PRINT_UNDOCUMENTED_OPTIONS_HELP false
 880 #endif
 881
 882 static _Noreturn void
 883 print_help (argument *argbuffer)
 884 {
 885   bool help_for_lang = false;
 886
 887   for (; argbuffer->arg_type != at_end; argbuffer++)
 888     if (argbuffer->arg_type == at_language)
 889       {
 890         if (help_for_lang)
 891           puts ("");
 892         puts (argbuffer->lang->help);
 893         help_for_lang = true;
 894       }
 895
 896   if (help_for_lang)
 897     exit (EXIT_SUCCESS);
 898
 899   printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
 900 \n\
 901 These are the options accepted by %s.\n", progname, progname);
 902   puts ("You may use unambiguous abbreviations for the long option names.");
 903   puts ("  A - as file name means read names from stdin (one per line).\n\
 904 Absolute names are stored in the output file as they are.\n\
 905 Relative ones are stored relative to the output file's directory.\n");
 906
 907   puts ("-a, --append\n\
 908         Append tag entries to existing tags file.");
 909
 910   puts ("--packages-only\n\
 911         For Ada files, only generate tags for packages.");
 912
 913   if (CTAGS)
 914     puts ("-B, --backward-search\n\
 915         Write the search commands for the tag entries using '?', the\n\
 916         backward-search command instead of '/', the forward-search command.");
 917
 918   /* This option is mostly obsolete, because etags can now automatically
 919      detect C++.  Retained for backward compatibility and for debugging and
 920      experimentation.  In principle, we could want to tag as C++ even
 921      before any "class" or "template" keyword.
 922   puts ("-C, --c++\n\
 923         Treat files whose name suffix defaults to C language as C++ files.");
 924   */
 925
 926   puts ("--declarations\n\
 927         In C and derived languages, create tags for function declarations,");
 928   if (CTAGS)
 929     puts ("\tand create tags for extern variables if --globals is used.");
 930   else
 931     puts
 932       ("\tand create tags for extern variables unless --no-globals is used.");
 933
 934   if (CTAGS)
 935     puts ("-d, --defines\n\
 936         Create tag entries for C #define constants and enum constants, too.");
 937   else
 938     puts ("-D, --no-defines\n\
 939         Don't create tag entries for C #define constants and enum constants.\n\
 940         This makes the tags file smaller.");
 941
 942   if (!CTAGS)
 943     puts ("-i FILE, --include=FILE\n\
 944         Include a note in tag file indicating that, when searching for\n\
 945         a tag, one should also consult the tags file FILE after\n\
 946         checking the current file.");
 947
 948   puts ("-l LANG, --language=LANG\n\
 949         Force the following files to be considered as written in the\n\
 950         named language up to the next --language=LANG option.");
 951
 952   if (CTAGS)
 953     puts ("--globals\n\
 954         Create tag entries for global variables in some languages.");
 955   else
 956     puts ("--no-globals\n\
 957         Do not create tag entries for global variables in some\n\
 958         languages.  This makes the tags file smaller.");
 959
 960   if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
 961     puts ("--no-line-directive\n\
 962         Ignore #line preprocessor directives in C and derived languages.");
 963
 964   if (CTAGS)
 965     puts ("--members\n\
 966         Create tag entries for members of structures in some languages.");
 967   else
 968     puts ("--no-members\n\
 969         Do not create tag entries for members of structures\n\
 970         in some languages.");
 971
 972   puts ("-Q, --class-qualify\n\
 973         Qualify tag names with their class name in C++, ObjC, and Java.\n\
 974         This produces tag names of the form \"class::member\" for C++,\n\
 975         \"class(category)\" for Objective C, and \"class.member\" for Java.\n\
 976         For Objective C, this also produces class methods qualified with\n\
 977         their arguments, as in \"foo:bar:baz:more\".");
 978   puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
 979         Make a tag for each line matching a regular expression pattern\n\
 980         in the following files.  {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
 981         files only.  REGEXFILE is a file containing one REGEXP per line.\n\
 982         REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
 983         optional.  The TAGREGEXP pattern is anchored (as if preceded by ^).");
 984   puts ("       If TAGNAME/ is present, the tags created are named.\n\
 985         For example Tcl named tags can be created with:\n\
 986           --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
 987         MODS are optional one-letter modifiers: 'i' means to ignore case,\n\
 988         'm' means to allow multi-line matches, 's' implies 'm' and\n\
 989         causes dot to match any character, including newline.");
 990
 991   puts ("-R, --no-regex\n\
 992         Don't create tags from regexps for the following files.");
 993
 994   puts ("-I, --ignore-indentation\n\
 995         In C and C++ do not assume that a closing brace in the first\n\
 996         column is the final brace of a function or structure definition.");
 997
 998   puts ("-o FILE, --output=FILE\n\
 999         Write the tags to FILE.");
1000
1001   puts ("--parse-stdin=NAME\n\
1002         Read from standard input and record tags as belonging to file NAME.");
1003
1004   if (CTAGS)
1005     {
1006       puts ("-t, --typedefs\n\
1007         Generate tag entries for C and Ada typedefs.");
1008       puts ("-T, --typedefs-and-c++\n\
1009         Generate tag entries for C typedefs, C struct/enum/union tags,\n\
1010         and C++ member functions.");
1011     }
1012
1013   if (CTAGS)
1014     puts ("-u, --update\n\
1015         Update the tag entries for the given files, leaving tag\n\
1016         entries for other files in place.  Currently, this is\n\
1017         implemented by deleting the existing entries for the given\n\
1018         files and then rewriting the new entries at the end of the\n\
1019         tags file.  It is often faster to simply rebuild the entire\n\
1020         tag file than to use this.");
1021
1022   if (CTAGS)
1023     {
1024       puts ("-v, --vgrind\n\
1025         Print on the standard output an index of items intended for\n\
1026         human consumption, similar to the output of vgrind.  The index\n\
1027         is sorted, and gives the page number of each item.");
1028
1029       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1030         puts ("-w, --no-duplicates\n\
1031         Do not create duplicate tag entries, for compatibility with\n\
1032         traditional ctags.");
1033
1034       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1035         puts ("-w, --no-warn\n\
1036         Suppress warning messages about duplicate tag entries.");
1037
1038       puts ("-x, --cxref\n\
1039         Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1040         The output uses line numbers instead of page numbers, but\n\
1041         beyond that the differences are cosmetic; try both to see\n\
1042         which you like.");
1043     }
1044
1045   puts ("-V, --version\n\
1046         Print the version of the program.\n\
1047 -h, --help\n\
1048         Print this help message.\n\
1049         Followed by one or more '--language' options prints detailed\n\
1050         help about tag generation for the specified languages.");
1051
1052   print_language_names ();
1053
1054   puts ("");
1055   puts ("Report bugs to bug-gnu-emacs@gnu.org");
1056
1057   exit (EXIT_SUCCESS);
1058 }
1059
1060 \f
1061 int
1062 main (int argc, char **argv)
1063 {
1064   int i;
1065   unsigned int nincluded_files;
1066   char **included_files;
1067   argument *argbuffer;
1068   int current_arg, file_count;
1069   linebuffer filename_lb;
1070   bool help_asked = false;
1071   ptrdiff_t len;
1072   char *optstring;
1073   int opt;
1074
1075   progname = argv[0];
1076   nincluded_files = 0;
1077   included_files = xnew (argc, char *);
1078   current_arg = 0;
1079   file_count = 0;
1080
1081   /* Allocate enough no matter what happens.  Overkill, but each one
1082      is small. */
1083   argbuffer = xnew (argc, argument);
1084
1085   /*
1086    * Always find typedefs and structure tags.
1087    * Also default to find macro constants, enum constants, struct
1088    * members and global variables.  Do it for both etags and ctags.
1089    */
1090   typedefs = typedefs_or_cplusplus = constantypedefs = true;
1091   globals = members = true;
1092
1093   /* When the optstring begins with a '-' getopt_long does not rearrange the
1094      non-options arguments to be at the end, but leaves them alone. */
1095   optstring = concat ("-ac:Cf:Il:o:Qr:RSVhH",
1096                       (CTAGS) ? "BxdtTuvw" : "Di:",
1097                       "");
1098
1099   while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1100     switch (opt)
1101       {
1102       case 0:
1103         /* If getopt returns 0, then it has already processed a
1104            long-named option.  We should do nothing.  */
1105         break;
1106
1107       case 1:
1108         /* This means that a file name has been seen.  Record it. */
1109         argbuffer[current_arg].arg_type = at_filename;
1110         argbuffer[current_arg].what     = optarg;
1111         len = strlen (optarg);
1112         if (whatlen_max < len)
1113           whatlen_max = len;
1114         ++current_arg;
1115         ++file_count;
1116         break;
1117
1118       case STDIN:
1119         /* Parse standard input.  Idea by Vivek <vivek@etla.org>. */
1120         argbuffer[current_arg].arg_type = at_stdin;
1121         argbuffer[current_arg].what     = optarg;
1122         len = strlen (optarg);
1123         if (whatlen_max < len)
1124           whatlen_max = len;
1125         ++current_arg;
1126         ++file_count;
1127         if (parsing_stdin)
1128           fatal ("cannot parse standard input more than once");
1129         parsing_stdin = true;
1130         break;
1131
1132         /* Common options. */
1133       case 'a': append_to_tagfile = true;       break;
1134       case 'C': cplusplus = true;               break;
1135       case 'f':         /* for compatibility with old makefiles */
1136       case 'o':
1137         if (tagfile)
1138           {
1139             error ("-o option may only be given once.");
1140             suggest_asking_for_help ();
1141             /* NOTREACHED */
1142           }
1143         tagfile = optarg;
1144         break;
1145       case 'I':
1146       case 'S':         /* for backward compatibility */
1147         ignoreindent = true;
1148         break;
1149       case 'l':
1150         {
1151           language *lang = get_language_from_langname (optarg);
1152           if (lang != NULL)
1153             {
1154               argbuffer[current_arg].lang = lang;
1155               argbuffer[current_arg].arg_type = at_language;
1156               ++current_arg;
1157             }
1158         }
1159         break;
1160       case 'c':
1161         /* Backward compatibility: support obsolete --ignore-case-regexp. */
1162         optarg = concat (optarg, "i", ""); /* memory leak here */
1163         /* FALLTHRU */
1164       case 'r':
1165         argbuffer[current_arg].arg_type = at_regexp;
1166         argbuffer[current_arg].what = optarg;
1167         len = strlen (optarg);
1168         if (whatlen_max < len)
1169           whatlen_max = len;
1170         ++current_arg;
1171         break;
1172       case 'R':
1173         argbuffer[current_arg].arg_type = at_regexp;
1174         argbuffer[current_arg].what = NULL;
1175         ++current_arg;
1176         break;
1177       case 'V':
1178         print_version ();
1179         break;
1180       case 'h':
1181       case 'H':
1182         help_asked = true;
1183         break;
1184       case 'Q':
1185         class_qualify = 1;
1186         break;
1187
1188         /* Etags options */
1189       case 'D': constantypedefs = false;                        break;
1190       case 'i': included_files[nincluded_files++] = optarg;     break;
1191
1192         /* Ctags options. */
1193       case 'B': searchar = '?';                                 break;
1194       case 'd': constantypedefs = true;                         break;
1195       case 't': typedefs = true;                                break;
1196       case 'T': typedefs = typedefs_or_cplusplus = true;        break;
1197       case 'u': update = true;                                  break;
1198       case 'v': vgrind_style = true;                      /*FALLTHRU*/
1199       case 'x': cxref_style = true;                             break;
1200       case 'w': no_warnings = true;                             break;
1201       default:
1202         suggest_asking_for_help ();
1203         /* NOTREACHED */
1204       }
1205
1206   /* No more options.  Store the rest of arguments. */
1207   for (; optind < argc; optind++)
1208     {
1209       argbuffer[current_arg].arg_type = at_filename;
1210       argbuffer[current_arg].what = argv[optind];
1211       len = strlen (argv[optind]);
1212       if (whatlen_max < len)
1213         whatlen_max = len;
1214       ++current_arg;
1215       ++file_count;
1216     }
1217
1218   argbuffer[current_arg].arg_type = at_end;
1219
1220   if (help_asked)
1221     print_help (argbuffer);
1222     /* NOTREACHED */
1223
1224   if (nincluded_files == 0 && file_count == 0)
1225     {
1226       error ("no input files specified.");
1227       suggest_asking_for_help ();
1228       /* NOTREACHED */
1229     }
1230
1231   if (tagfile == NULL)
1232     tagfile = savestr (CTAGS ? "tags" : "TAGS");
1233   cwd = etags_getcwd ();        /* the current working directory */
1234   if (cwd[strlen (cwd) - 1] != '/')
1235     {
1236       char *oldcwd = cwd;
1237       cwd = concat (oldcwd, "/", "");
1238       free (oldcwd);
1239     }
1240
1241   /* Compute base directory for relative file names. */
1242   if (streq (tagfile, "-")
1243       || strneq (tagfile, "/dev/", 5))
1244     tagfiledir = cwd;            /* relative file names are relative to cwd */
1245   else
1246     {
1247       canonicalize_filename (tagfile);
1248       tagfiledir = absolute_dirname (tagfile, cwd);
1249     }
1250
1251   linebuffer_init (&lb);
1252   linebuffer_init (&filename_lb);
1253   linebuffer_init (&filebuf);
1254   linebuffer_init (&token_name);
1255
1256   if (!CTAGS)
1257     {
1258       if (streq (tagfile, "-"))
1259         {
1260           tagf = stdout;
1261           SET_BINARY (fileno (stdout));
1262         }
1263       else
1264         tagf = fopen (tagfile, append_to_tagfile ? "ab" : "wb");
1265       if (tagf == NULL)
1266         pfatal (tagfile);
1267     }
1268
1269   /*
1270    * Loop through files finding functions.
1271    */
1272   for (i = 0; i < current_arg; i++)
1273     {
1274       static language *lang;    /* non-NULL if language is forced */
1275       char *this_file;
1276
1277       switch (argbuffer[i].arg_type)
1278         {
1279         case at_language:
1280           lang = argbuffer[i].lang;
1281           break;
1282         case at_regexp:
1283           analyze_regex (argbuffer[i].what);
1284           break;
1285         case at_filename:
1286               this_file = argbuffer[i].what;
1287               /* Input file named "-" means read file names from stdin
1288                  (one per line) and use them. */
1289               if (streq (this_file, "-"))
1290                 {
1291                   if (parsing_stdin)
1292                     fatal ("cannot parse standard input "
1293                            "AND read file names from it");
1294                   while (readline_internal (&filename_lb, stdin, "-") > 0)
1295                     process_file_name (filename_lb.buffer, lang);
1296                 }
1297               else
1298                 process_file_name (this_file, lang);
1299           break;
1300         case at_stdin:
1301           this_file = argbuffer[i].what;
1302           process_file (stdin, this_file, lang);
1303           break;
1304         default:
1305           error ("internal error: arg_type");
1306         }
1307     }
1308
1309   free_regexps ();
1310   free (lb.buffer);
1311   free (filebuf.buffer);
1312   free (token_name.buffer);
1313
1314   if (!CTAGS || cxref_style)
1315     {
1316       /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1317       put_entries (nodehead);
1318       free_tree (nodehead);
1319       nodehead = NULL;
1320       if (!CTAGS)
1321         {
1322           fdesc *fdp;
1323
1324           /* Output file entries that have no tags. */
1325           for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1326             if (!fdp->written)
1327               fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1328
1329           while (nincluded_files-- > 0)
1330             fprintf (tagf, "\f\n%s,include\n", *included_files++);
1331
1332           if (fclose (tagf) == EOF)
1333             pfatal (tagfile);
1334         }
1335
1336       exit (EXIT_SUCCESS);
1337     }
1338
1339   /* From here on, we are in (CTAGS && !cxref_style) */
1340   if (update)
1341     {
1342       char *cmd =
1343         xmalloc (strlen (tagfile) + whatlen_max +
1344                  sizeof "mv..OTAGS;fgrep -v '\t\t' OTAGS >;rm OTAGS");
1345       for (i = 0; i < current_arg; ++i)
1346         {
1347           switch (argbuffer[i].arg_type)
1348             {
1349             case at_filename:
1350             case at_stdin:
1351               break;
1352             default:
1353               continue;         /* the for loop */
1354             }
1355           char *z = stpcpy (cmd, "mv ");
1356           z = stpcpy (z, tagfile);
1357           z = stpcpy (z, " OTAGS;fgrep -v '\t");
1358           z = stpcpy (z, argbuffer[i].what);
1359           z = stpcpy (z, "\t' OTAGS >");
1360           z = stpcpy (z, tagfile);
1361           strcpy (z, ";rm OTAGS");
1362           if (system (cmd) != EXIT_SUCCESS)
1363             fatal ("failed to execute shell command");
1364         }
1365       free (cmd);
1366       append_to_tagfile = true;
1367     }
1368
1369   tagf = fopen (tagfile, append_to_tagfile ? "ab" : "wb");
1370   if (tagf == NULL)
1371     pfatal (tagfile);
1372   put_entries (nodehead);       /* write all the tags (CTAGS) */
1373   free_tree (nodehead);
1374   nodehead = NULL;
1375   if (fclose (tagf) == EOF)
1376     pfatal (tagfile);
1377
1378   if (CTAGS)
1379     if (append_to_tagfile || update)
1380       {
1381         char *cmd = xmalloc (2 * strlen (tagfile) + sizeof "sort -u -o..");
1382         /* Maybe these should be used:
1383            setenv ("LC_COLLATE", "C", 1);
1384            setenv ("LC_ALL", "C", 1); */
1385         char *z = stpcpy (cmd, "sort -u -o ");
1386         z = stpcpy (z, tagfile);
1387         *z++ = ' ';
1388         strcpy (z, tagfile);
1389         exit (system (cmd));
1390       }
1391   return EXIT_SUCCESS;
1392 }
1393
1394
1395 /*
1396  * Return a compressor given the file name.  If EXTPTR is non-zero,
1397  * return a pointer into FILE where the compressor-specific
1398  * extension begins.  If no compressor is found, NULL is returned
1399  * and EXTPTR is not significant.
1400  * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1401  */
1402 static compressor *
1403 get_compressor_from_suffix (char *file, char **extptr)
1404 {
1405   compressor *compr;
1406   char *slash, *suffix;
1407
1408   /* File has been processed by canonicalize_filename,
1409      so we don't need to consider backslashes on DOS_NT.  */
1410   slash = strrchr (file, '/');
1411   suffix = strrchr (file, '.');
1412   if (suffix == NULL || suffix < slash)
1413     return NULL;
1414   if (extptr != NULL)
1415     *extptr = suffix;
1416   suffix += 1;
1417   /* Let those poor souls who live with DOS 8+3 file name limits get
1418      some solace by treating foo.cgz as if it were foo.c.gz, etc.
1419      Only the first do loop is run if not MSDOS */
1420   do
1421     {
1422       for (compr = compressors; compr->suffix != NULL; compr++)
1423         if (streq (compr->suffix, suffix))
1424           return compr;
1425       if (!MSDOS)
1426         break;                  /* do it only once: not really a loop */
1427       if (extptr != NULL)
1428         *extptr = ++suffix;
1429     } while (*suffix != '\0');
1430   return NULL;
1431 }
1432
1433
1434
1435 /*
1436  * Return a language given the name.
1437  */
1438 static language *
1439 get_language_from_langname (const char *name)
1440 {
1441   language *lang;
1442
1443   if (name == NULL)
1444     error ("empty language name");
1445   else
1446     {
1447       for (lang = lang_names; lang->name != NULL; lang++)
1448         if (streq (name, lang->name))
1449           return lang;
1450       error ("unknown language \"%s\"", name);
1451     }
1452
1453   return NULL;
1454 }
1455
1456
1457 /*
1458  * Return a language given the interpreter name.
1459  */
1460 static language *
1461 get_language_from_interpreter (char *interpreter)
1462 {
1463   language *lang;
1464   const char **iname;
1465
1466   if (interpreter == NULL)
1467     return NULL;
1468   for (lang = lang_names; lang->name != NULL; lang++)
1469     if (lang->interpreters != NULL)
1470       for (iname = lang->interpreters; *iname != NULL; iname++)
1471         if (streq (*iname, interpreter))
1472             return lang;
1473
1474   return NULL;
1475 }
1476
1477
1478
1479 /*
1480  * Return a language given the file name.
1481  */
1482 static language *
1483 get_language_from_filename (char *file, int case_sensitive)
1484 {
1485   language *lang;
1486   const char **name, **ext, *suffix;
1487
1488   /* Try whole file name first. */
1489   for (lang = lang_names; lang->name != NULL; lang++)
1490     if (lang->filenames != NULL)
1491       for (name = lang->filenames; *name != NULL; name++)
1492         if ((case_sensitive)
1493             ? streq (*name, file)
1494             : strcaseeq (*name, file))
1495           return lang;
1496
1497   /* If not found, try suffix after last dot. */
1498   suffix = strrchr (file, '.');
1499   if (suffix == NULL)
1500     return NULL;
1501   suffix += 1;
1502   for (lang = lang_names; lang->name != NULL; lang++)
1503     if (lang->suffixes != NULL)
1504       for (ext = lang->suffixes; *ext != NULL; ext++)
1505         if ((case_sensitive)
1506             ? streq (*ext, suffix)
1507             : strcaseeq (*ext, suffix))
1508           return lang;
1509   return NULL;
1510 }
1511
1512 \f
1513 /*
1514  * This routine is called on each file argument.
1515  */
1516 static void
1517 process_file_name (char *file, language *lang)
1518 {
1519   FILE *inf;
1520   fdesc *fdp;
1521   compressor *compr;
1522   char *compressed_name, *uncompressed_name;
1523   char *ext, *real_name, *tmp_name;
1524   int retval;
1525
1526   canonicalize_filename (file);
1527   if (streq (file, tagfile) && !streq (tagfile, "-"))
1528     {
1529       error ("skipping inclusion of %s in self.", file);
1530       return;
1531     }
1532   compr = get_compressor_from_suffix (file, &ext);
1533   if (compr)
1534     {
1535       compressed_name = file;
1536       uncompressed_name = savenstr (file, ext - file);
1537     }
1538   else
1539     {
1540       compressed_name = NULL;
1541       uncompressed_name = file;
1542     }
1543
1544   /* If the canonicalized uncompressed name
1545      has already been dealt with, skip it silently. */
1546   for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1547     {
1548       assert (fdp->infname != NULL);
1549       if (streq (uncompressed_name, fdp->infname))
1550         goto cleanup;
1551     }
1552
1553   inf = fopen (file, "r" FOPEN_BINARY);
1554   if (inf)
1555     real_name = file;
1556   else
1557     {
1558       int file_errno = errno;
1559       if (compressed_name)
1560         {
1561           /* Try with the given suffix.  */
1562           inf = fopen (uncompressed_name, "r" FOPEN_BINARY);
1563           if (inf)
1564             real_name = uncompressed_name;
1565         }
1566       else
1567         {
1568           /* Try all possible suffixes.  */
1569           for (compr = compressors; compr->suffix != NULL; compr++)
1570             {
1571               compressed_name = concat (file, ".", compr->suffix);
1572               inf = fopen (compressed_name, "r" FOPEN_BINARY);
1573               if (inf)
1574                 {
1575                   real_name = compressed_name;
1576                   break;
1577                 }
1578               if (MSDOS)
1579                 {
1580                   char *suf = compressed_name + strlen (file);
1581                   size_t suflen = strlen (compr->suffix) + 1;
1582                   for ( ; suf[1]; suf++, suflen--)
1583                     {
1584                       memmove (suf, suf + 1, suflen);
1585                       inf = fopen (compressed_name, "r" FOPEN_BINARY);
1586                       if (inf)
1587                         {
1588                           real_name = compressed_name;
1589                           break;
1590                         }
1591                     }
1592                   if (inf)
1593                     break;
1594                 }
1595               free (compressed_name);
1596               compressed_name = NULL;
1597             }
1598         }
1599       if (! inf)
1600         {
1601           errno = file_errno;
1602           perror (file);
1603           goto cleanup;
1604         }
1605     }
1606
1607   if (real_name == compressed_name)
1608     {
1609       fclose (inf);
1610       tmp_name = etags_mktmp ();
1611       if (!tmp_name)
1612         inf = NULL;
1613       else
1614         {
1615 #if MSDOS || defined (DOS_NT)
1616           char *cmd1 = concat (compr->command, " \"", real_name);
1617           char *cmd = concat (cmd1, "\" > ", tmp_name);
1618 #else
1619           char *cmd1 = concat (compr->command, " '", real_name);
1620           char *cmd = concat (cmd1, "' > ", tmp_name);
1621 #endif
1622           free (cmd1);
1623           int tmp_errno;
1624           if (system (cmd) == -1)
1625             {
1626               inf = NULL;
1627               tmp_errno = EINVAL;
1628             }
1629           else
1630             {
1631               inf = fopen (tmp_name, "r" FOPEN_BINARY);
1632               tmp_errno = errno;
1633             }
1634           free (cmd);
1635           errno = tmp_errno;
1636         }
1637
1638       if (!inf)
1639         {
1640           perror (real_name);
1641           goto cleanup;
1642         }
1643     }
1644
1645   process_file (inf, uncompressed_name, lang);
1646
1647   retval = fclose (inf);
1648   if (real_name == compressed_name)
1649     {
1650       remove (tmp_name);
1651       free (tmp_name);
1652     }
1653   if (retval < 0)
1654     pfatal (file);
1655
1656  cleanup:
1657   if (compressed_name != file)
1658     free (compressed_name);
1659   if (uncompressed_name != file)
1660     free (uncompressed_name);
1661   last_node = NULL;
1662   curfdp = NULL;
1663   return;
1664 }
1665
1666 static void
1667 process_file (FILE *fh, char *fn, language *lang)
1668 {
1669   static const fdesc emptyfdesc;
1670   fdesc *fdp;
1671
1672   infilename = fn;
1673   /* Create a new input file description entry. */
1674   fdp = xnew (1, fdesc);
1675   *fdp = emptyfdesc;
1676   fdp->next = fdhead;
1677   fdp->infname = savestr (fn);
1678   fdp->lang = lang;
1679   fdp->infabsname = absolute_filename (fn, cwd);
1680   fdp->infabsdir = absolute_dirname (fn, cwd);
1681   if (filename_is_absolute (fn))
1682     {
1683       /* An absolute file name.  Canonicalize it. */
1684       fdp->taggedfname = absolute_filename (fn, NULL);
1685     }
1686   else
1687     {
1688       /* A file name relative to cwd.  Make it relative
1689          to the directory of the tags file. */
1690       fdp->taggedfname = relative_filename (fn, tagfiledir);
1691     }
1692   fdp->usecharno = true;        /* use char position when making tags */
1693   fdp->prop = NULL;
1694   fdp->written = false;         /* not written on tags file yet */
1695
1696   fdhead = fdp;
1697   curfdp = fdhead;              /* the current file description */
1698
1699   find_entries (fh);
1700
1701   /* If not Ctags, and if this is not metasource and if it contained no #line
1702      directives, we can write the tags and free all nodes pointing to
1703      curfdp. */
1704   if (!CTAGS
1705       && curfdp->usecharno      /* no #line directives in this file */
1706       && !curfdp->lang->metasource)
1707     {
1708       node *np, *prev;
1709
1710       /* Look for the head of the sublist relative to this file.  See add_node
1711          for the structure of the node tree. */
1712       prev = NULL;
1713       for (np = nodehead; np != NULL; prev = np, np = np->left)
1714         if (np->fdp == curfdp)
1715           break;
1716
1717       /* If we generated tags for this file, write and delete them. */
1718       if (np != NULL)
1719         {
1720           /* This is the head of the last sublist, if any.  The following
1721              instructions depend on this being true. */
1722           assert (np->left == NULL);
1723
1724           assert (fdhead == curfdp);
1725           assert (last_node->fdp == curfdp);
1726           put_entries (np);     /* write tags for file curfdp->taggedfname */
1727           free_tree (np);       /* remove the written nodes */
1728           if (prev == NULL)
1729             nodehead = NULL;    /* no nodes left */
1730           else
1731             prev->left = NULL;  /* delete the pointer to the sublist */
1732         }
1733     }
1734 }
1735
1736 static void
1737 reset_input (FILE *inf)
1738 {
1739   if (fseek (inf, 0, SEEK_SET) != 0)
1740     perror (infilename);
1741 }
1742
1743 /*
1744  * This routine opens the specified file and calls the function
1745  * which finds the function and type definitions.
1746  */
1747 static void
1748 find_entries (FILE *inf)
1749 {
1750   char *cp;
1751   language *lang = curfdp->lang;
1752   Lang_function *parser = NULL;
1753
1754   /* If user specified a language, use it. */
1755   if (lang != NULL && lang->function != NULL)
1756     {
1757       parser = lang->function;
1758     }
1759
1760   /* Else try to guess the language given the file name. */
1761   if (parser == NULL)
1762     {
1763       lang = get_language_from_filename (curfdp->infname, true);
1764       if (lang != NULL && lang->function != NULL)
1765         {
1766           curfdp->lang = lang;
1767           parser = lang->function;
1768         }
1769     }
1770
1771   /* Else look for sharp-bang as the first two characters. */
1772   if (parser == NULL
1773       && readline_internal (&lb, inf, infilename) > 0
1774       && lb.len >= 2
1775       && lb.buffer[0] == '#'
1776       && lb.buffer[1] == '!')
1777     {
1778       char *lp;
1779
1780       /* Set lp to point at the first char after the last slash in the
1781          line or, if no slashes, at the first nonblank.  Then set cp to
1782          the first successive blank and terminate the string. */
1783       lp = strrchr (lb.buffer+2, '/');
1784       if (lp != NULL)
1785         lp += 1;
1786       else
1787         lp = skip_spaces (lb.buffer + 2);
1788       cp = skip_non_spaces (lp);
1789       *cp = '\0';
1790
1791       if (strlen (lp) > 0)
1792         {
1793           lang = get_language_from_interpreter (lp);
1794           if (lang != NULL && lang->function != NULL)
1795             {
1796               curfdp->lang = lang;
1797               parser = lang->function;
1798             }
1799         }
1800     }
1801
1802   reset_input (inf);
1803
1804   /* Else try to guess the language given the case insensitive file name. */
1805   if (parser == NULL)
1806     {
1807       lang = get_language_from_filename (curfdp->infname, false);
1808       if (lang != NULL && lang->function != NULL)
1809         {
1810           curfdp->lang = lang;
1811           parser = lang->function;
1812         }
1813     }
1814
1815   /* Else try Fortran or C. */
1816   if (parser == NULL)
1817     {
1818       node *old_last_node = last_node;
1819
1820       curfdp->lang = get_language_from_langname ("fortran");
1821       find_entries (inf);
1822
1823       if (old_last_node == last_node)
1824         /* No Fortran entries found.  Try C. */
1825         {
1826           reset_input (inf);
1827           curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1828           find_entries (inf);
1829         }
1830       return;
1831     }
1832
1833   if (!no_line_directive
1834       && curfdp->lang != NULL && curfdp->lang->metasource)
1835     /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1836        file, or anyway we parsed a file that is automatically generated from
1837        this one.  If this is the case, the bingo.c file contained #line
1838        directives that generated tags pointing to this file.  Let's delete
1839        them all before parsing this file, which is the real source. */
1840     {
1841       fdesc **fdpp = &fdhead;
1842       while (*fdpp != NULL)
1843         if (*fdpp != curfdp
1844             && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1845           /* We found one of those!  We must delete both the file description
1846              and all tags referring to it. */
1847           {
1848             fdesc *badfdp = *fdpp;
1849
1850             /* Delete the tags referring to badfdp->taggedfname
1851                that were obtained from badfdp->infname. */
1852             invalidate_nodes (badfdp, &nodehead);
1853
1854             *fdpp = badfdp->next; /* remove the bad description from the list */
1855             free_fdesc (badfdp);
1856           }
1857         else
1858           fdpp = &(*fdpp)->next; /* advance the list pointer */
1859     }
1860
1861   assert (parser != NULL);
1862
1863   /* Generic initializations before reading from file. */
1864   linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1865
1866   /* Generic initializations before parsing file with readline. */
1867   lineno = 0;                  /* reset global line number */
1868   charno = 0;                  /* reset global char number */
1869   linecharno = 0;              /* reset global char number of line start */
1870
1871   parser (inf);
1872
1873   regex_tag_multiline ();
1874 }
1875
1876 \f
1877 /*
1878  * Check whether an implicitly named tag should be created,
1879  * then call `pfnote'.
1880  * NAME is a string that is internally copied by this function.
1881  *
1882  * TAGS format specification
1883  * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1884  * The following is explained in some more detail in etc/ETAGS.EBNF.
1885  *
1886  * make_tag creates tags with "implicit tag names" (unnamed tags)
1887  * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1888  *  1. NAME does not contain any of the characters in NONAM;
1889  *  2. LINESTART contains name as either a rightmost, or rightmost but
1890  *     one character, substring;
1891  *  3. the character, if any, immediately before NAME in LINESTART must
1892  *     be a character in NONAM;
1893  *  4. the character, if any, immediately after NAME in LINESTART must
1894  *     also be a character in NONAM.
1895  *
1896  * The implementation uses the notinname() macro, which recognizes the
1897  * characters stored in the string `nonam'.
1898  * etags.el needs to use the same characters that are in NONAM.
1899  */
1900 static void
1901 make_tag (const char *name,     /* tag name, or NULL if unnamed */
1902           int namelen,          /* tag length */
1903           bool is_func,         /* tag is a function */
1904           char *linestart,      /* start of the line where tag is */
1905           int linelen,          /* length of the line where tag is */
1906           int lno,              /* line number */
1907           long int cno)         /* character number */
1908 {
1909   bool named = (name != NULL && namelen > 0);
1910   char *nname = NULL;
1911
1912   if (!CTAGS && named)          /* maybe set named to false */
1913     /* Let's try to make an implicit tag name, that is, create an unnamed tag
1914        such that etags.el can guess a name from it. */
1915     {
1916       int i;
1917       register const char *cp = name;
1918
1919       for (i = 0; i < namelen; i++)
1920         if (notinname (*cp++))
1921           break;
1922       if (i == namelen)                         /* rule #1 */
1923         {
1924           cp = linestart + linelen - namelen;
1925           if (notinname (linestart[linelen-1]))
1926             cp -= 1;                            /* rule #4 */
1927           if (cp >= linestart                   /* rule #2 */
1928               && (cp == linestart
1929                   || notinname (cp[-1]))        /* rule #3 */
1930               && strneq (name, cp, namelen))    /* rule #2 */
1931             named = false;      /* use implicit tag name */
1932         }
1933     }
1934
1935   if (named)
1936     nname = savenstr (name, namelen);
1937
1938   pfnote (nname, is_func, linestart, linelen, lno, cno);
1939 }
1940
1941 /* Record a tag. */
1942 static void
1943 pfnote (char *name, bool is_func, char *linestart, int linelen, int lno,
1944         long int cno)
1945                                 /* tag name, or NULL if unnamed */
1946                                 /* tag is a function */
1947                                 /* start of the line where tag is */
1948                                 /* length of the line where tag is */
1949                                 /* line number */
1950                                 /* character number */
1951 {
1952   register node *np;
1953
1954   assert (name == NULL || name[0] != '\0');
1955   if (CTAGS && name == NULL)
1956     return;
1957
1958   np = xnew (1, node);
1959
1960   /* If ctags mode, change name "main" to M<thisfilename>. */
1961   if (CTAGS && !cxref_style && streq (name, "main"))
1962     {
1963       char *fp = strrchr (curfdp->taggedfname, '/');
1964       np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
1965       fp = strrchr (np->name, '.');
1966       if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1967         fp[0] = '\0';
1968     }
1969   else
1970     np->name = name;
1971   np->valid = true;
1972   np->been_warned = false;
1973   np->fdp = curfdp;
1974   np->is_func = is_func;
1975   np->lno = lno;
1976   if (np->fdp->usecharno)
1977     /* Our char numbers are 0-base, because of C language tradition?
1978        ctags compatibility?  old versions compatibility?   I don't know.
1979        Anyway, since emacs's are 1-base we expect etags.el to take care
1980        of the difference.  If we wanted to have 1-based numbers, we would
1981        uncomment the +1 below. */
1982     np->cno = cno /* + 1 */ ;
1983   else
1984     np->cno = invalidcharno;
1985   np->left = np->right = NULL;
1986   if (CTAGS && !cxref_style)
1987     {
1988       if (strlen (linestart) < 50)
1989         np->regex = concat (linestart, "$", "");
1990       else
1991         np->regex = savenstr (linestart, 50);
1992     }
1993   else
1994     np->regex = savenstr (linestart, linelen);
1995
1996   add_node (np, &nodehead);
1997 }
1998
1999 /*
2000  * free_tree ()
2001  *      recurse on left children, iterate on right children.
2002  */
2003 static void
2004 free_tree (register node *np)
2005 {
2006   while (np)
2007     {
2008       register node *node_right = np->right;
2009       free_tree (np->left);
2010       free (np->name);
2011       free (np->regex);
2012       free (np);
2013       np = node_right;
2014     }
2015 }
2016
2017 /*
2018  * free_fdesc ()
2019  *      delete a file description
2020  */
2021 static void
2022 free_fdesc (register fdesc *fdp)
2023 {
2024   free (fdp->infname);
2025   free (fdp->infabsname);
2026   free (fdp->infabsdir);
2027   free (fdp->taggedfname);
2028   free (fdp->prop);
2029   free (fdp);
2030 }
2031
2032 /*
2033  * add_node ()
2034  *      Adds a node to the tree of nodes.  In etags mode, sort by file
2035  *      name.  In ctags mode, sort by tag name.  Make no attempt at
2036  *      balancing.
2037  *
2038  *      add_node is the only function allowed to add nodes, so it can
2039  *      maintain state.
2040  */
2041 static void
2042 add_node (node *np, node **cur_node_p)
2043 {
2044   register int dif;
2045   register node *cur_node = *cur_node_p;
2046
2047   if (cur_node == NULL)
2048     {
2049       *cur_node_p = np;
2050       last_node = np;
2051       return;
2052     }
2053
2054   if (!CTAGS)
2055     /* Etags Mode */
2056     {
2057       /* For each file name, tags are in a linked sublist on the right
2058          pointer.  The first tags of different files are a linked list
2059          on the left pointer.  last_node points to the end of the last
2060          used sublist. */
2061       if (last_node != NULL && last_node->fdp == np->fdp)
2062         {
2063           /* Let's use the same sublist as the last added node. */
2064           assert (last_node->right == NULL);
2065           last_node->right = np;
2066           last_node = np;
2067         }
2068       else if (cur_node->fdp == np->fdp)
2069         {
2070           /* Scanning the list we found the head of a sublist which is
2071              good for us.  Let's scan this sublist. */
2072           add_node (np, &cur_node->right);
2073         }
2074       else
2075         /* The head of this sublist is not good for us.  Let's try the
2076            next one. */
2077         add_node (np, &cur_node->left);
2078     } /* if ETAGS mode */
2079
2080   else
2081     {
2082       /* Ctags Mode */
2083       dif = strcmp (np->name, cur_node->name);
2084
2085       /*
2086        * If this tag name matches an existing one, then
2087        * do not add the node, but maybe print a warning.
2088        */
2089       if (no_duplicates && !dif)
2090         {
2091           if (np->fdp == cur_node->fdp)
2092             {
2093               if (!no_warnings)
2094                 {
2095                   fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2096                            np->fdp->infname, lineno, np->name);
2097                   fprintf (stderr, "Second entry ignored\n");
2098                 }
2099             }
2100           else if (!cur_node->been_warned && !no_warnings)
2101             {
2102               fprintf
2103                 (stderr,
2104                  "Duplicate entry in files %s and %s: %s (Warning only)\n",
2105                  np->fdp->infname, cur_node->fdp->infname, np->name);
2106               cur_node->been_warned = true;
2107             }
2108           return;
2109         }
2110
2111       /* Actually add the node */
2112       add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2113     } /* if CTAGS mode */
2114 }
2115
2116 /*
2117  * invalidate_nodes ()
2118  *      Scan the node tree and invalidate all nodes pointing to the
2119  *      given file description (CTAGS case) or free them (ETAGS case).
2120  */
2121 static void
2122 invalidate_nodes (fdesc *badfdp, node **npp)
2123 {
2124   node *np = *npp;
2125
2126   if (np == NULL)
2127     return;
2128
2129   if (CTAGS)
2130     {
2131       if (np->left != NULL)
2132         invalidate_nodes (badfdp, &np->left);
2133       if (np->fdp == badfdp)
2134         np->valid = false;
2135       if (np->right != NULL)
2136         invalidate_nodes (badfdp, &np->right);
2137     }
2138   else
2139     {
2140       assert (np->fdp != NULL);
2141       if (np->fdp == badfdp)
2142         {
2143           *npp = np->left;      /* detach the sublist from the list */
2144           np->left = NULL;      /* isolate it */
2145           free_tree (np);       /* free it */
2146           invalidate_nodes (badfdp, npp);
2147         }
2148       else
2149         invalidate_nodes (badfdp, &np->left);
2150     }
2151 }
2152
2153 \f
2154 static int total_size_of_entries (node *);
2155 static int number_len (long) ATTRIBUTE_CONST;
2156
2157 /* Length of a non-negative number's decimal representation. */
2158 static int
2159 number_len (long int num)
2160 {
2161   int len = 1;
2162   while ((num /= 10) > 0)
2163     len += 1;
2164   return len;
2165 }
2166
2167 /*
2168  * Return total number of characters that put_entries will output for
2169  * the nodes in the linked list at the right of the specified node.
2170  * This count is irrelevant with etags.el since emacs 19.34 at least,
2171  * but is still supplied for backward compatibility.
2172  */
2173 static int
2174 total_size_of_entries (register node *np)
2175 {
2176   register int total = 0;
2177
2178   for (; np != NULL; np = np->right)
2179     if (np->valid)
2180       {
2181         total += strlen (np->regex) + 1;                /* pat\177 */
2182         if (np->name != NULL)
2183           total += strlen (np->name) + 1;               /* name\001 */
2184         total += number_len ((long) np->lno) + 1;       /* lno, */
2185         if (np->cno != invalidcharno)                   /* cno */
2186           total += number_len (np->cno);
2187         total += 1;                                     /* newline */
2188       }
2189
2190   return total;
2191 }
2192
2193 static void
2194 put_entries (register node *np)
2195 {
2196   register char *sp;
2197   static fdesc *fdp = NULL;
2198
2199   if (np == NULL)
2200     return;
2201
2202   /* Output subentries that precede this one */
2203   if (CTAGS)
2204     put_entries (np->left);
2205
2206   /* Output this entry */
2207   if (np->valid)
2208     {
2209       if (!CTAGS)
2210         {
2211           /* Etags mode */
2212           if (fdp != np->fdp)
2213             {
2214               fdp = np->fdp;
2215               fprintf (tagf, "\f\n%s,%d\n",
2216                        fdp->taggedfname, total_size_of_entries (np));
2217               fdp->written = true;
2218             }
2219           fputs (np->regex, tagf);
2220           fputc ('\177', tagf);
2221           if (np->name != NULL)
2222             {
2223               fputs (np->name, tagf);
2224               fputc ('\001', tagf);
2225             }
2226           fprintf (tagf, "%d,", np->lno);
2227           if (np->cno != invalidcharno)
2228             fprintf (tagf, "%ld", np->cno);
2229           fputs ("\n", tagf);
2230         }
2231       else
2232         {
2233           /* Ctags mode */
2234           if (np->name == NULL)
2235             error ("internal error: NULL name in ctags mode.");
2236
2237           if (cxref_style)
2238             {
2239               if (vgrind_style)
2240                 fprintf (stdout, "%s %s %d\n",
2241                          np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2242               else
2243                 fprintf (stdout, "%-16s %3d %-16s %s\n",
2244                          np->name, np->lno, np->fdp->taggedfname, np->regex);
2245             }
2246           else
2247             {
2248               fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2249
2250               if (np->is_func)
2251                 {               /* function or #define macro with args */
2252                   putc (searchar, tagf);
2253                   putc ('^', tagf);
2254
2255                   for (sp = np->regex; *sp; sp++)
2256                     {
2257                       if (*sp == '\\' || *sp == searchar)
2258                         putc ('\\', tagf);
2259                       putc (*sp, tagf);
2260                     }
2261                   putc (searchar, tagf);
2262                 }
2263               else
2264                 {               /* anything else; text pattern inadequate */
2265                   fprintf (tagf, "%d", np->lno);
2266                 }
2267               putc ('\n', tagf);
2268             }
2269         }
2270     } /* if this node contains a valid tag */
2271
2272   /* Output subentries that follow this one */
2273   put_entries (np->right);
2274   if (!CTAGS)
2275     put_entries (np->left);
2276 }
2277
2278 \f
2279 /* C extensions. */
2280 #define C_EXT   0x00fff         /* C extensions */
2281 #define C_PLAIN 0x00000         /* C */
2282 #define C_PLPL  0x00001         /* C++ */
2283 #define C_STAR  0x00003         /* C* */
2284 #define C_JAVA  0x00005         /* JAVA */
2285 #define C_AUTO  0x01000         /* C, but switch to C++ if `class' is met */
2286 #define YACC    0x10000         /* yacc file */
2287
2288 /*
2289  * The C symbol tables.
2290  */
2291 enum sym_type
2292 {
2293   st_none,
2294   st_C_objprot, st_C_objimpl, st_C_objend,
2295   st_C_gnumacro,
2296   st_C_ignore, st_C_attribute,
2297   st_C_javastruct,
2298   st_C_operator,
2299   st_C_class, st_C_template,
2300   st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2301 };
2302
2303 /* Feed stuff between (but not including) %[ and %] lines to:
2304      gperf -m 5
2305 %[
2306 %compare-strncmp
2307 %enum
2308 %struct-type
2309 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2310 %%
2311 if,             0,                      st_C_ignore
2312 for,            0,                      st_C_ignore
2313 while,          0,                      st_C_ignore
2314 switch,         0,                      st_C_ignore
2315 return,         0,                      st_C_ignore
2316 __attribute__,  0,                      st_C_attribute
2317 GTY,            0,                      st_C_attribute
2318 @interface,     0,                      st_C_objprot
2319 @protocol,      0,                      st_C_objprot
2320 @implementation,0,                      st_C_objimpl
2321 @end,           0,                      st_C_objend
2322 import,         (C_JAVA & ~C_PLPL),     st_C_ignore
2323 package,        (C_JAVA & ~C_PLPL),     st_C_ignore
2324 friend,         C_PLPL,                 st_C_ignore
2325 extends,        (C_JAVA & ~C_PLPL),     st_C_javastruct
2326 implements,     (C_JAVA & ~C_PLPL),     st_C_javastruct
2327 interface,      (C_JAVA & ~C_PLPL),     st_C_struct
2328 class,          0,                      st_C_class
2329 namespace,      C_PLPL,                 st_C_struct
2330 domain,         C_STAR,                 st_C_struct
2331 union,          0,                      st_C_struct
2332 struct,         0,                      st_C_struct
2333 extern,         0,                      st_C_extern
2334 enum,           0,                      st_C_enum
2335 typedef,        0,                      st_C_typedef
2336 define,         0,                      st_C_define
2337 undef,          0,                      st_C_define
2338 operator,       C_PLPL,                 st_C_operator
2339 template,       0,                      st_C_template
2340 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2341 DEFUN,          0,                      st_C_gnumacro
2342 SYSCALL,        0,                      st_C_gnumacro
2343 ENTRY,          0,                      st_C_gnumacro
2344 PSEUDO,         0,                      st_C_gnumacro
2345 # These are defined inside C functions, so currently they are not met.
2346 # EXFUN used in glibc, DEFVAR_* in emacs.
2347 #EXFUN,         0,                      st_C_gnumacro
2348 #DEFVAR_,       0,                      st_C_gnumacro
2349 %]
2350 and replace lines between %< and %> with its output, then:
2351  - remove the #if characterset check
2352  - make in_word_set static and not inline. */
2353 /*%<*/
2354 /* C code produced by gperf version 3.0.1 */
2355 /* Command-line: gperf -m 5  */
2356 /* Computed positions: -k'2-3' */
2357
2358 struct C_stab_entry { const char *name; int c_ext; enum sym_type type; };
2359 /* maximum key range = 33, duplicates = 0 */
2360
2361 static int
2362 hash (const char *str, int len)
2363 {
2364   static char const asso_values[] =
2365     {
2366       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2367       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2368       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2369       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2370       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2371       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2372       35, 35, 35, 35, 35, 35, 35, 35, 35,  3,
2373       26, 35, 35, 35, 35, 35, 35, 35, 27, 35,
2374       35, 35, 35, 24,  0, 35, 35, 35, 35,  0,
2375       35, 35, 35, 35, 35,  1, 35, 16, 35,  6,
2376       23,  0,  0, 35, 22,  0, 35, 35,  5,  0,
2377        0, 15,  1, 35,  6, 35,  8, 19, 35, 16,
2378        4,  5, 35, 35, 35, 35, 35, 35, 35, 35,
2379       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2380       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2381       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2382       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2383       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2384       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2385       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2386       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2387       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2388       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2389       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2390       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2391       35, 35, 35, 35, 35, 35
2392     };
2393   int hval = len;
2394
2395   switch (hval)
2396     {
2397       default:
2398         hval += asso_values[(unsigned char) str[2]];
2399       /*FALLTHROUGH*/
2400       case 2:
2401         hval += asso_values[(unsigned char) str[1]];
2402         break;
2403     }
2404   return hval;
2405 }
2406
2407 static struct C_stab_entry *
2408 in_word_set (register const char *str, register unsigned int len)
2409 {
2410   enum
2411     {
2412       TOTAL_KEYWORDS = 33,
2413       MIN_WORD_LENGTH = 2,
2414       MAX_WORD_LENGTH = 15,
2415       MIN_HASH_VALUE = 2,
2416       MAX_HASH_VALUE = 34
2417     };
2418
2419   static struct C_stab_entry wordlist[] =
2420     {
2421       {""}, {""},
2422       {"if",            0,                      st_C_ignore},
2423       {"GTY",           0,                      st_C_attribute},
2424       {"@end",          0,                      st_C_objend},
2425       {"union",         0,                      st_C_struct},
2426       {"define",                0,                      st_C_define},
2427       {"import",                (C_JAVA & ~C_PLPL),     st_C_ignore},
2428       {"template",      0,                      st_C_template},
2429       {"operator",      C_PLPL,                 st_C_operator},
2430       {"@interface",    0,                      st_C_objprot},
2431       {"implements",    (C_JAVA & ~C_PLPL),     st_C_javastruct},
2432       {"friend",                C_PLPL,                 st_C_ignore},
2433       {"typedef",       0,                      st_C_typedef},
2434       {"return",                0,                      st_C_ignore},
2435       {"@implementation",0,                     st_C_objimpl},
2436       {"@protocol",     0,                      st_C_objprot},
2437       {"interface",     (C_JAVA & ~C_PLPL),     st_C_struct},
2438       {"extern",                0,                      st_C_extern},
2439       {"extends",       (C_JAVA & ~C_PLPL),     st_C_javastruct},
2440       {"struct",                0,                      st_C_struct},
2441       {"domain",                C_STAR,                 st_C_struct},
2442       {"switch",                0,                      st_C_ignore},
2443       {"enum",          0,                      st_C_enum},
2444       {"for",           0,                      st_C_ignore},
2445       {"namespace",     C_PLPL,                 st_C_struct},
2446       {"class",         0,                      st_C_class},
2447       {"while",         0,                      st_C_ignore},
2448       {"undef",         0,                      st_C_define},
2449       {"package",       (C_JAVA & ~C_PLPL),     st_C_ignore},
2450       {"__attribute__", 0,                      st_C_attribute},
2451       {"SYSCALL",       0,                      st_C_gnumacro},
2452       {"ENTRY",         0,                      st_C_gnumacro},
2453       {"PSEUDO",                0,                      st_C_gnumacro},
2454       {"DEFUN",         0,                      st_C_gnumacro}
2455     };
2456
2457   if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2458     {
2459       int key = hash (str, len);
2460
2461       if (key <= MAX_HASH_VALUE && key >= 0)
2462         {
2463           const char *s = wordlist[key].name;
2464
2465           if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2466             return &wordlist[key];
2467         }
2468     }
2469   return 0;
2470 }
2471 /*%>*/
2472
2473 static enum sym_type
2474 C_symtype (char *str, int len, int c_ext)
2475 {
2476   register struct C_stab_entry *se = in_word_set (str, len);
2477
2478   if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2479     return st_none;
2480   return se->type;
2481 }
2482
2483 \f
2484 /*
2485  * Ignoring __attribute__ ((list))
2486  */
2487 static bool inattribute;        /* looking at an __attribute__ construct */
2488
2489 /*
2490  * C functions and variables are recognized using a simple
2491  * finite automaton.  fvdef is its state variable.
2492  */
2493 static enum
2494 {
2495   fvnone,                       /* nothing seen */
2496   fdefunkey,                    /* Emacs DEFUN keyword seen */
2497   fdefunname,                   /* Emacs DEFUN name seen */
2498   foperator,                    /* func: operator keyword seen (cplpl) */
2499   fvnameseen,                   /* function or variable name seen */
2500   fstartlist,                   /* func: just after open parenthesis */
2501   finlist,                      /* func: in parameter list */
2502   flistseen,                    /* func: after parameter list */
2503   fignore,                      /* func: before open brace */
2504   vignore                       /* var-like: ignore until ';' */
2505 } fvdef;
2506
2507 static bool fvextern;           /* func or var: extern keyword seen; */
2508
2509 /*
2510  * typedefs are recognized using a simple finite automaton.
2511  * typdef is its state variable.
2512  */
2513 static enum
2514 {
2515   tnone,                        /* nothing seen */
2516   tkeyseen,                     /* typedef keyword seen */
2517   ttypeseen,                    /* defined type seen */
2518   tinbody,                      /* inside typedef body */
2519   tend,                         /* just before typedef tag */
2520   tignore                       /* junk after typedef tag */
2521 } typdef;
2522
2523 /*
2524  * struct-like structures (enum, struct and union) are recognized
2525  * using another simple finite automaton.  `structdef' is its state
2526  * variable.
2527  */
2528 static enum
2529 {
2530   snone,                        /* nothing seen yet,
2531                                    or in struct body if bracelev > 0 */
2532   skeyseen,                     /* struct-like keyword seen */
2533   stagseen,                     /* struct-like tag seen */
2534   scolonseen                    /* colon seen after struct-like tag */
2535 } structdef;
2536
2537 /*
2538  * When objdef is different from onone, objtag is the name of the class.
2539  */
2540 static const char *objtag = "<uninited>";
2541
2542 /*
2543  * Yet another little state machine to deal with preprocessor lines.
2544  */
2545 static enum
2546 {
2547   dnone,                        /* nothing seen */
2548   dsharpseen,                   /* '#' seen as first char on line */
2549   ddefineseen,                  /* '#' and 'define' seen */
2550   dignorerest                   /* ignore rest of line */
2551 } definedef;
2552
2553 /*
2554  * State machine for Objective C protocols and implementations.
2555  * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2556  */
2557 static enum
2558 {
2559   onone,                        /* nothing seen */
2560   oprotocol,                    /* @interface or @protocol seen */
2561   oimplementation,              /* @implementations seen */
2562   otagseen,                     /* class name seen */
2563   oparenseen,                   /* parenthesis before category seen */
2564   ocatseen,                     /* category name seen */
2565   oinbody,                      /* in @implementation body */
2566   omethodsign,                  /* in @implementation body, after +/- */
2567   omethodtag,                   /* after method name */
2568   omethodcolon,                 /* after method colon */
2569   omethodparm,                  /* after method parameter */
2570   oignore                       /* wait for @end */
2571 } objdef;
2572
2573
2574 /*
2575  * Use this structure to keep info about the token read, and how it
2576  * should be tagged.  Used by the make_C_tag function to build a tag.
2577  */
2578 static struct tok
2579 {
2580   char *line;                   /* string containing the token */
2581   int offset;                   /* where the token starts in LINE */
2582   int length;                   /* token length */
2583   /*
2584     The previous members can be used to pass strings around for generic
2585     purposes.  The following ones specifically refer to creating tags.  In this
2586     case the token contained here is the pattern that will be used to create a
2587     tag.
2588   */
2589   bool valid;                   /* do not create a tag; the token should be
2590                                    invalidated whenever a state machine is
2591                                    reset prematurely */
2592   bool named;                   /* create a named tag */
2593   int lineno;                   /* source line number of tag */
2594   long linepos;                 /* source char number of tag */
2595 } token;                        /* latest token read */
2596
2597 /*
2598  * Variables and functions for dealing with nested structures.
2599  * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2600  */
2601 static void pushclass_above (int, char *, int);
2602 static void popclass_above (int);
2603 static void write_classname (linebuffer *, const char *qualifier);
2604
2605 static struct {
2606   char **cname;                 /* nested class names */
2607   int *bracelev;                /* nested class brace level */
2608   int nl;                       /* class nesting level (elements used) */
2609   int size;                     /* length of the array */
2610 } cstack;                       /* stack for nested declaration tags */
2611 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2612 #define nestlev         (cstack.nl)
2613 /* After struct keyword or in struct body, not inside a nested function. */
2614 #define instruct        (structdef == snone && nestlev > 0                      \
2615                          && bracelev == cstack.bracelev[nestlev-1] + 1)
2616
2617 static void
2618 pushclass_above (int bracelev, char *str, int len)
2619 {
2620   int nl;
2621
2622   popclass_above (bracelev);
2623   nl = cstack.nl;
2624   if (nl >= cstack.size)
2625     {
2626       int size = cstack.size *= 2;
2627       xrnew (cstack.cname, size, char *);
2628       xrnew (cstack.bracelev, size, int);
2629     }
2630   assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2631   cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2632   cstack.bracelev[nl] = bracelev;
2633   cstack.nl = nl + 1;
2634 }
2635
2636 static void
2637 popclass_above (int bracelev)
2638 {
2639   int nl;
2640
2641   for (nl = cstack.nl - 1;
2642        nl >= 0 && cstack.bracelev[nl] >= bracelev;
2643        nl--)
2644     {
2645       free (cstack.cname[nl]);
2646       cstack.nl = nl;
2647     }
2648 }
2649
2650 static void
2651 write_classname (linebuffer *cn, const char *qualifier)
2652 {
2653   int i, len;
2654   int qlen = strlen (qualifier);
2655
2656   if (cstack.nl == 0 || cstack.cname[0] == NULL)
2657     {
2658       len = 0;
2659       cn->len = 0;
2660       cn->buffer[0] = '\0';
2661     }
2662   else
2663     {
2664       len = strlen (cstack.cname[0]);
2665       linebuffer_setlen (cn, len);
2666       strcpy (cn->buffer, cstack.cname[0]);
2667     }
2668   for (i = 1; i < cstack.nl; i++)
2669     {
2670       char *s = cstack.cname[i];
2671       if (s == NULL)
2672         continue;
2673       linebuffer_setlen (cn, len + qlen + strlen (s));
2674       len += sprintf (cn->buffer + len, "%s%s", qualifier, s);
2675     }
2676 }
2677
2678 \f
2679 static bool consider_token (char *, int, int, int *, int, int, bool *);
2680 static void make_C_tag (bool);
2681
2682 /*
2683  * consider_token ()
2684  *      checks to see if the current token is at the start of a
2685  *      function or variable, or corresponds to a typedef, or
2686  *      is a struct/union/enum tag, or #define, or an enum constant.
2687  *
2688  *      *IS_FUNC_OR_VAR gets true if the token is a function or #define macro
2689  *      with args.  C_EXTP points to which language we are looking at.
2690  *
2691  * Globals
2692  *      fvdef                   IN OUT
2693  *      structdef               IN OUT
2694  *      definedef               IN OUT
2695  *      typdef                  IN OUT
2696  *      objdef                  IN OUT
2697  */
2698
2699 static bool
2700 consider_token (char *str, int len, int c, int *c_extp,
2701                 int bracelev, int parlev, bool *is_func_or_var)
2702                                 /* IN: token pointer */
2703                                 /* IN: token length */
2704                                 /* IN: first char after the token */
2705                                 /* IN, OUT: C extensions mask */
2706                                 /* IN: brace level */
2707                                 /* IN: parenthesis level */
2708                                 /* OUT: function or variable found */
2709 {
2710   /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2711      structtype is the type of the preceding struct-like keyword, and
2712      structbracelev is the brace level where it has been seen. */
2713   static enum sym_type structtype;
2714   static int structbracelev;
2715   static enum sym_type toktype;
2716
2717
2718   toktype = C_symtype (str, len, *c_extp);
2719
2720   /*
2721    * Skip __attribute__
2722    */
2723   if (toktype == st_C_attribute)
2724     {
2725       inattribute = true;
2726       return false;
2727      }
2728
2729    /*
2730     * Advance the definedef state machine.
2731     */
2732    switch (definedef)
2733      {
2734      case dnone:
2735        /* We're not on a preprocessor line. */
2736        if (toktype == st_C_gnumacro)
2737          {
2738            fvdef = fdefunkey;
2739            return false;
2740          }
2741        break;
2742      case dsharpseen:
2743        if (toktype == st_C_define)
2744          {
2745            definedef = ddefineseen;
2746          }
2747        else
2748          {
2749            definedef = dignorerest;
2750          }
2751        return false;
2752      case ddefineseen:
2753        /*
2754         * Make a tag for any macro, unless it is a constant
2755         * and constantypedefs is false.
2756         */
2757        definedef = dignorerest;
2758        *is_func_or_var = (c == '(');
2759        if (!*is_func_or_var && !constantypedefs)
2760          return false;
2761        else
2762          return true;
2763      case dignorerest:
2764        return false;
2765      default:
2766        error ("internal error: definedef value.");
2767      }
2768
2769    /*
2770     * Now typedefs
2771     */
2772    switch (typdef)
2773      {
2774      case tnone:
2775        if (toktype == st_C_typedef)
2776          {
2777            if (typedefs)
2778              typdef = tkeyseen;
2779            fvextern = false;
2780            fvdef = fvnone;
2781            return false;
2782          }
2783        break;
2784      case tkeyseen:
2785        switch (toktype)
2786          {
2787          case st_none:
2788          case st_C_class:
2789          case st_C_struct:
2790          case st_C_enum:
2791            typdef = ttypeseen;
2792            break;
2793          default:
2794            break;
2795          }
2796        break;
2797      case ttypeseen:
2798        if (structdef == snone && fvdef == fvnone)
2799          {
2800            fvdef = fvnameseen;
2801            return true;
2802          }
2803        break;
2804      case tend:
2805        switch (toktype)
2806          {
2807          case st_C_class:
2808          case st_C_struct:
2809          case st_C_enum:
2810            return false;
2811          default:
2812            return true;
2813          }
2814      default:
2815        break;
2816      }
2817
2818    switch (toktype)
2819      {
2820      case st_C_javastruct:
2821        if (structdef == stagseen)
2822          structdef = scolonseen;
2823        return false;
2824      case st_C_template:
2825      case st_C_class:
2826        if ((*c_extp & C_AUTO)   /* automatic detection of C++ language */
2827            && bracelev == 0
2828            && definedef == dnone && structdef == snone
2829            && typdef == tnone && fvdef == fvnone)
2830          *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2831        if (toktype == st_C_template)
2832          break;
2833        /* FALLTHRU */
2834      case st_C_struct:
2835      case st_C_enum:
2836        if (parlev == 0
2837            && fvdef != vignore
2838            && (typdef == tkeyseen
2839                || (typedefs_or_cplusplus && structdef == snone)))
2840          {
2841            structdef = skeyseen;
2842            structtype = toktype;
2843            structbracelev = bracelev;
2844            if (fvdef == fvnameseen)
2845              fvdef = fvnone;
2846          }
2847        return false;
2848      default:
2849        break;
2850      }
2851
2852    if (structdef == skeyseen)
2853      {
2854        structdef = stagseen;
2855        return true;
2856      }
2857
2858    if (typdef != tnone)
2859      definedef = dnone;
2860
2861    /* Detect Objective C constructs. */
2862    switch (objdef)
2863      {
2864      case onone:
2865        switch (toktype)
2866          {
2867          case st_C_objprot:
2868            objdef = oprotocol;
2869            return false;
2870          case st_C_objimpl:
2871            objdef = oimplementation;
2872            return false;
2873          default:
2874            break;
2875          }
2876        break;
2877      case oimplementation:
2878        /* Save the class tag for functions or variables defined inside. */
2879        objtag = savenstr (str, len);
2880        objdef = oinbody;
2881        return false;
2882      case oprotocol:
2883        /* Save the class tag for categories. */
2884        objtag = savenstr (str, len);
2885        objdef = otagseen;
2886        *is_func_or_var = true;
2887        return true;
2888      case oparenseen:
2889        objdef = ocatseen;
2890        *is_func_or_var = true;
2891        return true;
2892      case oinbody:
2893        break;
2894      case omethodsign:
2895        if (parlev == 0)
2896          {
2897            fvdef = fvnone;
2898            objdef = omethodtag;
2899            linebuffer_setlen (&token_name, len);
2900            memcpy (token_name.buffer, str, len);
2901            token_name.buffer[len] = '\0';
2902            return true;
2903          }
2904        return false;
2905      case omethodcolon:
2906        if (parlev == 0)
2907          objdef = omethodparm;
2908        return false;
2909      case omethodparm:
2910        if (parlev == 0)
2911          {
2912            objdef = omethodtag;
2913            if (class_qualify)
2914              {
2915                int oldlen = token_name.len;
2916                fvdef = fvnone;
2917                linebuffer_setlen (&token_name, oldlen + len);
2918                memcpy (token_name.buffer + oldlen, str, len);
2919                token_name.buffer[oldlen + len] = '\0';
2920              }
2921            return true;
2922          }
2923        return false;
2924      case oignore:
2925        if (toktype == st_C_objend)
2926          {
2927            /* Memory leakage here: the string pointed by objtag is
2928               never released, because many tests would be needed to
2929               avoid breaking on incorrect input code.  The amount of
2930               memory leaked here is the sum of the lengths of the
2931               class tags.
2932            free (objtag); */
2933            objdef = onone;
2934          }
2935        return false;
2936      default:
2937        break;
2938      }
2939
2940    /* A function, variable or enum constant? */
2941    switch (toktype)
2942      {
2943      case st_C_extern:
2944        fvextern = true;
2945        switch  (fvdef)
2946          {
2947          case finlist:
2948          case flistseen:
2949          case fignore:
2950          case vignore:
2951            break;
2952          default:
2953            fvdef = fvnone;
2954          }
2955        return false;
2956      case st_C_ignore:
2957        fvextern = false;
2958        fvdef = vignore;
2959        return false;
2960      case st_C_operator:
2961        fvdef = foperator;
2962        *is_func_or_var = true;
2963        return true;
2964      case st_none:
2965        if (constantypedefs
2966            && structdef == snone
2967            && structtype == st_C_enum && bracelev > structbracelev
2968            /* Don't tag tokens in expressions that assign values to enum
2969               constants.  */
2970            && fvdef != vignore)
2971          return true;           /* enum constant */
2972        switch (fvdef)
2973          {
2974          case fdefunkey:
2975            if (bracelev > 0)
2976              break;
2977            fvdef = fdefunname;  /* GNU macro */
2978            *is_func_or_var = true;
2979            return true;
2980          case fvnone:
2981            switch (typdef)
2982              {
2983              case ttypeseen:
2984                return false;
2985              case tnone:
2986                if ((strneq (str, "asm", 3) && endtoken (str[3]))
2987                    || (strneq (str, "__asm__", 7) && endtoken (str[7])))
2988                  {
2989                    fvdef = vignore;
2990                    return false;
2991                  }
2992                break;
2993              default:
2994                break;
2995              }
2996           /* FALLTHRU */
2997           case fvnameseen:
2998           if (len >= 10 && strneq (str+len-10, "::operator", 10))
2999             {
3000               if (*c_extp & C_AUTO) /* automatic detection of C++ */
3001                 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3002               fvdef = foperator;
3003               *is_func_or_var = true;
3004               return true;
3005             }
3006           if (bracelev > 0 && !instruct)
3007             break;
3008           fvdef = fvnameseen;   /* function or variable */
3009           *is_func_or_var = true;
3010           return true;
3011          default:
3012            break;
3013         }
3014       break;
3015      default:
3016        break;
3017     }
3018
3019   return false;
3020 }
3021
3022 \f
3023 /*
3024  * C_entries often keeps pointers to tokens or lines which are older than
3025  * the line currently read.  By keeping two line buffers, and switching
3026  * them at end of line, it is possible to use those pointers.
3027  */
3028 static struct
3029 {
3030   long linepos;
3031   linebuffer lb;
3032 } lbs[2];
3033
3034 #define current_lb_is_new (newndx == curndx)
3035 #define switch_line_buffers() (curndx = 1 - curndx)
3036
3037 #define curlb (lbs[curndx].lb)
3038 #define newlb (lbs[newndx].lb)
3039 #define curlinepos (lbs[curndx].linepos)
3040 #define newlinepos (lbs[newndx].linepos)
3041
3042 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3043 #define cplpl (c_ext & C_PLPL)
3044 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3045
3046 #define CNL_SAVE_DEFINEDEF()                                            \
3047 do {                                                                    \
3048   curlinepos = charno;                                                  \
3049   readline (&curlb, inf);                                               \
3050   lp = curlb.buffer;                                                    \
3051   quotednl = false;                                                     \
3052   newndx = curndx;                                                      \
3053 } while (0)
3054
3055 #define CNL()                                                           \
3056 do {                                                                    \
3057   CNL_SAVE_DEFINEDEF ();                                                \
3058   if (savetoken.valid)                                                  \
3059     {                                                                   \
3060       token = savetoken;                                                \
3061       savetoken.valid = false;                                          \
3062     }                                                                   \
3063   definedef = dnone;                                                    \
3064 } while (0)
3065
3066
3067 static void
3068 make_C_tag (bool isfun)
3069 {
3070   /* This function is never called when token.valid is false, but
3071      we must protect against invalid input or internal errors. */
3072   if (token.valid)
3073     make_tag (token_name.buffer, token_name.len, isfun, token.line,
3074               token.offset+token.length+1, token.lineno, token.linepos);
3075   else if (DEBUG)
3076     {                             /* this branch is optimized away if !DEBUG */
3077       make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3078                 token_name.len + 17, isfun, token.line,
3079                 token.offset+token.length+1, token.lineno, token.linepos);
3080       error ("INVALID TOKEN");
3081     }
3082
3083   token.valid = false;
3084 }
3085
3086 static bool
3087 perhaps_more_input (FILE *inf)
3088 {
3089   return !feof (inf) && !ferror (inf);
3090 }
3091
3092
3093 /*
3094  * C_entries ()
3095  *      This routine finds functions, variables, typedefs,
3096  *      #define's, enum constants and struct/union/enum definitions in
3097  *      C syntax and adds them to the list.
3098  */
3099 static void
3100 C_entries (int c_ext, FILE *inf)
3101                                 /* extension of C */
3102                                 /* input file */
3103 {
3104   register char c;              /* latest char read; '\0' for end of line */
3105   register char *lp;            /* pointer one beyond the character `c' */
3106   int curndx, newndx;           /* indices for current and new lb */
3107   register int tokoff;          /* offset in line of start of current token */
3108   register int toklen;          /* length of current token */
3109   const char *qualifier;        /* string used to qualify names */
3110   int qlen;                     /* length of qualifier */
3111   int bracelev;                 /* current brace level */
3112   int bracketlev;               /* current bracket level */
3113   int parlev;                   /* current parenthesis level */
3114   int attrparlev;               /* __attribute__ parenthesis level */
3115   int templatelev;              /* current template level */
3116   int typdefbracelev;           /* bracelev where a typedef struct body begun */
3117   bool incomm, inquote, inchar, quotednl, midtoken;
3118   bool yacc_rules;              /* in the rules part of a yacc file */
3119   struct tok savetoken = {0};   /* token saved during preprocessor handling */
3120
3121
3122   linebuffer_init (&lbs[0].lb);
3123   linebuffer_init (&lbs[1].lb);
3124   if (cstack.size == 0)
3125     {
3126       cstack.size = (DEBUG) ? 1 : 4;
3127       cstack.nl = 0;
3128       cstack.cname = xnew (cstack.size, char *);
3129       cstack.bracelev = xnew (cstack.size, int);
3130     }
3131
3132   tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3133   curndx = newndx = 0;
3134   lp = curlb.buffer;
3135   *lp = 0;
3136
3137   fvdef = fvnone; fvextern = false; typdef = tnone;
3138   structdef = snone; definedef = dnone; objdef = onone;
3139   yacc_rules = false;
3140   midtoken = inquote = inchar = incomm = quotednl = false;
3141   token.valid = savetoken.valid = false;
3142   bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3143   if (cjava)
3144     { qualifier = "."; qlen = 1; }
3145   else
3146     { qualifier = "::"; qlen = 2; }
3147
3148
3149   while (perhaps_more_input (inf))
3150     {
3151       c = *lp++;
3152       if (c == '\\')
3153         {
3154           /* If we are at the end of the line, the next character is a
3155              '\0'; do not skip it, because it is what tells us
3156              to read the next line.  */
3157           if (*lp == '\0')
3158             {
3159               quotednl = true;
3160               continue;
3161             }
3162           lp++;
3163           c = ' ';
3164         }
3165       else if (incomm)
3166         {
3167           switch (c)
3168             {
3169             case '*':
3170               if (*lp == '/')
3171                 {
3172                   c = *lp++;
3173                   incomm = false;
3174                 }
3175               break;
3176             case '\0':
3177               /* Newlines inside comments do not end macro definitions in
3178                  traditional cpp. */
3179               CNL_SAVE_DEFINEDEF ();
3180               break;
3181             }
3182           continue;
3183         }
3184       else if (inquote)
3185         {
3186           switch (c)
3187             {
3188             case '"':
3189               inquote = false;
3190               break;
3191             case '\0':
3192               /* Newlines inside strings do not end macro definitions
3193                  in traditional cpp, even though compilers don't
3194                  usually accept them. */
3195               CNL_SAVE_DEFINEDEF ();
3196               break;
3197             }
3198           continue;
3199         }
3200       else if (inchar)
3201         {
3202           switch (c)
3203             {
3204             case '\0':
3205               /* Hmmm, something went wrong. */
3206               CNL ();
3207               /* FALLTHRU */
3208             case '\'':
3209               inchar = false;
3210               break;
3211             }
3212           continue;
3213         }
3214       else switch (c)
3215         {
3216         case '"':
3217           inquote = true;
3218           if (bracketlev > 0)
3219             continue;
3220           if (inattribute)
3221             break;
3222           switch (fvdef)
3223             {
3224             case fdefunkey:
3225             case fstartlist:
3226             case finlist:
3227             case fignore:
3228             case vignore:
3229               break;
3230             default:
3231               fvextern = false;
3232               fvdef = fvnone;
3233             }
3234           continue;
3235         case '\'':
3236           inchar = true;
3237           if (bracketlev > 0)
3238             continue;
3239           if (inattribute)
3240             break;
3241           if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
3242             {
3243               fvextern = false;
3244               fvdef = fvnone;
3245             }
3246           continue;
3247         case '/':
3248           if (*lp == '*')
3249             {
3250               incomm = true;
3251               lp++;
3252               c = ' ';
3253               if (bracketlev > 0)
3254                 continue;
3255             }
3256           else if (/* cplpl && */ *lp == '/')
3257             {
3258               c = '\0';
3259             }
3260           break;
3261         case '%':
3262           if ((c_ext & YACC) && *lp == '%')
3263             {
3264               /* Entering or exiting rules section in yacc file. */
3265               lp++;
3266               definedef = dnone; fvdef = fvnone; fvextern = false;
3267               typdef = tnone; structdef = snone;
3268               midtoken = inquote = inchar = incomm = quotednl = false;
3269               bracelev = 0;
3270               yacc_rules = !yacc_rules;
3271               continue;
3272             }
3273           else
3274             break;
3275         case '#':
3276           if (definedef == dnone)
3277             {
3278               char *cp;
3279               bool cpptoken = true;
3280
3281               /* Look back on this line.  If all blanks, or nonblanks
3282                  followed by an end of comment, this is a preprocessor
3283                  token. */
3284               for (cp = newlb.buffer; cp < lp-1; cp++)
3285                 if (!c_isspace (*cp))
3286                   {
3287                     if (*cp == '*' && cp[1] == '/')
3288                       {
3289                         cp++;
3290                         cpptoken = true;
3291                       }
3292                     else
3293                       cpptoken = false;
3294                   }
3295               if (cpptoken)
3296                 {
3297                   definedef = dsharpseen;
3298                   /* This is needed for tagging enum values: when there are
3299                      preprocessor conditionals inside the enum, we need to
3300                      reset the value of fvdef so that the next enum value is
3301                      tagged even though the one before it did not end in a
3302                      comma.  */
3303                   if (fvdef == vignore && instruct && parlev == 0)
3304                     {
3305                       if (strneq (cp, "#if", 3) || strneq (cp, "#el", 3))
3306                         fvdef = fvnone;
3307                     }
3308                 }
3309             } /* if (definedef == dnone) */
3310           continue;
3311         case '[':
3312           bracketlev++;
3313           continue;
3314         default:
3315           if (bracketlev > 0)
3316             {
3317               if (c == ']')
3318                 --bracketlev;
3319               else if (c == '\0')
3320                 CNL_SAVE_DEFINEDEF ();
3321               continue;
3322             }
3323           break;
3324         } /* switch (c) */
3325
3326
3327       /* Consider token only if some involved conditions are satisfied. */
3328       if (typdef != tignore
3329           && definedef != dignorerest
3330           && fvdef != finlist
3331           && templatelev == 0
3332           && (definedef != dnone
3333               || structdef != scolonseen)
3334           && !inattribute)
3335         {
3336           if (midtoken)
3337             {
3338               if (endtoken (c))
3339                 {
3340                   if (c == ':' && *lp == ':' && begtoken (lp[1]))
3341                     /* This handles :: in the middle,
3342                        but not at the beginning of an identifier.
3343                        Also, space-separated :: is not recognized. */
3344                     {
3345                       if (c_ext & C_AUTO) /* automatic detection of C++ */
3346                         c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3347                       lp += 2;
3348                       toklen += 2;
3349                       c = lp[-1];
3350                       goto still_in_token;
3351                     }
3352                   else
3353                     {
3354                       bool funorvar = false;
3355
3356                       if (yacc_rules
3357                           || consider_token (newlb.buffer + tokoff, toklen, c,
3358                                              &c_ext, bracelev, parlev,
3359                                              &funorvar))
3360                         {
3361                           if (fvdef == foperator)
3362                             {
3363                               char *oldlp = lp;
3364                               lp = skip_spaces (lp-1);
3365                               if (*lp != '\0')
3366                                 lp += 1;
3367                               while (*lp != '\0'
3368                                      && !c_isspace (*lp) && *lp != '(')
3369                                 lp += 1;
3370                               c = *lp++;
3371                               toklen += lp - oldlp;
3372                             }
3373                           token.named = false;
3374                           if (!plainc
3375                               && nestlev > 0 && definedef == dnone)
3376                             /* in struct body */
3377                             {
3378                               if (class_qualify)
3379                                 {
3380                                   int len;
3381                                   write_classname (&token_name, qualifier);
3382                                   len = token_name.len;
3383                                   linebuffer_setlen (&token_name,
3384                                                      len + qlen + toklen);
3385                                   sprintf (token_name.buffer + len, "%s%.*s",
3386                                            qualifier, toklen,
3387                                            newlb.buffer + tokoff);
3388                                 }
3389                               else
3390                                 {
3391                                   linebuffer_setlen (&token_name, toklen);
3392                                   sprintf (token_name.buffer, "%.*s",
3393                                            toklen, newlb.buffer + tokoff);
3394                                 }
3395                               token.named = true;
3396                             }
3397                           else if (objdef == ocatseen)
3398                             /* Objective C category */
3399                             {
3400                               if (class_qualify)
3401                                 {
3402                                   int len = strlen (objtag) + 2 + toklen;
3403                                   linebuffer_setlen (&token_name, len);
3404                                   sprintf (token_name.buffer, "%s(%.*s)",
3405                                            objtag, toklen,
3406                                            newlb.buffer + tokoff);
3407                                 }
3408                               else
3409                                 {
3410                                   linebuffer_setlen (&token_name, toklen);
3411                                   sprintf (token_name.buffer, "%.*s",
3412                                            toklen, newlb.buffer + tokoff);
3413                                 }
3414                               token.named = true;
3415                             }
3416                           else if (objdef == omethodtag
3417                                    || objdef == omethodparm)
3418                             /* Objective C method */
3419                             {
3420                               token.named = true;
3421                             }
3422                           else if (fvdef == fdefunname)
3423                             /* GNU DEFUN and similar macros */
3424                             {
3425                               bool defun = (newlb.buffer[tokoff] == 'F');
3426                               int off = tokoff;
3427                               int len = toklen;
3428
3429                               /* Rewrite the tag so that emacs lisp DEFUNs
3430                                  can be found by their elisp name */
3431                               if (defun)
3432                                 {
3433                                   off += 1;
3434                                   len -= 1;
3435                                 }
3436                               linebuffer_setlen (&token_name, len);
3437                               memcpy (token_name.buffer,
3438                                       newlb.buffer + off, len);
3439                               token_name.buffer[len] = '\0';
3440                               if (defun)
3441                                 while (--len >= 0)
3442                                   if (token_name.buffer[len] == '_')
3443                                     token_name.buffer[len] = '-';
3444                               token.named = defun;
3445                             }
3446                           else
3447                             {
3448                               linebuffer_setlen (&token_name, toklen);
3449                               memcpy (token_name.buffer,
3450                                       newlb.buffer + tokoff, toklen);
3451                               token_name.buffer[toklen] = '\0';
3452                               /* Name macros and members. */
3453                               token.named = (structdef == stagseen
3454                                              || typdef == ttypeseen
3455                                              || typdef == tend
3456                                              || (funorvar
3457                                                  && definedef == dignorerest)
3458                                              || (funorvar
3459                                                  && definedef == dnone
3460                                                  && structdef == snone
3461                                                  && bracelev > 0));
3462                             }
3463                           token.lineno = lineno;
3464                           token.offset = tokoff;
3465                           token.length = toklen;
3466                           token.line = newlb.buffer;
3467                           token.linepos = newlinepos;
3468                           token.valid = true;
3469
3470                           if (definedef == dnone
3471                               && (fvdef == fvnameseen
3472                                   || fvdef == foperator
3473                                   || structdef == stagseen
3474                                   || typdef == tend
3475                                   || typdef == ttypeseen
3476                                   || objdef != onone))
3477                             {
3478                               if (current_lb_is_new)
3479                                 switch_line_buffers ();
3480                             }
3481                           else if (definedef != dnone
3482                                    || fvdef == fdefunname
3483                                    || instruct)
3484                             make_C_tag (funorvar);
3485                         }
3486                       else /* not yacc and consider_token failed */
3487                         {
3488                           if (inattribute && fvdef == fignore)
3489                             {
3490                               /* We have just met __attribute__ after a
3491                                  function parameter list: do not tag the
3492                                  function again. */
3493                               fvdef = fvnone;
3494                             }
3495                         }
3496                       midtoken = false;
3497                     }
3498                 } /* if (endtoken (c)) */
3499               else if (intoken (c))
3500                 still_in_token:
3501                 {
3502                   toklen++;
3503                   continue;
3504                 }
3505             } /* if (midtoken) */
3506           else if (begtoken (c))
3507             {
3508               switch (definedef)
3509                 {
3510                 case dnone:
3511                   switch (fvdef)
3512                     {
3513                     case fstartlist:
3514                       /* This prevents tagging fb in
3515                          void (__attribute__((noreturn)) *fb) (void);
3516                          Fixing this is not easy and not very important. */
3517                       fvdef = finlist;
3518                       continue;
3519                     case flistseen:
3520                       if (plainc || declarations)
3521                         {
3522                           make_C_tag (true); /* a function */
3523                           fvdef = fignore;
3524                         }
3525                       break;
3526                     default:
3527                       break;
3528                     }
3529                   if (structdef == stagseen && !cjava)
3530                     {
3531                       popclass_above (bracelev);
3532                       structdef = snone;
3533                     }
3534                   break;
3535                 case dsharpseen:
3536                   savetoken = token;
3537                   break;
3538                 default:
3539                   break;
3540                 }
3541               if (!yacc_rules || lp == newlb.buffer + 1)
3542                 {
3543                   tokoff = lp - 1 - newlb.buffer;
3544                   toklen = 1;
3545                   midtoken = true;
3546                 }
3547               continue;
3548             } /* if (begtoken) */
3549         } /* if must look at token */
3550
3551
3552       /* Detect end of line, colon, comma, semicolon and various braces
3553          after having handled a token.*/
3554       switch (c)
3555         {
3556         case ':':
3557           if (inattribute)
3558             break;
3559           if (yacc_rules && token.offset == 0 && token.valid)
3560             {
3561               make_C_tag (false); /* a yacc function */
3562               break;
3563             }
3564           if (definedef != dnone)
3565             break;
3566           switch (objdef)
3567             {
3568             case otagseen:
3569               objdef = oignore;
3570               make_C_tag (true); /* an Objective C class */
3571               break;
3572             case omethodtag:
3573             case omethodparm:
3574               objdef = omethodcolon;
3575               if (class_qualify)
3576                 {
3577                   int toklen = token_name.len;
3578                   linebuffer_setlen (&token_name, toklen + 1);
3579                   strcpy (token_name.buffer + toklen, ":");
3580                 }
3581               break;
3582             default:
3583               break;
3584             }
3585           if (structdef == stagseen)
3586             {
3587               structdef = scolonseen;
3588               break;
3589             }
3590           /* Should be useless, but may be work as a safety net. */
3591           if (cplpl && fvdef == flistseen)
3592             {
3593               make_C_tag (true); /* a function */
3594               fvdef = fignore;
3595               break;
3596             }
3597           break;
3598         case ';':
3599           if (definedef != dnone || inattribute)
3600             break;
3601           switch (typdef)
3602             {
3603             case tend:
3604             case ttypeseen:
3605               make_C_tag (false); /* a typedef */
3606               typdef = tnone;
3607               fvdef = fvnone;
3608               break;
3609             case tnone:
3610             case tinbody:
3611             case tignore:
3612               switch (fvdef)
3613                 {
3614                 case fignore:
3615                   if (typdef == tignore || cplpl)
3616                     fvdef = fvnone;
3617                   break;
3618                 case fvnameseen:
3619                   if ((globals && bracelev == 0 && (!fvextern || declarations))
3620                       || (members && instruct))
3621                     make_C_tag (false); /* a variable */
3622                   fvextern = false;
3623                   fvdef = fvnone;
3624                   token.valid = false;
3625                   break;
3626                 case flistseen:
3627                   if ((declarations
3628                        && (cplpl || !instruct)
3629                        && (typdef == tnone || (typdef != tignore && instruct)))
3630                       || (members
3631                           && plainc && instruct))
3632                     make_C_tag (true);  /* a function */
3633                   /* FALLTHRU */
3634                 default:
3635                   fvextern = false;
3636                   fvdef = fvnone;
3637                   if (declarations
3638                        && cplpl && structdef == stagseen)
3639                     make_C_tag (false); /* forward declaration */
3640                   else
3641                     token.valid = false;
3642                 } /* switch (fvdef) */
3643               /* FALLTHRU */
3644             default:
3645               if (!instruct)
3646                 typdef = tnone;
3647             }
3648           if (structdef == stagseen)
3649             structdef = snone;
3650           break;
3651         case ',':
3652           if (definedef != dnone || inattribute)
3653             break;
3654           switch (objdef)
3655             {
3656             case omethodtag:
3657             case omethodparm:
3658               make_C_tag (true); /* an Objective C method */
3659               objdef = oinbody;
3660               break;
3661             default:
3662               break;
3663             }
3664           switch (fvdef)
3665             {
3666             case fdefunkey:
3667             case foperator:
3668             case fstartlist:
3669             case finlist:
3670             case fignore:
3671               break;
3672             case vignore:
3673               if (instruct && parlev == 0)
3674                 fvdef = fvnone;
3675               break;
3676             case fdefunname:
3677               fvdef = fignore;
3678               break;
3679             case fvnameseen:
3680               if (parlev == 0
3681                   && ((globals
3682                        && bracelev == 0
3683                        && templatelev == 0
3684                        && (!fvextern || declarations))
3685                       || (members && instruct)))
3686                   make_C_tag (false); /* a variable */
3687               break;
3688             case flistseen:
3689               if ((declarations && typdef == tnone && !instruct)
3690                   || (members && typdef != tignore && instruct))
3691                 {
3692                   make_C_tag (true); /* a function */
3693                   fvdef = fvnameseen;
3694                 }
3695               else if (!declarations)
3696                 fvdef = fvnone;
3697               token.valid = false;
3698               break;
3699             default:
3700               fvdef = fvnone;
3701             }
3702           if (structdef == stagseen)
3703             structdef = snone;
3704           break;
3705         case ']':
3706           if (definedef != dnone || inattribute)
3707             break;
3708           if (structdef == stagseen)
3709             structdef = snone;
3710           switch (typdef)
3711             {
3712             case ttypeseen:
3713             case tend:
3714               typdef = tignore;
3715               make_C_tag (false);       /* a typedef */
3716               break;
3717             case tnone:
3718             case tinbody:
3719               switch (fvdef)
3720                 {
3721                 case foperator:
3722                 case finlist:
3723                 case fignore:
3724                 case vignore:
3725                   break;
3726                 case fvnameseen:
3727                   if ((members && bracelev == 1)
3728                       || (globals && bracelev == 0
3729                           && (!fvextern || declarations)))
3730                     make_C_tag (false); /* a variable */
3731                   /* FALLTHRU */
3732                 default:
3733                   fvdef = fvnone;
3734                 }
3735               break;
3736             default:
3737               break;
3738             }
3739           break;
3740         case '(':
3741           if (inattribute)
3742             {
3743               attrparlev++;
3744               break;
3745             }
3746           if (definedef != dnone)
3747             break;
3748           if (objdef == otagseen && parlev == 0)
3749             objdef = oparenseen;
3750           switch (fvdef)
3751             {
3752             case fvnameseen:
3753               if (typdef == ttypeseen
3754                   && *lp != '*'
3755                   && !instruct)
3756                 {
3757                   /* This handles constructs like:
3758                      typedef void OperatorFun (int fun); */
3759                   make_C_tag (false);
3760                   typdef = tignore;
3761                   fvdef = fignore;
3762                   break;
3763                 }
3764               /* FALLTHRU */
3765             case foperator:
3766               fvdef = fstartlist;
3767               break;
3768             case flistseen:
3769               fvdef = finlist;
3770               break;
3771             default:
3772               break;
3773             }
3774           parlev++;
3775           break;
3776         case ')':
3777           if (inattribute)
3778             {
3779               if (--attrparlev == 0)
3780                 inattribute = false;
3781               break;
3782             }
3783           if (definedef != dnone)
3784             break;
3785           if (objdef == ocatseen && parlev == 1)
3786             {
3787               make_C_tag (true); /* an Objective C category */
3788               objdef = oignore;
3789             }
3790           if (--parlev == 0)
3791             {
3792               switch (fvdef)
3793                 {
3794                 case fstartlist:
3795                 case finlist:
3796                   fvdef = flistseen;
3797                   break;
3798                 default:
3799                   break;
3800                 }
3801               if (!instruct
3802                   && (typdef == tend
3803                       || typdef == ttypeseen))
3804                 {
3805                   typdef = tignore;
3806                   make_C_tag (false); /* a typedef */
3807                 }
3808             }
3809           else if (parlev < 0)  /* can happen due to ill-conceived #if's. */
3810             parlev = 0;
3811           break;
3812         case '{':
3813           if (definedef != dnone)
3814             break;
3815           if (typdef == ttypeseen)
3816             {
3817               /* Whenever typdef is set to tinbody (currently only
3818                  here), typdefbracelev should be set to bracelev. */
3819               typdef = tinbody;
3820               typdefbracelev = bracelev;
3821             }
3822           switch (fvdef)
3823             {
3824             case flistseen:
3825               if (cplpl && !class_qualify)
3826                 {
3827                   /* Remove class and namespace qualifiers from the token,
3828                      leaving only the method/member name.  */
3829                   char *cc, *uqname = token_name.buffer;
3830                   char *tok_end = token_name.buffer + token_name.len;
3831
3832                   for (cc = token_name.buffer; cc < tok_end; cc++)
3833                     {
3834                       if (*cc == ':' && cc[1] == ':')
3835                         {
3836                           uqname = cc + 2;
3837                           cc++;
3838                         }
3839                     }
3840                   if (uqname > token_name.buffer)
3841                     {
3842                       int uqlen = strlen (uqname);
3843                       linebuffer_setlen (&token_name, uqlen);
3844                       memmove (token_name.buffer, uqname, uqlen + 1);
3845                     }
3846                 }
3847               make_C_tag (true);    /* a function */
3848               /* FALLTHRU */
3849             case fignore:
3850               fvdef = fvnone;
3851               break;
3852             case fvnone:
3853               switch (objdef)
3854                 {
3855                 case otagseen:
3856                   make_C_tag (true); /* an Objective C class */
3857                   objdef = oignore;
3858                   break;
3859                 case omethodtag:
3860                 case omethodparm:
3861                   make_C_tag (true); /* an Objective C method */
3862                   objdef = oinbody;
3863                   break;
3864                 default:
3865                   /* Neutralize `extern "C" {' grot. */
3866                   if (bracelev == 0 && structdef == snone && nestlev == 0
3867                       && typdef == tnone)
3868                     bracelev = -1;
3869                 }
3870               break;
3871             default:
3872               break;
3873             }
3874           switch (structdef)
3875             {
3876             case skeyseen:         /* unnamed struct */
3877               pushclass_above (bracelev, NULL, 0);
3878               structdef = snone;
3879               break;
3880             case stagseen:         /* named struct or enum */
3881             case scolonseen:       /* a class */
3882               pushclass_above (bracelev,token.line+token.offset, token.length);
3883               structdef = snone;
3884               make_C_tag (false);  /* a struct or enum */
3885               break;
3886             default:
3887               break;
3888             }
3889           bracelev += 1;
3890           break;
3891         case '*':
3892           if (definedef != dnone)
3893             break;
3894           if (fvdef == fstartlist)
3895             {
3896               fvdef = fvnone;   /* avoid tagging `foo' in `foo (*bar()) ()' */
3897               token.valid = false;
3898             }
3899           break;
3900         case '}':
3901           if (definedef != dnone)
3902             break;
3903           bracelev -= 1;
3904           if (!ignoreindent && lp == newlb.buffer + 1)
3905             {
3906               if (bracelev != 0)
3907                 token.valid = false; /* unexpected value, token unreliable */
3908               bracelev = 0;     /* reset brace level if first column */
3909               parlev = 0;       /* also reset paren level, just in case... */
3910             }
3911           else if (bracelev < 0)
3912             {
3913               token.valid = false; /* something gone amiss, token unreliable */
3914               bracelev = 0;
3915             }
3916           if (bracelev == 0 && fvdef == vignore)
3917             fvdef = fvnone;             /* end of function */
3918           popclass_above (bracelev);
3919           structdef = snone;
3920           /* Only if typdef == tinbody is typdefbracelev significant. */
3921           if (typdef == tinbody && bracelev <= typdefbracelev)
3922             {
3923               assert (bracelev == typdefbracelev);
3924               typdef = tend;
3925             }
3926           break;
3927         case '=':
3928           if (definedef != dnone)
3929             break;
3930           switch (fvdef)
3931             {
3932             case foperator:
3933             case finlist:
3934             case fignore:
3935             case vignore:
3936               break;
3937             case fvnameseen:
3938               if ((members && bracelev == 1)
3939                   || (globals && bracelev == 0 && (!fvextern || declarations)))
3940                 make_C_tag (false); /* a variable */
3941               /* FALLTHRU */
3942             default:
3943               fvdef = vignore;
3944             }
3945           break;
3946         case '<':
3947           if (cplpl
3948               && (structdef == stagseen || fvdef == fvnameseen))
3949             {
3950               templatelev++;
3951               break;
3952             }
3953           goto resetfvdef;
3954         case '>':
3955           if (templatelev > 0)
3956             {
3957               templatelev--;
3958               break;
3959             }
3960           goto resetfvdef;
3961         case '+':
3962         case '-':
3963           if (objdef == oinbody && bracelev == 0)
3964             {
3965               objdef = omethodsign;
3966               break;
3967             }
3968           /* FALLTHRU */
3969         resetfvdef:
3970         case '#': case '~': case '&': case '%': case '/':
3971         case '|': case '^': case '!': case '.': case '?':
3972           if (definedef != dnone)
3973             break;
3974           /* These surely cannot follow a function tag in C. */
3975           switch (fvdef)
3976             {
3977             case foperator:
3978             case finlist:
3979             case fignore:
3980             case vignore:
3981               break;
3982             default:
3983               fvdef = fvnone;
3984             }
3985           break;
3986         case '\0':
3987           if (objdef == otagseen)
3988             {
3989               make_C_tag (true); /* an Objective C class */
3990               objdef = oignore;
3991             }
3992           /* If a macro spans multiple lines don't reset its state. */
3993           if (quotednl)
3994             CNL_SAVE_DEFINEDEF ();
3995           else
3996             CNL ();
3997           break;
3998         } /* switch (c) */
3999
4000     } /* while not eof */
4001
4002   free (lbs[0].lb.buffer);
4003   free (lbs[1].lb.buffer);
4004 }
4005
4006 /*
4007  * Process either a C++ file or a C file depending on the setting
4008  * of a global flag.
4009  */
4010 static void
4011 default_C_entries (FILE *inf)
4012 {
4013   C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
4014 }
4015
4016 /* Always do plain C. */
4017 static void
4018 plain_C_entries (FILE *inf)
4019 {
4020   C_entries (0, inf);
4021 }
4022
4023 /* Always do C++. */
4024 static void
4025 Cplusplus_entries (FILE *inf)
4026 {
4027   C_entries (C_PLPL, inf);
4028 }
4029
4030 /* Always do Java. */
4031 static void
4032 Cjava_entries (FILE *inf)
4033 {
4034   C_entries (C_JAVA, inf);
4035 }
4036
4037 /* Always do C*. */
4038 static void
4039 Cstar_entries (FILE *inf)
4040 {
4041   C_entries (C_STAR, inf);
4042 }
4043
4044 /* Always do Yacc. */
4045 static void
4046 Yacc_entries (FILE *inf)
4047 {
4048   C_entries (YACC, inf);
4049 }
4050
4051 \f
4052 /* Useful macros. */
4053 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer)    \
4054   while (perhaps_more_input (file_pointer)                              \
4055          && (readline (&(line_buffer), file_pointer),                   \
4056              (char_pointer) = (line_buffer).buffer,                     \
4057              true))                                                     \
4058
4059 #define LOOKING_AT(cp, kw)  /* kw is the keyword, a literal string */   \
4060   ((assert ("" kw), true)   /* syntax error if not a literal string */  \
4061    && strneq ((cp), kw, sizeof (kw)-1)          /* cp points at kw */   \
4062    && notinname ((cp)[sizeof (kw)-1])           /* end of kw */         \
4063    && ((cp) = skip_spaces ((cp)+sizeof (kw)-1))) /* skip spaces */
4064
4065 /* Similar to LOOKING_AT but does not use notinname, does not skip */
4066 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
4067   ((assert ("" kw), true) /* syntax error if not a literal string */    \
4068    && strncaseeq ((cp), kw, sizeof (kw)-1)      /* cp points at kw */   \
4069    && ((cp) += sizeof (kw)-1))                  /* skip spaces */
4070
4071 /*
4072  * Read a file, but do no processing.  This is used to do regexp
4073  * matching on files that have no language defined.
4074  */
4075 static void
4076 just_read_file (FILE *inf)
4077 {
4078   while (perhaps_more_input (inf))
4079     readline (&lb, inf);
4080 }
4081
4082 \f
4083 /* Fortran parsing */
4084
4085 static void F_takeprec (void);
4086 static void F_getit (FILE *);
4087
4088 static void
4089 F_takeprec (void)
4090 {
4091   dbp = skip_spaces (dbp);
4092   if (*dbp != '*')
4093     return;
4094   dbp++;
4095   dbp = skip_spaces (dbp);
4096   if (strneq (dbp, "(*)", 3))
4097     {
4098       dbp += 3;
4099       return;
4100     }
4101   if (!c_isdigit (*dbp))
4102     {
4103       --dbp;                    /* force failure */
4104       return;
4105     }
4106   do
4107     dbp++;
4108   while (c_isdigit (*dbp));
4109 }
4110
4111 static void
4112 F_getit (FILE *inf)
4113 {
4114   register char *cp;
4115
4116   dbp = skip_spaces (dbp);
4117   if (*dbp == '\0')
4118     {
4119       readline (&lb, inf);
4120       dbp = lb.buffer;
4121       if (dbp[5] != '&')
4122         return;
4123       dbp += 6;
4124       dbp = skip_spaces (dbp);
4125     }
4126   if (!c_isalpha (*dbp) && *dbp != '_' && *dbp != '$')
4127     return;
4128   for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4129     continue;
4130   make_tag (dbp, cp-dbp, true,
4131             lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4132 }
4133
4134
4135 static void
4136 Fortran_functions (FILE *inf)
4137 {
4138   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4139     {
4140       if (*dbp == '%')
4141         dbp++;                  /* Ratfor escape to fortran */
4142       dbp = skip_spaces (dbp);
4143       if (*dbp == '\0')
4144         continue;
4145
4146       if (LOOKING_AT_NOCASE (dbp, "recursive"))
4147         dbp = skip_spaces (dbp);
4148
4149       if (LOOKING_AT_NOCASE (dbp, "pure"))
4150         dbp = skip_spaces (dbp);
4151
4152       if (LOOKING_AT_NOCASE (dbp, "elemental"))
4153         dbp = skip_spaces (dbp);
4154
4155       switch (c_tolower (*dbp))
4156         {
4157         case 'i':
4158           if (nocase_tail ("integer"))
4159             F_takeprec ();
4160           break;
4161         case 'r':
4162           if (nocase_tail ("real"))
4163             F_takeprec ();
4164           break;
4165         case 'l':
4166           if (nocase_tail ("logical"))
4167             F_takeprec ();
4168           break;
4169         case 'c':
4170           if (nocase_tail ("complex") || nocase_tail ("character"))
4171             F_takeprec ();
4172           break;
4173         case 'd':
4174           if (nocase_tail ("double"))
4175             {
4176               dbp = skip_spaces (dbp);
4177               if (*dbp == '\0')
4178                 continue;
4179               if (nocase_tail ("precision"))
4180                 break;
4181               continue;
4182             }
4183           break;
4184         }
4185       dbp = skip_spaces (dbp);
4186       if (*dbp == '\0')
4187         continue;
4188       switch (c_tolower (*dbp))
4189         {
4190         case 'f':
4191           if (nocase_tail ("function"))
4192             F_getit (inf);
4193           continue;
4194         case 's':
4195           if (nocase_tail ("subroutine"))
4196             F_getit (inf);
4197           continue;
4198         case 'e':
4199           if (nocase_tail ("entry"))
4200             F_getit (inf);
4201           continue;
4202         case 'b':
4203           if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4204             {
4205               dbp = skip_spaces (dbp);
4206               if (*dbp == '\0') /* assume un-named */
4207                 make_tag ("blockdata", 9, true,
4208                           lb.buffer, dbp - lb.buffer, lineno, linecharno);
4209               else
4210                 F_getit (inf);  /* look for name */
4211             }
4212           continue;
4213         }
4214     }
4215 }
4216
4217 \f
4218 /*
4219  * Go language support
4220  * Original code by Xi Lu <lx@shellcodes.org> (2016)
4221  */
4222 static void
4223 Go_functions(FILE *inf)
4224 {
4225   char *cp, *name;
4226
4227   LOOP_ON_INPUT_LINES(inf, lb, cp)
4228     {
4229       cp = skip_spaces (cp);
4230
4231       if (LOOKING_AT (cp, "package"))
4232         {
4233           name = cp;
4234           while (!notinname (*cp) && *cp != '\0')
4235             cp++;
4236           make_tag (name, cp - name, false, lb.buffer,
4237                     cp - lb.buffer + 1, lineno, linecharno);
4238         }
4239       else if (LOOKING_AT (cp, "func"))
4240         {
4241           /* Go implementation of interface, such as:
4242              func (n *Integer) Add(m Integer) ...
4243              skip `(n *Integer)` part.
4244           */
4245           if (*cp == '(')
4246             {
4247               while (*cp != ')')
4248                 cp++;
4249               cp = skip_spaces (cp+1);
4250             }
4251
4252           if (*cp)
4253             {
4254               name = cp;
4255
4256               while (!notinname (*cp))
4257                 cp++;
4258
4259               make_tag (name, cp - name, true, lb.buffer,
4260                         cp - lb.buffer + 1, lineno, linecharno);
4261             }
4262         }
4263       else if (members && LOOKING_AT (cp, "type"))
4264         {
4265           name = cp;
4266
4267           /* Ignore the likes of the following:
4268              type (
4269                     A
4270              )
4271            */
4272           if (*cp == '(')
4273             return;
4274
4275           while (!notinname (*cp) && *cp != '\0')
4276             cp++;
4277
4278           make_tag (name, cp - name, false, lb.buffer,
4279                     cp - lb.buffer + 1, lineno, linecharno);
4280         }
4281     }
4282 }
4283
4284 \f
4285 /*
4286  * Ada parsing
4287  * Original code by
4288  * Philippe Waroquiers (1998)
4289  */
4290
4291 /* Once we are positioned after an "interesting" keyword, let's get
4292    the real tag value necessary. */
4293 static void
4294 Ada_getit (FILE *inf, const char *name_qualifier)
4295 {
4296   register char *cp;
4297   char *name;
4298   char c;
4299
4300   while (perhaps_more_input (inf))
4301     {
4302       dbp = skip_spaces (dbp);
4303       if (*dbp == '\0'
4304           || (dbp[0] == '-' && dbp[1] == '-'))
4305         {
4306           readline (&lb, inf);
4307           dbp = lb.buffer;
4308         }
4309       switch (c_tolower (*dbp))
4310         {
4311         case 'b':
4312           if (nocase_tail ("body"))
4313             {
4314               /* Skipping body of   procedure body   or   package body or ....
4315                  resetting qualifier to body instead of spec. */
4316               name_qualifier = "/b";
4317               continue;
4318             }
4319           break;
4320         case 't':
4321           /* Skipping type of   task type   or   protected type ... */
4322           if (nocase_tail ("type"))
4323             continue;
4324           break;
4325         }
4326       if (*dbp == '"')
4327         {
4328           dbp += 1;
4329           for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4330             continue;
4331         }
4332       else
4333         {
4334           dbp = skip_spaces (dbp);
4335           for (cp = dbp;
4336                c_isalnum (*cp) || *cp == '_' || *cp == '.';
4337                cp++)
4338             continue;
4339           if (cp == dbp)
4340             return;
4341         }
4342       c = *cp;
4343       *cp = '\0';
4344       name = concat (dbp, name_qualifier, "");
4345       *cp = c;
4346       make_tag (name, strlen (name), true,
4347                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4348       free (name);
4349       if (c == '"')
4350         dbp = cp + 1;
4351       return;
4352     }
4353 }
4354
4355 static void
4356 Ada_funcs (FILE *inf)
4357 {
4358   bool inquote = false;
4359   bool skip_till_semicolumn = false;
4360
4361   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4362     {
4363       while (*dbp != '\0')
4364         {
4365           /* Skip a string i.e. "abcd". */
4366           if (inquote || (*dbp == '"'))
4367             {
4368               dbp = strchr (dbp + !inquote, '"');
4369               if (dbp != NULL)
4370                 {
4371                   inquote = false;
4372                   dbp += 1;
4373                   continue;     /* advance char */
4374                 }
4375               else
4376                 {
4377                   inquote = true;
4378                   break;        /* advance line */
4379                 }
4380             }
4381
4382           /* Skip comments. */
4383           if (dbp[0] == '-' && dbp[1] == '-')
4384             break;              /* advance line */
4385
4386           /* Skip character enclosed in single quote i.e. 'a'
4387              and skip single quote starting an attribute i.e. 'Image. */
4388           if (*dbp == '\'')
4389             {
4390               dbp++ ;
4391               if (*dbp != '\0')
4392                 dbp++;
4393               continue;
4394             }
4395
4396           if (skip_till_semicolumn)
4397             {
4398               if (*dbp == ';')
4399                 skip_till_semicolumn = false;
4400               dbp++;
4401               continue;         /* advance char */
4402             }
4403
4404           /* Search for beginning of a token.  */
4405           if (!begtoken (*dbp))
4406             {
4407               dbp++;
4408               continue;         /* advance char */
4409             }
4410
4411           /* We are at the beginning of a token. */
4412           switch (c_tolower (*dbp))
4413             {
4414             case 'f':
4415               if (!packages_only && nocase_tail ("function"))
4416                 Ada_getit (inf, "/f");
4417               else
4418                 break;          /* from switch */
4419               continue;         /* advance char */
4420             case 'p':
4421               if (!packages_only && nocase_tail ("procedure"))
4422                 Ada_getit (inf, "/p");
4423               else if (nocase_tail ("package"))
4424                 Ada_getit (inf, "/s");
4425               else if (nocase_tail ("protected")) /* protected type */
4426                 Ada_getit (inf, "/t");
4427               else
4428                 break;          /* from switch */
4429               continue;         /* advance char */
4430
4431             case 'u':
4432               if (typedefs && !packages_only && nocase_tail ("use"))
4433                 {
4434                   /* when tagging types, avoid tagging  use type Pack.Typename;
4435                      for this, we will skip everything till a ; */
4436                   skip_till_semicolumn = true;
4437                   continue;     /* advance char */
4438                 }
4439
4440             case 't':
4441               if (!packages_only && nocase_tail ("task"))
4442                 Ada_getit (inf, "/k");
4443               else if (typedefs && !packages_only && nocase_tail ("type"))
4444                 {
4445                   Ada_getit (inf, "/t");
4446                   while (*dbp != '\0')
4447                     dbp += 1;
4448                 }
4449               else
4450                 break;          /* from switch */
4451               continue;         /* advance char */
4452             }
4453
4454           /* Look for the end of the token. */
4455           while (!endtoken (*dbp))
4456             dbp++;
4457
4458         } /* advance char */
4459     } /* advance line */
4460 }
4461
4462 \f
4463 /*
4464  * Unix and microcontroller assembly tag handling
4465  * Labels:  /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4466  * Idea by Bob Weiner, Motorola Inc. (1994)
4467  */
4468 static void
4469 Asm_labels (FILE *inf)
4470 {
4471   register char *cp;
4472
4473   LOOP_ON_INPUT_LINES (inf, lb, cp)
4474     {
4475       /* If first char is alphabetic or one of [_.$], test for colon
4476          following identifier. */
4477       if (c_isalpha (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4478         {
4479           /* Read past label. */
4480           cp++;
4481           while (c_isalnum (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4482             cp++;
4483           if (*cp == ':' || c_isspace (*cp))
4484             /* Found end of label, so copy it and add it to the table. */
4485             make_tag (lb.buffer, cp - lb.buffer, true,
4486                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4487         }
4488     }
4489 }
4490
4491 \f
4492 /*
4493  * Perl support
4494  * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4495  *                 /^use constant[ \t\n]+[^ \t\n{=,;]+/
4496  * Perl variable names: /^(my|local).../
4497  * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4498  * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4499  * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4500  */
4501 static void
4502 Perl_functions (FILE *inf)
4503 {
4504   char *package = savestr ("main"); /* current package name */
4505   register char *cp;
4506
4507   LOOP_ON_INPUT_LINES (inf, lb, cp)
4508     {
4509       cp = skip_spaces (cp);
4510
4511       if (LOOKING_AT (cp, "package"))
4512         {
4513           free (package);
4514           get_tag (cp, &package);
4515         }
4516       else if (LOOKING_AT (cp, "sub"))
4517         {
4518           char *pos, *sp;
4519
4520         subr:
4521           sp = cp;
4522           while (!notinname (*cp))
4523             cp++;
4524           if (cp == sp)
4525             continue;           /* nothing found */
4526           pos = strchr (sp, ':');
4527           if (pos && pos < cp && pos[1] == ':')
4528             /* The name is already qualified. */
4529             make_tag (sp, cp - sp, true,
4530                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4531           else
4532             /* Qualify it. */
4533             {
4534               char savechar, *name;
4535
4536               savechar = *cp;
4537               *cp = '\0';
4538               name = concat (package, "::", sp);
4539               *cp = savechar;
4540               make_tag (name, strlen (name), true,
4541                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4542               free (name);
4543             }
4544         }
4545       else if (LOOKING_AT (cp, "use constant")
4546                || LOOKING_AT (cp, "use constant::defer"))
4547         {
4548           /* For hash style multi-constant like
4549                 use constant { FOO => 123,
4550                                BAR => 456 };
4551              only the first FOO is picked up.  Parsing across the value
4552              expressions would be difficult in general, due to possible nested
4553              hashes, here-documents, etc.  */
4554           if (*cp == '{')
4555             cp = skip_spaces (cp+1);
4556           goto subr;
4557         }
4558       else if (globals) /* only if we are tagging global vars */
4559         {
4560           /* Skip a qualifier, if any. */
4561           bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4562           /* After "my" or "local", but before any following paren or space. */
4563           char *varstart = cp;
4564
4565           if (qual              /* should this be removed?  If yes, how? */
4566               && (*cp == '$' || *cp == '@' || *cp == '%'))
4567             {
4568               varstart += 1;
4569               do
4570                 cp++;
4571               while (c_isalnum (*cp) || *cp == '_');
4572             }
4573           else if (qual)
4574             {
4575               /* Should be examining a variable list at this point;
4576                  could insist on seeing an open parenthesis. */
4577               while (*cp != '\0' && *cp != ';' && *cp != '=' &&  *cp != ')')
4578                 cp++;
4579             }
4580           else
4581             continue;
4582
4583           make_tag (varstart, cp - varstart, false,
4584                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4585         }
4586     }
4587   free (package);
4588 }
4589
4590
4591 /*
4592  * Python support
4593  * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4594  * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4595  * More ideas by seb bacon <seb@jamkit.com> (2002)
4596  */
4597 static void
4598 Python_functions (FILE *inf)
4599 {
4600   register char *cp;
4601
4602   LOOP_ON_INPUT_LINES (inf, lb, cp)
4603     {
4604       cp = skip_spaces (cp);
4605       if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4606         {
4607           char *name = cp;
4608           while (!notinname (*cp) && *cp != ':')
4609             cp++;
4610           make_tag (name, cp - name, true,
4611                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4612         }
4613     }
4614 }
4615
4616 /*
4617  * Ruby support
4618  * Original code by Xi Lu <lx@shellcodes.org> (2015)
4619  */
4620 static void
4621 Ruby_functions (FILE *inf)
4622 {
4623   char *cp = NULL;
4624
4625   LOOP_ON_INPUT_LINES (inf, lb, cp)
4626     {
4627       bool is_class = false;
4628       bool is_method = false;
4629       char *name;
4630
4631       cp = skip_spaces (cp);
4632       if (c_isalpha (*cp) && c_isupper (*cp)) /* constants */
4633         {
4634           char *bp, *colon = NULL;
4635
4636           name = cp;
4637
4638           for (cp++; c_isalnum (*cp) || *cp == '_' || *cp == ':'; cp++)
4639             {
4640               if (*cp == ':')
4641                 colon = cp;
4642             }
4643           if (cp > name + 1)
4644             {
4645               bp = skip_spaces (cp);
4646               if (*bp == '=' && c_isspace (bp[1]))
4647                 {
4648                   if (colon && !c_isspace (colon[1]))
4649                     name = colon + 1;
4650                   make_tag (name, cp - name, false,
4651                             lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4652                 }
4653             }
4654         }
4655       else if ((is_method = LOOKING_AT (cp, "def")) /* module/class/method */
4656                || (is_class = LOOKING_AT (cp, "class"))
4657                || LOOKING_AT (cp, "module"))
4658         {
4659           const char self_name[] = "self.";
4660           const size_t self_size1 = sizeof ("self.") - 1;
4661
4662           name = cp;
4663
4664          /* Ruby method names can end in a '='.  Also, operator overloading can
4665             define operators whose names include '='.  */
4666           while (!notinname (*cp) || *cp == '=')
4667             cp++;
4668
4669           /* Remove "self." from the method name.  */
4670           if (cp - name > self_size1
4671               && strneq (name, self_name, self_size1))
4672             name += self_size1;
4673
4674           /* Remove the class/module qualifiers from method names.  */
4675           if (is_method)
4676             {
4677               char *q;
4678
4679               for (q = name; q < cp && *q != '.'; q++)
4680                 ;
4681               if (q < cp - 1)   /* punt if we see just "FOO." */
4682                 name = q + 1;
4683             }
4684
4685           /* Don't tag singleton classes.  */
4686           if (is_class && strneq (name, "<<", 2) && cp == name + 2)
4687             continue;
4688
4689           make_tag (name, cp - name, true,
4690                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4691         }
4692     }
4693 }
4694
4695 \f
4696 /*
4697  * PHP support
4698  * Look for:
4699  *  - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4700  *  - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4701  *  - /^[ \t]*define\(\"[^\"]+/
4702  * Only with --members:
4703  *  - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4704  * Idea by Diez B. Roggisch (2001)
4705  */
4706 static void
4707 PHP_functions (FILE *inf)
4708 {
4709   char *cp, *name;
4710   bool search_identifier = false;
4711
4712   LOOP_ON_INPUT_LINES (inf, lb, cp)
4713     {
4714       cp = skip_spaces (cp);
4715       name = cp;
4716       if (search_identifier
4717           && *cp != '\0')
4718         {
4719           while (!notinname (*cp))
4720             cp++;
4721           make_tag (name, cp - name, true,
4722                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4723           search_identifier = false;
4724         }
4725       else if (LOOKING_AT (cp, "function"))
4726         {
4727           if (*cp == '&')
4728             cp = skip_spaces (cp+1);
4729           if (*cp != '\0')
4730             {
4731               name = cp;
4732               while (!notinname (*cp))
4733                 cp++;
4734               make_tag (name, cp - name, true,
4735                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4736             }
4737           else
4738             search_identifier = true;
4739         }
4740       else if (LOOKING_AT (cp, "class"))
4741         {
4742           if (*cp != '\0')
4743             {
4744               name = cp;
4745               while (*cp != '\0' && !c_isspace (*cp))
4746                 cp++;
4747               make_tag (name, cp - name, false,
4748                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4749             }
4750           else
4751             search_identifier = true;
4752         }
4753       else if (strneq (cp, "define", 6)
4754                && (cp = skip_spaces (cp+6))
4755                && *cp++ == '('
4756                && (*cp == '"' || *cp == '\''))
4757         {
4758           char quote = *cp++;
4759           name = cp;
4760           while (*cp != quote && *cp != '\0')
4761             cp++;
4762           make_tag (name, cp - name, false,
4763                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4764         }
4765       else if (members
4766                && LOOKING_AT (cp, "var")
4767                && *cp == '$')
4768         {
4769           name = cp;
4770           while (!notinname (*cp))
4771             cp++;
4772           make_tag (name, cp - name, false,
4773                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4774         }
4775     }
4776 }
4777
4778 \f
4779 /*
4780  * Cobol tag functions
4781  * We could look for anything that could be a paragraph name.
4782  * i.e. anything that starts in column 8 is one word and ends in a full stop.
4783  * Idea by Corny de Souza (1993)
4784  */
4785 static void
4786 Cobol_paragraphs (FILE *inf)
4787 {
4788   register char *bp, *ep;
4789
4790   LOOP_ON_INPUT_LINES (inf, lb, bp)
4791     {
4792       if (lb.len < 9)
4793         continue;
4794       bp += 8;
4795
4796       /* If eoln, compiler option or comment ignore whole line. */
4797       if (bp[-1] != ' ' || !c_isalnum (bp[0]))
4798         continue;
4799
4800       for (ep = bp; c_isalnum (*ep) || *ep == '-'; ep++)
4801         continue;
4802       if (*ep++ == '.')
4803         make_tag (bp, ep - bp, true,
4804                   lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4805     }
4806 }
4807
4808 \f
4809 /*
4810  * Makefile support
4811  * Ideas by Assar Westerlund <assar@sics.se> (2001)
4812  */
4813 static void
4814 Makefile_targets (FILE *inf)
4815 {
4816   register char *bp;
4817
4818   LOOP_ON_INPUT_LINES (inf, lb, bp)
4819     {
4820       if (*bp == '\t' || *bp == '#')
4821         continue;
4822       while (*bp != '\0' && *bp != '=' && *bp != ':')
4823         bp++;
4824       if (*bp == ':' || (globals && *bp == '='))
4825         {
4826           /* We should detect if there is more than one tag, but we do not.
4827              We just skip initial and final spaces. */
4828           char * namestart = skip_spaces (lb.buffer);
4829           while (--bp > namestart)
4830             if (!notinname (*bp))
4831               break;
4832           make_tag (namestart, bp - namestart + 1, true,
4833                     lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4834         }
4835     }
4836 }
4837
4838 \f
4839 /*
4840  * Pascal parsing
4841  * Original code by Mosur K. Mohan (1989)
4842  *
4843  *  Locates tags for procedures & functions.  Doesn't do any type- or
4844  *  var-definitions.  It does look for the keyword "extern" or
4845  *  "forward" immediately following the procedure statement; if found,
4846  *  the tag is skipped.
4847  */
4848 static void
4849 Pascal_functions (FILE *inf)
4850 {
4851   linebuffer tline;             /* mostly copied from C_entries */
4852   long save_lcno;
4853   int save_lineno, namelen, taglen;
4854   char c, *name;
4855
4856   bool                          /* each of these flags is true if: */
4857     incomment,                  /* point is inside a comment */
4858     inquote,                    /* point is inside '..' string */
4859     get_tagname,                /* point is after PROCEDURE/FUNCTION
4860                                    keyword, so next item = potential tag */
4861     found_tag,                  /* point is after a potential tag */
4862     inparms,                    /* point is within parameter-list */
4863     verify_tag;                 /* point has passed the parm-list, so the
4864                                    next token will determine whether this
4865                                    is a FORWARD/EXTERN to be ignored, or
4866                                    whether it is a real tag */
4867
4868   save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4869   name = NULL;                  /* keep compiler quiet */
4870   dbp = lb.buffer;
4871   *dbp = '\0';
4872   linebuffer_init (&tline);
4873
4874   incomment = inquote = false;
4875   found_tag = false;            /* have a proc name; check if extern */
4876   get_tagname = false;          /* found "procedure" keyword         */
4877   inparms = false;              /* found '(' after "proc"            */
4878   verify_tag = false;           /* check if "extern" is ahead        */
4879
4880
4881   while (perhaps_more_input (inf)) /* long main loop to get next char */
4882     {
4883       c = *dbp++;
4884       if (c == '\0')            /* if end of line */
4885         {
4886           readline (&lb, inf);
4887           dbp = lb.buffer;
4888           if (*dbp == '\0')
4889             continue;
4890           if (!((found_tag && verify_tag)
4891                 || get_tagname))
4892             c = *dbp++;         /* only if don't need *dbp pointing
4893                                    to the beginning of the name of
4894                                    the procedure or function */
4895         }
4896       if (incomment)
4897         {
4898           if (c == '}')         /* within { } comments */
4899             incomment = false;
4900           else if (c == '*' && *dbp == ')') /* within (* *) comments */
4901             {
4902               dbp++;
4903               incomment = false;
4904             }
4905           continue;
4906         }
4907       else if (inquote)
4908         {
4909           if (c == '\'')
4910             inquote = false;
4911           continue;
4912         }
4913       else
4914         switch (c)
4915           {
4916           case '\'':
4917             inquote = true;     /* found first quote */
4918             continue;
4919           case '{':             /* found open { comment */
4920             incomment = true;
4921             continue;
4922           case '(':
4923             if (*dbp == '*')    /* found open (* comment */
4924               {
4925                 incomment = true;
4926                 dbp++;
4927               }
4928             else if (found_tag) /* found '(' after tag, i.e., parm-list */
4929               inparms = true;
4930             continue;
4931           case ')':             /* end of parms list */
4932             if (inparms)
4933               inparms = false;
4934             continue;
4935           case ';':
4936             if (found_tag && !inparms) /* end of proc or fn stmt */
4937               {
4938                 verify_tag = true;
4939                 break;
4940               }
4941             continue;
4942           }
4943       if (found_tag && verify_tag && (*dbp != ' '))
4944         {
4945           /* Check if this is an "extern" declaration. */
4946           if (*dbp == '\0')
4947             continue;
4948           if (c_tolower (*dbp) == 'e')
4949             {
4950               if (nocase_tail ("extern")) /* superfluous, really! */
4951                 {
4952                   found_tag = false;
4953                   verify_tag = false;
4954                 }
4955             }
4956           else if (c_tolower (*dbp) == 'f')
4957             {
4958               if (nocase_tail ("forward")) /* check for forward reference */
4959                 {
4960                   found_tag = false;
4961                   verify_tag = false;
4962                 }
4963             }
4964           if (found_tag && verify_tag) /* not external proc, so make tag */
4965             {
4966               found_tag = false;
4967               verify_tag = false;
4968               make_tag (name, namelen, true,
4969                         tline.buffer, taglen, save_lineno, save_lcno);
4970               continue;
4971             }
4972         }
4973       if (get_tagname)          /* grab name of proc or fn */
4974         {
4975           char *cp;
4976
4977           if (*dbp == '\0')
4978             continue;
4979
4980           /* Find block name. */
4981           for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4982             continue;
4983
4984           /* Save all values for later tagging. */
4985           linebuffer_setlen (&tline, lb.len);
4986           strcpy (tline.buffer, lb.buffer);
4987           save_lineno = lineno;
4988           save_lcno = linecharno;
4989           name = tline.buffer + (dbp - lb.buffer);
4990           namelen = cp - dbp;
4991           taglen = cp - lb.buffer + 1;
4992
4993           dbp = cp;             /* set dbp to e-o-token */
4994           get_tagname = false;
4995           found_tag = true;
4996           continue;
4997
4998           /* And proceed to check for "extern". */
4999         }
5000       else if (!incomment && !inquote && !found_tag)
5001         {
5002           /* Check for proc/fn keywords. */
5003           switch (c_tolower (c))
5004             {
5005             case 'p':
5006               if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
5007                 get_tagname = true;
5008               continue;
5009             case 'f':
5010               if (nocase_tail ("unction"))
5011                 get_tagname = true;
5012               continue;
5013             }
5014         }
5015     } /* while not eof */
5016
5017   free (tline.buffer);
5018 }
5019
5020 \f
5021 /*
5022  * Lisp tag functions
5023  *  look for (def or (DEF, quote or QUOTE
5024  */
5025
5026 static void L_getit (void);
5027
5028 static void
5029 L_getit (void)
5030 {
5031   if (*dbp == '\'')             /* Skip prefix quote */
5032     dbp++;
5033   else if (*dbp == '(')
5034   {
5035     dbp++;
5036     /* Try to skip "(quote " */
5037     if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
5038       /* Ok, then skip "(" before name in (defstruct (foo)) */
5039       dbp = skip_spaces (dbp);
5040   }
5041   get_tag (dbp, NULL);
5042 }
5043
5044 static void
5045 Lisp_functions (FILE *inf)
5046 {
5047   LOOP_ON_INPUT_LINES (inf, lb, dbp)
5048     {
5049       if (dbp[0] != '(')
5050         continue;
5051
5052       /* "(defvar foo)" is a declaration rather than a definition.  */
5053       if (! declarations)
5054         {
5055           char *p = dbp + 1;
5056           if (LOOKING_AT (p, "defvar"))
5057             {
5058               p = skip_name (p); /* past var name */
5059               p = skip_spaces (p);
5060               if (*p == ')')
5061                 continue;
5062             }
5063         }
5064
5065       if (strneq (dbp + 1, "cl-", 3) || strneq (dbp + 1, "CL-", 3))
5066         dbp += 3;
5067
5068       if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
5069         {
5070           dbp = skip_non_spaces (dbp);
5071           dbp = skip_spaces (dbp);
5072           L_getit ();
5073         }
5074       else
5075         {
5076           /* Check for (foo::defmumble name-defined ... */
5077           do
5078             dbp++;
5079           while (!notinname (*dbp) && *dbp != ':');
5080           if (*dbp == ':')
5081             {
5082               do
5083                 dbp++;
5084               while (*dbp == ':');
5085
5086               if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
5087                 {
5088                   dbp = skip_non_spaces (dbp);
5089                   dbp = skip_spaces (dbp);
5090                   L_getit ();
5091                 }
5092             }
5093         }
5094     }
5095 }
5096
5097 \f
5098 /*
5099  * Lua script language parsing
5100  * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
5101  *
5102  *  "function" and "local function" are tags if they start at column 1.
5103  */
5104 static void
5105 Lua_functions (FILE *inf)
5106 {
5107   register char *bp;
5108
5109   LOOP_ON_INPUT_LINES (inf, lb, bp)
5110     {
5111       bp = skip_spaces (bp);
5112       if (bp[0] != 'f' && bp[0] != 'l')
5113         continue;
5114
5115       (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
5116
5117       if (LOOKING_AT (bp, "function"))
5118         {
5119           char *tag_name, *tp_dot, *tp_colon;
5120
5121           get_tag (bp, &tag_name);
5122           /* If the tag ends with ".foo" or ":foo", make an additional tag for
5123              "foo".  */
5124           tp_dot = strrchr (tag_name, '.');
5125           tp_colon = strrchr (tag_name, ':');
5126           if (tp_dot || tp_colon)
5127             {
5128               char *p = tp_dot > tp_colon ? tp_dot : tp_colon;
5129               int len_add = p - tag_name + 1;
5130
5131               get_tag (bp + len_add, NULL);
5132             }
5133         }
5134     }
5135 }
5136
5137 \f
5138 /*
5139  * PostScript tags
5140  * Just look for lines where the first character is '/'
5141  * Also look at "defineps" for PSWrap
5142  * Ideas by:
5143  *   Richard Mlynarik <mly@adoc.xerox.com> (1997)
5144  *   Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
5145  */
5146 static void
5147 PS_functions (FILE *inf)
5148 {
5149   register char *bp, *ep;
5150
5151   LOOP_ON_INPUT_LINES (inf, lb, bp)
5152     {
5153       if (bp[0] == '/')
5154         {
5155           for (ep = bp+1;
5156                *ep != '\0' && *ep != ' ' && *ep != '{';
5157                ep++)
5158             continue;
5159           make_tag (bp, ep - bp, true,
5160                     lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
5161         }
5162       else if (LOOKING_AT (bp, "defineps"))
5163         get_tag (bp, NULL);
5164     }
5165 }
5166
5167 \f
5168 /*
5169  * Forth tags
5170  * Ignore anything after \ followed by space or in ( )
5171  * Look for words defined by :
5172  * Look for constant, code, create, defer, value, and variable
5173  * OBP extensions:  Look for buffer:, field,
5174  * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
5175  */
5176 static void
5177 Forth_words (FILE *inf)
5178 {
5179   register char *bp;
5180
5181   LOOP_ON_INPUT_LINES (inf, lb, bp)
5182     while ((bp = skip_spaces (bp))[0] != '\0')
5183       if (bp[0] == '\\' && c_isspace (bp[1]))
5184         break;                  /* read next line */
5185       else if (bp[0] == '(' && c_isspace (bp[1]))
5186         do                      /* skip to ) or eol */
5187           bp++;
5188         while (*bp != ')' && *bp != '\0');
5189       else if ((bp[0] == ':' && c_isspace (bp[1]) && bp++)
5190                || LOOKING_AT_NOCASE (bp, "constant")
5191                || LOOKING_AT_NOCASE (bp, "code")
5192                || LOOKING_AT_NOCASE (bp, "create")
5193                || LOOKING_AT_NOCASE (bp, "defer")
5194                || LOOKING_AT_NOCASE (bp, "value")
5195                || LOOKING_AT_NOCASE (bp, "variable")
5196                || LOOKING_AT_NOCASE (bp, "buffer:")
5197                || LOOKING_AT_NOCASE (bp, "field"))
5198         get_tag (skip_spaces (bp), NULL); /* Yay!  A definition! */
5199       else
5200         bp = skip_non_spaces (bp);
5201 }
5202
5203 \f
5204 /*
5205  * Scheme tag functions
5206  * look for (def... xyzzy
5207  *          (def... (xyzzy
5208  *          (def ... ((...(xyzzy ....
5209  *          (set! xyzzy
5210  * Original code by Ken Haase (1985?)
5211  */
5212 static void
5213 Scheme_functions (FILE *inf)
5214 {
5215   register char *bp;
5216
5217   LOOP_ON_INPUT_LINES (inf, lb, bp)
5218     {
5219       if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
5220         {
5221           bp = skip_non_spaces (bp+4);
5222           /* Skip over open parens and white space.  Don't continue past
5223              '\0'. */
5224           while (*bp && notinname (*bp))
5225             bp++;
5226           get_tag (bp, NULL);
5227         }
5228       if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
5229         get_tag (bp, NULL);
5230     }
5231 }
5232
5233 \f
5234 /* Find tags in TeX and LaTeX input files.  */
5235
5236 /* TEX_toktab is a table of TeX control sequences that define tags.
5237  * Each entry records one such control sequence.
5238  *
5239  * Original code from who knows whom.
5240  * Ideas by:
5241  *   Stefan Monnier (2002)
5242  */
5243
5244 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
5245
5246 /* Default set of control sequences to put into TEX_toktab.
5247    The value of environment var TEXTAGS is prepended to this.  */
5248 static const char *TEX_defenv = "\
5249 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5250 :part:appendix:entry:index:def\
5251 :newcommand:renewcommand:newenvironment:renewenvironment";
5252
5253 static void TEX_decode_env (const char *, const char *);
5254
5255 /*
5256  * TeX/LaTeX scanning loop.
5257  */
5258 static void
5259 TeX_commands (FILE *inf)
5260 {
5261   char *cp;
5262   linebuffer *key;
5263
5264   char TEX_esc = '\0';
5265   char TEX_opgrp, TEX_clgrp;
5266
5267   /* Initialize token table once from environment. */
5268   if (TEX_toktab == NULL)
5269     TEX_decode_env ("TEXTAGS", TEX_defenv);
5270
5271   LOOP_ON_INPUT_LINES (inf, lb, cp)
5272     {
5273       /* Look at each TEX keyword in line. */
5274       for (;;)
5275         {
5276           /* Look for a TEX escape. */
5277           while (true)
5278             {
5279               char c = *cp++;
5280               if (c == '\0' || c == '%')
5281                 goto tex_next_line;
5282
5283               /* Select either \ or ! as escape character, whichever comes
5284                  first outside a comment.  */
5285               if (!TEX_esc)
5286                 switch (c)
5287                   {
5288                   case '\\':
5289                     TEX_esc = c;
5290                     TEX_opgrp = '{';
5291                     TEX_clgrp = '}';
5292                     break;
5293
5294                   case '!':
5295                     TEX_esc = c;
5296                     TEX_opgrp = '<';
5297                     TEX_clgrp = '>';
5298                     break;
5299                   }
5300
5301               if (c == TEX_esc)
5302                 break;
5303             }
5304
5305           for (key = TEX_toktab; key->buffer != NULL; key++)
5306             if (strneq (cp, key->buffer, key->len))
5307               {
5308                 char *p;
5309                 int namelen, linelen;
5310                 bool opgrp = false;
5311
5312                 cp = skip_spaces (cp + key->len);
5313                 if (*cp == TEX_opgrp)
5314                   {
5315                     opgrp = true;
5316                     cp++;
5317                   }
5318                 for (p = cp;
5319                      (!c_isspace (*p) && *p != '#' &&
5320                       *p != TEX_opgrp && *p != TEX_clgrp);
5321                      p++)
5322                   continue;
5323                 namelen = p - cp;
5324                 linelen = lb.len;
5325                 if (!opgrp || *p == TEX_clgrp)
5326                   {
5327                     while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5328                       p++;
5329                     linelen = p - lb.buffer + 1;
5330                   }
5331                 make_tag (cp, namelen, true,
5332                           lb.buffer, linelen, lineno, linecharno);
5333                 goto tex_next_line; /* We only tag a line once */
5334               }
5335         }
5336     tex_next_line:
5337       ;
5338     }
5339 }
5340
5341 /* Read environment and prepend it to the default string.
5342    Build token table. */
5343 static void
5344 TEX_decode_env (const char *evarname, const char *defenv)
5345 {
5346   register const char *env, *p;
5347   int i, len;
5348
5349   /* Append default string to environment. */
5350   env = getenv (evarname);
5351   if (!env)
5352     env = defenv;
5353   else
5354     env = concat (env, defenv, "");
5355
5356   /* Allocate a token table */
5357   for (len = 1, p = env; (p = strchr (p, ':')); )
5358     if (*++p)
5359       len++;
5360   TEX_toktab = xnew (len, linebuffer);
5361
5362   /* Unpack environment string into token table. Be careful about */
5363   /* zero-length strings (leading ':', "::" and trailing ':') */
5364   for (i = 0; *env != '\0';)
5365     {
5366       p = strchr (env, ':');
5367       if (!p)                   /* End of environment string. */
5368         p = env + strlen (env);
5369       if (p - env > 0)
5370         {                       /* Only non-zero strings. */
5371           TEX_toktab[i].buffer = savenstr (env, p - env);
5372           TEX_toktab[i].len = p - env;
5373           i++;
5374         }
5375       if (*p)
5376         env = p + 1;
5377       else
5378         {
5379           TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5380           TEX_toktab[i].len = 0;
5381           break;
5382         }
5383     }
5384 }
5385
5386 \f
5387 /* Texinfo support.  Dave Love, Mar. 2000.  */
5388 static void
5389 Texinfo_nodes (FILE *inf)
5390 {
5391   char *cp, *start;
5392   LOOP_ON_INPUT_LINES (inf, lb, cp)
5393     if (LOOKING_AT (cp, "@node"))
5394       {
5395         start = cp;
5396         while (*cp != '\0' && *cp != ',')
5397           cp++;
5398         make_tag (start, cp - start, true,
5399                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5400       }
5401 }
5402
5403 \f
5404 /*
5405  * HTML support.
5406  * Contents of <title>, <h1>, <h2>, <h3> are tags.
5407  * Contents of <a name=xxx> are tags with name xxx.
5408  *
5409  * Francesco Potortì, 2002.
5410  */
5411 static void
5412 HTML_labels (FILE *inf)
5413 {
5414   bool getnext = false;         /* next text outside of HTML tags is a tag */
5415   bool skiptag = false;         /* skip to the end of the current HTML tag */
5416   bool intag = false;           /* inside an html tag, looking for ID= */
5417   bool inanchor = false;        /* when INTAG, is an anchor, look for NAME= */
5418   char *end;
5419
5420
5421   linebuffer_setlen (&token_name, 0); /* no name in buffer */
5422
5423   LOOP_ON_INPUT_LINES (inf, lb, dbp)
5424     for (;;)                    /* loop on the same line */
5425       {
5426         if (skiptag)            /* skip HTML tag */
5427           {
5428             while (*dbp != '\0' && *dbp != '>')
5429               dbp++;
5430             if (*dbp == '>')
5431               {
5432                 dbp += 1;
5433                 skiptag = false;
5434                 continue;       /* look on the same line */
5435               }
5436             break;              /* go to next line */
5437           }
5438
5439         else if (intag) /* look for "name=" or "id=" */
5440           {
5441             while (*dbp != '\0' && *dbp != '>'
5442                    && c_tolower (*dbp) != 'n' && c_tolower (*dbp) != 'i')
5443               dbp++;
5444             if (*dbp == '\0')
5445               break;            /* go to next line */
5446             if (*dbp == '>')
5447               {
5448                 dbp += 1;
5449                 intag = false;
5450                 continue;       /* look on the same line */
5451               }
5452             if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5453                 || LOOKING_AT_NOCASE (dbp, "id="))
5454               {
5455                 bool quoted = (dbp[0] == '"');
5456
5457                 if (quoted)
5458                   for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5459                     continue;
5460                 else
5461                   for (end = dbp; *end != '\0' && intoken (*end); end++)
5462                     continue;
5463                 linebuffer_setlen (&token_name, end - dbp);
5464                 memcpy (token_name.buffer, dbp, end - dbp);
5465                 token_name.buffer[end - dbp] = '\0';
5466
5467                 dbp = end;
5468                 intag = false;  /* we found what we looked for */
5469                 skiptag = true; /* skip to the end of the tag */
5470                 getnext = true; /* then grab the text */
5471                 continue;       /* look on the same line */
5472               }
5473             dbp += 1;
5474           }
5475
5476         else if (getnext)       /* grab next tokens and tag them */
5477           {
5478             dbp = skip_spaces (dbp);
5479             if (*dbp == '\0')
5480               break;            /* go to next line */
5481             if (*dbp == '<')
5482               {
5483                 intag = true;
5484                 inanchor = (c_tolower (dbp[1]) == 'a' && !intoken (dbp[2]));
5485                 continue;       /* look on the same line */
5486               }
5487
5488             for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5489               continue;
5490             make_tag (token_name.buffer, token_name.len, true,
5491                       dbp, end - dbp, lineno, linecharno);
5492             linebuffer_setlen (&token_name, 0); /* no name in buffer */
5493             getnext = false;
5494             break;              /* go to next line */
5495           }
5496
5497         else                    /* look for an interesting HTML tag */
5498           {
5499             while (*dbp != '\0' && *dbp != '<')
5500               dbp++;
5501             if (*dbp == '\0')
5502               break;            /* go to next line */
5503             intag = true;
5504             if (c_tolower (dbp[1]) == 'a' && !intoken (dbp[2]))
5505               {
5506                 inanchor = true;
5507                 continue;       /* look on the same line */
5508               }
5509             else if (LOOKING_AT_NOCASE (dbp, "<title>")
5510                      || LOOKING_AT_NOCASE (dbp, "<h1>")
5511                      || LOOKING_AT_NOCASE (dbp, "<h2>")
5512                      || LOOKING_AT_NOCASE (dbp, "<h3>"))
5513               {
5514                 intag = false;
5515                 getnext = true;
5516                 continue;       /* look on the same line */
5517               }
5518             dbp += 1;
5519           }
5520       }
5521 }
5522
5523 \f
5524 /*
5525  * Prolog support
5526  *
5527  * Assumes that the predicate or rule starts at column 0.
5528  * Only the first clause of a predicate or rule is added.
5529  * Original code by Sunichirou Sugou (1989)
5530  * Rewritten by Anders Lindgren (1996)
5531  */
5532 static size_t prolog_pr (char *, char *);
5533 static void prolog_skip_comment (linebuffer *, FILE *);
5534 static size_t prolog_atom (char *, size_t);
5535
5536 static void
5537 Prolog_functions (FILE *inf)
5538 {
5539   char *cp, *last;
5540   size_t len;
5541   size_t allocated;
5542
5543   allocated = 0;
5544   len = 0;
5545   last = NULL;
5546
5547   LOOP_ON_INPUT_LINES (inf, lb, cp)
5548     {
5549       if (cp[0] == '\0')        /* Empty line */
5550         continue;
5551       else if (c_isspace (cp[0])) /* Not a predicate */
5552         continue;
5553       else if (cp[0] == '/' && cp[1] == '*')    /* comment. */
5554         prolog_skip_comment (&lb, inf);
5555       else if ((len = prolog_pr (cp, last)) > 0)
5556         {
5557           /* Predicate or rule.  Store the function name so that we
5558              only generate a tag for the first clause.  */
5559           if (last == NULL)
5560             last = xnew (len + 1, char);
5561           else if (len + 1 > allocated)
5562             xrnew (last, len + 1, char);
5563           allocated = len + 1;
5564           memcpy (last, cp, len);
5565           last[len] = '\0';
5566         }
5567     }
5568   free (last);
5569 }
5570
5571
5572 static void
5573 prolog_skip_comment (linebuffer *plb, FILE *inf)
5574 {
5575   char *cp;
5576
5577   do
5578     {
5579       for (cp = plb->buffer; *cp != '\0'; cp++)
5580         if (cp[0] == '*' && cp[1] == '/')
5581           return;
5582       readline (plb, inf);
5583     }
5584   while (perhaps_more_input (inf));
5585 }
5586
5587 /*
5588  * A predicate or rule definition is added if it matches:
5589  *     <beginning of line><Prolog Atom><whitespace>(
5590  * or  <beginning of line><Prolog Atom><whitespace>:-
5591  *
5592  * It is added to the tags database if it doesn't match the
5593  * name of the previous clause header.
5594  *
5595  * Return the size of the name of the predicate or rule, or 0 if no
5596  * header was found.
5597  */
5598 static size_t
5599 prolog_pr (char *s, char *last)
5600
5601                                 /* Name of last clause. */
5602 {
5603   size_t pos;
5604   size_t len;
5605
5606   pos = prolog_atom (s, 0);
5607   if (! pos)
5608     return 0;
5609
5610   len = pos;
5611   pos = skip_spaces (s + pos) - s;
5612
5613   if ((s[pos] == '.'
5614        || (s[pos] == '(' && (pos += 1))
5615        || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5616       && (last == NULL          /* save only the first clause */
5617           || len != strlen (last)
5618           || !strneq (s, last, len)))
5619         {
5620           make_tag (s, len, true, s, pos, lineno, linecharno);
5621           return len;
5622         }
5623   else
5624     return 0;
5625 }
5626
5627 /*
5628  * Consume a Prolog atom.
5629  * Return the number of bytes consumed, or 0 if there was an error.
5630  *
5631  * A prolog atom, in this context, could be one of:
5632  * - An alphanumeric sequence, starting with a lower case letter.
5633  * - A quoted arbitrary string. Single quotes can escape themselves.
5634  *   Backslash quotes everything.
5635  */
5636 static size_t
5637 prolog_atom (char *s, size_t pos)
5638 {
5639   size_t origpos;
5640
5641   origpos = pos;
5642
5643   if (c_islower (s[pos]) || s[pos] == '_')
5644     {
5645       /* The atom is unquoted. */
5646       pos++;
5647       while (c_isalnum (s[pos]) || s[pos] == '_')
5648         {
5649           pos++;
5650         }
5651       return pos - origpos;
5652     }
5653   else if (s[pos] == '\'')
5654     {
5655       pos++;
5656
5657       for (;;)
5658         {
5659           if (s[pos] == '\'')
5660             {
5661               pos++;
5662               if (s[pos] != '\'')
5663                 break;
5664               pos++;            /* A double quote */
5665             }
5666           else if (s[pos] == '\0')
5667             /* Multiline quoted atoms are ignored. */
5668             return 0;
5669           else if (s[pos] == '\\')
5670             {
5671               if (s[pos+1] == '\0')
5672                 return 0;
5673               pos += 2;
5674             }
5675           else
5676             pos++;
5677         }
5678       return pos - origpos;
5679     }
5680   else
5681     return 0;
5682 }
5683
5684 \f
5685 /*
5686  * Support for Erlang
5687  *
5688  * Generates tags for functions, defines, and records.
5689  * Assumes that Erlang functions start at column 0.
5690  * Original code by Anders Lindgren (1996)
5691  */
5692 static int erlang_func (char *, char *);
5693 static void erlang_attribute (char *);
5694 static int erlang_atom (char *);
5695
5696 static void
5697 Erlang_functions (FILE *inf)
5698 {
5699   char *cp, *last;
5700   int len;
5701   int allocated;
5702
5703   allocated = 0;
5704   len = 0;
5705   last = NULL;
5706
5707   LOOP_ON_INPUT_LINES (inf, lb, cp)
5708     {
5709       if (cp[0] == '\0')        /* Empty line */
5710         continue;
5711       else if (c_isspace (cp[0])) /* Not function nor attribute */
5712         continue;
5713       else if (cp[0] == '%')    /* comment */
5714         continue;
5715       else if (cp[0] == '"')    /* Sometimes, strings start in column one */
5716         continue;
5717       else if (cp[0] == '-')    /* attribute, e.g. "-define" */
5718         {
5719           erlang_attribute (cp);
5720           if (last != NULL)
5721             {
5722               free (last);
5723               last = NULL;
5724             }
5725         }
5726       else if ((len = erlang_func (cp, last)) > 0)
5727         {
5728           /*
5729            * Function.  Store the function name so that we only
5730            * generates a tag for the first clause.
5731            */
5732           if (last == NULL)
5733             last = xnew (len + 1, char);
5734           else if (len + 1 > allocated)
5735             xrnew (last, len + 1, char);
5736           allocated = len + 1;
5737           memcpy (last, cp, len);
5738           last[len] = '\0';
5739         }
5740     }
5741   free (last);
5742 }
5743
5744
5745 /*
5746  * A function definition is added if it matches:
5747  *     <beginning of line><Erlang Atom><whitespace>(
5748  *
5749  * It is added to the tags database if it doesn't match the
5750  * name of the previous clause header.
5751  *
5752  * Return the size of the name of the function, or 0 if no function
5753  * was found.
5754  */
5755 static int
5756 erlang_func (char *s, char *last)
5757
5758                                 /* Name of last clause. */
5759 {
5760   int pos;
5761   int len;
5762
5763   pos = erlang_atom (s);
5764   if (pos < 1)
5765     return 0;
5766
5767   len = pos;
5768   pos = skip_spaces (s + pos) - s;
5769
5770   /* Save only the first clause. */
5771   if (s[pos++] == '('
5772       && (last == NULL
5773           || len != (int)strlen (last)
5774           || !strneq (s, last, len)))
5775         {
5776           make_tag (s, len, true, s, pos, lineno, linecharno);
5777           return len;
5778         }
5779
5780   return 0;
5781 }
5782
5783
5784 /*
5785  * Handle attributes.  Currently, tags are generated for defines
5786  * and records.
5787  *
5788  * They are on the form:
5789  * -define(foo, bar).
5790  * -define(Foo(M, N), M+N).
5791  * -record(graph, {vtab = notable, cyclic = true}).
5792  */
5793 static void
5794 erlang_attribute (char *s)
5795 {
5796   char *cp = s;
5797
5798   if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5799       && *cp++ == '(')
5800     {
5801       int len = erlang_atom (skip_spaces (cp));
5802       if (len > 0)
5803         make_tag (cp, len, true, s, cp + len - s, lineno, linecharno);
5804     }
5805   return;
5806 }
5807
5808
5809 /*
5810  * Consume an Erlang atom (or variable).
5811  * Return the number of bytes consumed, or -1 if there was an error.
5812  */
5813 static int
5814 erlang_atom (char *s)
5815 {
5816   int pos = 0;
5817
5818   if (c_isalpha (s[pos]) || s[pos] == '_')
5819     {
5820       /* The atom is unquoted. */
5821       do
5822         pos++;
5823       while (c_isalnum (s[pos]) || s[pos] == '_');
5824     }
5825   else if (s[pos] == '\'')
5826     {
5827       for (pos++; s[pos] != '\''; pos++)
5828         if (s[pos] == '\0'      /* multiline quoted atoms are ignored */
5829             || (s[pos] == '\\' && s[++pos] == '\0'))
5830           return 0;
5831       pos++;
5832     }
5833
5834   return pos;
5835 }
5836
5837 \f
5838 static char *scan_separators (char *);
5839 static void add_regex (char *, language *);
5840 static char *substitute (char *, char *, struct re_registers *);
5841
5842 /*
5843  * Take a string like "/blah/" and turn it into "blah", verifying
5844  * that the first and last characters are the same, and handling
5845  * quoted separator characters.  Actually, stops on the occurrence of
5846  * an unquoted separator.  Also process \t, \n, etc. and turn into
5847  * appropriate characters. Works in place.  Null terminates name string.
5848  * Returns pointer to terminating separator, or NULL for
5849  * unterminated regexps.
5850  */
5851 static char *
5852 scan_separators (char *name)
5853 {
5854   char sep = name[0];
5855   char *copyto = name;
5856   bool quoted = false;
5857
5858   for (++name; *name != '\0'; ++name)
5859     {
5860       if (quoted)
5861         {
5862           switch (*name)
5863             {
5864             case 'a': *copyto++ = '\007'; break; /* BEL (bell)           */
5865             case 'b': *copyto++ = '\b'; break;   /* BS (back space)      */
5866             case 'd': *copyto++ = 0177; break;   /* DEL (delete)         */
5867             case 'e': *copyto++ = 033; break;    /* ESC (delete)         */
5868             case 'f': *copyto++ = '\f'; break;   /* FF (form feed)       */
5869             case 'n': *copyto++ = '\n'; break;   /* NL (new line)        */
5870             case 'r': *copyto++ = '\r'; break;   /* CR (carriage return) */
5871             case 't': *copyto++ = '\t'; break;   /* TAB (horizontal tab) */
5872             case 'v': *copyto++ = '\v'; break;   /* VT (vertical tab)    */
5873             default:
5874               if (*name == sep)
5875                 *copyto++ = sep;
5876               else
5877                 {
5878                   /* Something else is quoted, so preserve the quote. */
5879                   *copyto++ = '\\';
5880                   *copyto++ = *name;
5881                 }
5882               break;
5883             }
5884           quoted = false;
5885         }
5886       else if (*name == '\\')
5887         quoted = true;
5888       else if (*name == sep)
5889         break;
5890       else
5891         *copyto++ = *name;
5892     }
5893   if (*name != sep)
5894     name = NULL;                /* signal unterminated regexp */
5895
5896   /* Terminate copied string. */
5897   *copyto = '\0';
5898   return name;
5899 }
5900
5901 /* Look at the argument of --regex or --no-regex and do the right
5902    thing.  Same for each line of a regexp file. */
5903 static void
5904 analyze_regex (char *regex_arg)
5905 {
5906   if (regex_arg == NULL)
5907     {
5908       free_regexps ();          /* --no-regex: remove existing regexps */
5909       return;
5910     }
5911
5912   /* A real --regexp option or a line in a regexp file. */
5913   switch (regex_arg[0])
5914     {
5915       /* Comments in regexp file or null arg to --regex. */
5916     case '\0':
5917     case ' ':
5918     case '\t':
5919       break;
5920
5921       /* Read a regex file.  This is recursive and may result in a
5922          loop, which will stop when the file descriptors are exhausted. */
5923     case '@':
5924       {
5925         FILE *regexfp;
5926         linebuffer regexbuf;
5927         char *regexfile = regex_arg + 1;
5928
5929         /* regexfile is a file containing regexps, one per line. */
5930         regexfp = fopen (regexfile, "r" FOPEN_BINARY);
5931         if (regexfp == NULL)
5932           pfatal (regexfile);
5933         linebuffer_init (&regexbuf);
5934         while (readline_internal (&regexbuf, regexfp, regexfile) > 0)
5935           analyze_regex (regexbuf.buffer);
5936         free (regexbuf.buffer);
5937         if (fclose (regexfp) != 0)
5938           pfatal (regexfile);
5939       }
5940       break;
5941
5942       /* Regexp to be used for a specific language only. */
5943     case '{':
5944       {
5945         language *lang;
5946         char *lang_name = regex_arg + 1;
5947         char *cp;
5948
5949         for (cp = lang_name; *cp != '}'; cp++)
5950           if (*cp == '\0')
5951             {
5952               error ("unterminated language name in regex: %s", regex_arg);
5953               return;
5954             }
5955         *cp++ = '\0';
5956         lang = get_language_from_langname (lang_name);
5957         if (lang == NULL)
5958           return;
5959         add_regex (cp, lang);
5960       }
5961       break;
5962
5963       /* Regexp to be used for any language. */
5964     default:
5965       add_regex (regex_arg, NULL);
5966       break;
5967     }
5968 }
5969
5970 /* Separate the regexp pattern, compile it,
5971    and care for optional name and modifiers. */
5972 static void
5973 add_regex (char *regexp_pattern, language *lang)
5974 {
5975   static struct re_pattern_buffer zeropattern;
5976   char sep, *pat, *name, *modifiers;
5977   char empty = '\0';
5978   const char *err;
5979   struct re_pattern_buffer *patbuf;
5980   regexp *rp;
5981   bool
5982     force_explicit_name = true, /* do not use implicit tag names */
5983     ignore_case = false,        /* case is significant */
5984     multi_line = false,         /* matches are done one line at a time */
5985     single_line = false;        /* dot does not match newline */
5986
5987
5988   if (strlen (regexp_pattern) < 3)
5989     {
5990       error ("null regexp");
5991       return;
5992     }
5993   sep = regexp_pattern[0];
5994   name = scan_separators (regexp_pattern);
5995   if (name == NULL)
5996     {
5997       error ("%s: unterminated regexp", regexp_pattern);
5998       return;
5999     }
6000   if (name[1] == sep)
6001     {
6002       error ("null name for regexp \"%s\"", regexp_pattern);
6003       return;
6004     }
6005   modifiers = scan_separators (name);
6006   if (modifiers == NULL)        /* no terminating separator --> no name */
6007     {
6008       modifiers = name;
6009       name = &empty;
6010     }
6011   else
6012     modifiers += 1;             /* skip separator */
6013
6014   /* Parse regex modifiers. */
6015   for (; modifiers[0] != '\0'; modifiers++)
6016     switch (modifiers[0])
6017       {
6018       case 'N':
6019         if (modifiers == name)
6020           error ("forcing explicit tag name but no name, ignoring");
6021         force_explicit_name = true;
6022         break;
6023       case 'i':
6024         ignore_case = true;
6025         break;
6026       case 's':
6027         single_line = true;
6028         /* FALLTHRU */
6029       case 'm':
6030         multi_line = true;
6031         need_filebuf = true;
6032         break;
6033       default:
6034         error ("invalid regexp modifier '%c', ignoring", modifiers[0]);
6035         break;
6036       }
6037
6038   patbuf = xnew (1, struct re_pattern_buffer);
6039   *patbuf = zeropattern;
6040   if (ignore_case)
6041     {
6042       static char lc_trans[UCHAR_MAX + 1];
6043       int i;
6044       for (i = 0; i < UCHAR_MAX + 1; i++)
6045         lc_trans[i] = c_tolower (i);
6046       patbuf->translate = lc_trans;     /* translation table to fold case  */
6047     }
6048
6049   if (multi_line)
6050     pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
6051   else
6052     pat = regexp_pattern;
6053
6054   if (single_line)
6055     re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
6056   else
6057     re_set_syntax (RE_SYNTAX_EMACS);
6058
6059   err = re_compile_pattern (pat, strlen (pat), patbuf);
6060   if (multi_line)
6061     free (pat);
6062   if (err != NULL)
6063     {
6064       error ("%s while compiling pattern", err);
6065       return;
6066     }
6067
6068   rp = p_head;
6069   p_head = xnew (1, regexp);
6070   p_head->pattern = savestr (regexp_pattern);
6071   p_head->p_next = rp;
6072   p_head->lang = lang;
6073   p_head->pat = patbuf;
6074   p_head->name = savestr (name);
6075   p_head->error_signaled = false;
6076   p_head->force_explicit_name = force_explicit_name;
6077   p_head->ignore_case = ignore_case;
6078   p_head->multi_line = multi_line;
6079 }
6080
6081 /*
6082  * Do the substitutions indicated by the regular expression and
6083  * arguments.
6084  */
6085 static char *
6086 substitute (char *in, char *out, struct re_registers *regs)
6087 {
6088   char *result, *t;
6089   int size, dig, diglen;
6090
6091   result = NULL;
6092   size = strlen (out);
6093
6094   /* Pass 1: figure out how much to allocate by finding all \N strings. */
6095   if (out[size - 1] == '\\')
6096     fatal ("pattern error in \"%s\"", out);
6097   for (t = strchr (out, '\\');
6098        t != NULL;
6099        t = strchr (t + 2, '\\'))
6100     if (c_isdigit (t[1]))
6101       {
6102         dig = t[1] - '0';
6103         diglen = regs->end[dig] - regs->start[dig];
6104         size += diglen - 2;
6105       }
6106     else
6107       size -= 1;
6108
6109   /* Allocate space and do the substitutions. */
6110   assert (size >= 0);
6111   result = xnew (size + 1, char);
6112
6113   for (t = result; *out != '\0'; out++)
6114     if (*out == '\\' && c_isdigit (*++out))
6115       {
6116         dig = *out - '0';
6117         diglen = regs->end[dig] - regs->start[dig];
6118         memcpy (t, in + regs->start[dig], diglen);
6119         t += diglen;
6120       }
6121     else
6122       *t++ = *out;
6123   *t = '\0';
6124
6125   assert (t <= result + size);
6126   assert (t - result == (int)strlen (result));
6127
6128   return result;
6129 }
6130
6131 /* Deallocate all regexps. */
6132 static void
6133 free_regexps (void)
6134 {
6135   regexp *rp;
6136   while (p_head != NULL)
6137     {
6138       rp = p_head->p_next;
6139       free (p_head->pattern);
6140       free (p_head->name);
6141       free (p_head);
6142       p_head = rp;
6143     }
6144   return;
6145 }
6146
6147 /*
6148  * Reads the whole file as a single string from `filebuf' and looks for
6149  * multi-line regular expressions, creating tags on matches.
6150  * readline already dealt with normal regexps.
6151  *
6152  * Idea by Ben Wing <ben@666.com> (2002).
6153  */
6154 static void
6155 regex_tag_multiline (void)
6156 {
6157   char *buffer = filebuf.buffer;
6158   regexp *rp;
6159   char *name;
6160
6161   for (rp = p_head; rp != NULL; rp = rp->p_next)
6162     {
6163       int match = 0;
6164
6165       if (!rp->multi_line)
6166         continue;               /* skip normal regexps */
6167
6168       /* Generic initializations before parsing file from memory. */
6169       lineno = 1;               /* reset global line number */
6170       charno = 0;               /* reset global char number */
6171       linecharno = 0;           /* reset global char number of line start */
6172
6173       /* Only use generic regexps or those for the current language. */
6174       if (rp->lang != NULL && rp->lang != curfdp->lang)
6175         continue;
6176
6177       while (match >= 0 && match < filebuf.len)
6178         {
6179           match = re_search (rp->pat, buffer, filebuf.len, charno,
6180                              filebuf.len - match, &rp->regs);
6181           switch (match)
6182             {
6183             case -2:
6184               /* Some error. */
6185               if (!rp->error_signaled)
6186                 {
6187                   error ("regexp stack overflow while matching \"%s\"",
6188                          rp->pattern);
6189                   rp->error_signaled = true;
6190                 }
6191               break;
6192             case -1:
6193               /* No match. */
6194               break;
6195             default:
6196               if (match == rp->regs.end[0])
6197                 {
6198                   if (!rp->error_signaled)
6199                     {
6200                       error ("regexp matches the empty string: \"%s\"",
6201                              rp->pattern);
6202                       rp->error_signaled = true;
6203                     }
6204                   match = -3;   /* exit from while loop */
6205                   break;
6206                 }
6207
6208               /* Match occurred.  Construct a tag. */
6209               while (charno < rp->regs.end[0])
6210                 if (buffer[charno++] == '\n')
6211                   lineno++, linecharno = charno;
6212               name = rp->name;
6213               if (name[0] == '\0')
6214                 name = NULL;
6215               else /* make a named tag */
6216                 name = substitute (buffer, rp->name, &rp->regs);
6217               if (rp->force_explicit_name)
6218                 /* Force explicit tag name, if a name is there. */
6219                 pfnote (name, true, buffer + linecharno,
6220                         charno - linecharno + 1, lineno, linecharno);
6221               else
6222                 make_tag (name, strlen (name), true, buffer + linecharno,
6223                           charno - linecharno + 1, lineno, linecharno);
6224               break;
6225             }
6226         }
6227     }
6228 }
6229
6230 \f
6231 static bool
6232 nocase_tail (const char *cp)
6233 {
6234   int len = 0;
6235
6236   while (*cp != '\0' && c_tolower (*cp) == c_tolower (dbp[len]))
6237     cp++, len++;
6238   if (*cp == '\0' && !intoken (dbp[len]))
6239     {
6240       dbp += len;
6241       return true;
6242     }
6243   return false;
6244 }
6245
6246 static void
6247 get_tag (register char *bp, char **namepp)
6248 {
6249   register char *cp = bp;
6250
6251   if (*bp != '\0')
6252     {
6253       /* Go till you get to white space or a syntactic break */
6254       for (cp = bp + 1; !notinname (*cp); cp++)
6255         continue;
6256       make_tag (bp, cp - bp, true,
6257                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6258     }
6259
6260   if (namepp != NULL)
6261     *namepp = savenstr (bp, cp - bp);
6262 }
6263
6264 /*
6265  * Read a line of text from `stream' into `lbp', excluding the
6266  * newline or CR-NL, if any.  Return the number of characters read from
6267  * `stream', which is the length of the line including the newline.
6268  *
6269  * On DOS or Windows we do not count the CR character, if any before the
6270  * NL, in the returned length; this mirrors the behavior of Emacs on those
6271  * platforms (for text files, it translates CR-NL to NL as it reads in the
6272  * file).
6273  *
6274  * If multi-line regular expressions are requested, each line read is
6275  * appended to `filebuf'.
6276  */
6277 static long
6278 readline_internal (linebuffer *lbp, FILE *stream, char const *filename)
6279 {
6280   char *buffer = lbp->buffer;
6281   char *p = lbp->buffer;
6282   char *pend;
6283   int chars_deleted;
6284
6285   pend = p + lbp->size;         /* Separate to avoid 386/IX compiler bug.  */
6286
6287   for (;;)
6288     {
6289       register int c = getc (stream);
6290       if (p == pend)
6291         {
6292           /* We're at the end of linebuffer: expand it. */
6293           lbp->size *= 2;
6294           xrnew (buffer, lbp->size, char);
6295           p += buffer - lbp->buffer;
6296           pend = buffer + lbp->size;
6297           lbp->buffer = buffer;
6298         }
6299       if (c == EOF)
6300         {
6301           if (ferror (stream))
6302             perror (filename);
6303           *p = '\0';
6304           chars_deleted = 0;
6305           break;
6306         }
6307       if (c == '\n')
6308         {
6309           if (p > buffer && p[-1] == '\r')
6310             {
6311               p -= 1;
6312               chars_deleted = 2;
6313             }
6314           else
6315             {
6316               chars_deleted = 1;
6317             }
6318           *p = '\0';
6319           break;
6320         }
6321       *p++ = c;
6322     }
6323   lbp->len = p - buffer;
6324
6325   if (need_filebuf              /* we need filebuf for multi-line regexps */
6326       && chars_deleted > 0)     /* not at EOF */
6327     {
6328       while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6329         {
6330           /* Expand filebuf. */
6331           filebuf.size *= 2;
6332           xrnew (filebuf.buffer, filebuf.size, char);
6333         }
6334       memcpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6335       filebuf.len += lbp->len;
6336       filebuf.buffer[filebuf.len++] = '\n';
6337       filebuf.buffer[filebuf.len] = '\0';
6338     }
6339
6340   return lbp->len + chars_deleted;
6341 }
6342
6343 /*
6344  * Like readline_internal, above, but in addition try to match the
6345  * input line against relevant regular expressions and manage #line
6346  * directives.
6347  */
6348 static void
6349 readline (linebuffer *lbp, FILE *stream)
6350 {
6351   long result;
6352
6353   linecharno = charno;          /* update global char number of line start */
6354   result = readline_internal (lbp, stream, infilename); /* read line */
6355   lineno += 1;                  /* increment global line number */
6356   charno += result;             /* increment global char number */
6357
6358   /* Honor #line directives. */
6359   if (!no_line_directive)
6360     {
6361       static bool discard_until_line_directive;
6362
6363       /* Check whether this is a #line directive. */
6364       if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6365         {
6366           unsigned int lno;
6367           int start = 0;
6368
6369           if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6370               && start > 0)     /* double quote character found */
6371             {
6372               char *endp = lbp->buffer + start;
6373
6374               while ((endp = strchr (endp, '"')) != NULL
6375                      && endp[-1] == '\\')
6376                 endp++;
6377               if (endp != NULL)
6378                 /* Ok, this is a real #line directive.  Let's deal with it. */
6379                 {
6380                   char *taggedabsname;  /* absolute name of original file */
6381                   char *taggedfname;    /* name of original file as given */
6382                   char *name;           /* temp var */
6383
6384                   discard_until_line_directive = false; /* found it */
6385                   name = lbp->buffer + start;
6386                   *endp = '\0';
6387                   canonicalize_filename (name);
6388                   taggedabsname = absolute_filename (name, tagfiledir);
6389                   if (filename_is_absolute (name)
6390                       || filename_is_absolute (curfdp->infname))
6391                     taggedfname = savestr (taggedabsname);
6392                   else
6393                     taggedfname = relative_filename (taggedabsname,tagfiledir);
6394
6395                   if (streq (curfdp->taggedfname, taggedfname))
6396                     /* The #line directive is only a line number change.  We
6397                        deal with this afterwards. */
6398                     free (taggedfname);
6399                   else
6400                     /* The tags following this #line directive should be
6401                        attributed to taggedfname.  In order to do this, set
6402                        curfdp accordingly. */
6403                     {
6404                       fdesc *fdp; /* file description pointer */
6405
6406                       /* Go look for a file description already set up for the
6407                          file indicated in the #line directive.  If there is
6408                          one, use it from now until the next #line
6409                          directive. */
6410                       for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6411                         if (streq (fdp->infname, curfdp->infname)
6412                             && streq (fdp->taggedfname, taggedfname))
6413                           /* If we remove the second test above (after the &&)
6414                              then all entries pertaining to the same file are
6415                              coalesced in the tags file.  If we use it, then
6416                              entries pertaining to the same file but generated
6417                              from different files (via #line directives) will
6418                              go into separate sections in the tags file.  These
6419                              alternatives look equivalent.  The first one
6420                              destroys some apparently useless information. */
6421                           {
6422                             curfdp = fdp;
6423                             free (taggedfname);
6424                             break;
6425                           }
6426                       /* Else, if we already tagged the real file, skip all
6427                          input lines until the next #line directive. */
6428                       if (fdp == NULL) /* not found */
6429                         for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6430                           if (streq (fdp->infabsname, taggedabsname))
6431                             {
6432                               discard_until_line_directive = true;
6433                               free (taggedfname);
6434                               break;
6435                             }
6436                       /* Else create a new file description and use that from
6437                          now on, until the next #line directive. */
6438                       if (fdp == NULL) /* not found */
6439                         {
6440                           fdp = fdhead;
6441                           fdhead = xnew (1, fdesc);
6442                           *fdhead = *curfdp; /* copy curr. file description */
6443                           fdhead->next = fdp;
6444                           fdhead->infname = savestr (curfdp->infname);
6445                           fdhead->infabsname = savestr (curfdp->infabsname);
6446                           fdhead->infabsdir = savestr (curfdp->infabsdir);
6447                           fdhead->taggedfname = taggedfname;
6448                           fdhead->usecharno = false;
6449                           fdhead->prop = NULL;
6450                           fdhead->written = false;
6451                           curfdp = fdhead;
6452                         }
6453                     }
6454                   free (taggedabsname);
6455                   lineno = lno - 1;
6456                   readline (lbp, stream);
6457                   return;
6458                 } /* if a real #line directive */
6459             } /* if #line is followed by a number */
6460         } /* if line begins with "#line " */
6461
6462       /* If we are here, no #line directive was found. */
6463       if (discard_until_line_directive)
6464         {
6465           if (result > 0)
6466             {
6467               /* Do a tail recursion on ourselves, thus discarding the contents
6468                  of the line buffer. */
6469               readline (lbp, stream);
6470               return;
6471             }
6472           /* End of file. */
6473           discard_until_line_directive = false;
6474           return;
6475         }
6476     } /* if #line directives should be considered */
6477
6478   {
6479     int match;
6480     regexp *rp;
6481     char *name;
6482
6483     /* Match against relevant regexps. */
6484     if (lbp->len > 0)
6485       for (rp = p_head; rp != NULL; rp = rp->p_next)
6486         {
6487           /* Only use generic regexps or those for the current language.
6488              Also do not use multiline regexps, which is the job of
6489              regex_tag_multiline. */
6490           if ((rp->lang != NULL && rp->lang != fdhead->lang)
6491               || rp->multi_line)
6492             continue;
6493
6494           match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6495           switch (match)
6496             {
6497             case -2:
6498               /* Some error. */
6499               if (!rp->error_signaled)
6500                 {
6501                   error ("regexp stack overflow while matching \"%s\"",
6502                          rp->pattern);
6503                   rp->error_signaled = true;
6504                 }
6505               break;
6506             case -1:
6507               /* No match. */
6508               break;
6509             case 0:
6510               /* Empty string matched. */
6511               if (!rp->error_signaled)
6512                 {
6513                   error ("regexp matches the empty string: \"%s\"", rp->pattern);
6514                   rp->error_signaled = true;
6515                 }
6516               break;
6517             default:
6518               /* Match occurred.  Construct a tag. */
6519               name = rp->name;
6520               if (name[0] == '\0')
6521                 name = NULL;
6522               else /* make a named tag */
6523                 name = substitute (lbp->buffer, rp->name, &rp->regs);
6524               if (rp->force_explicit_name)
6525                 /* Force explicit tag name, if a name is there. */
6526                 pfnote (name, true, lbp->buffer, match, lineno, linecharno);
6527               else
6528                 make_tag (name, strlen (name), true,
6529                           lbp->buffer, match, lineno, linecharno);
6530               break;
6531             }
6532         }
6533   }
6534 }
6535
6536 \f
6537 /*
6538  * Return a pointer to a space of size strlen(cp)+1 allocated
6539  * with xnew where the string CP has been copied.
6540  */
6541 static char *
6542 savestr (const char *cp)
6543 {
6544   return savenstr (cp, strlen (cp));
6545 }
6546
6547 /*
6548  * Return a pointer to a space of size LEN+1 allocated with xnew where
6549  * the string CP has been copied for at most the first LEN characters.
6550  */
6551 static char *
6552 savenstr (const char *cp, int len)
6553 {
6554   char *dp = xnew (len + 1, char);
6555   dp[len] = '\0';
6556   return memcpy (dp, cp, len);
6557 }
6558
6559 /* Skip spaces (end of string is not space), return new pointer. */
6560 static char *
6561 skip_spaces (char *cp)
6562 {
6563   while (c_isspace (*cp))
6564     cp++;
6565   return cp;
6566 }
6567
6568 /* Skip non spaces, except end of string, return new pointer. */
6569 static char *
6570 skip_non_spaces (char *cp)
6571 {
6572   while (*cp != '\0' && !c_isspace (*cp))
6573     cp++;
6574   return cp;
6575 }
6576
6577 /* Skip any chars in the "name" class.*/
6578 static char *
6579 skip_name (char *cp)
6580 {
6581   /* '\0' is a notinname() so loop stops there too */
6582   while (! notinname (*cp))
6583     cp++;
6584   return cp;
6585 }
6586
6587 /* Print error message and exit.  */
6588 static void
6589 fatal (char const *format, ...)
6590 {
6591   va_list ap;
6592   va_start (ap, format);
6593   verror (format, ap);
6594   va_end (ap);
6595   exit (EXIT_FAILURE);
6596 }
6597
6598 static void
6599 pfatal (const char *s1)
6600 {
6601   perror (s1);
6602   exit (EXIT_FAILURE);
6603 }
6604
6605 static void
6606 suggest_asking_for_help (void)
6607 {
6608   fprintf (stderr, "\tTry '%s --help' for a complete list of options.\n",
6609            progname);
6610   exit (EXIT_FAILURE);
6611 }
6612
6613 /* Output a diagnostic with printf-style FORMAT and args.  */
6614 static void
6615 error (const char *format, ...)
6616 {
6617   va_list ap;
6618   va_start (ap, format);
6619   verror (format, ap);
6620   va_end (ap);
6621 }
6622
6623 static void
6624 verror (char const *format, va_list ap)
6625 {
6626   fprintf (stderr, "%s: ", progname);
6627   vfprintf (stderr, format, ap);
6628   fprintf (stderr, "\n");
6629 }
6630
6631 /* Return a newly-allocated string whose contents
6632    concatenate those of s1, s2, s3.  */
6633 static char *
6634 concat (const char *s1, const char *s2, const char *s3)
6635 {
6636   int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6637   char *result = xnew (len1 + len2 + len3 + 1, char);
6638
6639   strcpy (result, s1);
6640   strcpy (result + len1, s2);
6641   strcpy (result + len1 + len2, s3);
6642
6643   return result;
6644 }
6645
6646 \f
6647 /* Does the same work as the system V getcwd, but does not need to
6648    guess the buffer size in advance. */
6649 static char *
6650 etags_getcwd (void)
6651 {
6652   int bufsize = 200;
6653   char *path = xnew (bufsize, char);
6654
6655   while (getcwd (path, bufsize) == NULL)
6656     {
6657       if (errno != ERANGE)
6658         pfatal ("getcwd");
6659       bufsize *= 2;
6660       free (path);
6661       path = xnew (bufsize, char);
6662     }
6663
6664   canonicalize_filename (path);
6665   return path;
6666 }
6667
6668 /* Return a newly allocated string containing a name of a temporary file.  */
6669 static char *
6670 etags_mktmp (void)
6671 {
6672   const char *tmpdir = getenv ("TMPDIR");
6673   const char *slash = "/";
6674
6675 #if MSDOS || defined (DOS_NT)
6676   if (!tmpdir)
6677     tmpdir = getenv ("TEMP");
6678   if (!tmpdir)
6679     tmpdir = getenv ("TMP");
6680   if (!tmpdir)
6681     tmpdir = ".";
6682   if (tmpdir[strlen (tmpdir) - 1] == '/'
6683       || tmpdir[strlen (tmpdir) - 1] == '\\')
6684     slash = "";
6685 #else
6686   if (!tmpdir)
6687     tmpdir = "/tmp";
6688   if (tmpdir[strlen (tmpdir) - 1] == '/')
6689     slash = "";
6690 #endif
6691
6692   char *templt = concat (tmpdir, slash, "etXXXXXX");
6693   int fd = mkostemp (templt, O_CLOEXEC);
6694   if (fd < 0 || close (fd) != 0)
6695     {
6696       int temp_errno = errno;
6697       free (templt);
6698       errno = temp_errno;
6699       templt = NULL;
6700     }
6701
6702 #if defined (DOS_NT)
6703   /* The file name will be used in shell redirection, so it needs to have
6704      DOS-style backslashes, or else the Windows shell will barf.  */
6705   char *p;
6706   for (p = templt; *p; p++)
6707     if (*p == '/')
6708       *p = '\\';
6709 #endif
6710
6711   return templt;
6712 }
6713
6714 /* Return a newly allocated string containing the file name of FILE
6715    relative to the absolute directory DIR (which should end with a slash). */
6716 static char *
6717 relative_filename (char *file, char *dir)
6718 {
6719   char *fp, *dp, *afn, *res;
6720   int i;
6721
6722   /* Find the common root of file and dir (with a trailing slash). */
6723   afn = absolute_filename (file, cwd);
6724   fp = afn;
6725   dp = dir;
6726   while (*fp++ == *dp++)
6727     continue;
6728   fp--, dp--;                   /* back to the first differing char */
6729 #ifdef DOS_NT
6730   if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6731     return afn;
6732 #endif
6733   do                            /* look at the equal chars until '/' */
6734     fp--, dp--;
6735   while (*fp != '/');
6736
6737   /* Build a sequence of "../" strings for the resulting relative file name. */
6738   i = 0;
6739   while ((dp = strchr (dp + 1, '/')) != NULL)
6740     i += 1;
6741   res = xnew (3*i + strlen (fp + 1) + 1, char);
6742   char *z = res;
6743   while (i-- > 0)
6744     z = stpcpy (z, "../");
6745
6746   /* Add the file name relative to the common root of file and dir. */
6747   strcpy (z, fp + 1);
6748   free (afn);
6749
6750   return res;
6751 }
6752
6753 /* Return a newly allocated string containing the absolute file name
6754    of FILE given DIR (which should end with a slash). */
6755 static char *
6756 absolute_filename (char *file, char *dir)
6757 {
6758   char *slashp, *cp, *res;
6759
6760   if (filename_is_absolute (file))
6761     res = savestr (file);
6762 #ifdef DOS_NT
6763   /* We don't support non-absolute file names with a drive
6764      letter, like `d:NAME' (it's too much hassle).  */
6765   else if (file[1] == ':')
6766     fatal ("%s: relative file names with drive letters not supported", file);
6767 #endif
6768   else
6769     res = concat (dir, file, "");
6770
6771   /* Delete the "/dirname/.." and "/." substrings. */
6772   slashp = strchr (res, '/');
6773   while (slashp != NULL && slashp[0] != '\0')
6774     {
6775       if (slashp[1] == '.')
6776         {
6777           if (slashp[2] == '.'
6778               && (slashp[3] == '/' || slashp[3] == '\0'))
6779             {
6780               cp = slashp;
6781               do
6782                 cp--;
6783               while (cp >= res && !filename_is_absolute (cp));
6784               if (cp < res)
6785                 cp = slashp;    /* the absolute name begins with "/.." */
6786 #ifdef DOS_NT
6787               /* Under MSDOS and NT we get `d:/NAME' as absolute
6788                  file name, so the luser could say `d:/../NAME'.
6789                  We silently treat this as `d:/NAME'.  */
6790               else if (cp[0] != '/')
6791                 cp = slashp;
6792 #endif
6793               memmove (cp, slashp + 3, strlen (slashp + 2));
6794               slashp = cp;
6795               continue;
6796             }
6797           else if (slashp[2] == '/' || slashp[2] == '\0')
6798             {
6799               memmove (slashp, slashp + 2, strlen (slashp + 1));
6800               continue;
6801             }
6802         }
6803
6804       slashp = strchr (slashp + 1, '/');
6805     }
6806
6807   if (res[0] == '\0')           /* just a safety net: should never happen */
6808     {
6809       free (res);
6810       return savestr ("/");
6811     }
6812   else
6813     return res;
6814 }
6815
6816 /* Return a newly allocated string containing the absolute
6817    file name of dir where FILE resides given DIR (which should
6818    end with a slash). */
6819 static char *
6820 absolute_dirname (char *file, char *dir)
6821 {
6822   char *slashp, *res;
6823   char save;
6824
6825   slashp = strrchr (file, '/');
6826   if (slashp == NULL)
6827     return savestr (dir);
6828   save = slashp[1];
6829   slashp[1] = '\0';
6830   res = absolute_filename (file, dir);
6831   slashp[1] = save;
6832
6833   return res;
6834 }
6835
6836 /* Whether the argument string is an absolute file name.  The argument
6837    string must have been canonicalized with canonicalize_filename. */
6838 static bool
6839 filename_is_absolute (char *fn)
6840 {
6841   return (fn[0] == '/'
6842 #ifdef DOS_NT
6843           || (c_isalpha (fn[0]) && fn[1] == ':' && fn[2] == '/')
6844 #endif
6845           );
6846 }
6847
6848 /* Downcase DOS drive letter and collapse separators into single slashes.
6849    Works in place. */
6850 static void
6851 canonicalize_filename (register char *fn)
6852 {
6853   register char* cp;
6854
6855 #ifdef DOS_NT
6856   /* Canonicalize drive letter case.  */
6857   if (c_isupper (fn[0]) && fn[1] == ':')
6858     fn[0] = c_tolower (fn[0]);
6859
6860   /* Collapse multiple forward- and back-slashes into a single forward
6861      slash. */
6862   for (cp = fn; *cp != '\0'; cp++, fn++)
6863     if (*cp == '/' || *cp == '\\')
6864       {
6865         *fn = '/';
6866         while (cp[1] == '/' || cp[1] == '\\')
6867           cp++;
6868       }
6869     else
6870       *fn = *cp;
6871
6872 #else  /* !DOS_NT */
6873
6874   /* Collapse multiple slashes into a single slash. */
6875   for (cp = fn; *cp != '\0'; cp++, fn++)
6876     if (*cp == '/')
6877       {
6878         *fn = '/';
6879         while (cp[1] == '/')
6880           cp++;
6881       }
6882     else
6883       *fn = *cp;
6884
6885 #endif  /* !DOS_NT */
6886
6887   *fn = '\0';
6888 }
6889
6890 \f
6891 /* Initialize a linebuffer for use. */
6892 static void
6893 linebuffer_init (linebuffer *lbp)
6894 {
6895   lbp->size = (DEBUG) ? 3 : 200;
6896   lbp->buffer = xnew (lbp->size, char);
6897   lbp->buffer[0] = '\0';
6898   lbp->len = 0;
6899 }
6900
6901 /* Set the minimum size of a string contained in a linebuffer. */
6902 static void
6903 linebuffer_setlen (linebuffer *lbp, int toksize)
6904 {
6905   while (lbp->size <= toksize)
6906     {
6907       lbp->size *= 2;
6908       xrnew (lbp->buffer, lbp->size, char);
6909     }
6910   lbp->len = toksize;
6911 }
6912
6913 /* Like malloc but get fatal error if memory is exhausted. */
6914 static void *
6915 xmalloc (size_t size)
6916 {
6917   void *result = malloc (size);
6918   if (result == NULL)
6919     fatal ("virtual memory exhausted");
6920   return result;
6921 }
6922
6923 static void *
6924 xrealloc (void *ptr, size_t size)
6925 {
6926   void *result = realloc (ptr, size);
6927   if (result == NULL)
6928     fatal ("virtual memory exhausted");
6929   return result;
6930 }
6931
6932 /*
6933  * Local Variables:
6934  * indent-tabs-mode: t
6935  * tab-width: 8
6936  * fill-column: 79
6937  * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6938  * c-file-style: "gnu"
6939  * End:
6940  */
6941
6942 /* etags.c ends here */