code.delx.au - gnu-emacs/blob - lib-src/etags.c

   1 /* Tags file maker to go with GNU Emacs           -*- coding: latin-1 -*-
   2
   3 Copyright (C) 1984 The Regents of the University of California
   4
   5 Redistribution and use in source and binary forms, with or without
   6 modification, are permitted provided that the following conditions are
   7 met:
   8 1. Redistributions of source code must retain the above copyright
   9    notice, this list of conditions and the following disclaimer.
  10 2. Redistributions in binary form must reproduce the above copyright
  11    notice, this list of conditions and the following disclaimer in the
  12    documentation and/or other materials provided with the
  13    distribution.
  14 3. Neither the name of the University nor the names of its
  15    contributors may be used to endorse or promote products derived
  16    from this software without specific prior written permission.
  17
  18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
  19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  21 PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
  22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
  28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29
  30
  31 Copyright (C) 1984, 1987, 1988, 1989, 1993, 1994, 1995, 1998, 1999,
  32   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
  33   2011  Free Software Foundation, Inc.
  34
  35 This file is not considered part of GNU Emacs.
  36
  37 This program is free software: you can redistribute it and/or modify
  38 it under the terms of the GNU General Public License as published by
  39 the Free Software Foundation, either version 3 of the License, or
  40 (at your option) any later version.
  41
  42 This program is distributed in the hope that it will be useful,
  43 but WITHOUT ANY WARRANTY; without even the implied warranty of
  44 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  45 GNU General Public License for more details.
  46
  47 You should have received a copy of the GNU General Public License
  48 along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  49
  50
  51 /* NB To comply with the above BSD license, copyright information is
  52 reproduced in etc/ETAGS.README.  That file should be updated when the
  53 above notices are.
  54
  55 To the best of our knowledge, this code was originally based on the
  56 ctags.c distributed with BSD4.2, which was copyrighted by the
  57 University of California, as described above. */
  58
  59
  60 /*
  61  * Authors:
  62  * 1983 Ctags originally by Ken Arnold.
  63  * 1984 Fortran added by Jim Kleckner.
  64  * 1984 Ed Pelegri-Llopart added C typedefs.
  65  * 1985 Emacs TAGS format by Richard Stallman.
  66  * 1989 Sam Kendall added C++.
  67  * 1992 Joseph B. Wells improved C and C++ parsing.
  68  * 1993 Francesco Potortì reorganized C and C++.
  69  * 1994 Line-by-line regexp tags by Tom Tromey.
  70  * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
  71  * 2002 #line directives by Francesco Potortì.
  72  *
  73  * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
  74  */
  75
  76 /*
  77  * If you want to add support for a new language, start by looking at the LUA
  78  * language, which is the simplest.  Alternatively, consider distributing etags
  79  * together with a configuration file containing regexp definitions for etags.
  80  */
  81
  82 char pot_etags_version[] = "@(#) pot revision number is 17.38.1.4";
  83
  84 #define TRUE    1
  85 #define FALSE   0
  86
  87 #ifdef DEBUG
  88 #  undef DEBUG
  89 #  define DEBUG TRUE
  90 #else
  91 #  define DEBUG  FALSE
  92 #  define NDEBUG                /* disable assert */
  93 #endif
  94
  95 #ifdef HAVE_CONFIG_H
  96 # include <config.h>
  97   /* On some systems, Emacs defines static as nothing for the sake
  98      of unexec.  We don't want that here since we don't use unexec. */
  99 # undef static
 100 # ifndef PTR                    /* for XEmacs */
 101 #   define PTR void *
 102 # endif
 103 #else  /* no config.h */
 104 # if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
 105 #   define PTR void *           /* for generic pointers */
 106 # else /* not standard C */
 107 #   define const                /* remove const for old compilers' sake */
 108 #   define PTR long *           /* don't use void* */
 109 # endif
 110 #endif /* !HAVE_CONFIG_H */
 111
 112 #ifndef _GNU_SOURCE
 113 # define _GNU_SOURCE 1          /* enables some compiler checks on GNU */
 114 #endif
 115
 116 /* WIN32_NATIVE is for XEmacs.
 117    MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
 118 #ifdef WIN32_NATIVE
 119 # undef MSDOS
 120 # undef  WINDOWSNT
 121 # define WINDOWSNT
 122 #endif /* WIN32_NATIVE */
 123
 124 #ifdef MSDOS
 125 # undef MSDOS
 126 # define MSDOS TRUE
 127 # include <fcntl.h>
 128 # include <sys/param.h>
 129 # include <io.h>
 130 # ifndef HAVE_CONFIG_H
 131 #   define DOS_NT
 132 #   include <sys/config.h>
 133 # endif
 134 #else
 135 # define MSDOS FALSE
 136 #endif /* MSDOS */
 137
 138 #ifdef WINDOWSNT
 139 # include <stdlib.h>
 140 # include <fcntl.h>
 141 # include <string.h>
 142 # include <direct.h>
 143 # include <io.h>
 144 # define MAXPATHLEN _MAX_PATH
 145 # undef HAVE_NTGUI
 146 # undef  DOS_NT
 147 # define DOS_NT
 148 # ifndef HAVE_GETCWD
 149 #   define HAVE_GETCWD
 150 # endif /* undef HAVE_GETCWD */
 151 #else /* not WINDOWSNT */
 152 # ifdef STDC_HEADERS
 153 #  include <stdlib.h>
 154 #  include <string.h>
 155 # else /* no standard C headers */
 156    extern char *getenv (const char *);
 157    extern char *strcpy (char *, const char *);
 158    extern char *strncpy (char *, const char *, unsigned long);
 159    extern char *strcat (char *, const char *);
 160    extern char *strncat (char *, const char *, unsigned long);
 161    extern int strcmp (const char *, const char *);
 162    extern int strncmp (const char *, const char *, unsigned long);
 163    extern int system (const char *);
 164    extern unsigned long strlen (const char *);
 165    extern void *malloc (unsigned long);
 166    extern void *realloc (void *, unsigned long);
 167    extern void exit (int);
 168    extern void free (void *);
 169    extern void *memmove (void *, const void *, unsigned long);
 170 #  define EXIT_SUCCESS  0
 171 #  define EXIT_FAILURE  1
 172 # endif
 173 #endif /* !WINDOWSNT */
 174
 175 #include <unistd.h>
 176 #ifndef HAVE_UNISTD_H
 177 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
 178     extern char *getcwd (char *buf, size_t size);
 179 # endif
 180 #endif /* HAVE_UNISTD_H */
 181
 182 #include <stdio.h>
 183 #include <ctype.h>
 184 #include <errno.h>
 185 #include <sys/types.h>
 186 #include <sys/stat.h>
 187
 188 #include <assert.h>
 189 #ifdef NDEBUG
 190 # undef  assert                 /* some systems have a buggy assert.h */
 191 # define assert(x) ((void) 0)
 192 #endif
 193
 194 #if !defined (S_ISREG) && defined (S_IFREG)
 195 # define S_ISREG(m)     (((m) & S_IFMT) == S_IFREG)
 196 #endif
 197
 198 #ifdef NO_LONG_OPTIONS          /* define this if you don't have GNU getopt */
 199 # define NO_LONG_OPTIONS TRUE
 200 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
 201   extern char *optarg;
 202   extern int optind, opterr;
 203 #else
 204 # define NO_LONG_OPTIONS FALSE
 205 # include <getopt.h>
 206 #endif /* NO_LONG_OPTIONS */
 207
 208 #ifndef HAVE_CONFIG_H           /* this is a standalone compilation */
 209 # ifdef __CYGWIN__              /* compiling on Cygwin */
 210                              !!! NOTICE !!!
 211  the regex.h distributed with Cygwin is not compatible with etags, alas!
 212 If you want regular expression support, you should delete this notice and
 213               arrange to use the GNU regex.h and regex.c.
 214 # endif
 215 #endif
 216 #include <regex.h>
 217
 218 /* Define CTAGS to make the program "ctags" compatible with the usual one.
 219  Leave it undefined to make the program "etags", which makes emacs-style
 220  tag tables and tags typedefs, #defines and struct/union/enum by default. */
 221 #ifdef CTAGS
 222 # undef  CTAGS
 223 # define CTAGS TRUE
 224 #else
 225 # define CTAGS FALSE
 226 #endif
 227
 228 #define streq(s,t)      (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
 229 #define strcaseeq(s,t)  (assert((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
 230 #define strneq(s,t,n)   (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
 231 #define strncaseeq(s,t,n) (assert((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
 232
 233 #define CHARS 256               /* 2^sizeof(char) */
 234 #define CHAR(x)         ((unsigned int)(x) & (CHARS - 1))
 235 #define iswhite(c)      (_wht[CHAR(c)]) /* c is white (see white) */
 236 #define notinname(c)    (_nin[CHAR(c)]) /* c is not in a name (see nonam) */
 237 #define begtoken(c)     (_btk[CHAR(c)]) /* c can start token (see begtk) */
 238 #define intoken(c)      (_itk[CHAR(c)]) /* c can be in token (see midtk) */
 239 #define endtoken(c)     (_etk[CHAR(c)]) /* c ends tokens (see endtk) */
 240
 241 #define ISALNUM(c)      isalnum (CHAR(c))
 242 #define ISALPHA(c)      isalpha (CHAR(c))
 243 #define ISDIGIT(c)      isdigit (CHAR(c))
 244 #define ISLOWER(c)      islower (CHAR(c))
 245
 246 #define lowcase(c)      tolower (CHAR(c))
 247 #define upcase(c)       toupper (CHAR(c))
 248
 249
 250 /*
 251  *      xnew, xrnew -- allocate, reallocate storage
 252  *
 253  * SYNOPSIS:    Type *xnew (int n, Type);
 254  *              void xrnew (OldPointer, int n, Type);
 255  */
 256 #if DEBUG
 257 # include "chkmalloc.h"
 258 # define xnew(n,Type)     ((Type *) trace_malloc (__FILE__, __LINE__, \
 259                                                   (n) * sizeof (Type)))
 260 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
 261                                         (char *) (op), (n) * sizeof (Type)))
 262 #else
 263 # define xnew(n,Type)     ((Type *) xmalloc ((n) * sizeof (Type)))
 264 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
 265                                         (char *) (op), (n) * sizeof (Type)))
 266 #endif
 267
 268 #define bool int
 269
 270 typedef void Lang_function (FILE *);
 271
 272 typedef struct
 273 {
 274   const char *suffix;           /* file name suffix for this compressor */
 275   const char *command;          /* takes one arg and decompresses to stdout */
 276 } compressor;
 277
 278 typedef struct
 279 {
 280   const char *name;             /* language name */
 281   const char *help;             /* detailed help for the language */
 282   Lang_function *function;      /* parse function */
 283   const char **suffixes;        /* name suffixes of this language's files */
 284   const char **filenames;       /* names of this language's files */
 285   const char **interpreters;    /* interpreters for this language */
 286   bool metasource;              /* source used to generate other sources */
 287 } language;
 288
 289 typedef struct fdesc
 290 {
 291   struct fdesc *next;           /* for the linked list */
 292   char *infname;                /* uncompressed input file name */
 293   char *infabsname;             /* absolute uncompressed input file name */
 294   char *infabsdir;              /* absolute dir of input file */
 295   char *taggedfname;            /* file name to write in tagfile */
 296   language *lang;               /* language of file */
 297   char *prop;                   /* file properties to write in tagfile */
 298   bool usecharno;               /* etags tags shall contain char number */
 299   bool written;                 /* entry written in the tags file */
 300 } fdesc;
 301
 302 typedef struct node_st
 303 {                               /* sorting structure */
 304   struct node_st *left, *right; /* left and right sons */
 305   fdesc *fdp;                   /* description of file to whom tag belongs */
 306   char *name;                   /* tag name */
 307   char *regex;                  /* search regexp */
 308   bool valid;                   /* write this tag on the tag file */
 309   bool is_func;                 /* function tag: use regexp in CTAGS mode */
 310   bool been_warned;             /* warning already given for duplicated tag */
 311   int lno;                      /* line number tag is on */
 312   long cno;                     /* character number line starts on */
 313 } node;
 314
 315 /*
 316  * A `linebuffer' is a structure which holds a line of text.
 317  * `readline_internal' reads a line from a stream into a linebuffer
 318  * and works regardless of the length of the line.
 319  * SIZE is the size of BUFFER, LEN is the length of the string in
 320  * BUFFER after readline reads it.
 321  */
 322 typedef struct
 323 {
 324   long size;
 325   int len;
 326   char *buffer;
 327 } linebuffer;
 328
 329 /* Used to support mixing of --lang and file names. */
 330 typedef struct
 331 {
 332   enum {
 333     at_language,                /* a language specification */
 334     at_regexp,                  /* a regular expression */
 335     at_filename,                /* a file name */
 336     at_stdin,                   /* read from stdin here */
 337     at_end                      /* stop parsing the list */
 338   } arg_type;                   /* argument type */
 339   language *lang;               /* language associated with the argument */
 340   char *what;                   /* the argument itself */
 341 } argument;
 342
 343 /* Structure defining a regular expression. */
 344 typedef struct regexp
 345 {
 346   struct regexp *p_next;        /* pointer to next in list */
 347   language *lang;               /* if set, use only for this language */
 348   char *pattern;                /* the regexp pattern */
 349   char *name;                   /* tag name */
 350   struct re_pattern_buffer *pat; /* the compiled pattern */
 351   struct re_registers regs;     /* re registers */
 352   bool error_signaled;          /* already signaled for this regexp */
 353   bool force_explicit_name;     /* do not allow implict tag name */
 354   bool ignore_case;             /* ignore case when matching */
 355   bool multi_line;              /* do a multi-line match on the whole file */
 356 } regexp;
 357
 358
 359 /* Many compilers barf on this:
 360         Lang_function Ada_funcs;
 361    so let's write it this way */
 362 static void Ada_funcs (FILE *);
 363 static void Asm_labels (FILE *);
 364 static void C_entries (int c_ext, FILE *);
 365 static void default_C_entries (FILE *);
 366 static void plain_C_entries (FILE *);
 367 static void Cjava_entries (FILE *);
 368 static void Cobol_paragraphs (FILE *);
 369 static void Cplusplus_entries (FILE *);
 370 static void Cstar_entries (FILE *);
 371 static void Erlang_functions (FILE *);
 372 static void Forth_words (FILE *);
 373 static void Fortran_functions (FILE *);
 374 static void HTML_labels (FILE *);
 375 static void Lisp_functions (FILE *);
 376 static void Lua_functions (FILE *);
 377 static void Makefile_targets (FILE *);
 378 static void Pascal_functions (FILE *);
 379 static void Perl_functions (FILE *);
 380 static void PHP_functions (FILE *);
 381 static void PS_functions (FILE *);
 382 static void Prolog_functions (FILE *);
 383 static void Python_functions (FILE *);
 384 static void Scheme_functions (FILE *);
 385 static void TeX_commands (FILE *);
 386 static void Texinfo_nodes (FILE *);
 387 static void Yacc_entries (FILE *);
 388 static void just_read_file (FILE *);
 389
 390 static void print_language_names (void);
 391 static void print_version (void);
 392 static void print_help (argument *);
 393 int main (int, char **);
 394
 395 static compressor *get_compressor_from_suffix (char *, char **);
 396 static language *get_language_from_langname (const char *);
 397 static language *get_language_from_interpreter (char *);
 398 static language *get_language_from_filename (char *, bool);
 399 static void readline (linebuffer *, FILE *);
 400 static long readline_internal (linebuffer *, FILE *);
 401 static bool nocase_tail (const char *);
 402 static void get_tag (char *, char **);
 403
 404 static void analyse_regex (char *);
 405 static void free_regexps (void);
 406 static void regex_tag_multiline (void);
 407 static void error (const char *, const char *);
 408 static void suggest_asking_for_help (void) NO_RETURN;
 409 void fatal (const char *, const char *) NO_RETURN;
 410 static void pfatal (const char *) NO_RETURN;
 411 static void add_node (node *, node **);
 412
 413 static void init (void);
 414 static void process_file_name (char *, language *);
 415 static void process_file (FILE *, char *, language *);
 416 static void find_entries (FILE *);
 417 static void free_tree (node *);
 418 static void free_fdesc (fdesc *);
 419 static void pfnote (char *, bool, char *, int, int, long);
 420 static void make_tag (const char *, int, bool, char *, int, int, long);
 421 static void invalidate_nodes (fdesc *, node **);
 422 static void put_entries (node *);
 423
 424 static char *concat (const char *, const char *, const char *);
 425 static char *skip_spaces (char *);
 426 static char *skip_non_spaces (char *);
 427 static char *savenstr (const char *, int);
 428 static char *savestr (const char *);
 429 static char *etags_strchr (const char *, int);
 430 static char *etags_strrchr (const char *, int);
 431 static int etags_strcasecmp (const char *, const char *);
 432 static int etags_strncasecmp (const char *, const char *, int);
 433 static char *etags_getcwd (void);
 434 static char *relative_filename (char *, char *);
 435 static char *absolute_filename (char *, char *);
 436 static char *absolute_dirname (char *, char *);
 437 static bool filename_is_absolute (char *f);
 438 static void canonicalize_filename (char *);
 439 static void linebuffer_init (linebuffer *);
 440 static void linebuffer_setlen (linebuffer *, int);
 441 static PTR xmalloc (unsigned int);
 442 static PTR xrealloc (char *, unsigned int);
 443
 444 \f
 445 static char searchar = '/';     /* use /.../ searches */
 446
 447 static char *tagfile;           /* output file */
 448 static char *progname;          /* name this program was invoked with */
 449 static char *cwd;               /* current working directory */
 450 static char *tagfiledir;        /* directory of tagfile */
 451 static FILE *tagf;              /* ioptr for tags file */
 452
 453 static fdesc *fdhead;           /* head of file description list */
 454 static fdesc *curfdp;           /* current file description */
 455 static int lineno;              /* line number of current line */
 456 static long charno;             /* current character number */
 457 static long linecharno;         /* charno of start of current line */
 458 static char *dbp;               /* pointer to start of current tag */
 459
 460 static const int invalidcharno = -1;
 461
 462 static node *nodehead;          /* the head of the binary tree of tags */
 463 static node *last_node;         /* the last node created */
 464
 465 static linebuffer lb;           /* the current line */
 466 static linebuffer filebuf;      /* a buffer containing the whole file */
 467 static linebuffer token_name;   /* a buffer containing a tag name */
 468
 469 /* boolean "functions" (see init)       */
 470 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
 471 static const char
 472   /* white chars */
 473   *white = " \f\t\n\r\v",
 474   /* not in a name */
 475   *nonam = " \f\t\n\r()=,;",    /* look at make_tag before modifying! */
 476   /* token ending chars */
 477   *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
 478   /* token starting chars */
 479   *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
 480   /* valid in-token chars */
 481   *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
 482
 483 static bool append_to_tagfile;  /* -a: append to tags */
 484 /* The next five default to TRUE in C and derived languages.  */
 485 static bool typedefs;           /* -t: create tags for C and Ada typedefs */
 486 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
 487                                 /* 0 struct/enum/union decls, and C++ */
 488                                 /* member functions. */
 489 static bool constantypedefs;    /* -d: create tags for C #define, enum */
 490                                 /* constants and variables. */
 491                                 /* -D: opposite of -d.  Default under ctags. */
 492 static bool globals;            /* create tags for global variables */
 493 static bool members;            /* create tags for C member variables */
 494 static bool declarations;       /* --declarations: tag them and extern in C&Co*/
 495 static bool no_line_directive;  /* ignore #line directives (undocumented) */
 496 static bool no_duplicates;      /* no duplicate tags for ctags (undocumented) */
 497 static bool update;             /* -u: update tags */
 498 static bool vgrind_style;       /* -v: create vgrind style index output */
 499 static bool no_warnings;        /* -w: suppress warnings (undocumented) */
 500 static bool cxref_style;        /* -x: create cxref style output */
 501 static bool cplusplus;          /* .[hc] means C++, not C (undocumented) */
 502 static bool ignoreindent;       /* -I: ignore indentation in C */
 503 static bool packages_only;      /* --packages-only: in Ada, only tag packages*/
 504
 505 /* STDIN is defined in LynxOS system headers */
 506 #ifdef STDIN
 507 # undef STDIN
 508 #endif
 509
 510 #define STDIN 0x1001            /* returned by getopt_long on --parse-stdin */
 511 static bool parsing_stdin;      /* --parse-stdin used */
 512
 513 static regexp *p_head;          /* list of all regexps */
 514 static bool need_filebuf;       /* some regexes are multi-line */
 515
 516 static struct option longopts[] =
 517 {
 518   { "append",             no_argument,       NULL,               'a'   },
 519   { "packages-only",      no_argument,       &packages_only,     TRUE  },
 520   { "c++",                no_argument,       NULL,               'C'   },
 521   { "declarations",       no_argument,       &declarations,      TRUE  },
 522   { "no-line-directive",  no_argument,       &no_line_directive, TRUE  },
 523   { "no-duplicates",      no_argument,       &no_duplicates,     TRUE  },
 524   { "help",               no_argument,       NULL,               'h'   },
 525   { "help",               no_argument,       NULL,               'H'   },
 526   { "ignore-indentation", no_argument,       NULL,               'I'   },
 527   { "language",           required_argument, NULL,               'l'   },
 528   { "members",            no_argument,       &members,           TRUE  },
 529   { "no-members",         no_argument,       &members,           FALSE },
 530   { "output",             required_argument, NULL,               'o'   },
 531   { "regex",              required_argument, NULL,               'r'   },
 532   { "no-regex",           no_argument,       NULL,               'R'   },
 533   { "ignore-case-regex",  required_argument, NULL,               'c'   },
 534   { "parse-stdin",        required_argument, NULL,               STDIN },
 535   { "version",            no_argument,       NULL,               'V'   },
 536
 537 #if CTAGS /* Ctags options */
 538   { "backward-search",    no_argument,       NULL,               'B'   },
 539   { "cxref",              no_argument,       NULL,               'x'   },
 540   { "defines",            no_argument,       NULL,               'd'   },
 541   { "globals",            no_argument,       &globals,           TRUE  },
 542   { "typedefs",           no_argument,       NULL,               't'   },
 543   { "typedefs-and-c++",   no_argument,       NULL,               'T'   },
 544   { "update",             no_argument,       NULL,               'u'   },
 545   { "vgrind",             no_argument,       NULL,               'v'   },
 546   { "no-warn",            no_argument,       NULL,               'w'   },
 547
 548 #else /* Etags options */
 549   { "no-defines",         no_argument,       NULL,               'D'   },
 550   { "no-globals",         no_argument,       &globals,           FALSE },
 551   { "include",            required_argument, NULL,               'i'   },
 552 #endif
 553   { NULL }
 554 };
 555
 556 static compressor compressors[] =
 557 {
 558   { "z", "gzip -d -c"},
 559   { "Z", "gzip -d -c"},
 560   { "gz", "gzip -d -c"},
 561   { "GZ", "gzip -d -c"},
 562   { "bz2", "bzip2 -d -c" },
 563   { "xz", "xz -d -c" },
 564   { NULL }
 565 };
 566
 567 /*
 568  * Language stuff.
 569  */
 570
 571 /* Ada code */
 572 static const char *Ada_suffixes [] =
 573   { "ads", "adb", "ada", NULL };
 574 static const char Ada_help [] =
 575 "In Ada code, functions, procedures, packages, tasks and types are\n\
 576 tags.  Use the `--packages-only' option to create tags for\n\
 577 packages only.\n\
 578 Ada tag names have suffixes indicating the type of entity:\n\
 579         Entity type:    Qualifier:\n\
 580         ------------    ----------\n\
 581         function        /f\n\
 582         procedure       /p\n\
 583         package spec    /s\n\
 584         package body    /b\n\
 585         type            /t\n\
 586         task            /k\n\
 587 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
 588 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
 589 will just search for any tag `bidule'.";
 590
 591 /* Assembly code */
 592 static const char *Asm_suffixes [] =
 593   { "a",        /* Unix assembler */
 594     "asm", /* Microcontroller assembly */
 595     "def", /* BSO/Tasking definition includes  */
 596     "inc", /* Microcontroller include files */
 597     "ins", /* Microcontroller include files */
 598     "s", "sa", /* Unix assembler */
 599     "S",   /* cpp-processed Unix assembler */
 600     "src", /* BSO/Tasking C compiler output */
 601     NULL
 602   };
 603 static const char Asm_help [] =
 604 "In assembler code, labels appearing at the beginning of a line,\n\
 605 followed by a colon, are tags.";
 606
 607
 608 /* Note that .c and .h can be considered C++, if the --c++ flag was
 609    given, or if the `class' or `template' keywords are met inside the file.
 610    That is why default_C_entries is called for these. */
 611 static const char *default_C_suffixes [] =
 612   { "c", "h", NULL };
 613 #if CTAGS                               /* C help for Ctags */
 614 static const char default_C_help [] =
 615 "In C code, any C function is a tag.  Use -t to tag typedefs.\n\
 616 Use -T to tag definitions of `struct', `union' and `enum'.\n\
 617 Use -d to tag `#define' macro definitions and `enum' constants.\n\
 618 Use --globals to tag global variables.\n\
 619 You can tag function declarations and external variables by\n\
 620 using `--declarations', and struct members by using `--members'.";
 621 #else                                   /* C help for Etags */
 622 static const char default_C_help [] =
 623 "In C code, any C function or typedef is a tag, and so are\n\
 624 definitions of `struct', `union' and `enum'.  `#define' macro\n\
 625 definitions and `enum' constants are tags unless you specify\n\
 626 `--no-defines'.  Global variables are tags unless you specify\n\
 627 `--no-globals' and so are struct members unless you specify\n\
 628 `--no-members'.  Use of `--no-globals', `--no-defines' and\n\
 629 `--no-members' can make the tags table file much smaller.\n\
 630 You can tag function declarations and external variables by\n\
 631 using `--declarations'.";
 632 #endif  /* C help for Ctags and Etags */
 633
 634 static const char *Cplusplus_suffixes [] =
 635   { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
 636     "M",                        /* Objective C++ */
 637     "pdb",                      /* Postscript with C syntax */
 638     NULL };
 639 static const char Cplusplus_help [] =
 640 "In C++ code, all the tag constructs of C code are tagged.  (Use\n\
 641 --help --lang=c --lang=c++ for full help.)\n\
 642 In addition to C tags, member functions are also recognized.  Member\n\
 643 variables are recognized unless you use the `--no-members' option.\n\
 644 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
 645 and `CLASS::FUNCTION'.  `operator' definitions have tag names like\n\
 646 `operator+'.";
 647
 648 static const char *Cjava_suffixes [] =
 649   { "java", NULL };
 650 static char Cjava_help [] =
 651 "In Java code, all the tags constructs of C and C++ code are\n\
 652 tagged.  (Use --help --lang=c --lang=c++ --lang=java for full help.)";
 653
 654
 655 static const char *Cobol_suffixes [] =
 656   { "COB", "cob", NULL };
 657 static char Cobol_help [] =
 658 "In Cobol code, tags are paragraph names; that is, any word\n\
 659 starting in column 8 and followed by a period.";
 660
 661 static const char *Cstar_suffixes [] =
 662   { "cs", "hs", NULL };
 663
 664 static const char *Erlang_suffixes [] =
 665   { "erl", "hrl", NULL };
 666 static const char Erlang_help [] =
 667 "In Erlang code, the tags are the functions, records and macros\n\
 668 defined in the file.";
 669
 670 const char *Forth_suffixes [] =
 671   { "fth", "tok", NULL };
 672 static const char Forth_help [] =
 673 "In Forth code, tags are words defined by `:',\n\
 674 constant, code, create, defer, value, variable, buffer:, field.";
 675
 676 static const char *Fortran_suffixes [] =
 677   { "F", "f", "f90", "for", NULL };
 678 static const char Fortran_help [] =
 679 "In Fortran code, functions, subroutines and block data are tags.";
 680
 681 static const char *HTML_suffixes [] =
 682   { "htm", "html", "shtml", NULL };
 683 static const char HTML_help [] =
 684 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
 685 `h3' headers.  Also, tags are `name=' in anchors and all\n\
 686 occurrences of `id='.";
 687
 688 static const char *Lisp_suffixes [] =
 689   { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
 690 static const char Lisp_help [] =
 691 "In Lisp code, any function defined with `defun', any variable\n\
 692 defined with `defvar' or `defconst', and in general the first\n\
 693 argument of any expression that starts with `(def' in column zero\n\
 694 is a tag.";
 695
 696 static const char *Lua_suffixes [] =
 697   { "lua", "LUA", NULL };
 698 static const char Lua_help [] =
 699 "In Lua scripts, all functions are tags.";
 700
 701 static const char *Makefile_filenames [] =
 702   { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
 703 static const char Makefile_help [] =
 704 "In makefiles, targets are tags; additionally, variables are tags\n\
 705 unless you specify `--no-globals'.";
 706
 707 static const char *Objc_suffixes [] =
 708   { "lm",                       /* Objective lex file */
 709     "m",                        /* Objective C file */
 710      NULL };
 711 static const char Objc_help [] =
 712 "In Objective C code, tags include Objective C definitions for classes,\n\
 713 class categories, methods and protocols.  Tags for variables and\n\
 714 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
 715 (Use --help --lang=c --lang=objc --lang=java for full help.)";
 716
 717 static const char *Pascal_suffixes [] =
 718   { "p", "pas", NULL };
 719 static const char Pascal_help [] =
 720 "In Pascal code, the tags are the functions and procedures defined\n\
 721 in the file.";
 722 /* " // this is for working around an Emacs highlighting bug... */
 723
 724 static const char *Perl_suffixes [] =
 725   { "pl", "pm", NULL };
 726 static const char *Perl_interpreters [] =
 727   { "perl", "@PERL@", NULL };
 728 static const char Perl_help [] =
 729 "In Perl code, the tags are the packages, subroutines and variables\n\
 730 defined by the `package', `sub', `my' and `local' keywords.  Use\n\
 731 `--globals' if you want to tag global variables.  Tags for\n\
 732 subroutines are named `PACKAGE::SUB'.  The name for subroutines\n\
 733 defined in the default package is `main::SUB'.";
 734
 735 static const char *PHP_suffixes [] =
 736   { "php", "php3", "php4", NULL };
 737 static const char PHP_help [] =
 738 "In PHP code, tags are functions, classes and defines.  Unless you use\n\
 739 the `--no-members' option, vars are tags too.";
 740
 741 static const char *plain_C_suffixes [] =
 742   { "pc",                       /* Pro*C file */
 743      NULL };
 744
 745 static const char *PS_suffixes [] =
 746   { "ps", "psw", NULL };        /* .psw is for PSWrap */
 747 static const char PS_help [] =
 748 "In PostScript code, the tags are the functions.";
 749
 750 static const char *Prolog_suffixes [] =
 751   { "prolog", NULL };
 752 static const char Prolog_help [] =
 753 "In Prolog code, tags are predicates and rules at the beginning of\n\
 754 line.";
 755
 756 static const char *Python_suffixes [] =
 757   { "py", NULL };
 758 static const char Python_help [] =
 759 "In Python code, `def' or `class' at the beginning of a line\n\
 760 generate a tag.";
 761
 762 /* Can't do the `SCM' or `scm' prefix with a version number. */
 763 static const char *Scheme_suffixes [] =
 764   { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
 765 static const char Scheme_help [] =
 766 "In Scheme code, tags include anything defined with `def' or with a\n\
 767 construct whose name starts with `def'.  They also include\n\
 768 variables set with `set!' at top level in the file.";
 769
 770 static const char *TeX_suffixes [] =
 771   { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
 772 static const char TeX_help [] =
 773 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
 774 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
 775 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
 776 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
 777 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
 778 \n\
 779 Other commands can be specified by setting the environment variable\n\
 780 `TEXTAGS' to a colon-separated list like, for example,\n\
 781      TEXTAGS=\"mycommand:myothercommand\".";
 782
 783
 784 static const char *Texinfo_suffixes [] =
 785   { "texi", "texinfo", "txi", NULL };
 786 static const char Texinfo_help [] =
 787 "for texinfo files, lines starting with @node are tagged.";
 788
 789 static const char *Yacc_suffixes [] =
 790   { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
 791 static const char Yacc_help [] =
 792 "In Bison or Yacc input files, each rule defines as a tag the\n\
 793 nonterminal it constructs.  The portions of the file that contain\n\
 794 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
 795 for full help).";
 796
 797 static const char auto_help [] =
 798 "`auto' is not a real language, it indicates to use\n\
 799 a default language for files base on file name suffix and file contents.";
 800
 801 static const char none_help [] =
 802 "`none' is not a real language, it indicates to only do\n\
 803 regexp processing on files.";
 804
 805 static const char no_lang_help [] =
 806 "No detailed help available for this language.";
 807
 808
 809 /*
 810  * Table of languages.
 811  *
 812  * It is ok for a given function to be listed under more than one
 813  * name.  I just didn't.
 814  */
 815
 816 static language lang_names [] =
 817 {
 818   { "ada",       Ada_help,       Ada_funcs,         Ada_suffixes       },
 819   { "asm",       Asm_help,       Asm_labels,        Asm_suffixes       },
 820   { "c",         default_C_help, default_C_entries, default_C_suffixes },
 821   { "c++",       Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
 822   { "c*",        no_lang_help,   Cstar_entries,     Cstar_suffixes     },
 823   { "cobol",     Cobol_help,     Cobol_paragraphs,  Cobol_suffixes     },
 824   { "erlang",    Erlang_help,    Erlang_functions,  Erlang_suffixes    },
 825   { "forth",     Forth_help,     Forth_words,       Forth_suffixes     },
 826   { "fortran",   Fortran_help,   Fortran_functions, Fortran_suffixes   },
 827   { "html",      HTML_help,      HTML_labels,       HTML_suffixes      },
 828   { "java",      Cjava_help,     Cjava_entries,     Cjava_suffixes     },
 829   { "lisp",      Lisp_help,      Lisp_functions,    Lisp_suffixes      },
 830   { "lua",       Lua_help,       Lua_functions,     Lua_suffixes       },
 831   { "makefile",  Makefile_help,Makefile_targets,NULL,Makefile_filenames},
 832   { "objc",      Objc_help,      plain_C_entries,   Objc_suffixes      },
 833   { "pascal",    Pascal_help,    Pascal_functions,  Pascal_suffixes    },
 834   { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
 835   { "php",       PHP_help,       PHP_functions,     PHP_suffixes       },
 836   { "postscript",PS_help,        PS_functions,      PS_suffixes        },
 837   { "proc",      no_lang_help,   plain_C_entries,   plain_C_suffixes   },
 838   { "prolog",    Prolog_help,    Prolog_functions,  Prolog_suffixes    },
 839   { "python",    Python_help,    Python_functions,  Python_suffixes    },
 840   { "scheme",    Scheme_help,    Scheme_functions,  Scheme_suffixes    },
 841   { "tex",       TeX_help,       TeX_commands,      TeX_suffixes       },
 842   { "texinfo",   Texinfo_help,   Texinfo_nodes,     Texinfo_suffixes   },
 843   { "yacc",      Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
 844   { "auto",      auto_help },                      /* default guessing scheme */
 845   { "none",      none_help,      just_read_file }, /* regexp matching only */
 846   { NULL }                /* end of list */
 847 };
 848
 849 \f
 850 static void
 851 print_language_names (void)
 852 {
 853   language *lang;
 854   const char **name, **ext;
 855
 856   puts ("\nThese are the currently supported languages, along with the\n\
 857 default file names and dot suffixes:");
 858   for (lang = lang_names; lang->name != NULL; lang++)
 859     {
 860       printf ("  %-*s", 10, lang->name);
 861       if (lang->filenames != NULL)
 862         for (name = lang->filenames; *name != NULL; name++)
 863           printf (" %s", *name);
 864       if (lang->suffixes != NULL)
 865         for (ext = lang->suffixes; *ext != NULL; ext++)
 866           printf (" .%s", *ext);
 867       puts ("");
 868     }
 869   puts ("where `auto' means use default language for files based on file\n\
 870 name suffix, and `none' means only do regexp processing on files.\n\
 871 If no language is specified and no matching suffix is found,\n\
 872 the first line of the file is read for a sharp-bang (#!) sequence\n\
 873 followed by the name of an interpreter.  If no such sequence is found,\n\
 874 Fortran is tried first; if no tags are found, C is tried next.\n\
 875 When parsing any C file, a \"class\" or \"template\" keyword\n\
 876 switches to C++.");
 877   puts ("Compressed files are supported using gzip, bzip2, and xz.\n\
 878 \n\
 879 For detailed help on a given language use, for example,\n\
 880 etags --help --lang=ada.");
 881 }
 882
 883 #ifndef EMACS_NAME
 884 # define EMACS_NAME "standalone"
 885 #endif
 886 #ifndef VERSION
 887 # define VERSION "17.38.1.4"
 888 #endif
 889 static void
 890 print_version (void)
 891 {
 892   /* Makes it easier to update automatically. */
 893   char emacs_copyright[] = "Copyright (C) 2011 Free Software Foundation, Inc.";
 894
 895   printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
 896   puts (emacs_copyright);
 897   puts ("This program is distributed under the terms in ETAGS.README");
 898
 899   exit (EXIT_SUCCESS);
 900 }
 901
 902 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
 903 # define PRINT_UNDOCUMENTED_OPTIONS_HELP FALSE
 904 #endif
 905
 906 static void
 907 print_help (argument *argbuffer)
 908 {
 909   bool help_for_lang = FALSE;
 910
 911   for (; argbuffer->arg_type != at_end; argbuffer++)
 912     if (argbuffer->arg_type == at_language)
 913       {
 914         if (help_for_lang)
 915           puts ("");
 916         puts (argbuffer->lang->help);
 917         help_for_lang = TRUE;
 918       }
 919
 920   if (help_for_lang)
 921     exit (EXIT_SUCCESS);
 922
 923   printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
 924 \n\
 925 These are the options accepted by %s.\n", progname, progname);
 926   if (NO_LONG_OPTIONS)
 927     puts ("WARNING: long option names do not work with this executable,\n\
 928 as it is not linked with GNU getopt.");
 929   else
 930     puts ("You may use unambiguous abbreviations for the long option names.");
 931   puts ("  A - as file name means read names from stdin (one per line).\n\
 932 Absolute names are stored in the output file as they are.\n\
 933 Relative ones are stored relative to the output file's directory.\n");
 934
 935   puts ("-a, --append\n\
 936         Append tag entries to existing tags file.");
 937
 938   puts ("--packages-only\n\
 939         For Ada files, only generate tags for packages.");
 940
 941   if (CTAGS)
 942     puts ("-B, --backward-search\n\
 943         Write the search commands for the tag entries using '?', the\n\
 944         backward-search command instead of '/', the forward-search command.");
 945
 946   /* This option is mostly obsolete, because etags can now automatically
 947      detect C++.  Retained for backward compatibility and for debugging and
 948      experimentation.  In principle, we could want to tag as C++ even
 949      before any "class" or "template" keyword.
 950   puts ("-C, --c++\n\
 951         Treat files whose name suffix defaults to C language as C++ files.");
 952   */
 953
 954   puts ("--declarations\n\
 955         In C and derived languages, create tags for function declarations,");
 956   if (CTAGS)
 957     puts ("\tand create tags for extern variables if --globals is used.");
 958   else
 959     puts
 960       ("\tand create tags for extern variables unless --no-globals is used.");
 961
 962   if (CTAGS)
 963     puts ("-d, --defines\n\
 964         Create tag entries for C #define constants and enum constants, too.");
 965   else
 966     puts ("-D, --no-defines\n\
 967         Don't create tag entries for C #define constants and enum constants.\n\
 968         This makes the tags file smaller.");
 969
 970   if (!CTAGS)
 971     puts ("-i FILE, --include=FILE\n\
 972         Include a note in tag file indicating that, when searching for\n\
 973         a tag, one should also consult the tags file FILE after\n\
 974         checking the current file.");
 975
 976   puts ("-l LANG, --language=LANG\n\
 977         Force the following files to be considered as written in the\n\
 978         named language up to the next --language=LANG option.");
 979
 980   if (CTAGS)
 981     puts ("--globals\n\
 982         Create tag entries for global variables in some languages.");
 983   else
 984     puts ("--no-globals\n\
 985         Do not create tag entries for global variables in some\n\
 986         languages.  This makes the tags file smaller.");
 987
 988   if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
 989     puts ("--no-line-directive\n\
 990         Ignore #line preprocessor directives in C and derived languages.");
 991
 992   if (CTAGS)
 993     puts ("--members\n\
 994         Create tag entries for members of structures in some languages.");
 995   else
 996     puts ("--no-members\n\
 997         Do not create tag entries for members of structures\n\
 998         in some languages.");
 999
1000   puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
1001         Make a tag for each line matching a regular expression pattern\n\
1002         in the following files.  {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
1003         files only.  REGEXFILE is a file containing one REGEXP per line.\n\
1004         REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
1005         optional.  The TAGREGEXP pattern is anchored (as if preceded by ^).");
1006   puts ("       If TAGNAME/ is present, the tags created are named.\n\
1007         For example Tcl named tags can be created with:\n\
1008           --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
1009         MODS are optional one-letter modifiers: `i' means to ignore case,\n\
1010         `m' means to allow multi-line matches, `s' implies `m' and\n\
1011         causes dot to match any character, including newline.");
1012
1013   puts ("-R, --no-regex\n\
1014         Don't create tags from regexps for the following files.");
1015
1016   puts ("-I, --ignore-indentation\n\
1017         In C and C++ do not assume that a closing brace in the first\n\
1018         column is the final brace of a function or structure definition.");
1019
1020   puts ("-o FILE, --output=FILE\n\
1021         Write the tags to FILE.");
1022
1023   puts ("--parse-stdin=NAME\n\
1024         Read from standard input and record tags as belonging to file NAME.");
1025
1026   if (CTAGS)
1027     {
1028       puts ("-t, --typedefs\n\
1029         Generate tag entries for C and Ada typedefs.");
1030       puts ("-T, --typedefs-and-c++\n\
1031         Generate tag entries for C typedefs, C struct/enum/union tags,\n\
1032         and C++ member functions.");
1033     }
1034
1035   if (CTAGS)
1036     puts ("-u, --update\n\
1037         Update the tag entries for the given files, leaving tag\n\
1038         entries for other files in place.  Currently, this is\n\
1039         implemented by deleting the existing entries for the given\n\
1040         files and then rewriting the new entries at the end of the\n\
1041         tags file.  It is often faster to simply rebuild the entire\n\
1042         tag file than to use this.");
1043
1044   if (CTAGS)
1045     {
1046       puts ("-v, --vgrind\n\
1047         Print on the standard output an index of items intended for\n\
1048         human consumption, similar to the output of vgrind.  The index\n\
1049         is sorted, and gives the page number of each item.");
1050
1051       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1052         puts ("-w, --no-duplicates\n\
1053         Do not create duplicate tag entries, for compatibility with\n\
1054         traditional ctags.");
1055
1056       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1057         puts ("-w, --no-warn\n\
1058         Suppress warning messages about duplicate tag entries.");
1059
1060       puts ("-x, --cxref\n\
1061         Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1062         The output uses line numbers instead of page numbers, but\n\
1063         beyond that the differences are cosmetic; try both to see\n\
1064         which you like.");
1065     }
1066
1067   puts ("-V, --version\n\
1068         Print the version of the program.\n\
1069 -h, --help\n\
1070         Print this help message.\n\
1071         Followed by one or more `--language' options prints detailed\n\
1072         help about tag generation for the specified languages.");
1073
1074   print_language_names ();
1075
1076   puts ("");
1077   puts ("Report bugs to bug-gnu-emacs@gnu.org");
1078
1079   exit (EXIT_SUCCESS);
1080 }
1081
1082 \f
1083 int
1084 main (int argc, char **argv)
1085 {
1086   int i;
1087   unsigned int nincluded_files;
1088   char **included_files;
1089   argument *argbuffer;
1090   int current_arg, file_count;
1091   linebuffer filename_lb;
1092   bool help_asked = FALSE;
1093  char *optstring;
1094  int opt;
1095
1096
1097 #ifdef DOS_NT
1098   _fmode = O_BINARY;   /* all of files are treated as binary files */
1099 #endif /* DOS_NT */
1100
1101   progname = argv[0];
1102   nincluded_files = 0;
1103   included_files = xnew (argc, char *);
1104   current_arg = 0;
1105   file_count = 0;
1106
1107   /* Allocate enough no matter what happens.  Overkill, but each one
1108      is small. */
1109   argbuffer = xnew (argc, argument);
1110
1111   /*
1112    * Always find typedefs and structure tags.
1113    * Also default to find macro constants, enum constants, struct
1114    * members and global variables.  Do it for both etags and ctags.
1115    */
1116   typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1117   globals = members = TRUE;
1118
1119   /* When the optstring begins with a '-' getopt_long does not rearrange the
1120      non-options arguments to be at the end, but leaves them alone. */
1121   optstring = concat (NO_LONG_OPTIONS ? "" : "-",
1122                       "ac:Cf:Il:o:r:RSVhH",
1123                       (CTAGS) ? "BxdtTuvw" : "Di:");
1124
1125   while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1126     switch (opt)
1127       {
1128       case 0:
1129         /* If getopt returns 0, then it has already processed a
1130            long-named option.  We should do nothing.  */
1131         break;
1132
1133       case 1:
1134         /* This means that a file name has been seen.  Record it. */
1135         argbuffer[current_arg].arg_type = at_filename;
1136         argbuffer[current_arg].what     = optarg;
1137         ++current_arg;
1138         ++file_count;
1139         break;
1140
1141       case STDIN:
1142         /* Parse standard input.  Idea by Vivek <vivek@etla.org>. */
1143         argbuffer[current_arg].arg_type = at_stdin;
1144         argbuffer[current_arg].what     = optarg;
1145         ++current_arg;
1146         ++file_count;
1147         if (parsing_stdin)
1148           fatal ("cannot parse standard input more than once", (char *)NULL);
1149         parsing_stdin = TRUE;
1150         break;
1151
1152         /* Common options. */
1153       case 'a': append_to_tagfile = TRUE;       break;
1154       case 'C': cplusplus = TRUE;               break;
1155       case 'f':         /* for compatibility with old makefiles */
1156       case 'o':
1157         if (tagfile)
1158           {
1159             error ("-o option may only be given once.", (char *)NULL);
1160             suggest_asking_for_help ();
1161             /* NOTREACHED */
1162           }
1163         tagfile = optarg;
1164         break;
1165       case 'I':
1166       case 'S':         /* for backward compatibility */
1167         ignoreindent = TRUE;
1168         break;
1169       case 'l':
1170         {
1171           language *lang = get_language_from_langname (optarg);
1172           if (lang != NULL)
1173             {
1174               argbuffer[current_arg].lang = lang;
1175               argbuffer[current_arg].arg_type = at_language;
1176               ++current_arg;
1177             }
1178         }
1179         break;
1180       case 'c':
1181         /* Backward compatibility: support obsolete --ignore-case-regexp. */
1182         optarg = concat (optarg, "i", ""); /* memory leak here */
1183         /* FALLTHRU */
1184       case 'r':
1185         argbuffer[current_arg].arg_type = at_regexp;
1186         argbuffer[current_arg].what = optarg;
1187         ++current_arg;
1188         break;
1189       case 'R':
1190         argbuffer[current_arg].arg_type = at_regexp;
1191         argbuffer[current_arg].what = NULL;
1192         ++current_arg;
1193         break;
1194       case 'V':
1195         print_version ();
1196         break;
1197       case 'h':
1198       case 'H':
1199         help_asked = TRUE;
1200         break;
1201
1202         /* Etags options */
1203       case 'D': constantypedefs = FALSE;                        break;
1204       case 'i': included_files[nincluded_files++] = optarg;     break;
1205
1206         /* Ctags options. */
1207       case 'B': searchar = '?';                                 break;
1208       case 'd': constantypedefs = TRUE;                         break;
1209       case 't': typedefs = TRUE;                                break;
1210       case 'T': typedefs = typedefs_or_cplusplus = TRUE;        break;
1211       case 'u': update = TRUE;                                  break;
1212       case 'v': vgrind_style = TRUE;                      /*FALLTHRU*/
1213       case 'x': cxref_style = TRUE;                             break;
1214       case 'w': no_warnings = TRUE;                             break;
1215       default:
1216         suggest_asking_for_help ();
1217         /* NOTREACHED */
1218       }
1219
1220   /* No more options.  Store the rest of arguments. */
1221   for (; optind < argc; optind++)
1222     {
1223       argbuffer[current_arg].arg_type = at_filename;
1224       argbuffer[current_arg].what = argv[optind];
1225       ++current_arg;
1226       ++file_count;
1227     }
1228
1229   argbuffer[current_arg].arg_type = at_end;
1230
1231   if (help_asked)
1232     print_help (argbuffer);
1233     /* NOTREACHED */
1234
1235   if (nincluded_files == 0 && file_count == 0)
1236     {
1237       error ("no input files specified.", (char *)NULL);
1238       suggest_asking_for_help ();
1239       /* NOTREACHED */
1240     }
1241
1242   if (tagfile == NULL)
1243     tagfile = savestr (CTAGS ? "tags" : "TAGS");
1244   cwd = etags_getcwd ();        /* the current working directory */
1245   if (cwd[strlen (cwd) - 1] != '/')
1246     {
1247       char *oldcwd = cwd;
1248       cwd = concat (oldcwd, "/", "");
1249       free (oldcwd);
1250     }
1251
1252   /* Compute base directory for relative file names. */
1253   if (streq (tagfile, "-")
1254       || strneq (tagfile, "/dev/", 5))
1255     tagfiledir = cwd;            /* relative file names are relative to cwd */
1256   else
1257     {
1258       canonicalize_filename (tagfile);
1259       tagfiledir = absolute_dirname (tagfile, cwd);
1260     }
1261
1262   init ();                      /* set up boolean "functions" */
1263
1264   linebuffer_init (&lb);
1265   linebuffer_init (&filename_lb);
1266   linebuffer_init (&filebuf);
1267   linebuffer_init (&token_name);
1268
1269   if (!CTAGS)
1270     {
1271       if (streq (tagfile, "-"))
1272         {
1273           tagf = stdout;
1274 #ifdef DOS_NT
1275           /* Switch redirected `stdout' to binary mode (setting `_fmode'
1276              doesn't take effect until after `stdout' is already open). */
1277           if (!isatty (fileno (stdout)))
1278             setmode (fileno (stdout), O_BINARY);
1279 #endif /* DOS_NT */
1280         }
1281       else
1282         tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1283       if (tagf == NULL)
1284         pfatal (tagfile);
1285     }
1286
1287   /*
1288    * Loop through files finding functions.
1289    */
1290   for (i = 0; i < current_arg; i++)
1291     {
1292       static language *lang;    /* non-NULL if language is forced */
1293       char *this_file;
1294
1295       switch (argbuffer[i].arg_type)
1296         {
1297         case at_language:
1298           lang = argbuffer[i].lang;
1299           break;
1300         case at_regexp:
1301           analyse_regex (argbuffer[i].what);
1302           break;
1303         case at_filename:
1304               this_file = argbuffer[i].what;
1305               /* Input file named "-" means read file names from stdin
1306                  (one per line) and use them. */
1307               if (streq (this_file, "-"))
1308                 {
1309                   if (parsing_stdin)
1310                     fatal ("cannot parse standard input AND read file names from it",
1311                            (char *)NULL);
1312                   while (readline_internal (&filename_lb, stdin) > 0)
1313                     process_file_name (filename_lb.buffer, lang);
1314                 }
1315               else
1316                 process_file_name (this_file, lang);
1317           break;
1318         case at_stdin:
1319           this_file = argbuffer[i].what;
1320           process_file (stdin, this_file, lang);
1321           break;
1322         }
1323     }
1324
1325   free_regexps ();
1326   free (lb.buffer);
1327   free (filebuf.buffer);
1328   free (token_name.buffer);
1329
1330   if (!CTAGS || cxref_style)
1331     {
1332       /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1333       put_entries (nodehead);
1334       free_tree (nodehead);
1335       nodehead = NULL;
1336       if (!CTAGS)
1337         {
1338           fdesc *fdp;
1339
1340           /* Output file entries that have no tags. */
1341           for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1342             if (!fdp->written)
1343               fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1344
1345           while (nincluded_files-- > 0)
1346             fprintf (tagf, "\f\n%s,include\n", *included_files++);
1347
1348           if (fclose (tagf) == EOF)
1349             pfatal (tagfile);
1350         }
1351
1352       exit (EXIT_SUCCESS);
1353     }
1354
1355   /* From here on, we are in (CTAGS && !cxref_style) */
1356   if (update)
1357     {
1358       char cmd[BUFSIZ];
1359       for (i = 0; i < current_arg; ++i)
1360         {
1361           switch (argbuffer[i].arg_type)
1362             {
1363             case at_filename:
1364             case at_stdin:
1365               break;
1366             default:
1367               continue;         /* the for loop */
1368             }
1369           sprintf (cmd,
1370                    "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1371                    tagfile, argbuffer[i].what, tagfile);
1372           if (system (cmd) != EXIT_SUCCESS)
1373             fatal ("failed to execute shell command", (char *)NULL);
1374         }
1375       append_to_tagfile = TRUE;
1376     }
1377
1378   tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1379   if (tagf == NULL)
1380     pfatal (tagfile);
1381   put_entries (nodehead);       /* write all the tags (CTAGS) */
1382   free_tree (nodehead);
1383   nodehead = NULL;
1384   if (fclose (tagf) == EOF)
1385     pfatal (tagfile);
1386
1387   if (CTAGS)
1388     if (append_to_tagfile || update)
1389       {
1390         char cmd[2*BUFSIZ+20];
1391         /* Maybe these should be used:
1392            setenv ("LC_COLLATE", "C", 1);
1393            setenv ("LC_ALL", "C", 1); */
1394         sprintf (cmd, "sort -u -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile);
1395         exit (system (cmd));
1396       }
1397   return EXIT_SUCCESS;
1398 }
1399
1400
1401 /*
1402  * Return a compressor given the file name.  If EXTPTR is non-zero,
1403  * return a pointer into FILE where the compressor-specific
1404  * extension begins.  If no compressor is found, NULL is returned
1405  * and EXTPTR is not significant.
1406  * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1407  */
1408 static compressor *
1409 get_compressor_from_suffix (char *file, char **extptr)
1410 {
1411   compressor *compr;
1412   char *slash, *suffix;
1413
1414   /* File has been processed by canonicalize_filename,
1415      so we don't need to consider backslashes on DOS_NT.  */
1416   slash = etags_strrchr (file, '/');
1417   suffix = etags_strrchr (file, '.');
1418   if (suffix == NULL || suffix < slash)
1419     return NULL;
1420   if (extptr != NULL)
1421     *extptr = suffix;
1422   suffix += 1;
1423   /* Let those poor souls who live with DOS 8+3 file name limits get
1424      some solace by treating foo.cgz as if it were foo.c.gz, etc.
1425      Only the first do loop is run if not MSDOS */
1426   do
1427     {
1428       for (compr = compressors; compr->suffix != NULL; compr++)
1429         if (streq (compr->suffix, suffix))
1430           return compr;
1431       if (!MSDOS)
1432         break;                  /* do it only once: not really a loop */
1433       if (extptr != NULL)
1434         *extptr = ++suffix;
1435     } while (*suffix != '\0');
1436   return NULL;
1437 }
1438
1439
1440
1441 /*
1442  * Return a language given the name.
1443  */
1444 static language *
1445 get_language_from_langname (const char *name)
1446 {
1447   language *lang;
1448
1449   if (name == NULL)
1450     error ("empty language name", (char *)NULL);
1451   else
1452     {
1453       for (lang = lang_names; lang->name != NULL; lang++)
1454         if (streq (name, lang->name))
1455           return lang;
1456       error ("unknown language \"%s\"", name);
1457     }
1458
1459   return NULL;
1460 }
1461
1462
1463 /*
1464  * Return a language given the interpreter name.
1465  */
1466 static language *
1467 get_language_from_interpreter (char *interpreter)
1468 {
1469   language *lang;
1470   const char **iname;
1471
1472   if (interpreter == NULL)
1473     return NULL;
1474   for (lang = lang_names; lang->name != NULL; lang++)
1475     if (lang->interpreters != NULL)
1476       for (iname = lang->interpreters; *iname != NULL; iname++)
1477         if (streq (*iname, interpreter))
1478             return lang;
1479
1480   return NULL;
1481 }
1482
1483
1484
1485 /*
1486  * Return a language given the file name.
1487  */
1488 static language *
1489 get_language_from_filename (char *file, int case_sensitive)
1490 {
1491   language *lang;
1492   const char **name, **ext, *suffix;
1493
1494   /* Try whole file name first. */
1495   for (lang = lang_names; lang->name != NULL; lang++)
1496     if (lang->filenames != NULL)
1497       for (name = lang->filenames; *name != NULL; name++)
1498         if ((case_sensitive)
1499             ? streq (*name, file)
1500             : strcaseeq (*name, file))
1501           return lang;
1502
1503   /* If not found, try suffix after last dot. */
1504   suffix = etags_strrchr (file, '.');
1505   if (suffix == NULL)
1506     return NULL;
1507   suffix += 1;
1508   for (lang = lang_names; lang->name != NULL; lang++)
1509     if (lang->suffixes != NULL)
1510       for (ext = lang->suffixes; *ext != NULL; ext++)
1511         if ((case_sensitive)
1512             ? streq (*ext, suffix)
1513             : strcaseeq (*ext, suffix))
1514           return lang;
1515   return NULL;
1516 }
1517
1518 \f
1519 /*
1520  * This routine is called on each file argument.
1521  */
1522 static void
1523 process_file_name (char *file, language *lang)
1524 {
1525   struct stat stat_buf;
1526   FILE *inf;
1527   fdesc *fdp;
1528   compressor *compr;
1529   char *compressed_name, *uncompressed_name;
1530   char *ext, *real_name;
1531   int retval;
1532
1533   canonicalize_filename (file);
1534   if (streq (file, tagfile) && !streq (tagfile, "-"))
1535     {
1536       error ("skipping inclusion of %s in self.", file);
1537       return;
1538     }
1539   if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1540     {
1541       compressed_name = NULL;
1542       real_name = uncompressed_name = savestr (file);
1543     }
1544   else
1545     {
1546       real_name = compressed_name = savestr (file);
1547       uncompressed_name = savenstr (file, ext - file);
1548     }
1549
1550   /* If the canonicalized uncompressed name
1551      has already been dealt with, skip it silently. */
1552   for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1553     {
1554       assert (fdp->infname != NULL);
1555       if (streq (uncompressed_name, fdp->infname))
1556         goto cleanup;
1557     }
1558
1559   if (stat (real_name, &stat_buf) != 0)
1560     {
1561       /* Reset real_name and try with a different name. */
1562       real_name = NULL;
1563       if (compressed_name != NULL) /* try with the given suffix */
1564         {
1565           if (stat (uncompressed_name, &stat_buf) == 0)
1566             real_name = uncompressed_name;
1567         }
1568       else                      /* try all possible suffixes */
1569         {
1570           for (compr = compressors; compr->suffix != NULL; compr++)
1571             {
1572               compressed_name = concat (file, ".", compr->suffix);
1573               if (stat (compressed_name, &stat_buf) != 0)
1574                 {
1575                   if (MSDOS)
1576                     {
1577                       char *suf = compressed_name + strlen (file);
1578                       size_t suflen = strlen (compr->suffix) + 1;
1579                       for ( ; suf[1]; suf++, suflen--)
1580                         {
1581                           memmove (suf, suf + 1, suflen);
1582                           if (stat (compressed_name, &stat_buf) == 0)
1583                             {
1584                               real_name = compressed_name;
1585                               break;
1586                             }
1587                         }
1588                       if (real_name != NULL)
1589                         break;
1590                     } /* MSDOS */
1591                   free (compressed_name);
1592                   compressed_name = NULL;
1593                 }
1594               else
1595                 {
1596                   real_name = compressed_name;
1597                   break;
1598                 }
1599             }
1600         }
1601       if (real_name == NULL)
1602         {
1603           perror (file);
1604           goto cleanup;
1605         }
1606     } /* try with a different name */
1607
1608   if (!S_ISREG (stat_buf.st_mode))
1609     {
1610       error ("skipping %s: it is not a regular file.", real_name);
1611       goto cleanup;
1612     }
1613   if (real_name == compressed_name)
1614     {
1615       char *cmd = concat (compr->command, " ", real_name);
1616       inf = (FILE *) popen (cmd, "r");
1617       free (cmd);
1618     }
1619   else
1620     inf = fopen (real_name, "r");
1621   if (inf == NULL)
1622     {
1623       perror (real_name);
1624       goto cleanup;
1625     }
1626
1627   process_file (inf, uncompressed_name, lang);
1628
1629   if (real_name == compressed_name)
1630     retval = pclose (inf);
1631   else
1632     retval = fclose (inf);
1633   if (retval < 0)
1634     pfatal (file);
1635
1636  cleanup:
1637   free (compressed_name);
1638   free (uncompressed_name);
1639   last_node = NULL;
1640   curfdp = NULL;
1641   return;
1642 }
1643
1644 static void
1645 process_file (FILE *fh, char *fn, language *lang)
1646 {
1647   static const fdesc emptyfdesc;
1648   fdesc *fdp;
1649
1650   /* Create a new input file description entry. */
1651   fdp = xnew (1, fdesc);
1652   *fdp = emptyfdesc;
1653   fdp->next = fdhead;
1654   fdp->infname = savestr (fn);
1655   fdp->lang = lang;
1656   fdp->infabsname = absolute_filename (fn, cwd);
1657   fdp->infabsdir = absolute_dirname (fn, cwd);
1658   if (filename_is_absolute (fn))
1659     {
1660       /* An absolute file name.  Canonicalize it. */
1661       fdp->taggedfname = absolute_filename (fn, NULL);
1662     }
1663   else
1664     {
1665       /* A file name relative to cwd.  Make it relative
1666          to the directory of the tags file. */
1667       fdp->taggedfname = relative_filename (fn, tagfiledir);
1668     }
1669   fdp->usecharno = TRUE;        /* use char position when making tags */
1670   fdp->prop = NULL;
1671   fdp->written = FALSE;         /* not written on tags file yet */
1672
1673   fdhead = fdp;
1674   curfdp = fdhead;              /* the current file description */
1675
1676   find_entries (fh);
1677
1678   /* If not Ctags, and if this is not metasource and if it contained no #line
1679      directives, we can write the tags and free all nodes pointing to
1680      curfdp. */
1681   if (!CTAGS
1682       && curfdp->usecharno      /* no #line directives in this file */
1683       && !curfdp->lang->metasource)
1684     {
1685       node *np, *prev;
1686
1687       /* Look for the head of the sublist relative to this file.  See add_node
1688          for the structure of the node tree. */
1689       prev = NULL;
1690       for (np = nodehead; np != NULL; prev = np, np = np->left)
1691         if (np->fdp == curfdp)
1692           break;
1693
1694       /* If we generated tags for this file, write and delete them. */
1695       if (np != NULL)
1696         {
1697           /* This is the head of the last sublist, if any.  The following
1698              instructions depend on this being true. */
1699           assert (np->left == NULL);
1700
1701           assert (fdhead == curfdp);
1702           assert (last_node->fdp == curfdp);
1703           put_entries (np);     /* write tags for file curfdp->taggedfname */
1704           free_tree (np);       /* remove the written nodes */
1705           if (prev == NULL)
1706             nodehead = NULL;    /* no nodes left */
1707           else
1708             prev->left = NULL;  /* delete the pointer to the sublist */
1709         }
1710     }
1711 }
1712
1713 /*
1714  * This routine sets up the boolean pseudo-functions which work
1715  * by setting boolean flags dependent upon the corresponding character.
1716  * Every char which is NOT in that string is not a white char.  Therefore,
1717  * all of the array "_wht" is set to FALSE, and then the elements
1718  * subscripted by the chars in "white" are set to TRUE.  Thus "_wht"
1719  * of a char is TRUE if it is the string "white", else FALSE.
1720  */
1721 static void
1722 init (void)
1723 {
1724   register const char *sp;
1725   register int i;
1726
1727   for (i = 0; i < CHARS; i++)
1728     iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1729   for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1730   for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1731   notinname('\0') = notinname('\n');
1732   for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1733   begtoken('\0') = begtoken('\n');
1734   for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1735   intoken('\0') = intoken('\n');
1736   for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1737   endtoken('\0') = endtoken('\n');
1738 }
1739
1740 /*
1741  * This routine opens the specified file and calls the function
1742  * which finds the function and type definitions.
1743  */
1744 static void
1745 find_entries (FILE *inf)
1746 {
1747   char *cp;
1748   language *lang = curfdp->lang;
1749   Lang_function *parser = NULL;
1750
1751   /* If user specified a language, use it. */
1752   if (lang != NULL && lang->function != NULL)
1753     {
1754       parser = lang->function;
1755     }
1756
1757   /* Else try to guess the language given the file name. */
1758   if (parser == NULL)
1759     {
1760       lang = get_language_from_filename (curfdp->infname, TRUE);
1761       if (lang != NULL && lang->function != NULL)
1762         {
1763           curfdp->lang = lang;
1764           parser = lang->function;
1765         }
1766     }
1767
1768   /* Else look for sharp-bang as the first two characters. */
1769   if (parser == NULL
1770       && readline_internal (&lb, inf) > 0
1771       && lb.len >= 2
1772       && lb.buffer[0] == '#'
1773       && lb.buffer[1] == '!')
1774     {
1775       char *lp;
1776
1777       /* Set lp to point at the first char after the last slash in the
1778          line or, if no slashes, at the first nonblank.  Then set cp to
1779          the first successive blank and terminate the string. */
1780       lp = etags_strrchr (lb.buffer+2, '/');
1781       if (lp != NULL)
1782         lp += 1;
1783       else
1784         lp = skip_spaces (lb.buffer + 2);
1785       cp = skip_non_spaces (lp);
1786       *cp = '\0';
1787
1788       if (strlen (lp) > 0)
1789         {
1790           lang = get_language_from_interpreter (lp);
1791           if (lang != NULL && lang->function != NULL)
1792             {
1793               curfdp->lang = lang;
1794               parser = lang->function;
1795             }
1796         }
1797     }
1798
1799   /* We rewind here, even if inf may be a pipe.  We fail if the
1800      length of the first line is longer than the pipe block size,
1801      which is unlikely. */
1802   rewind (inf);
1803
1804   /* Else try to guess the language given the case insensitive file name. */
1805   if (parser == NULL)
1806     {
1807       lang = get_language_from_filename (curfdp->infname, FALSE);
1808       if (lang != NULL && lang->function != NULL)
1809         {
1810           curfdp->lang = lang;
1811           parser = lang->function;
1812         }
1813     }
1814
1815   /* Else try Fortran or C. */
1816   if (parser == NULL)
1817     {
1818       node *old_last_node = last_node;
1819
1820       curfdp->lang = get_language_from_langname ("fortran");
1821       find_entries (inf);
1822
1823       if (old_last_node == last_node)
1824         /* No Fortran entries found.  Try C. */
1825         {
1826           /* We do not tag if rewind fails.
1827              Only the file name will be recorded in the tags file. */
1828           rewind (inf);
1829           curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1830           find_entries (inf);
1831         }
1832       return;
1833     }
1834
1835   if (!no_line_directive
1836       && curfdp->lang != NULL && curfdp->lang->metasource)
1837     /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1838        file, or anyway we parsed a file that is automatically generated from
1839        this one.  If this is the case, the bingo.c file contained #line
1840        directives that generated tags pointing to this file.  Let's delete
1841        them all before parsing this file, which is the real source. */
1842     {
1843       fdesc **fdpp = &fdhead;
1844       while (*fdpp != NULL)
1845         if (*fdpp != curfdp
1846             && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1847           /* We found one of those!  We must delete both the file description
1848              and all tags referring to it. */
1849           {
1850             fdesc *badfdp = *fdpp;
1851
1852             /* Delete the tags referring to badfdp->taggedfname
1853                that were obtained from badfdp->infname. */
1854             invalidate_nodes (badfdp, &nodehead);
1855
1856             *fdpp = badfdp->next; /* remove the bad description from the list */
1857             free_fdesc (badfdp);
1858           }
1859         else
1860           fdpp = &(*fdpp)->next; /* advance the list pointer */
1861     }
1862
1863   assert (parser != NULL);
1864
1865   /* Generic initialisations before reading from file. */
1866   linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1867
1868   /* Generic initialisations before parsing file with readline. */
1869   lineno = 0;                  /* reset global line number */
1870   charno = 0;                  /* reset global char number */
1871   linecharno = 0;              /* reset global char number of line start */
1872
1873   parser (inf);
1874
1875   regex_tag_multiline ();
1876 }
1877
1878 \f
1879 /*
1880  * Check whether an implicitly named tag should be created,
1881  * then call `pfnote'.
1882  * NAME is a string that is internally copied by this function.
1883  *
1884  * TAGS format specification
1885  * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1886  * The following is explained in some more detail in etc/ETAGS.EBNF.
1887  *
1888  * make_tag creates tags with "implicit tag names" (unnamed tags)
1889  * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1890  *  1. NAME does not contain any of the characters in NONAM;
1891  *  2. LINESTART contains name as either a rightmost, or rightmost but
1892  *     one character, substring;
1893  *  3. the character, if any, immediately before NAME in LINESTART must
1894  *     be a character in NONAM;
1895  *  4. the character, if any, immediately after NAME in LINESTART must
1896  *     also be a character in NONAM.
1897  *
1898  * The implementation uses the notinname() macro, which recognises the
1899  * characters stored in the string `nonam'.
1900  * etags.el needs to use the same characters that are in NONAM.
1901  */
1902 static void
1903 make_tag (const char *name,     /* tag name, or NULL if unnamed */
1904           int namelen,          /* tag length */
1905           int is_func,          /* tag is a function */
1906           char *linestart,      /* start of the line where tag is */
1907           int linelen,          /* length of the line where tag is */
1908           int lno,              /* line number */
1909           long int cno)         /* character number */
1910 {
1911   bool named = (name != NULL && namelen > 0);
1912   char *nname = NULL;
1913
1914   if (!CTAGS && named)          /* maybe set named to false */
1915     /* Let's try to make an implicit tag name, that is, create an unnamed tag
1916        such that etags.el can guess a name from it. */
1917     {
1918       int i;
1919       register const char *cp = name;
1920
1921       for (i = 0; i < namelen; i++)
1922         if (notinname (*cp++))
1923           break;
1924       if (i == namelen)                         /* rule #1 */
1925         {
1926           cp = linestart + linelen - namelen;
1927           if (notinname (linestart[linelen-1]))
1928             cp -= 1;                            /* rule #4 */
1929           if (cp >= linestart                   /* rule #2 */
1930               && (cp == linestart
1931                   || notinname (cp[-1]))        /* rule #3 */
1932               && strneq (name, cp, namelen))    /* rule #2 */
1933             named = FALSE;      /* use implicit tag name */
1934         }
1935     }
1936
1937   if (named)
1938     nname = savenstr (name, namelen);
1939
1940   pfnote (nname, is_func, linestart, linelen, lno, cno);
1941 }
1942
1943 /* Record a tag. */
1944 static void
1945 pfnote (char *name, int is_func, char *linestart, int linelen, int lno, long int cno)
1946                                 /* tag name, or NULL if unnamed */
1947                                 /* tag is a function */
1948                                 /* start of the line where tag is */
1949                                 /* length of the line where tag is */
1950                                 /* line number */
1951                                 /* character number */
1952 {
1953   register node *np;
1954
1955   assert (name == NULL || name[0] != '\0');
1956   if (CTAGS && name == NULL)
1957     return;
1958
1959   np = xnew (1, node);
1960
1961   /* If ctags mode, change name "main" to M<thisfilename>. */
1962   if (CTAGS && !cxref_style && streq (name, "main"))
1963     {
1964       register char *fp = etags_strrchr (curfdp->taggedfname, '/');
1965       np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
1966       fp = etags_strrchr (np->name, '.');
1967       if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1968         fp[0] = '\0';
1969     }
1970   else
1971     np->name = name;
1972   np->valid = TRUE;
1973   np->been_warned = FALSE;
1974   np->fdp = curfdp;
1975   np->is_func = is_func;
1976   np->lno = lno;
1977   if (np->fdp->usecharno)
1978     /* Our char numbers are 0-base, because of C language tradition?
1979        ctags compatibility?  old versions compatibility?   I don't know.
1980        Anyway, since emacs's are 1-base we expect etags.el to take care
1981        of the difference.  If we wanted to have 1-based numbers, we would
1982        uncomment the +1 below. */
1983     np->cno = cno /* + 1 */ ;
1984   else
1985     np->cno = invalidcharno;
1986   np->left = np->right = NULL;
1987   if (CTAGS && !cxref_style)
1988     {
1989       if (strlen (linestart) < 50)
1990         np->regex = concat (linestart, "$", "");
1991       else
1992         np->regex = savenstr (linestart, 50);
1993     }
1994   else
1995     np->regex = savenstr (linestart, linelen);
1996
1997   add_node (np, &nodehead);
1998 }
1999
2000 /*
2001  * free_tree ()
2002  *      recurse on left children, iterate on right children.
2003  */
2004 static void
2005 free_tree (register node *np)
2006 {
2007   while (np)
2008     {
2009       register node *node_right = np->right;
2010       free_tree (np->left);
2011       free (np->name);
2012       free (np->regex);
2013       free (np);
2014       np = node_right;
2015     }
2016 }
2017
2018 /*
2019  * free_fdesc ()
2020  *      delete a file description
2021  */
2022 static void
2023 free_fdesc (register fdesc *fdp)
2024 {
2025   free (fdp->infname);
2026   free (fdp->infabsname);
2027   free (fdp->infabsdir);
2028   free (fdp->taggedfname);
2029   free (fdp->prop);
2030   free (fdp);
2031 }
2032
2033 /*
2034  * add_node ()
2035  *      Adds a node to the tree of nodes.  In etags mode, sort by file
2036  *      name.  In ctags mode, sort by tag name.  Make no attempt at
2037  *      balancing.
2038  *
2039  *      add_node is the only function allowed to add nodes, so it can
2040  *      maintain state.
2041  */
2042 static void
2043 add_node (node *np, node **cur_node_p)
2044 {
2045   register int dif;
2046   register node *cur_node = *cur_node_p;
2047
2048   if (cur_node == NULL)
2049     {
2050       *cur_node_p = np;
2051       last_node = np;
2052       return;
2053     }
2054
2055   if (!CTAGS)
2056     /* Etags Mode */
2057     {
2058       /* For each file name, tags are in a linked sublist on the right
2059          pointer.  The first tags of different files are a linked list
2060          on the left pointer.  last_node points to the end of the last
2061          used sublist. */
2062       if (last_node != NULL && last_node->fdp == np->fdp)
2063         {
2064           /* Let's use the same sublist as the last added node. */
2065           assert (last_node->right == NULL);
2066           last_node->right = np;
2067           last_node = np;
2068         }
2069       else if (cur_node->fdp == np->fdp)
2070         {
2071           /* Scanning the list we found the head of a sublist which is
2072              good for us.  Let's scan this sublist. */
2073           add_node (np, &cur_node->right);
2074         }
2075       else
2076         /* The head of this sublist is not good for us.  Let's try the
2077            next one. */
2078         add_node (np, &cur_node->left);
2079     } /* if ETAGS mode */
2080
2081   else
2082     {
2083       /* Ctags Mode */
2084       dif = strcmp (np->name, cur_node->name);
2085
2086       /*
2087        * If this tag name matches an existing one, then
2088        * do not add the node, but maybe print a warning.
2089        */
2090       if (no_duplicates && !dif)
2091         {
2092           if (np->fdp == cur_node->fdp)
2093             {
2094               if (!no_warnings)
2095                 {
2096                   fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2097                            np->fdp->infname, lineno, np->name);
2098                   fprintf (stderr, "Second entry ignored\n");
2099                 }
2100             }
2101           else if (!cur_node->been_warned && !no_warnings)
2102             {
2103               fprintf
2104                 (stderr,
2105                  "Duplicate entry in files %s and %s: %s (Warning only)\n",
2106                  np->fdp->infname, cur_node->fdp->infname, np->name);
2107               cur_node->been_warned = TRUE;
2108             }
2109           return;
2110         }
2111
2112       /* Actually add the node */
2113       add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2114     } /* if CTAGS mode */
2115 }
2116
2117 /*
2118  * invalidate_nodes ()
2119  *      Scan the node tree and invalidate all nodes pointing to the
2120  *      given file description (CTAGS case) or free them (ETAGS case).
2121  */
2122 static void
2123 invalidate_nodes (fdesc *badfdp, node **npp)
2124 {
2125   node *np = *npp;
2126
2127   if (np == NULL)
2128     return;
2129
2130   if (CTAGS)
2131     {
2132       if (np->left != NULL)
2133         invalidate_nodes (badfdp, &np->left);
2134       if (np->fdp == badfdp)
2135         np->valid = FALSE;
2136       if (np->right != NULL)
2137         invalidate_nodes (badfdp, &np->right);
2138     }
2139   else
2140     {
2141       assert (np->fdp != NULL);
2142       if (np->fdp == badfdp)
2143         {
2144           *npp = np->left;      /* detach the sublist from the list */
2145           np->left = NULL;      /* isolate it */
2146           free_tree (np);       /* free it */
2147           invalidate_nodes (badfdp, npp);
2148         }
2149       else
2150         invalidate_nodes (badfdp, &np->left);
2151     }
2152 }
2153
2154 \f
2155 static int total_size_of_entries (node *);
2156 static int number_len (long);
2157
2158 /* Length of a non-negative number's decimal representation. */
2159 static int
2160 number_len (long int num)
2161 {
2162   int len = 1;
2163   while ((num /= 10) > 0)
2164     len += 1;
2165   return len;
2166 }
2167
2168 /*
2169  * Return total number of characters that put_entries will output for
2170  * the nodes in the linked list at the right of the specified node.
2171  * This count is irrelevant with etags.el since emacs 19.34 at least,
2172  * but is still supplied for backward compatibility.
2173  */
2174 static int
2175 total_size_of_entries (register node *np)
2176 {
2177   register int total = 0;
2178
2179   for (; np != NULL; np = np->right)
2180     if (np->valid)
2181       {
2182         total += strlen (np->regex) + 1;                /* pat\177 */
2183         if (np->name != NULL)
2184           total += strlen (np->name) + 1;               /* name\001 */
2185         total += number_len ((long) np->lno) + 1;       /* lno, */
2186         if (np->cno != invalidcharno)                   /* cno */
2187           total += number_len (np->cno);
2188         total += 1;                                     /* newline */
2189       }
2190
2191   return total;
2192 }
2193
2194 static void
2195 put_entries (register node *np)
2196 {
2197   register char *sp;
2198   static fdesc *fdp = NULL;
2199
2200   if (np == NULL)
2201     return;
2202
2203   /* Output subentries that precede this one */
2204   if (CTAGS)
2205     put_entries (np->left);
2206
2207   /* Output this entry */
2208   if (np->valid)
2209     {
2210       if (!CTAGS)
2211         {
2212           /* Etags mode */
2213           if (fdp != np->fdp)
2214             {
2215               fdp = np->fdp;
2216               fprintf (tagf, "\f\n%s,%d\n",
2217                        fdp->taggedfname, total_size_of_entries (np));
2218               fdp->written = TRUE;
2219             }
2220           fputs (np->regex, tagf);
2221           fputc ('\177', tagf);
2222           if (np->name != NULL)
2223             {
2224               fputs (np->name, tagf);
2225               fputc ('\001', tagf);
2226             }
2227           fprintf (tagf, "%d,", np->lno);
2228           if (np->cno != invalidcharno)
2229             fprintf (tagf, "%ld", np->cno);
2230           fputs ("\n", tagf);
2231         }
2232       else
2233         {
2234           /* Ctags mode */
2235           if (np->name == NULL)
2236             error ("internal error: NULL name in ctags mode.", (char *)NULL);
2237
2238           if (cxref_style)
2239             {
2240               if (vgrind_style)
2241                 fprintf (stdout, "%s %s %d\n",
2242                          np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2243               else
2244                 fprintf (stdout, "%-16s %3d %-16s %s\n",
2245                          np->name, np->lno, np->fdp->taggedfname, np->regex);
2246             }
2247           else
2248             {
2249               fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2250
2251               if (np->is_func)
2252                 {               /* function or #define macro with args */
2253                   putc (searchar, tagf);
2254                   putc ('^', tagf);
2255
2256                   for (sp = np->regex; *sp; sp++)
2257                     {
2258                       if (*sp == '\\' || *sp == searchar)
2259                         putc ('\\', tagf);
2260                       putc (*sp, tagf);
2261                     }
2262                   putc (searchar, tagf);
2263                 }
2264               else
2265                 {               /* anything else; text pattern inadequate */
2266                   fprintf (tagf, "%d", np->lno);
2267                 }
2268               putc ('\n', tagf);
2269             }
2270         }
2271     } /* if this node contains a valid tag */
2272
2273   /* Output subentries that follow this one */
2274   put_entries (np->right);
2275   if (!CTAGS)
2276     put_entries (np->left);
2277 }
2278
2279 \f
2280 /* C extensions. */
2281 #define C_EXT   0x00fff         /* C extensions */
2282 #define C_PLAIN 0x00000         /* C */
2283 #define C_PLPL  0x00001         /* C++ */
2284 #define C_STAR  0x00003         /* C* */
2285 #define C_JAVA  0x00005         /* JAVA */
2286 #define C_AUTO  0x01000         /* C, but switch to C++ if `class' is met */
2287 #define YACC    0x10000         /* yacc file */
2288
2289 /*
2290  * The C symbol tables.
2291  */
2292 enum sym_type
2293 {
2294   st_none,
2295   st_C_objprot, st_C_objimpl, st_C_objend,
2296   st_C_gnumacro,
2297   st_C_ignore, st_C_attribute,
2298   st_C_javastruct,
2299   st_C_operator,
2300   st_C_class, st_C_template,
2301   st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2302 };
2303
2304 static unsigned int hash (const char *, unsigned int);
2305 static struct C_stab_entry * in_word_set (const char *, unsigned int);
2306 static enum sym_type C_symtype (char *, int, int);
2307
2308 /* Feed stuff between (but not including) %[ and %] lines to:
2309      gperf -m 5
2310 %[
2311 %compare-strncmp
2312 %enum
2313 %struct-type
2314 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2315 %%
2316 if,             0,                      st_C_ignore
2317 for,            0,                      st_C_ignore
2318 while,          0,                      st_C_ignore
2319 switch,         0,                      st_C_ignore
2320 return,         0,                      st_C_ignore
2321 __attribute__,  0,                      st_C_attribute
2322 GTY,            0,                      st_C_attribute
2323 @interface,     0,                      st_C_objprot
2324 @protocol,      0,                      st_C_objprot
2325 @implementation,0,                      st_C_objimpl
2326 @end,           0,                      st_C_objend
2327 import,         (C_JAVA & ~C_PLPL),     st_C_ignore
2328 package,        (C_JAVA & ~C_PLPL),     st_C_ignore
2329 friend,         C_PLPL,                 st_C_ignore
2330 extends,        (C_JAVA & ~C_PLPL),     st_C_javastruct
2331 implements,     (C_JAVA & ~C_PLPL),     st_C_javastruct
2332 interface,      (C_JAVA & ~C_PLPL),     st_C_struct
2333 class,          0,                      st_C_class
2334 namespace,      C_PLPL,                 st_C_struct
2335 domain,         C_STAR,                 st_C_struct
2336 union,          0,                      st_C_struct
2337 struct,         0,                      st_C_struct
2338 extern,         0,                      st_C_extern
2339 enum,           0,                      st_C_enum
2340 typedef,        0,                      st_C_typedef
2341 define,         0,                      st_C_define
2342 undef,          0,                      st_C_define
2343 operator,       C_PLPL,                 st_C_operator
2344 template,       0,                      st_C_template
2345 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2346 DEFUN,          0,                      st_C_gnumacro
2347 SYSCALL,        0,                      st_C_gnumacro
2348 ENTRY,          0,                      st_C_gnumacro
2349 PSEUDO,         0,                      st_C_gnumacro
2350 # These are defined inside C functions, so currently they are not met.
2351 # EXFUN used in glibc, DEFVAR_* in emacs.
2352 #EXFUN,         0,                      st_C_gnumacro
2353 #DEFVAR_,       0,                      st_C_gnumacro
2354 %]
2355 and replace lines between %< and %> with its output, then:
2356  - remove the #if characterset check
2357  - make in_word_set static and not inline. */
2358 /*%<*/
2359 /* C code produced by gperf version 3.0.1 */
2360 /* Command-line: gperf -m 5  */
2361 /* Computed positions: -k'2-3' */
2362
2363 struct C_stab_entry { const char *name; int c_ext; enum sym_type type; };
2364 /* maximum key range = 33, duplicates = 0 */
2365
2366 #ifdef __GNUC__
2367 __inline
2368 #else
2369 #ifdef __cplusplus
2370 inline
2371 #endif
2372 #endif
2373 static unsigned int
2374 hash (register const char *str, register unsigned int len)
2375 {
2376   static unsigned char asso_values[] =
2377     {
2378       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2379       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2380       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2381       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2382       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2383       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2384       35, 35, 35, 35, 35, 35, 35, 35, 35,  3,
2385       26, 35, 35, 35, 35, 35, 35, 35, 27, 35,
2386       35, 35, 35, 24,  0, 35, 35, 35, 35,  0,
2387       35, 35, 35, 35, 35,  1, 35, 16, 35,  6,
2388       23,  0,  0, 35, 22,  0, 35, 35,  5,  0,
2389        0, 15,  1, 35,  6, 35,  8, 19, 35, 16,
2390        4,  5, 35, 35, 35, 35, 35, 35, 35, 35,
2391       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2392       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2393       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2394       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2395       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2396       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2397       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2398       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2399       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2400       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2401       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2402       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2403       35, 35, 35, 35, 35, 35
2404     };
2405   register int hval = len;
2406
2407   switch (hval)
2408     {
2409       default:
2410         hval += asso_values[(unsigned char)str[2]];
2411       /*FALLTHROUGH*/
2412       case 2:
2413         hval += asso_values[(unsigned char)str[1]];
2414         break;
2415     }
2416   return hval;
2417 }
2418
2419 static struct C_stab_entry *
2420 in_word_set (register const char *str, register unsigned int len)
2421 {
2422   enum
2423     {
2424       TOTAL_KEYWORDS = 33,
2425       MIN_WORD_LENGTH = 2,
2426       MAX_WORD_LENGTH = 15,
2427       MIN_HASH_VALUE = 2,
2428       MAX_HASH_VALUE = 34
2429     };
2430
2431   static struct C_stab_entry wordlist[] =
2432     {
2433       {""}, {""},
2434       {"if",            0,                      st_C_ignore},
2435       {"GTY",           0,                      st_C_attribute},
2436       {"@end",          0,                      st_C_objend},
2437       {"union",         0,                      st_C_struct},
2438       {"define",                0,                      st_C_define},
2439       {"import",                (C_JAVA & ~C_PLPL),     st_C_ignore},
2440       {"template",      0,                      st_C_template},
2441       {"operator",      C_PLPL,                 st_C_operator},
2442       {"@interface",    0,                      st_C_objprot},
2443       {"implements",    (C_JAVA & ~C_PLPL),     st_C_javastruct},
2444       {"friend",                C_PLPL,                 st_C_ignore},
2445       {"typedef",       0,                      st_C_typedef},
2446       {"return",                0,                      st_C_ignore},
2447       {"@implementation",0,                     st_C_objimpl},
2448       {"@protocol",     0,                      st_C_objprot},
2449       {"interface",     (C_JAVA & ~C_PLPL),     st_C_struct},
2450       {"extern",                0,                      st_C_extern},
2451       {"extends",       (C_JAVA & ~C_PLPL),     st_C_javastruct},
2452       {"struct",                0,                      st_C_struct},
2453       {"domain",                C_STAR,                 st_C_struct},
2454       {"switch",                0,                      st_C_ignore},
2455       {"enum",          0,                      st_C_enum},
2456       {"for",           0,                      st_C_ignore},
2457       {"namespace",     C_PLPL,                 st_C_struct},
2458       {"class",         0,                      st_C_class},
2459       {"while",         0,                      st_C_ignore},
2460       {"undef",         0,                      st_C_define},
2461       {"package",       (C_JAVA & ~C_PLPL),     st_C_ignore},
2462       {"__attribute__", 0,                      st_C_attribute},
2463       {"SYSCALL",       0,                      st_C_gnumacro},
2464       {"ENTRY",         0,                      st_C_gnumacro},
2465       {"PSEUDO",                0,                      st_C_gnumacro},
2466       {"DEFUN",         0,                      st_C_gnumacro}
2467     };
2468
2469   if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2470     {
2471       register int key = hash (str, len);
2472
2473       if (key <= MAX_HASH_VALUE && key >= 0)
2474         {
2475           register const char *s = wordlist[key].name;
2476
2477           if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2478             return &wordlist[key];
2479         }
2480     }
2481   return 0;
2482 }
2483 /*%>*/
2484
2485 static enum sym_type
2486 C_symtype (char *str, int len, int c_ext)
2487 {
2488   register struct C_stab_entry *se = in_word_set (str, len);
2489
2490   if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2491     return st_none;
2492   return se->type;
2493 }
2494
2495 \f
2496 /*
2497  * Ignoring __attribute__ ((list))
2498  */
2499 static bool inattribute;        /* looking at an __attribute__ construct */
2500
2501 /*
2502  * C functions and variables are recognized using a simple
2503  * finite automaton.  fvdef is its state variable.
2504  */
2505 static enum
2506 {
2507   fvnone,                       /* nothing seen */
2508   fdefunkey,                    /* Emacs DEFUN keyword seen */
2509   fdefunname,                   /* Emacs DEFUN name seen */
2510   foperator,                    /* func: operator keyword seen (cplpl) */
2511   fvnameseen,                   /* function or variable name seen */
2512   fstartlist,                   /* func: just after open parenthesis */
2513   finlist,                      /* func: in parameter list */
2514   flistseen,                    /* func: after parameter list */
2515   fignore,                      /* func: before open brace */
2516   vignore                       /* var-like: ignore until ';' */
2517 } fvdef;
2518
2519 static bool fvextern;           /* func or var: extern keyword seen; */
2520
2521 /*
2522  * typedefs are recognized using a simple finite automaton.
2523  * typdef is its state variable.
2524  */
2525 static enum
2526 {
2527   tnone,                        /* nothing seen */
2528   tkeyseen,                     /* typedef keyword seen */
2529   ttypeseen,                    /* defined type seen */
2530   tinbody,                      /* inside typedef body */
2531   tend,                         /* just before typedef tag */
2532   tignore                       /* junk after typedef tag */
2533 } typdef;
2534
2535 /*
2536  * struct-like structures (enum, struct and union) are recognized
2537  * using another simple finite automaton.  `structdef' is its state
2538  * variable.
2539  */
2540 static enum
2541 {
2542   snone,                        /* nothing seen yet,
2543                                    or in struct body if bracelev > 0 */
2544   skeyseen,                     /* struct-like keyword seen */
2545   stagseen,                     /* struct-like tag seen */
2546   scolonseen                    /* colon seen after struct-like tag */
2547 } structdef;
2548
2549 /*
2550  * When objdef is different from onone, objtag is the name of the class.
2551  */
2552 static const char *objtag = "<uninited>";
2553
2554 /*
2555  * Yet another little state machine to deal with preprocessor lines.
2556  */
2557 static enum
2558 {
2559   dnone,                        /* nothing seen */
2560   dsharpseen,                   /* '#' seen as first char on line */
2561   ddefineseen,                  /* '#' and 'define' seen */
2562   dignorerest                   /* ignore rest of line */
2563 } definedef;
2564
2565 /*
2566  * State machine for Objective C protocols and implementations.
2567  * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2568  */
2569 static enum
2570 {
2571   onone,                        /* nothing seen */
2572   oprotocol,                    /* @interface or @protocol seen */
2573   oimplementation,              /* @implementations seen */
2574   otagseen,                     /* class name seen */
2575   oparenseen,                   /* parenthesis before category seen */
2576   ocatseen,                     /* category name seen */
2577   oinbody,                      /* in @implementation body */
2578   omethodsign,                  /* in @implementation body, after +/- */
2579   omethodtag,                   /* after method name */
2580   omethodcolon,                 /* after method colon */
2581   omethodparm,                  /* after method parameter */
2582   oignore                       /* wait for @end */
2583 } objdef;
2584
2585
2586 /*
2587  * Use this structure to keep info about the token read, and how it
2588  * should be tagged.  Used by the make_C_tag function to build a tag.
2589  */
2590 static struct tok
2591 {
2592   char *line;                   /* string containing the token */
2593   int offset;                   /* where the token starts in LINE */
2594   int length;                   /* token length */
2595   /*
2596     The previous members can be used to pass strings around for generic
2597     purposes.  The following ones specifically refer to creating tags.  In this
2598     case the token contained here is the pattern that will be used to create a
2599     tag.
2600   */
2601   bool valid;                   /* do not create a tag; the token should be
2602                                    invalidated whenever a state machine is
2603                                    reset prematurely */
2604   bool named;                   /* create a named tag */
2605   int lineno;                   /* source line number of tag */
2606   long linepos;                 /* source char number of tag */
2607 } token;                        /* latest token read */
2608
2609 /*
2610  * Variables and functions for dealing with nested structures.
2611  * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2612  */
2613 static void pushclass_above (int, char *, int);
2614 static void popclass_above (int);
2615 static void write_classname (linebuffer *, const char *qualifier);
2616
2617 static struct {
2618   char **cname;                 /* nested class names */
2619   int *bracelev;                /* nested class brace level */
2620   int nl;                       /* class nesting level (elements used) */
2621   int size;                     /* length of the array */
2622 } cstack;                       /* stack for nested declaration tags */
2623 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2624 #define nestlev         (cstack.nl)
2625 /* After struct keyword or in struct body, not inside a nested function. */
2626 #define instruct        (structdef == snone && nestlev > 0                      \
2627                          && bracelev == cstack.bracelev[nestlev-1] + 1)
2628
2629 static void
2630 pushclass_above (int bracelev, char *str, int len)
2631 {
2632   int nl;
2633
2634   popclass_above (bracelev);
2635   nl = cstack.nl;
2636   if (nl >= cstack.size)
2637     {
2638       int size = cstack.size *= 2;
2639       xrnew (cstack.cname, size, char *);
2640       xrnew (cstack.bracelev, size, int);
2641     }
2642   assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2643   cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2644   cstack.bracelev[nl] = bracelev;
2645   cstack.nl = nl + 1;
2646 }
2647
2648 static void
2649 popclass_above (int bracelev)
2650 {
2651   int nl;
2652
2653   for (nl = cstack.nl - 1;
2654        nl >= 0 && cstack.bracelev[nl] >= bracelev;
2655        nl--)
2656     {
2657       free (cstack.cname[nl]);
2658       cstack.nl = nl;
2659     }
2660 }
2661
2662 static void
2663 write_classname (linebuffer *cn, const char *qualifier)
2664 {
2665   int i, len;
2666   int qlen = strlen (qualifier);
2667
2668   if (cstack.nl == 0 || cstack.cname[0] == NULL)
2669     {
2670       len = 0;
2671       cn->len = 0;
2672       cn->buffer[0] = '\0';
2673     }
2674   else
2675     {
2676       len = strlen (cstack.cname[0]);
2677       linebuffer_setlen (cn, len);
2678       strcpy (cn->buffer, cstack.cname[0]);
2679     }
2680   for (i = 1; i < cstack.nl; i++)
2681     {
2682       char *s;
2683       int slen;
2684
2685       s = cstack.cname[i];
2686       if (s == NULL)
2687         continue;
2688       slen = strlen (s);
2689       len += slen + qlen;
2690       linebuffer_setlen (cn, len);
2691       strncat (cn->buffer, qualifier, qlen);
2692       strncat (cn->buffer, s, slen);
2693     }
2694 }
2695
2696 \f
2697 static bool consider_token (char *, int, int, int *, int, int, bool *);
2698 static void make_C_tag (bool);
2699
2700 /*
2701  * consider_token ()
2702  *      checks to see if the current token is at the start of a
2703  *      function or variable, or corresponds to a typedef, or
2704  *      is a struct/union/enum tag, or #define, or an enum constant.
2705  *
2706  *      *IS_FUNC gets TRUE if the token is a function or #define macro
2707  *      with args.  C_EXTP points to which language we are looking at.
2708  *
2709  * Globals
2710  *      fvdef                   IN OUT
2711  *      structdef               IN OUT
2712  *      definedef               IN OUT
2713  *      typdef                  IN OUT
2714  *      objdef                  IN OUT
2715  */
2716
2717 static bool
2718 consider_token (register char *str, register int len, register int c, int *c_extp, int bracelev, int parlev, int *is_func_or_var)
2719                                 /* IN: token pointer */
2720                                 /* IN: token length */
2721                                 /* IN: first char after the token */
2722                                 /* IN, OUT: C extensions mask */
2723                                 /* IN: brace level */
2724                                 /* IN: parenthesis level */
2725                                 /* OUT: function or variable found */
2726 {
2727   /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2728      structtype is the type of the preceding struct-like keyword, and
2729      structbracelev is the brace level where it has been seen. */
2730   static enum sym_type structtype;
2731   static int structbracelev;
2732   static enum sym_type toktype;
2733
2734
2735   toktype = C_symtype (str, len, *c_extp);
2736
2737   /*
2738    * Skip __attribute__
2739    */
2740   if (toktype == st_C_attribute)
2741     {
2742       inattribute = TRUE;
2743       return FALSE;
2744      }
2745
2746    /*
2747     * Advance the definedef state machine.
2748     */
2749    switch (definedef)
2750      {
2751      case dnone:
2752        /* We're not on a preprocessor line. */
2753        if (toktype == st_C_gnumacro)
2754          {
2755            fvdef = fdefunkey;
2756            return FALSE;
2757          }
2758        break;
2759      case dsharpseen:
2760        if (toktype == st_C_define)
2761          {
2762            definedef = ddefineseen;
2763          }
2764        else
2765          {
2766            definedef = dignorerest;
2767          }
2768        return FALSE;
2769      case ddefineseen:
2770        /*
2771         * Make a tag for any macro, unless it is a constant
2772         * and constantypedefs is FALSE.
2773         */
2774        definedef = dignorerest;
2775        *is_func_or_var = (c == '(');
2776        if (!*is_func_or_var && !constantypedefs)
2777          return FALSE;
2778        else
2779          return TRUE;
2780      case dignorerest:
2781        return FALSE;
2782      default:
2783        error ("internal error: definedef value.", (char *)NULL);
2784      }
2785
2786    /*
2787     * Now typedefs
2788     */
2789    switch (typdef)
2790      {
2791      case tnone:
2792        if (toktype == st_C_typedef)
2793          {
2794            if (typedefs)
2795              typdef = tkeyseen;
2796            fvextern = FALSE;
2797            fvdef = fvnone;
2798            return FALSE;
2799          }
2800        break;
2801      case tkeyseen:
2802        switch (toktype)
2803          {
2804          case st_none:
2805          case st_C_class:
2806          case st_C_struct:
2807          case st_C_enum:
2808            typdef = ttypeseen;
2809          }
2810        break;
2811      case ttypeseen:
2812        if (structdef == snone && fvdef == fvnone)
2813          {
2814            fvdef = fvnameseen;
2815            return TRUE;
2816          }
2817        break;
2818      case tend:
2819        switch (toktype)
2820          {
2821          case st_C_class:
2822          case st_C_struct:
2823          case st_C_enum:
2824            return FALSE;
2825          }
2826        return TRUE;
2827      }
2828
2829    switch (toktype)
2830      {
2831      case st_C_javastruct:
2832        if (structdef == stagseen)
2833          structdef = scolonseen;
2834        return FALSE;
2835      case st_C_template:
2836      case st_C_class:
2837        if ((*c_extp & C_AUTO)   /* automatic detection of C++ language */
2838            && bracelev == 0
2839            && definedef == dnone && structdef == snone
2840            && typdef == tnone && fvdef == fvnone)
2841          *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2842        if (toktype == st_C_template)
2843          break;
2844        /* FALLTHRU */
2845      case st_C_struct:
2846      case st_C_enum:
2847        if (parlev == 0
2848            && fvdef != vignore
2849            && (typdef == tkeyseen
2850                || (typedefs_or_cplusplus && structdef == snone)))
2851          {
2852            structdef = skeyseen;
2853            structtype = toktype;
2854            structbracelev = bracelev;
2855            if (fvdef == fvnameseen)
2856              fvdef = fvnone;
2857          }
2858        return FALSE;
2859      }
2860
2861    if (structdef == skeyseen)
2862      {
2863        structdef = stagseen;
2864        return TRUE;
2865      }
2866
2867    if (typdef != tnone)
2868      definedef = dnone;
2869
2870    /* Detect Objective C constructs. */
2871    switch (objdef)
2872      {
2873      case onone:
2874        switch (toktype)
2875          {
2876          case st_C_objprot:
2877            objdef = oprotocol;
2878            return FALSE;
2879          case st_C_objimpl:
2880            objdef = oimplementation;
2881            return FALSE;
2882          }
2883        break;
2884      case oimplementation:
2885        /* Save the class tag for functions or variables defined inside. */
2886        objtag = savenstr (str, len);
2887        objdef = oinbody;
2888        return FALSE;
2889      case oprotocol:
2890        /* Save the class tag for categories. */
2891        objtag = savenstr (str, len);
2892        objdef = otagseen;
2893        *is_func_or_var = TRUE;
2894        return TRUE;
2895      case oparenseen:
2896        objdef = ocatseen;
2897        *is_func_or_var = TRUE;
2898        return TRUE;
2899      case oinbody:
2900        break;
2901      case omethodsign:
2902        if (parlev == 0)
2903          {
2904            fvdef = fvnone;
2905            objdef = omethodtag;
2906            linebuffer_setlen (&token_name, len);
2907            strncpy (token_name.buffer, str, len);
2908            token_name.buffer[len] = '\0';
2909            return TRUE;
2910          }
2911        return FALSE;
2912      case omethodcolon:
2913        if (parlev == 0)
2914          objdef = omethodparm;
2915        return FALSE;
2916      case omethodparm:
2917        if (parlev == 0)
2918          {
2919            fvdef = fvnone;
2920            objdef = omethodtag;
2921            linebuffer_setlen (&token_name, token_name.len + len);
2922            strncat (token_name.buffer, str, len);
2923            return TRUE;
2924          }
2925        return FALSE;
2926      case oignore:
2927        if (toktype == st_C_objend)
2928          {
2929            /* Memory leakage here: the string pointed by objtag is
2930               never released, because many tests would be needed to
2931               avoid breaking on incorrect input code.  The amount of
2932               memory leaked here is the sum of the lengths of the
2933               class tags.
2934            free (objtag); */
2935            objdef = onone;
2936          }
2937        return FALSE;
2938      }
2939
2940    /* A function, variable or enum constant? */
2941    switch (toktype)
2942      {
2943      case st_C_extern:
2944        fvextern = TRUE;
2945        switch  (fvdef)
2946          {
2947          case finlist:
2948          case flistseen:
2949          case fignore:
2950          case vignore:
2951            break;
2952          default:
2953            fvdef = fvnone;
2954          }
2955        return FALSE;
2956      case st_C_ignore:
2957        fvextern = FALSE;
2958        fvdef = vignore;
2959        return FALSE;
2960      case st_C_operator:
2961        fvdef = foperator;
2962        *is_func_or_var = TRUE;
2963        return TRUE;
2964      case st_none:
2965        if (constantypedefs
2966            && structdef == snone
2967            && structtype == st_C_enum && bracelev > structbracelev)
2968          return TRUE;           /* enum constant */
2969        switch (fvdef)
2970          {
2971          case fdefunkey:
2972            if (bracelev > 0)
2973              break;
2974            fvdef = fdefunname;  /* GNU macro */
2975            *is_func_or_var = TRUE;
2976            return TRUE;
2977          case fvnone:
2978            switch (typdef)
2979              {
2980              case ttypeseen:
2981                return FALSE;
2982              case tnone:
2983                if ((strneq (str, "asm", 3) && endtoken (str[3]))
2984                    || (strneq (str, "__asm__", 7) && endtoken (str[7])))
2985                  {
2986                    fvdef = vignore;
2987                    return FALSE;
2988                  }
2989                break;
2990              }
2991           /* FALLTHRU */
2992           case fvnameseen:
2993           if (len >= 10 && strneq (str+len-10, "::operator", 10))
2994             {
2995               if (*c_extp & C_AUTO) /* automatic detection of C++ */
2996                 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2997               fvdef = foperator;
2998               *is_func_or_var = TRUE;
2999               return TRUE;
3000             }
3001           if (bracelev > 0 && !instruct)
3002             break;
3003           fvdef = fvnameseen;   /* function or variable */
3004           *is_func_or_var = TRUE;
3005           return TRUE;
3006         }
3007       break;
3008     }
3009
3010   return FALSE;
3011 }
3012
3013 \f
3014 /*
3015  * C_entries often keeps pointers to tokens or lines which are older than
3016  * the line currently read.  By keeping two line buffers, and switching
3017  * them at end of line, it is possible to use those pointers.
3018  */
3019 static struct
3020 {
3021   long linepos;
3022   linebuffer lb;
3023 } lbs[2];
3024
3025 #define current_lb_is_new (newndx == curndx)
3026 #define switch_line_buffers() (curndx = 1 - curndx)
3027
3028 #define curlb (lbs[curndx].lb)
3029 #define newlb (lbs[newndx].lb)
3030 #define curlinepos (lbs[curndx].linepos)
3031 #define newlinepos (lbs[newndx].linepos)
3032
3033 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3034 #define cplpl (c_ext & C_PLPL)
3035 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3036
3037 #define CNL_SAVE_DEFINEDEF()                                            \
3038 do {                                                                    \
3039   curlinepos = charno;                                                  \
3040   readline (&curlb, inf);                                               \
3041   lp = curlb.buffer;                                                    \
3042   quotednl = FALSE;                                                     \
3043   newndx = curndx;                                                      \
3044 } while (0)
3045
3046 #define CNL()                                                           \
3047 do {                                                                    \
3048   CNL_SAVE_DEFINEDEF();                                                 \
3049   if (savetoken.valid)                                                  \
3050     {                                                                   \
3051       token = savetoken;                                                \
3052       savetoken.valid = FALSE;                                          \
3053     }                                                                   \
3054   definedef = dnone;                                                    \
3055 } while (0)
3056
3057
3058 static void
3059 make_C_tag (int isfun)
3060 {
3061   /* This function is never called when token.valid is FALSE, but
3062      we must protect against invalid input or internal errors. */
3063   if (token.valid)
3064     make_tag (token_name.buffer, token_name.len, isfun, token.line,
3065               token.offset+token.length+1, token.lineno, token.linepos);
3066   else if (DEBUG)
3067     {                             /* this branch is optimised away if !DEBUG */
3068       make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3069                 token_name.len + 17, isfun, token.line,
3070                 token.offset+token.length+1, token.lineno, token.linepos);
3071       error ("INVALID TOKEN", NULL);
3072     }
3073
3074   token.valid = FALSE;
3075 }
3076
3077
3078 /*
3079  * C_entries ()
3080  *      This routine finds functions, variables, typedefs,
3081  *      #define's, enum constants and struct/union/enum definitions in
3082  *      C syntax and adds them to the list.
3083  */
3084 static void
3085 C_entries (int c_ext, FILE *inf)
3086                                 /* extension of C */
3087                                 /* input file */
3088 {
3089   register char c;              /* latest char read; '\0' for end of line */
3090   register char *lp;            /* pointer one beyond the character `c' */
3091   int curndx, newndx;           /* indices for current and new lb */
3092   register int tokoff;          /* offset in line of start of current token */
3093   register int toklen;          /* length of current token */
3094   const char *qualifier;        /* string used to qualify names */
3095   int qlen;                     /* length of qualifier */
3096   int bracelev;                 /* current brace level */
3097   int bracketlev;               /* current bracket level */
3098   int parlev;                   /* current parenthesis level */
3099   int attrparlev;               /* __attribute__ parenthesis level */
3100   int templatelev;              /* current template level */
3101   int typdefbracelev;           /* bracelev where a typedef struct body begun */
3102   bool incomm, inquote, inchar, quotednl, midtoken;
3103   bool yacc_rules;              /* in the rules part of a yacc file */
3104   struct tok savetoken = {0};   /* token saved during preprocessor handling */
3105
3106
3107   linebuffer_init (&lbs[0].lb);
3108   linebuffer_init (&lbs[1].lb);
3109   if (cstack.size == 0)
3110     {
3111       cstack.size = (DEBUG) ? 1 : 4;
3112       cstack.nl = 0;
3113       cstack.cname = xnew (cstack.size, char *);
3114       cstack.bracelev = xnew (cstack.size, int);
3115     }
3116
3117   tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3118   curndx = newndx = 0;
3119   lp = curlb.buffer;
3120   *lp = 0;
3121
3122   fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3123   structdef = snone; definedef = dnone; objdef = onone;
3124   yacc_rules = FALSE;
3125   midtoken = inquote = inchar = incomm = quotednl = FALSE;
3126   token.valid = savetoken.valid = FALSE;
3127   bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3128   if (cjava)
3129     { qualifier = "."; qlen = 1; }
3130   else
3131     { qualifier = "::"; qlen = 2; }
3132
3133
3134   while (!feof (inf))
3135     {
3136       c = *lp++;
3137       if (c == '\\')
3138         {
3139           /* If we are at the end of the line, the next character is a
3140              '\0'; do not skip it, because it is what tells us
3141              to read the next line.  */
3142           if (*lp == '\0')
3143             {
3144               quotednl = TRUE;
3145               continue;
3146             }
3147           lp++;
3148           c = ' ';
3149         }
3150       else if (incomm)
3151         {
3152           switch (c)
3153             {
3154             case '*':
3155               if (*lp == '/')
3156                 {
3157                   c = *lp++;
3158                   incomm = FALSE;
3159                 }
3160               break;
3161             case '\0':
3162               /* Newlines inside comments do not end macro definitions in
3163                  traditional cpp. */
3164               CNL_SAVE_DEFINEDEF ();
3165               break;
3166             }
3167           continue;
3168         }
3169       else if (inquote)
3170         {
3171           switch (c)
3172             {
3173             case '"':
3174               inquote = FALSE;
3175               break;
3176             case '\0':
3177               /* Newlines inside strings do not end macro definitions
3178                  in traditional cpp, even though compilers don't
3179                  usually accept them. */
3180               CNL_SAVE_DEFINEDEF ();
3181               break;
3182             }
3183           continue;
3184         }
3185       else if (inchar)
3186         {
3187           switch (c)
3188             {
3189             case '\0':
3190               /* Hmmm, something went wrong. */
3191               CNL ();
3192               /* FALLTHRU */
3193             case '\'':
3194               inchar = FALSE;
3195               break;
3196             }
3197           continue;
3198         }
3199       else if (bracketlev > 0)
3200         {
3201           switch (c)
3202             {
3203             case ']':
3204               if (--bracketlev > 0)
3205                 continue;
3206               break;
3207             case '\0':
3208               CNL_SAVE_DEFINEDEF ();
3209               break;
3210             }
3211           continue;
3212         }
3213       else switch (c)
3214         {
3215         case '"':
3216           inquote = TRUE;
3217           if (inattribute)
3218             break;
3219           switch (fvdef)
3220             {
3221             case fdefunkey:
3222             case fstartlist:
3223             case finlist:
3224             case fignore:
3225             case vignore:
3226               break;
3227             default:
3228               fvextern = FALSE;
3229               fvdef = fvnone;
3230             }
3231           continue;
3232         case '\'':
3233           inchar = TRUE;
3234           if (inattribute)
3235             break;
3236           if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
3237             {
3238               fvextern = FALSE;
3239               fvdef = fvnone;
3240             }
3241           continue;
3242         case '/':
3243           if (*lp == '*')
3244             {
3245               incomm = TRUE;
3246               lp++;
3247               c = ' ';
3248             }
3249           else if (/* cplpl && */ *lp == '/')
3250             {
3251               c = '\0';
3252             }
3253           break;
3254         case '%':
3255           if ((c_ext & YACC) && *lp == '%')
3256             {
3257               /* Entering or exiting rules section in yacc file. */
3258               lp++;
3259               definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3260               typdef = tnone; structdef = snone;
3261               midtoken = inquote = inchar = incomm = quotednl = FALSE;
3262               bracelev = 0;
3263               yacc_rules = !yacc_rules;
3264               continue;
3265             }
3266           else
3267             break;
3268         case '#':
3269           if (definedef == dnone)
3270             {
3271               char *cp;
3272               bool cpptoken = TRUE;
3273
3274               /* Look back on this line.  If all blanks, or nonblanks
3275                  followed by an end of comment, this is a preprocessor
3276                  token. */
3277               for (cp = newlb.buffer; cp < lp-1; cp++)
3278                 if (!iswhite (*cp))
3279                   {
3280                     if (*cp == '*' && *(cp+1) == '/')
3281                       {
3282                         cp++;
3283                         cpptoken = TRUE;
3284                       }
3285                     else
3286                       cpptoken = FALSE;
3287                   }
3288               if (cpptoken)
3289                 definedef = dsharpseen;
3290             } /* if (definedef == dnone) */
3291           continue;
3292         case '[':
3293           bracketlev++;
3294             continue;
3295         } /* switch (c) */
3296
3297
3298       /* Consider token only if some involved conditions are satisfied. */
3299       if (typdef != tignore
3300           && definedef != dignorerest
3301           && fvdef != finlist
3302           && templatelev == 0
3303           && (definedef != dnone
3304               || structdef != scolonseen)
3305           && !inattribute)
3306         {
3307           if (midtoken)
3308             {
3309               if (endtoken (c))
3310                 {
3311                   if (c == ':' && *lp == ':' && begtoken (lp[1]))
3312                     /* This handles :: in the middle,
3313                        but not at the beginning of an identifier.
3314                        Also, space-separated :: is not recognised. */
3315                     {
3316                       if (c_ext & C_AUTO) /* automatic detection of C++ */
3317                         c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3318                       lp += 2;
3319                       toklen += 2;
3320                       c = lp[-1];
3321                       goto still_in_token;
3322                     }
3323                   else
3324                     {
3325                       bool funorvar = FALSE;
3326
3327                       if (yacc_rules
3328                           || consider_token (newlb.buffer + tokoff, toklen, c,
3329                                              &c_ext, bracelev, parlev,
3330                                              &funorvar))
3331                         {
3332                           if (fvdef == foperator)
3333                             {
3334                               char *oldlp = lp;
3335                               lp = skip_spaces (lp-1);
3336                               if (*lp != '\0')
3337                                 lp += 1;
3338                               while (*lp != '\0'
3339                                      && !iswhite (*lp) && *lp != '(')
3340                                 lp += 1;
3341                               c = *lp++;
3342                               toklen += lp - oldlp;
3343                             }
3344                           token.named = FALSE;
3345                           if (!plainc
3346                               && nestlev > 0 && definedef == dnone)
3347                             /* in struct body */
3348                             {
3349                               write_classname (&token_name, qualifier);
3350                               linebuffer_setlen (&token_name,
3351                                                  token_name.len+qlen+toklen);
3352                               strcat (token_name.buffer, qualifier);
3353                               strncat (token_name.buffer,
3354                                        newlb.buffer + tokoff, toklen);
3355                               token.named = TRUE;
3356                             }
3357                           else if (objdef == ocatseen)
3358                             /* Objective C category */
3359                             {
3360                               int len = strlen (objtag) + 2 + toklen;
3361                               linebuffer_setlen (&token_name, len);
3362                               strcpy (token_name.buffer, objtag);
3363                               strcat (token_name.buffer, "(");
3364                               strncat (token_name.buffer,
3365                                        newlb.buffer + tokoff, toklen);
3366                               strcat (token_name.buffer, ")");
3367                               token.named = TRUE;
3368                             }
3369                           else if (objdef == omethodtag
3370                                    || objdef == omethodparm)
3371                             /* Objective C method */
3372                             {
3373                               token.named = TRUE;
3374                             }
3375                           else if (fvdef == fdefunname)
3376                             /* GNU DEFUN and similar macros */
3377                             {
3378                               bool defun = (newlb.buffer[tokoff] == 'F');
3379                               int off = tokoff;
3380                               int len = toklen;
3381
3382                               /* Rewrite the tag so that emacs lisp DEFUNs
3383                                  can be found by their elisp name */
3384                               if (defun)
3385                                 {
3386                                   off += 1;
3387                                   len -= 1;
3388                                 }
3389                               linebuffer_setlen (&token_name, len);
3390                               strncpy (token_name.buffer,
3391                                        newlb.buffer + off, len);
3392                               token_name.buffer[len] = '\0';
3393                               if (defun)
3394                                 while (--len >= 0)
3395                                   if (token_name.buffer[len] == '_')
3396                                     token_name.buffer[len] = '-';
3397                               token.named = defun;
3398                             }
3399                           else
3400                             {
3401                               linebuffer_setlen (&token_name, toklen);
3402                               strncpy (token_name.buffer,
3403                                        newlb.buffer + tokoff, toklen);
3404                               token_name.buffer[toklen] = '\0';
3405                               /* Name macros and members. */
3406                               token.named = (structdef == stagseen
3407                                              || typdef == ttypeseen
3408                                              || typdef == tend
3409                                              || (funorvar
3410                                                  && definedef == dignorerest)
3411                                              || (funorvar
3412                                                  && definedef == dnone
3413                                                  && structdef == snone
3414                                                  && bracelev > 0));
3415                             }
3416                           token.lineno = lineno;
3417                           token.offset = tokoff;
3418                           token.length = toklen;
3419                           token.line = newlb.buffer;
3420                           token.linepos = newlinepos;
3421                           token.valid = TRUE;
3422
3423                           if (definedef == dnone
3424                               && (fvdef == fvnameseen
3425                                   || fvdef == foperator
3426                                   || structdef == stagseen
3427                                   || typdef == tend
3428                                   || typdef == ttypeseen
3429                                   || objdef != onone))
3430                             {
3431                               if (current_lb_is_new)
3432                                 switch_line_buffers ();
3433                             }
3434                           else if (definedef != dnone
3435                                    || fvdef == fdefunname
3436                                    || instruct)
3437                             make_C_tag (funorvar);
3438                         }
3439                       else /* not yacc and consider_token failed */
3440                         {
3441                           if (inattribute && fvdef == fignore)
3442                             {
3443                               /* We have just met __attribute__ after a
3444                                  function parameter list: do not tag the
3445                                  function again. */
3446                               fvdef = fvnone;
3447                             }
3448                         }
3449                       midtoken = FALSE;
3450                     }
3451                 } /* if (endtoken (c)) */
3452               else if (intoken (c))
3453                 still_in_token:
3454                 {
3455                   toklen++;
3456                   continue;
3457                 }
3458             } /* if (midtoken) */
3459           else if (begtoken (c))
3460             {
3461               switch (definedef)
3462                 {
3463                 case dnone:
3464                   switch (fvdef)
3465                     {
3466                     case fstartlist:
3467                       /* This prevents tagging fb in
3468                          void (__attribute__((noreturn)) *fb) (void);
3469                          Fixing this is not easy and not very important. */
3470                       fvdef = finlist;
3471                       continue;
3472                     case flistseen:
3473                       if (plainc || declarations)
3474                         {
3475                           make_C_tag (TRUE); /* a function */
3476                           fvdef = fignore;
3477                         }
3478                       break;
3479                     }
3480                   if (structdef == stagseen && !cjava)
3481                     {
3482                       popclass_above (bracelev);
3483                       structdef = snone;
3484                     }
3485                   break;
3486                 case dsharpseen:
3487                   savetoken = token;
3488                   break;
3489                 }
3490               if (!yacc_rules || lp == newlb.buffer + 1)
3491                 {
3492                   tokoff = lp - 1 - newlb.buffer;
3493                   toklen = 1;
3494                   midtoken = TRUE;
3495                 }
3496               continue;
3497             } /* if (begtoken) */
3498         } /* if must look at token */
3499
3500
3501       /* Detect end of line, colon, comma, semicolon and various braces
3502          after having handled a token.*/
3503       switch (c)
3504         {
3505         case ':':
3506           if (inattribute)
3507             break;
3508           if (yacc_rules && token.offset == 0 && token.valid)
3509             {
3510               make_C_tag (FALSE); /* a yacc function */
3511               break;
3512             }
3513           if (definedef != dnone)
3514             break;
3515           switch (objdef)
3516             {
3517             case  otagseen:
3518               objdef = oignore;
3519               make_C_tag (TRUE); /* an Objective C class */
3520               break;
3521             case omethodtag:
3522             case omethodparm:
3523               objdef = omethodcolon;
3524               linebuffer_setlen (&token_name, token_name.len + 1);
3525               strcat (token_name.buffer, ":");
3526               break;
3527             }
3528           if (structdef == stagseen)
3529             {
3530               structdef = scolonseen;
3531               break;
3532             }
3533           /* Should be useless, but may be work as a safety net. */
3534           if (cplpl && fvdef == flistseen)
3535             {
3536               make_C_tag (TRUE); /* a function */
3537               fvdef = fignore;
3538               break;
3539             }
3540           break;
3541         case ';':
3542           if (definedef != dnone || inattribute)
3543             break;
3544           switch (typdef)
3545             {
3546             case tend:
3547             case ttypeseen:
3548               make_C_tag (FALSE); /* a typedef */
3549               typdef = tnone;
3550               fvdef = fvnone;
3551               break;
3552             case tnone:
3553             case tinbody:
3554             case tignore:
3555               switch (fvdef)
3556                 {
3557                 case fignore:
3558                   if (typdef == tignore || cplpl)
3559                     fvdef = fvnone;
3560                   break;
3561                 case fvnameseen:
3562                   if ((globals && bracelev == 0 && (!fvextern || declarations))
3563                       || (members && instruct))
3564                     make_C_tag (FALSE); /* a variable */
3565                   fvextern = FALSE;
3566                   fvdef = fvnone;
3567                   token.valid = FALSE;
3568                   break;
3569                 case flistseen:
3570                   if ((declarations
3571                        && (cplpl || !instruct)
3572                        && (typdef == tnone || (typdef != tignore && instruct)))
3573                       || (members
3574                           && plainc && instruct))
3575                     make_C_tag (TRUE);  /* a function */
3576                   /* FALLTHRU */
3577                 default:
3578                   fvextern = FALSE;
3579                   fvdef = fvnone;
3580                   if (declarations
3581                        && cplpl && structdef == stagseen)
3582                     make_C_tag (FALSE); /* forward declaration */
3583                   else
3584                     token.valid = FALSE;
3585                 } /* switch (fvdef) */
3586               /* FALLTHRU */
3587             default:
3588               if (!instruct)
3589                 typdef = tnone;
3590             }
3591           if (structdef == stagseen)
3592             structdef = snone;
3593           break;
3594         case ',':
3595           if (definedef != dnone || inattribute)
3596             break;
3597           switch (objdef)
3598             {
3599             case omethodtag:
3600             case omethodparm:
3601               make_C_tag (TRUE); /* an Objective C method */
3602               objdef = oinbody;
3603               break;
3604             }
3605           switch (fvdef)
3606             {
3607             case fdefunkey:
3608             case foperator:
3609             case fstartlist:
3610             case finlist:
3611             case fignore:
3612             case vignore:
3613               break;
3614             case fdefunname:
3615               fvdef = fignore;
3616               break;
3617             case fvnameseen:
3618               if (parlev == 0
3619                   && ((globals
3620                        && bracelev == 0
3621                        && templatelev == 0
3622                        && (!fvextern || declarations))
3623                       || (members && instruct)))
3624                   make_C_tag (FALSE); /* a variable */
3625               break;
3626             case flistseen:
3627               if ((declarations && typdef == tnone && !instruct)
3628                   || (members && typdef != tignore && instruct))
3629                 {
3630                   make_C_tag (TRUE); /* a function */
3631                   fvdef = fvnameseen;
3632                 }
3633               else if (!declarations)
3634                 fvdef = fvnone;
3635               token.valid = FALSE;
3636               break;
3637             default:
3638               fvdef = fvnone;
3639             }
3640           if (structdef == stagseen)
3641             structdef = snone;
3642           break;
3643         case ']':
3644           if (definedef != dnone || inattribute)
3645             break;
3646           if (structdef == stagseen)
3647             structdef = snone;
3648           switch (typdef)
3649             {
3650             case ttypeseen:
3651             case tend:
3652               typdef = tignore;
3653               make_C_tag (FALSE);       /* a typedef */
3654               break;
3655             case tnone:
3656             case tinbody:
3657               switch (fvdef)
3658                 {
3659                 case foperator:
3660                 case finlist:
3661                 case fignore:
3662                 case vignore:
3663                   break;
3664                 case fvnameseen:
3665                   if ((members && bracelev == 1)
3666                       || (globals && bracelev == 0
3667                           && (!fvextern || declarations)))
3668                     make_C_tag (FALSE); /* a variable */
3669                   /* FALLTHRU */
3670                 default:
3671                   fvdef = fvnone;
3672                 }
3673               break;
3674             }
3675           break;
3676         case '(':
3677           if (inattribute)
3678             {
3679               attrparlev++;
3680               break;
3681             }
3682           if (definedef != dnone)
3683             break;
3684           if (objdef == otagseen && parlev == 0)
3685             objdef = oparenseen;
3686           switch (fvdef)
3687             {
3688             case fvnameseen:
3689               if (typdef == ttypeseen
3690                   && *lp != '*'
3691                   && !instruct)
3692                 {
3693                   /* This handles constructs like:
3694                      typedef void OperatorFun (int fun); */
3695                   make_C_tag (FALSE);
3696                   typdef = tignore;
3697                   fvdef = fignore;
3698                   break;
3699                 }
3700               /* FALLTHRU */
3701             case foperator:
3702               fvdef = fstartlist;
3703               break;
3704             case flistseen:
3705               fvdef = finlist;
3706               break;
3707             }
3708           parlev++;
3709           break;
3710         case ')':
3711           if (inattribute)
3712             {
3713               if (--attrparlev == 0)
3714                 inattribute = FALSE;
3715               break;
3716             }
3717           if (definedef != dnone)
3718             break;
3719           if (objdef == ocatseen && parlev == 1)
3720             {
3721               make_C_tag (TRUE); /* an Objective C category */
3722               objdef = oignore;
3723             }
3724           if (--parlev == 0)
3725             {
3726               switch (fvdef)
3727                 {
3728                 case fstartlist:
3729                 case finlist:
3730                   fvdef = flistseen;
3731                   break;
3732                 }
3733               if (!instruct
3734                   && (typdef == tend
3735                       || typdef == ttypeseen))
3736                 {
3737                   typdef = tignore;
3738                   make_C_tag (FALSE); /* a typedef */
3739                 }
3740             }
3741           else if (parlev < 0)  /* can happen due to ill-conceived #if's. */
3742             parlev = 0;
3743           break;
3744         case '{':
3745           if (definedef != dnone)
3746             break;
3747           if (typdef == ttypeseen)
3748             {
3749               /* Whenever typdef is set to tinbody (currently only
3750                  here), typdefbracelev should be set to bracelev. */
3751               typdef = tinbody;
3752               typdefbracelev = bracelev;
3753             }
3754           switch (fvdef)
3755             {
3756             case flistseen:
3757               make_C_tag (TRUE);    /* a function */
3758               /* FALLTHRU */
3759             case fignore:
3760               fvdef = fvnone;
3761               break;
3762             case fvnone:
3763               switch (objdef)
3764                 {
3765                 case otagseen:
3766                   make_C_tag (TRUE); /* an Objective C class */
3767                   objdef = oignore;
3768                   break;
3769                 case omethodtag:
3770                 case omethodparm:
3771                   make_C_tag (TRUE); /* an Objective C method */
3772                   objdef = oinbody;
3773                   break;
3774                 default:
3775                   /* Neutralize `extern "C" {' grot. */
3776                   if (bracelev == 0 && structdef == snone && nestlev == 0
3777                       && typdef == tnone)
3778                     bracelev = -1;
3779                 }
3780               break;
3781             }
3782           switch (structdef)
3783             {
3784             case skeyseen:         /* unnamed struct */
3785               pushclass_above (bracelev, NULL, 0);
3786               structdef = snone;
3787               break;
3788             case stagseen:         /* named struct or enum */
3789             case scolonseen:       /* a class */
3790               pushclass_above (bracelev,token.line+token.offset, token.length);
3791               structdef = snone;
3792               make_C_tag (FALSE);  /* a struct or enum */
3793               break;
3794             }
3795           bracelev += 1;
3796           break;
3797         case '*':
3798           if (definedef != dnone)
3799             break;
3800           if (fvdef == fstartlist)
3801             {
3802               fvdef = fvnone;   /* avoid tagging `foo' in `foo (*bar()) ()' */
3803               token.valid = FALSE;
3804             }
3805           break;
3806         case '}':
3807           if (definedef != dnone)
3808             break;
3809           bracelev -= 1;
3810           if (!ignoreindent && lp == newlb.buffer + 1)
3811             {
3812               if (bracelev != 0)
3813                 token.valid = FALSE; /* unexpected value, token unreliable */
3814               bracelev = 0;     /* reset brace level if first column */
3815               parlev = 0;       /* also reset paren level, just in case... */
3816             }
3817           else if (bracelev < 0)
3818             {
3819               token.valid = FALSE; /* something gone amiss, token unreliable */
3820               bracelev = 0;
3821             }
3822           if (bracelev == 0 && fvdef == vignore)
3823             fvdef = fvnone;             /* end of function */
3824           popclass_above (bracelev);
3825           structdef = snone;
3826           /* Only if typdef == tinbody is typdefbracelev significant. */
3827           if (typdef == tinbody && bracelev <= typdefbracelev)
3828             {
3829               assert (bracelev == typdefbracelev);
3830               typdef = tend;
3831             }
3832           break;
3833         case '=':
3834           if (definedef != dnone)
3835             break;
3836           switch (fvdef)
3837             {
3838             case foperator:
3839             case finlist:
3840             case fignore:
3841             case vignore:
3842               break;
3843             case fvnameseen:
3844               if ((members && bracelev == 1)
3845                   || (globals && bracelev == 0 && (!fvextern || declarations)))
3846                 make_C_tag (FALSE); /* a variable */
3847               /* FALLTHRU */
3848             default:
3849               fvdef = vignore;
3850             }
3851           break;
3852         case '<':
3853           if (cplpl
3854               && (structdef == stagseen || fvdef == fvnameseen))
3855             {
3856               templatelev++;
3857               break;
3858             }
3859           goto resetfvdef;
3860         case '>':
3861           if (templatelev > 0)
3862             {
3863               templatelev--;
3864               break;
3865             }
3866           goto resetfvdef;
3867         case '+':
3868         case '-':
3869           if (objdef == oinbody && bracelev == 0)
3870             {
3871               objdef = omethodsign;
3872               break;
3873             }
3874           /* FALLTHRU */
3875         resetfvdef:
3876         case '#': case '~': case '&': case '%': case '/':
3877         case '|': case '^': case '!': case '.': case '?':
3878           if (definedef != dnone)
3879             break;
3880           /* These surely cannot follow a function tag in C. */
3881           switch (fvdef)
3882             {
3883             case foperator:
3884             case finlist:
3885             case fignore:
3886             case vignore:
3887               break;
3888             default:
3889               fvdef = fvnone;
3890             }
3891           break;
3892         case '\0':
3893           if (objdef == otagseen)
3894             {
3895               make_C_tag (TRUE); /* an Objective C class */
3896               objdef = oignore;
3897             }
3898           /* If a macro spans multiple lines don't reset its state. */
3899           if (quotednl)
3900             CNL_SAVE_DEFINEDEF ();
3901           else
3902             CNL ();
3903           break;
3904         } /* switch (c) */
3905
3906     } /* while not eof */
3907
3908   free (lbs[0].lb.buffer);
3909   free (lbs[1].lb.buffer);
3910 }
3911
3912 /*
3913  * Process either a C++ file or a C file depending on the setting
3914  * of a global flag.
3915  */
3916 static void
3917 default_C_entries (FILE *inf)
3918 {
3919   C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
3920 }
3921
3922 /* Always do plain C. */
3923 static void
3924 plain_C_entries (FILE *inf)
3925 {
3926   C_entries (0, inf);
3927 }
3928
3929 /* Always do C++. */
3930 static void
3931 Cplusplus_entries (FILE *inf)
3932 {
3933   C_entries (C_PLPL, inf);
3934 }
3935
3936 /* Always do Java. */
3937 static void
3938 Cjava_entries (FILE *inf)
3939 {
3940   C_entries (C_JAVA, inf);
3941 }
3942
3943 /* Always do C*. */
3944 static void
3945 Cstar_entries (FILE *inf)
3946 {
3947   C_entries (C_STAR, inf);
3948 }
3949
3950 /* Always do Yacc. */
3951 static void
3952 Yacc_entries (FILE *inf)
3953 {
3954   C_entries (YACC, inf);
3955 }
3956
3957 \f
3958 /* Useful macros. */
3959 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer)    \
3960   for (;                        /* loop initialization */               \
3961        !feof (file_pointer)     /* loop test */                         \
3962        &&                       /* instructions at start of loop */     \
3963           (readline (&line_buffer, file_pointer),                       \
3964            char_pointer = line_buffer.buffer,                           \
3965            TRUE);                                                       \
3966       )
3967
3968 #define LOOKING_AT(cp, kw)  /* kw is the keyword, a literal string */   \
3969   ((assert("" kw), TRUE)   /* syntax error if not a literal string */   \
3970    && strneq ((cp), kw, sizeof(kw)-1)           /* cp points at kw */   \
3971    && notinname ((cp)[sizeof(kw)-1])            /* end of kw */         \
3972    && ((cp) = skip_spaces((cp)+sizeof(kw)-1)))  /* skip spaces */
3973
3974 /* Similar to LOOKING_AT but does not use notinname, does not skip */
3975 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
3976   ((assert("" kw), TRUE)     /* syntax error if not a literal string */ \
3977    && strncaseeq ((cp), kw, sizeof(kw)-1)       /* cp points at kw */   \
3978    && ((cp) += sizeof(kw)-1))                   /* skip spaces */
3979
3980 /*
3981  * Read a file, but do no processing.  This is used to do regexp
3982  * matching on files that have no language defined.
3983  */
3984 static void
3985 just_read_file (FILE *inf)
3986 {
3987   register char *dummy;
3988
3989   LOOP_ON_INPUT_LINES (inf, lb, dummy)
3990     continue;
3991 }
3992
3993 \f
3994 /* Fortran parsing */
3995
3996 static void F_takeprec (void);
3997 static void F_getit (FILE *);
3998
3999 static void
4000 F_takeprec (void)
4001 {
4002   dbp = skip_spaces (dbp);
4003   if (*dbp != '*')
4004     return;
4005   dbp++;
4006   dbp = skip_spaces (dbp);
4007   if (strneq (dbp, "(*)", 3))
4008     {
4009       dbp += 3;
4010       return;
4011     }
4012   if (!ISDIGIT (*dbp))
4013     {
4014       --dbp;                    /* force failure */
4015       return;
4016     }
4017   do
4018     dbp++;
4019   while (ISDIGIT (*dbp));
4020 }
4021
4022 static void
4023 F_getit (FILE *inf)
4024 {
4025   register char *cp;
4026
4027   dbp = skip_spaces (dbp);
4028   if (*dbp == '\0')
4029     {
4030       readline (&lb, inf);
4031       dbp = lb.buffer;
4032       if (dbp[5] != '&')
4033         return;
4034       dbp += 6;
4035       dbp = skip_spaces (dbp);
4036     }
4037   if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
4038     return;
4039   for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4040     continue;
4041   make_tag (dbp, cp-dbp, TRUE,
4042             lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4043 }
4044
4045
4046 static void
4047 Fortran_functions (FILE *inf)
4048 {
4049   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4050     {
4051       if (*dbp == '%')
4052         dbp++;                  /* Ratfor escape to fortran */
4053       dbp = skip_spaces (dbp);
4054       if (*dbp == '\0')
4055         continue;
4056
4057       if (LOOKING_AT_NOCASE (dbp, "recursive"))
4058         dbp = skip_spaces (dbp);
4059
4060       switch (lowcase (*dbp))
4061         {
4062         case 'i':
4063           if (nocase_tail ("integer"))
4064             F_takeprec ();
4065           break;
4066         case 'r':
4067           if (nocase_tail ("real"))
4068             F_takeprec ();
4069           break;
4070         case 'l':
4071           if (nocase_tail ("logical"))
4072             F_takeprec ();
4073           break;
4074         case 'c':
4075           if (nocase_tail ("complex") || nocase_tail ("character"))
4076             F_takeprec ();
4077           break;
4078         case 'd':
4079           if (nocase_tail ("double"))
4080             {
4081               dbp = skip_spaces (dbp);
4082               if (*dbp == '\0')
4083                 continue;
4084               if (nocase_tail ("precision"))
4085                 break;
4086               continue;
4087             }
4088           break;
4089         }
4090       dbp = skip_spaces (dbp);
4091       if (*dbp == '\0')
4092         continue;
4093       switch (lowcase (*dbp))
4094         {
4095         case 'f':
4096           if (nocase_tail ("function"))
4097             F_getit (inf);
4098           continue;
4099         case 's':
4100           if (nocase_tail ("subroutine"))
4101             F_getit (inf);
4102           continue;
4103         case 'e':
4104           if (nocase_tail ("entry"))
4105             F_getit (inf);
4106           continue;
4107         case 'b':
4108           if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4109             {
4110               dbp = skip_spaces (dbp);
4111               if (*dbp == '\0') /* assume un-named */
4112                 make_tag ("blockdata", 9, TRUE,
4113                           lb.buffer, dbp - lb.buffer, lineno, linecharno);
4114               else
4115                 F_getit (inf);  /* look for name */
4116             }
4117           continue;
4118         }
4119     }
4120 }
4121
4122 \f
4123 /*
4124  * Ada parsing
4125  * Original code by
4126  * Philippe Waroquiers (1998)
4127  */
4128
4129 /* Once we are positioned after an "interesting" keyword, let's get
4130    the real tag value necessary. */
4131 static void
4132 Ada_getit (FILE *inf, const char *name_qualifier)
4133 {
4134   register char *cp;
4135   char *name;
4136   char c;
4137
4138   while (!feof (inf))
4139     {
4140       dbp = skip_spaces (dbp);
4141       if (*dbp == '\0'
4142           || (dbp[0] == '-' && dbp[1] == '-'))
4143         {
4144           readline (&lb, inf);
4145           dbp = lb.buffer;
4146         }
4147       switch (lowcase(*dbp))
4148         {
4149         case 'b':
4150           if (nocase_tail ("body"))
4151             {
4152               /* Skipping body of   procedure body   or   package body or ....
4153                  resetting qualifier to body instead of spec. */
4154               name_qualifier = "/b";
4155               continue;
4156             }
4157           break;
4158         case 't':
4159           /* Skipping type of   task type   or   protected type ... */
4160           if (nocase_tail ("type"))
4161             continue;
4162           break;
4163         }
4164       if (*dbp == '"')
4165         {
4166           dbp += 1;
4167           for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4168             continue;
4169         }
4170       else
4171         {
4172           dbp = skip_spaces (dbp);
4173           for (cp = dbp;
4174                (*cp != '\0'
4175                 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4176                cp++)
4177             continue;
4178           if (cp == dbp)
4179             return;
4180         }
4181       c = *cp;
4182       *cp = '\0';
4183       name = concat (dbp, name_qualifier, "");
4184       *cp = c;
4185       make_tag (name, strlen (name), TRUE,
4186                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4187       free (name);
4188       if (c == '"')
4189         dbp = cp + 1;
4190       return;
4191     }
4192 }
4193
4194 static void
4195 Ada_funcs (FILE *inf)
4196 {
4197   bool inquote = FALSE;
4198   bool skip_till_semicolumn = FALSE;
4199
4200   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4201     {
4202       while (*dbp != '\0')
4203         {
4204           /* Skip a string i.e. "abcd". */
4205           if (inquote || (*dbp == '"'))
4206             {
4207               dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
4208               if (dbp != NULL)
4209                 {
4210                   inquote = FALSE;
4211                   dbp += 1;
4212                   continue;     /* advance char */
4213                 }
4214               else
4215                 {
4216                   inquote = TRUE;
4217                   break;        /* advance line */
4218                 }
4219             }
4220
4221           /* Skip comments. */
4222           if (dbp[0] == '-' && dbp[1] == '-')
4223             break;              /* advance line */
4224
4225           /* Skip character enclosed in single quote i.e. 'a'
4226              and skip single quote starting an attribute i.e. 'Image. */
4227           if (*dbp == '\'')
4228             {
4229               dbp++ ;
4230               if (*dbp != '\0')
4231                 dbp++;
4232               continue;
4233             }
4234
4235           if (skip_till_semicolumn)
4236             {
4237               if (*dbp == ';')
4238                 skip_till_semicolumn = FALSE;
4239               dbp++;
4240               continue;         /* advance char */
4241             }
4242
4243           /* Search for beginning of a token.  */
4244           if (!begtoken (*dbp))
4245             {
4246               dbp++;
4247               continue;         /* advance char */
4248             }
4249
4250           /* We are at the beginning of a token. */
4251           switch (lowcase(*dbp))
4252             {
4253             case 'f':
4254               if (!packages_only && nocase_tail ("function"))
4255                 Ada_getit (inf, "/f");
4256               else
4257                 break;          /* from switch */
4258               continue;         /* advance char */
4259             case 'p':
4260               if (!packages_only && nocase_tail ("procedure"))
4261                 Ada_getit (inf, "/p");
4262               else if (nocase_tail ("package"))
4263                 Ada_getit (inf, "/s");
4264               else if (nocase_tail ("protected")) /* protected type */
4265                 Ada_getit (inf, "/t");
4266               else
4267                 break;          /* from switch */
4268               continue;         /* advance char */
4269
4270             case 'u':
4271               if (typedefs && !packages_only && nocase_tail ("use"))
4272                 {
4273                   /* when tagging types, avoid tagging  use type Pack.Typename;
4274                      for this, we will skip everything till a ; */
4275                   skip_till_semicolumn = TRUE;
4276                   continue;     /* advance char */
4277                 }
4278
4279             case 't':
4280               if (!packages_only && nocase_tail ("task"))
4281                 Ada_getit (inf, "/k");
4282               else if (typedefs && !packages_only && nocase_tail ("type"))
4283                 {
4284                   Ada_getit (inf, "/t");
4285                   while (*dbp != '\0')
4286                     dbp += 1;
4287                 }
4288               else
4289                 break;          /* from switch */
4290               continue;         /* advance char */
4291             }
4292
4293           /* Look for the end of the token. */
4294           while (!endtoken (*dbp))
4295             dbp++;
4296
4297         } /* advance char */
4298     } /* advance line */
4299 }
4300
4301 \f
4302 /*
4303  * Unix and microcontroller assembly tag handling
4304  * Labels:  /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4305  * Idea by Bob Weiner, Motorola Inc. (1994)
4306  */
4307 static void
4308 Asm_labels (FILE *inf)
4309 {
4310   register char *cp;
4311
4312   LOOP_ON_INPUT_LINES (inf, lb, cp)
4313     {
4314       /* If first char is alphabetic or one of [_.$], test for colon
4315          following identifier. */
4316       if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4317         {
4318           /* Read past label. */
4319           cp++;
4320           while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4321             cp++;
4322           if (*cp == ':' || iswhite (*cp))
4323             /* Found end of label, so copy it and add it to the table. */
4324             make_tag (lb.buffer, cp - lb.buffer, TRUE,
4325                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4326         }
4327     }
4328 }
4329
4330 \f
4331 /*
4332  * Perl support
4333  * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4334  * Perl variable names: /^(my|local).../
4335  * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4336  * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4337  * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4338  */
4339 static void
4340 Perl_functions (FILE *inf)
4341 {
4342   char *package = savestr ("main"); /* current package name */
4343   register char *cp;
4344
4345   LOOP_ON_INPUT_LINES (inf, lb, cp)
4346     {
4347       cp = skip_spaces (cp);
4348
4349       if (LOOKING_AT (cp, "package"))
4350         {
4351           free (package);
4352           get_tag (cp, &package);
4353         }
4354       else if (LOOKING_AT (cp, "sub"))
4355         {
4356           char *pos;
4357           char *sp = cp;
4358
4359           while (!notinname (*cp))
4360             cp++;
4361           if (cp == sp)
4362             continue;           /* nothing found */
4363           if ((pos = etags_strchr (sp, ':')) != NULL
4364               && pos < cp && pos[1] == ':')
4365             /* The name is already qualified. */
4366             make_tag (sp, cp - sp, TRUE,
4367                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4368           else
4369             /* Qualify it. */
4370             {
4371               char savechar, *name;
4372
4373               savechar = *cp;
4374               *cp = '\0';
4375               name = concat (package, "::", sp);
4376               *cp = savechar;
4377               make_tag (name, strlen(name), TRUE,
4378                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4379               free (name);
4380             }
4381         }
4382        else if (globals)        /* only if we are tagging global vars */
4383         {
4384           /* Skip a qualifier, if any. */
4385           bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4386           /* After "my" or "local", but before any following paren or space. */
4387           char *varstart = cp;
4388
4389           if (qual              /* should this be removed?  If yes, how? */
4390               && (*cp == '$' || *cp == '@' || *cp == '%'))
4391             {
4392               varstart += 1;
4393               do
4394                 cp++;
4395               while (ISALNUM (*cp) || *cp == '_');
4396             }
4397           else if (qual)
4398             {
4399               /* Should be examining a variable list at this point;
4400                  could insist on seeing an open parenthesis. */
4401               while (*cp != '\0' && *cp != ';' && *cp != '=' &&  *cp != ')')
4402                 cp++;
4403             }
4404           else
4405             continue;
4406
4407           make_tag (varstart, cp - varstart, FALSE,
4408                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4409         }
4410     }
4411   free (package);
4412 }
4413
4414
4415 /*
4416  * Python support
4417  * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4418  * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4419  * More ideas by seb bacon <seb@jamkit.com> (2002)
4420  */
4421 static void
4422 Python_functions (FILE *inf)
4423 {
4424   register char *cp;
4425
4426   LOOP_ON_INPUT_LINES (inf, lb, cp)
4427     {
4428       cp = skip_spaces (cp);
4429       if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4430         {
4431           char *name = cp;
4432           while (!notinname (*cp) && *cp != ':')
4433             cp++;
4434           make_tag (name, cp - name, TRUE,
4435                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4436         }
4437     }
4438 }
4439
4440 \f
4441 /*
4442  * PHP support
4443  * Look for:
4444  *  - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4445  *  - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4446  *  - /^[ \t]*define\(\"[^\"]+/
4447  * Only with --members:
4448  *  - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4449  * Idea by Diez B. Roggisch (2001)
4450  */
4451 static void
4452 PHP_functions (FILE *inf)
4453 {
4454   register char *cp, *name;
4455   bool search_identifier = FALSE;
4456
4457   LOOP_ON_INPUT_LINES (inf, lb, cp)
4458     {
4459       cp = skip_spaces (cp);
4460       name = cp;
4461       if (search_identifier
4462           && *cp != '\0')
4463         {
4464           while (!notinname (*cp))
4465             cp++;
4466           make_tag (name, cp - name, TRUE,
4467                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4468           search_identifier = FALSE;
4469         }
4470       else if (LOOKING_AT (cp, "function"))
4471         {
4472           if(*cp == '&')
4473             cp = skip_spaces (cp+1);
4474           if(*cp != '\0')
4475             {
4476               name = cp;
4477               while (!notinname (*cp))
4478                 cp++;
4479               make_tag (name, cp - name, TRUE,
4480                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4481             }
4482           else
4483             search_identifier = TRUE;
4484         }
4485       else if (LOOKING_AT (cp, "class"))
4486         {
4487           if (*cp != '\0')
4488             {
4489               name = cp;
4490               while (*cp != '\0' && !iswhite (*cp))
4491                 cp++;
4492               make_tag (name, cp - name, FALSE,
4493                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4494             }
4495           else
4496             search_identifier = TRUE;
4497         }
4498       else if (strneq (cp, "define", 6)
4499                && (cp = skip_spaces (cp+6))
4500                && *cp++ == '('
4501                && (*cp == '"' || *cp == '\''))
4502         {
4503           char quote = *cp++;
4504           name = cp;
4505           while (*cp != quote && *cp != '\0')
4506             cp++;
4507           make_tag (name, cp - name, FALSE,
4508                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4509         }
4510       else if (members
4511                && LOOKING_AT (cp, "var")
4512                && *cp == '$')
4513         {
4514           name = cp;
4515           while (!notinname(*cp))
4516             cp++;
4517           make_tag (name, cp - name, FALSE,
4518                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4519         }
4520     }
4521 }
4522
4523 \f
4524 /*
4525  * Cobol tag functions
4526  * We could look for anything that could be a paragraph name.
4527  * i.e. anything that starts in column 8 is one word and ends in a full stop.
4528  * Idea by Corny de Souza (1993)
4529  */
4530 static void
4531 Cobol_paragraphs (FILE *inf)
4532 {
4533   register char *bp, *ep;
4534
4535   LOOP_ON_INPUT_LINES (inf, lb, bp)
4536     {
4537       if (lb.len < 9)
4538         continue;
4539       bp += 8;
4540
4541       /* If eoln, compiler option or comment ignore whole line. */
4542       if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4543         continue;
4544
4545       for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4546         continue;
4547       if (*ep++ == '.')
4548         make_tag (bp, ep - bp, TRUE,
4549                   lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4550     }
4551 }
4552
4553 \f
4554 /*
4555  * Makefile support
4556  * Ideas by Assar Westerlund <assar@sics.se> (2001)
4557  */
4558 static void
4559 Makefile_targets (FILE *inf)
4560 {
4561   register char *bp;
4562
4563   LOOP_ON_INPUT_LINES (inf, lb, bp)
4564     {
4565       if (*bp == '\t' || *bp == '#')
4566         continue;
4567       while (*bp != '\0' && *bp != '=' && *bp != ':')
4568         bp++;
4569       if (*bp == ':' || (globals && *bp == '='))
4570         {
4571           /* We should detect if there is more than one tag, but we do not.
4572              We just skip initial and final spaces. */
4573           char * namestart = skip_spaces (lb.buffer);
4574           while (--bp > namestart)
4575             if (!notinname (*bp))
4576               break;
4577           make_tag (namestart, bp - namestart + 1, TRUE,
4578                     lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4579         }
4580     }
4581 }
4582
4583 \f
4584 /*
4585  * Pascal parsing
4586  * Original code by Mosur K. Mohan (1989)
4587  *
4588  *  Locates tags for procedures & functions.  Doesn't do any type- or
4589  *  var-definitions.  It does look for the keyword "extern" or
4590  *  "forward" immediately following the procedure statement; if found,
4591  *  the tag is skipped.
4592  */
4593 static void
4594 Pascal_functions (FILE *inf)
4595 {
4596   linebuffer tline;             /* mostly copied from C_entries */
4597   long save_lcno;
4598   int save_lineno, namelen, taglen;
4599   char c, *name;
4600
4601   bool                          /* each of these flags is TRUE if: */
4602     incomment,                  /* point is inside a comment */
4603     inquote,                    /* point is inside '..' string */
4604     get_tagname,                /* point is after PROCEDURE/FUNCTION
4605                                    keyword, so next item = potential tag */
4606     found_tag,                  /* point is after a potential tag */
4607     inparms,                    /* point is within parameter-list */
4608     verify_tag;                 /* point has passed the parm-list, so the
4609                                    next token will determine whether this
4610                                    is a FORWARD/EXTERN to be ignored, or
4611                                    whether it is a real tag */
4612
4613   save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4614   name = NULL;                  /* keep compiler quiet */
4615   dbp = lb.buffer;
4616   *dbp = '\0';
4617   linebuffer_init (&tline);
4618
4619   incomment = inquote = FALSE;
4620   found_tag = FALSE;            /* have a proc name; check if extern */
4621   get_tagname = FALSE;          /* found "procedure" keyword         */
4622   inparms = FALSE;              /* found '(' after "proc"            */
4623   verify_tag = FALSE;           /* check if "extern" is ahead        */
4624
4625
4626   while (!feof (inf))           /* long main loop to get next char */
4627     {
4628       c = *dbp++;
4629       if (c == '\0')            /* if end of line */
4630         {
4631           readline (&lb, inf);
4632           dbp = lb.buffer;
4633           if (*dbp == '\0')
4634             continue;
4635           if (!((found_tag && verify_tag)
4636                 || get_tagname))
4637             c = *dbp++;         /* only if don't need *dbp pointing
4638                                    to the beginning of the name of
4639                                    the procedure or function */
4640         }
4641       if (incomment)
4642         {
4643           if (c == '}')         /* within { } comments */
4644             incomment = FALSE;
4645           else if (c == '*' && *dbp == ')') /* within (* *) comments */
4646             {
4647               dbp++;
4648               incomment = FALSE;
4649             }
4650           continue;
4651         }
4652       else if (inquote)
4653         {
4654           if (c == '\'')
4655             inquote = FALSE;
4656           continue;
4657         }
4658       else
4659         switch (c)
4660           {
4661           case '\'':
4662             inquote = TRUE;     /* found first quote */
4663             continue;
4664           case '{':             /* found open { comment */
4665             incomment = TRUE;
4666             continue;
4667           case '(':
4668             if (*dbp == '*')    /* found open (* comment */
4669               {
4670                 incomment = TRUE;
4671                 dbp++;
4672               }
4673             else if (found_tag) /* found '(' after tag, i.e., parm-list */
4674               inparms = TRUE;
4675             continue;
4676           case ')':             /* end of parms list */
4677             if (inparms)
4678               inparms = FALSE;
4679             continue;
4680           case ';':
4681             if (found_tag && !inparms) /* end of proc or fn stmt */
4682               {
4683                 verify_tag = TRUE;
4684                 break;
4685               }
4686             continue;
4687           }
4688       if (found_tag && verify_tag && (*dbp != ' '))
4689         {
4690           /* Check if this is an "extern" declaration. */
4691           if (*dbp == '\0')
4692             continue;
4693           if (lowcase (*dbp == 'e'))
4694             {
4695               if (nocase_tail ("extern")) /* superfluous, really! */
4696                 {
4697                   found_tag = FALSE;
4698                   verify_tag = FALSE;
4699                 }
4700             }
4701           else if (lowcase (*dbp) == 'f')
4702             {
4703               if (nocase_tail ("forward")) /* check for forward reference */
4704                 {
4705                   found_tag = FALSE;
4706                   verify_tag = FALSE;
4707                 }
4708             }
4709           if (found_tag && verify_tag) /* not external proc, so make tag */
4710             {
4711               found_tag = FALSE;
4712               verify_tag = FALSE;
4713               make_tag (name, namelen, TRUE,
4714                         tline.buffer, taglen, save_lineno, save_lcno);
4715               continue;
4716             }
4717         }
4718       if (get_tagname)          /* grab name of proc or fn */
4719         {
4720           char *cp;
4721
4722           if (*dbp == '\0')
4723             continue;
4724
4725           /* Find block name. */
4726           for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4727             continue;
4728
4729           /* Save all values for later tagging. */
4730           linebuffer_setlen (&tline, lb.len);
4731           strcpy (tline.buffer, lb.buffer);
4732           save_lineno = lineno;
4733           save_lcno = linecharno;
4734           name = tline.buffer + (dbp - lb.buffer);
4735           namelen = cp - dbp;
4736           taglen = cp - lb.buffer + 1;
4737
4738           dbp = cp;             /* set dbp to e-o-token */
4739           get_tagname = FALSE;
4740           found_tag = TRUE;
4741           continue;
4742
4743           /* And proceed to check for "extern". */
4744         }
4745       else if (!incomment && !inquote && !found_tag)
4746         {
4747           /* Check for proc/fn keywords. */
4748           switch (lowcase (c))
4749             {
4750             case 'p':
4751               if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4752                 get_tagname = TRUE;
4753               continue;
4754             case 'f':
4755               if (nocase_tail ("unction"))
4756                 get_tagname = TRUE;
4757               continue;
4758             }
4759         }
4760     } /* while not eof */
4761
4762   free (tline.buffer);
4763 }
4764
4765 \f
4766 /*
4767  * Lisp tag functions
4768  *  look for (def or (DEF, quote or QUOTE
4769  */
4770
4771 static void L_getit (void);
4772
4773 static void
4774 L_getit (void)
4775 {
4776   if (*dbp == '\'')             /* Skip prefix quote */
4777     dbp++;
4778   else if (*dbp == '(')
4779   {
4780     dbp++;
4781     /* Try to skip "(quote " */
4782     if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4783       /* Ok, then skip "(" before name in (defstruct (foo)) */
4784       dbp = skip_spaces (dbp);
4785   }
4786   get_tag (dbp, NULL);
4787 }
4788
4789 static void
4790 Lisp_functions (FILE *inf)
4791 {
4792   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4793     {
4794       if (dbp[0] != '(')
4795         continue;
4796
4797       if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4798         {
4799           dbp = skip_non_spaces (dbp);
4800           dbp = skip_spaces (dbp);
4801           L_getit ();
4802         }
4803       else
4804         {
4805           /* Check for (foo::defmumble name-defined ... */
4806           do
4807             dbp++;
4808           while (!notinname (*dbp) && *dbp != ':');
4809           if (*dbp == ':')
4810             {
4811               do
4812                 dbp++;
4813               while (*dbp == ':');
4814
4815               if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4816                 {
4817                   dbp = skip_non_spaces (dbp);
4818                   dbp = skip_spaces (dbp);
4819                   L_getit ();
4820                 }
4821             }
4822         }
4823     }
4824 }
4825
4826 \f
4827 /*
4828  * Lua script language parsing
4829  * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
4830  *
4831  *  "function" and "local function" are tags if they start at column 1.
4832  */
4833 static void
4834 Lua_functions (FILE *inf)
4835 {
4836   register char *bp;
4837
4838   LOOP_ON_INPUT_LINES (inf, lb, bp)
4839     {
4840       if (bp[0] != 'f' && bp[0] != 'l')
4841         continue;
4842
4843       (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
4844
4845       if (LOOKING_AT (bp, "function"))
4846         get_tag (bp, NULL);
4847     }
4848 }
4849
4850 \f
4851 /*
4852  * Postscript tags
4853  * Just look for lines where the first character is '/'
4854  * Also look at "defineps" for PSWrap
4855  * Ideas by:
4856  *   Richard Mlynarik <mly@adoc.xerox.com> (1997)
4857  *   Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4858  */
4859 static void
4860 PS_functions (FILE *inf)
4861 {
4862   register char *bp, *ep;
4863
4864   LOOP_ON_INPUT_LINES (inf, lb, bp)
4865     {
4866       if (bp[0] == '/')
4867         {
4868           for (ep = bp+1;
4869                *ep != '\0' && *ep != ' ' && *ep != '{';
4870                ep++)
4871             continue;
4872           make_tag (bp, ep - bp, TRUE,
4873                     lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4874         }
4875       else if (LOOKING_AT (bp, "defineps"))
4876         get_tag (bp, NULL);
4877     }
4878 }
4879
4880 \f
4881 /*
4882  * Forth tags
4883  * Ignore anything after \ followed by space or in ( )
4884  * Look for words defined by :
4885  * Look for constant, code, create, defer, value, and variable
4886  * OBP extensions:  Look for buffer:, field,
4887  * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
4888  */
4889 static void
4890 Forth_words (FILE *inf)
4891 {
4892   register char *bp;
4893
4894   LOOP_ON_INPUT_LINES (inf, lb, bp)
4895     while ((bp = skip_spaces (bp))[0] != '\0')
4896       if (bp[0] == '\\' && iswhite(bp[1]))
4897         break;                  /* read next line */
4898       else if (bp[0] == '(' && iswhite(bp[1]))
4899         do                      /* skip to ) or eol */
4900           bp++;
4901         while (*bp != ')' && *bp != '\0');
4902       else if ((bp[0] == ':' && iswhite(bp[1]) && bp++)
4903                || LOOKING_AT_NOCASE (bp, "constant")
4904                || LOOKING_AT_NOCASE (bp, "code")
4905                || LOOKING_AT_NOCASE (bp, "create")
4906                || LOOKING_AT_NOCASE (bp, "defer")
4907                || LOOKING_AT_NOCASE (bp, "value")
4908                || LOOKING_AT_NOCASE (bp, "variable")
4909                || LOOKING_AT_NOCASE (bp, "buffer:")
4910                || LOOKING_AT_NOCASE (bp, "field"))
4911         get_tag (skip_spaces (bp), NULL); /* Yay!  A definition! */
4912       else
4913         bp = skip_non_spaces (bp);
4914 }
4915
4916 \f
4917 /*
4918  * Scheme tag functions
4919  * look for (def... xyzzy
4920  *          (def... (xyzzy
4921  *          (def ... ((...(xyzzy ....
4922  *          (set! xyzzy
4923  * Original code by Ken Haase (1985?)
4924  */
4925 static void
4926 Scheme_functions (FILE *inf)
4927 {
4928   register char *bp;
4929
4930   LOOP_ON_INPUT_LINES (inf, lb, bp)
4931     {
4932       if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
4933         {
4934           bp = skip_non_spaces (bp+4);
4935           /* Skip over open parens and white space.  Don't continue past
4936              '\0'. */
4937           while (*bp && notinname (*bp))
4938             bp++;
4939           get_tag (bp, NULL);
4940         }
4941       if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
4942         get_tag (bp, NULL);
4943     }
4944 }
4945
4946 \f
4947 /* Find tags in TeX and LaTeX input files.  */
4948
4949 /* TEX_toktab is a table of TeX control sequences that define tags.
4950  * Each entry records one such control sequence.
4951  *
4952  * Original code from who knows whom.
4953  * Ideas by:
4954  *   Stefan Monnier (2002)
4955  */
4956
4957 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
4958
4959 /* Default set of control sequences to put into TEX_toktab.
4960    The value of environment var TEXTAGS is prepended to this.  */
4961 static const char *TEX_defenv = "\
4962 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
4963 :part:appendix:entry:index:def\
4964 :newcommand:renewcommand:newenvironment:renewenvironment";
4965
4966 static void TEX_mode (FILE *);
4967 static void TEX_decode_env (const char *, const char *);
4968
4969 static char TEX_esc = '\\';
4970 static char TEX_opgrp = '{';
4971 static char TEX_clgrp = '}';
4972
4973 /*
4974  * TeX/LaTeX scanning loop.
4975  */
4976 static void
4977 TeX_commands (FILE *inf)
4978 {
4979   char *cp;
4980   linebuffer *key;
4981
4982   /* Select either \ or ! as escape character.  */
4983   TEX_mode (inf);
4984
4985   /* Initialize token table once from environment. */
4986   if (TEX_toktab == NULL)
4987     TEX_decode_env ("TEXTAGS", TEX_defenv);
4988
4989   LOOP_ON_INPUT_LINES (inf, lb, cp)
4990     {
4991       /* Look at each TEX keyword in line. */
4992       for (;;)
4993         {
4994           /* Look for a TEX escape. */
4995           while (*cp++ != TEX_esc)
4996             if (cp[-1] == '\0' || cp[-1] == '%')
4997               goto tex_next_line;
4998
4999           for (key = TEX_toktab; key->buffer != NULL; key++)
5000             if (strneq (cp, key->buffer, key->len))
5001               {
5002                 register char *p;
5003                 int namelen, linelen;
5004                 bool opgrp = FALSE;
5005
5006                 cp = skip_spaces (cp + key->len);
5007                 if (*cp == TEX_opgrp)
5008                   {
5009                     opgrp = TRUE;
5010                     cp++;
5011                   }
5012                 for (p = cp;
5013                      (!iswhite (*p) && *p != '#' &&
5014                       *p != TEX_opgrp && *p != TEX_clgrp);
5015                      p++)
5016                   continue;
5017                 namelen = p - cp;
5018                 linelen = lb.len;
5019                 if (!opgrp || *p == TEX_clgrp)
5020                   {
5021                     while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5022                       p++;
5023                     linelen = p - lb.buffer + 1;
5024                   }
5025                 make_tag (cp, namelen, TRUE,
5026                           lb.buffer, linelen, lineno, linecharno);
5027                 goto tex_next_line; /* We only tag a line once */
5028               }
5029         }
5030     tex_next_line:
5031       ;
5032     }
5033 }
5034
5035 #define TEX_LESC '\\'
5036 #define TEX_SESC '!'
5037
5038 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
5039    chars accordingly. */
5040 static void
5041 TEX_mode (FILE *inf)
5042 {
5043   int c;
5044
5045   while ((c = getc (inf)) != EOF)
5046     {
5047       /* Skip to next line if we hit the TeX comment char. */
5048       if (c == '%')
5049         while (c != '\n' && c != EOF)
5050           c = getc (inf);
5051       else if (c == TEX_LESC || c == TEX_SESC )
5052         break;
5053     }
5054
5055   if (c == TEX_LESC)
5056     {
5057       TEX_esc = TEX_LESC;
5058       TEX_opgrp = '{';
5059       TEX_clgrp = '}';
5060     }
5061   else
5062     {
5063       TEX_esc = TEX_SESC;
5064       TEX_opgrp = '<';
5065       TEX_clgrp = '>';
5066     }
5067   /* If the input file is compressed, inf is a pipe, and rewind may fail.
5068      No attempt is made to correct the situation. */
5069   rewind (inf);
5070 }
5071
5072 /* Read environment and prepend it to the default string.
5073    Build token table. */
5074 static void
5075 TEX_decode_env (const char *evarname, const char *defenv)
5076 {
5077   register const char *env, *p;
5078   int i, len;
5079
5080   /* Append default string to environment. */
5081   env = getenv (evarname);
5082   if (!env)
5083     env = defenv;
5084   else
5085     env = concat (env, defenv, "");
5086
5087   /* Allocate a token table */
5088   for (len = 1, p = env; p;)
5089     if ((p = etags_strchr (p, ':')) && *++p != '\0')
5090       len++;
5091   TEX_toktab = xnew (len, linebuffer);
5092
5093   /* Unpack environment string into token table. Be careful about */
5094   /* zero-length strings (leading ':', "::" and trailing ':') */
5095   for (i = 0; *env != '\0';)
5096     {
5097       p = etags_strchr (env, ':');
5098       if (!p)                   /* End of environment string. */
5099         p = env + strlen (env);
5100       if (p - env > 0)
5101         {                       /* Only non-zero strings. */
5102           TEX_toktab[i].buffer = savenstr (env, p - env);
5103           TEX_toktab[i].len = p - env;
5104           i++;
5105         }
5106       if (*p)
5107         env = p + 1;
5108       else
5109         {
5110           TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5111           TEX_toktab[i].len = 0;
5112           break;
5113         }
5114     }
5115 }
5116
5117 \f
5118 /* Texinfo support.  Dave Love, Mar. 2000.  */
5119 static void
5120 Texinfo_nodes (FILE *inf)
5121 {
5122   char *cp, *start;
5123   LOOP_ON_INPUT_LINES (inf, lb, cp)
5124     if (LOOKING_AT (cp, "@node"))
5125       {
5126         start = cp;
5127         while (*cp != '\0' && *cp != ',')
5128           cp++;
5129         make_tag (start, cp - start, TRUE,
5130                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5131       }
5132 }
5133
5134 \f
5135 /*
5136  * HTML support.
5137  * Contents of <title>, <h1>, <h2>, <h3> are tags.
5138  * Contents of <a name=xxx> are tags with name xxx.
5139  *
5140  * Francesco Potortì, 2002.
5141  */
5142 static void
5143 HTML_labels (FILE *inf)
5144 {
5145   bool getnext = FALSE;         /* next text outside of HTML tags is a tag */
5146   bool skiptag = FALSE;         /* skip to the end of the current HTML tag */
5147   bool intag = FALSE;           /* inside an html tag, looking for ID= */
5148   bool inanchor = FALSE;        /* when INTAG, is an anchor, look for NAME= */
5149   char *end;
5150
5151
5152   linebuffer_setlen (&token_name, 0); /* no name in buffer */
5153
5154   LOOP_ON_INPUT_LINES (inf, lb, dbp)
5155     for (;;)                    /* loop on the same line */
5156       {
5157         if (skiptag)            /* skip HTML tag */
5158           {
5159             while (*dbp != '\0' && *dbp != '>')
5160               dbp++;
5161             if (*dbp == '>')
5162               {
5163                 dbp += 1;
5164                 skiptag = FALSE;
5165                 continue;       /* look on the same line */
5166               }
5167             break;              /* go to next line */
5168           }
5169
5170         else if (intag) /* look for "name=" or "id=" */
5171           {
5172             while (*dbp != '\0' && *dbp != '>'
5173                    && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5174               dbp++;
5175             if (*dbp == '\0')
5176               break;            /* go to next line */
5177             if (*dbp == '>')
5178               {
5179                 dbp += 1;
5180                 intag = FALSE;
5181                 continue;       /* look on the same line */
5182               }
5183             if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5184                 || LOOKING_AT_NOCASE (dbp, "id="))
5185               {
5186                 bool quoted = (dbp[0] == '"');
5187
5188                 if (quoted)
5189                   for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5190                     continue;
5191                 else
5192                   for (end = dbp; *end != '\0' && intoken (*end); end++)
5193                     continue;
5194                 linebuffer_setlen (&token_name, end - dbp);
5195                 strncpy (token_name.buffer, dbp, end - dbp);
5196                 token_name.buffer[end - dbp] = '\0';
5197
5198                 dbp = end;
5199                 intag = FALSE;  /* we found what we looked for */
5200                 skiptag = TRUE; /* skip to the end of the tag */
5201                 getnext = TRUE; /* then grab the text */
5202                 continue;       /* look on the same line */
5203               }
5204             dbp += 1;
5205           }
5206
5207         else if (getnext)       /* grab next tokens and tag them */
5208           {
5209             dbp = skip_spaces (dbp);
5210             if (*dbp == '\0')
5211               break;            /* go to next line */
5212             if (*dbp == '<')
5213               {
5214                 intag = TRUE;
5215                 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5216                 continue;       /* look on the same line */
5217               }
5218
5219             for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5220               continue;
5221             make_tag (token_name.buffer, token_name.len, TRUE,
5222                       dbp, end - dbp, lineno, linecharno);
5223             linebuffer_setlen (&token_name, 0); /* no name in buffer */
5224             getnext = FALSE;
5225             break;              /* go to next line */
5226           }
5227
5228         else                    /* look for an interesting HTML tag */
5229           {
5230             while (*dbp != '\0' && *dbp != '<')
5231               dbp++;
5232             if (*dbp == '\0')
5233               break;            /* go to next line */
5234             intag = TRUE;
5235             if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5236               {
5237                 inanchor = TRUE;
5238                 continue;       /* look on the same line */
5239               }
5240             else if (LOOKING_AT_NOCASE (dbp, "<title>")
5241                      || LOOKING_AT_NOCASE (dbp, "<h1>")
5242                      || LOOKING_AT_NOCASE (dbp, "<h2>")
5243                      || LOOKING_AT_NOCASE (dbp, "<h3>"))
5244               {
5245                 intag = FALSE;
5246                 getnext = TRUE;
5247                 continue;       /* look on the same line */
5248               }
5249             dbp += 1;
5250           }
5251       }
5252 }
5253
5254 \f
5255 /*
5256  * Prolog support
5257  *
5258  * Assumes that the predicate or rule starts at column 0.
5259  * Only the first clause of a predicate or rule is added.
5260  * Original code by Sunichirou Sugou (1989)
5261  * Rewritten by Anders Lindgren (1996)
5262  */
5263 static int prolog_pr (char *, char *);
5264 static void prolog_skip_comment (linebuffer *, FILE *);
5265 static int prolog_atom (char *, int);
5266
5267 static void
5268 Prolog_functions (FILE *inf)
5269 {
5270   char *cp, *last;
5271   int len;
5272   int allocated;
5273
5274   allocated = 0;
5275   len = 0;
5276   last = NULL;
5277
5278   LOOP_ON_INPUT_LINES (inf, lb, cp)
5279     {
5280       if (cp[0] == '\0')        /* Empty line */
5281         continue;
5282       else if (iswhite (cp[0])) /* Not a predicate */
5283         continue;
5284       else if (cp[0] == '/' && cp[1] == '*')    /* comment. */
5285         prolog_skip_comment (&lb, inf);
5286       else if ((len = prolog_pr (cp, last)) > 0)
5287         {
5288           /* Predicate or rule.  Store the function name so that we
5289              only generate a tag for the first clause.  */
5290           if (last == NULL)
5291             last = xnew(len + 1, char);
5292           else if (len + 1 > allocated)
5293             xrnew (last, len + 1, char);
5294           allocated = len + 1;
5295           strncpy (last, cp, len);
5296           last[len] = '\0';
5297         }
5298     }
5299   free (last);
5300 }
5301
5302
5303 static void
5304 prolog_skip_comment (linebuffer *plb, FILE *inf)
5305 {
5306   char *cp;
5307
5308   do
5309     {
5310       for (cp = plb->buffer; *cp != '\0'; cp++)
5311         if (cp[0] == '*' && cp[1] == '/')
5312           return;
5313       readline (plb, inf);
5314     }
5315   while (!feof(inf));
5316 }
5317
5318 /*
5319  * A predicate or rule definition is added if it matches:
5320  *     <beginning of line><Prolog Atom><whitespace>(
5321  * or  <beginning of line><Prolog Atom><whitespace>:-
5322  *
5323  * It is added to the tags database if it doesn't match the
5324  * name of the previous clause header.
5325  *
5326  * Return the size of the name of the predicate or rule, or 0 if no
5327  * header was found.
5328  */
5329 static int
5330 prolog_pr (char *s, char *last)
5331
5332                                 /* Name of last clause. */
5333 {
5334   int pos;
5335   int len;
5336
5337   pos = prolog_atom (s, 0);
5338   if (pos < 1)
5339     return 0;
5340
5341   len = pos;
5342   pos = skip_spaces (s + pos) - s;
5343
5344   if ((s[pos] == '.'
5345        || (s[pos] == '(' && (pos += 1))
5346        || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5347       && (last == NULL          /* save only the first clause */
5348           || len != (int)strlen (last)
5349           || !strneq (s, last, len)))
5350         {
5351           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5352           return len;
5353         }
5354   else
5355     return 0;
5356 }
5357
5358 /*
5359  * Consume a Prolog atom.
5360  * Return the number of bytes consumed, or -1 if there was an error.
5361  *
5362  * A prolog atom, in this context, could be one of:
5363  * - An alphanumeric sequence, starting with a lower case letter.
5364  * - A quoted arbitrary string. Single quotes can escape themselves.
5365  *   Backslash quotes everything.
5366  */
5367 static int
5368 prolog_atom (char *s, int pos)
5369 {
5370   int origpos;
5371
5372   origpos = pos;
5373
5374   if (ISLOWER(s[pos]) || (s[pos] == '_'))
5375     {
5376       /* The atom is unquoted. */
5377       pos++;
5378       while (ISALNUM(s[pos]) || (s[pos] == '_'))
5379         {
5380           pos++;
5381         }
5382       return pos - origpos;
5383     }
5384   else if (s[pos] == '\'')
5385     {
5386       pos++;
5387
5388       for (;;)
5389         {
5390           if (s[pos] == '\'')
5391             {
5392               pos++;
5393               if (s[pos] != '\'')
5394                 break;
5395               pos++;            /* A double quote */
5396             }
5397           else if (s[pos] == '\0')
5398             /* Multiline quoted atoms are ignored. */
5399             return -1;
5400           else if (s[pos] == '\\')
5401             {
5402               if (s[pos+1] == '\0')
5403                 return -1;
5404               pos += 2;
5405             }
5406           else
5407             pos++;
5408         }
5409       return pos - origpos;
5410     }
5411   else
5412     return -1;
5413 }
5414
5415 \f
5416 /*
5417  * Support for Erlang
5418  *
5419  * Generates tags for functions, defines, and records.
5420  * Assumes that Erlang functions start at column 0.
5421  * Original code by Anders Lindgren (1996)
5422  */
5423 static int erlang_func (char *, char *);
5424 static void erlang_attribute (char *);
5425 static int erlang_atom (char *);
5426
5427 static void
5428 Erlang_functions (FILE *inf)
5429 {
5430   char *cp, *last;
5431   int len;
5432   int allocated;
5433
5434   allocated = 0;
5435   len = 0;
5436   last = NULL;
5437
5438   LOOP_ON_INPUT_LINES (inf, lb, cp)
5439     {
5440       if (cp[0] == '\0')        /* Empty line */
5441         continue;
5442       else if (iswhite (cp[0])) /* Not function nor attribute */
5443         continue;
5444       else if (cp[0] == '%')    /* comment */
5445         continue;
5446       else if (cp[0] == '"')    /* Sometimes, strings start in column one */
5447         continue;
5448       else if (cp[0] == '-')    /* attribute, e.g. "-define" */
5449         {
5450           erlang_attribute (cp);
5451           if (last != NULL)
5452             {
5453               free (last);
5454               last = NULL;
5455             }
5456         }
5457       else if ((len = erlang_func (cp, last)) > 0)
5458         {
5459           /*
5460            * Function.  Store the function name so that we only
5461            * generates a tag for the first clause.
5462            */
5463           if (last == NULL)
5464             last = xnew (len + 1, char);
5465           else if (len + 1 > allocated)
5466             xrnew (last, len + 1, char);
5467           allocated = len + 1;
5468           strncpy (last, cp, len);
5469           last[len] = '\0';
5470         }
5471     }
5472   free (last);
5473 }
5474
5475
5476 /*
5477  * A function definition is added if it matches:
5478  *     <beginning of line><Erlang Atom><whitespace>(
5479  *
5480  * It is added to the tags database if it doesn't match the
5481  * name of the previous clause header.
5482  *
5483  * Return the size of the name of the function, or 0 if no function
5484  * was found.
5485  */
5486 static int
5487 erlang_func (char *s, char *last)
5488
5489                                 /* Name of last clause. */
5490 {
5491   int pos;
5492   int len;
5493
5494   pos = erlang_atom (s);
5495   if (pos < 1)
5496     return 0;
5497
5498   len = pos;
5499   pos = skip_spaces (s + pos) - s;
5500
5501   /* Save only the first clause. */
5502   if (s[pos++] == '('
5503       && (last == NULL
5504           || len != (int)strlen (last)
5505           || !strneq (s, last, len)))
5506         {
5507           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5508           return len;
5509         }
5510
5511   return 0;
5512 }
5513
5514
5515 /*
5516  * Handle attributes.  Currently, tags are generated for defines
5517  * and records.
5518  *
5519  * They are on the form:
5520  * -define(foo, bar).
5521  * -define(Foo(M, N), M+N).
5522  * -record(graph, {vtab = notable, cyclic = true}).
5523  */
5524 static void
5525 erlang_attribute (char *s)
5526 {
5527   char *cp = s;
5528
5529   if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5530       && *cp++ == '(')
5531     {
5532       int len = erlang_atom (skip_spaces (cp));
5533       if (len > 0)
5534         make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5535     }
5536   return;
5537 }
5538
5539
5540 /*
5541  * Consume an Erlang atom (or variable).
5542  * Return the number of bytes consumed, or -1 if there was an error.
5543  */
5544 static int
5545 erlang_atom (char *s)
5546 {
5547   int pos = 0;
5548
5549   if (ISALPHA (s[pos]) || s[pos] == '_')
5550     {
5551       /* The atom is unquoted. */
5552       do
5553         pos++;
5554       while (ISALNUM (s[pos]) || s[pos] == '_');
5555     }
5556   else if (s[pos] == '\'')
5557     {
5558       for (pos++; s[pos] != '\''; pos++)
5559         if (s[pos] == '\0'      /* multiline quoted atoms are ignored */
5560             || (s[pos] == '\\' && s[++pos] == '\0'))
5561           return 0;
5562       pos++;
5563     }
5564
5565   return pos;
5566 }
5567
5568 \f
5569 static char *scan_separators (char *);
5570 static void add_regex (char *, language *);
5571 static char *substitute (char *, char *, struct re_registers *);
5572
5573 /*
5574  * Take a string like "/blah/" and turn it into "blah", verifying
5575  * that the first and last characters are the same, and handling
5576  * quoted separator characters.  Actually, stops on the occurrence of
5577  * an unquoted separator.  Also process \t, \n, etc. and turn into
5578  * appropriate characters. Works in place.  Null terminates name string.
5579  * Returns pointer to terminating separator, or NULL for
5580  * unterminated regexps.
5581  */
5582 static char *
5583 scan_separators (char *name)
5584 {
5585   char sep = name[0];
5586   char *copyto = name;
5587   bool quoted = FALSE;
5588
5589   for (++name; *name != '\0'; ++name)
5590     {
5591       if (quoted)
5592         {
5593           switch (*name)
5594             {
5595             case 'a': *copyto++ = '\007'; break; /* BEL (bell)           */
5596             case 'b': *copyto++ = '\b'; break;   /* BS (back space)      */
5597             case 'd': *copyto++ = 0177; break;   /* DEL (delete)         */
5598             case 'e': *copyto++ = 033; break;    /* ESC (delete)         */
5599             case 'f': *copyto++ = '\f'; break;   /* FF (form feed)       */
5600             case 'n': *copyto++ = '\n'; break;   /* NL (new line)        */
5601             case 'r': *copyto++ = '\r'; break;   /* CR (carriage return) */
5602             case 't': *copyto++ = '\t'; break;   /* TAB (horizontal tab) */
5603             case 'v': *copyto++ = '\v'; break;   /* VT (vertical tab)    */
5604             default:
5605               if (*name == sep)
5606                 *copyto++ = sep;
5607               else
5608                 {
5609                   /* Something else is quoted, so preserve the quote. */
5610                   *copyto++ = '\\';
5611                   *copyto++ = *name;
5612                 }
5613               break;
5614             }
5615           quoted = FALSE;
5616         }
5617       else if (*name == '\\')
5618         quoted = TRUE;
5619       else if (*name == sep)
5620         break;
5621       else
5622         *copyto++ = *name;
5623     }
5624   if (*name != sep)
5625     name = NULL;                /* signal unterminated regexp */
5626
5627   /* Terminate copied string. */
5628   *copyto = '\0';
5629   return name;
5630 }
5631
5632 /* Look at the argument of --regex or --no-regex and do the right
5633    thing.  Same for each line of a regexp file. */
5634 static void
5635 analyse_regex (char *regex_arg)
5636 {
5637   if (regex_arg == NULL)
5638     {
5639       free_regexps ();          /* --no-regex: remove existing regexps */
5640       return;
5641     }
5642
5643   /* A real --regexp option or a line in a regexp file. */
5644   switch (regex_arg[0])
5645     {
5646       /* Comments in regexp file or null arg to --regex. */
5647     case '\0':
5648     case ' ':
5649     case '\t':
5650       break;
5651
5652       /* Read a regex file.  This is recursive and may result in a
5653          loop, which will stop when the file descriptors are exhausted. */
5654     case '@':
5655       {
5656         FILE *regexfp;
5657         linebuffer regexbuf;
5658         char *regexfile = regex_arg + 1;
5659
5660         /* regexfile is a file containing regexps, one per line. */
5661         regexfp = fopen (regexfile, "r");
5662         if (regexfp == NULL)
5663           {
5664             pfatal (regexfile);
5665             return;
5666           }
5667         linebuffer_init (&regexbuf);
5668         while (readline_internal (&regexbuf, regexfp) > 0)
5669           analyse_regex (regexbuf.buffer);
5670         free (regexbuf.buffer);
5671         fclose (regexfp);
5672       }
5673       break;
5674
5675       /* Regexp to be used for a specific language only. */
5676     case '{':
5677       {
5678         language *lang;
5679         char *lang_name = regex_arg + 1;
5680         char *cp;
5681
5682         for (cp = lang_name; *cp != '}'; cp++)
5683           if (*cp == '\0')
5684             {
5685               error ("unterminated language name in regex: %s", regex_arg);
5686               return;
5687             }
5688         *cp++ = '\0';
5689         lang = get_language_from_langname (lang_name);
5690         if (lang == NULL)
5691           return;
5692         add_regex (cp, lang);
5693       }
5694       break;
5695
5696       /* Regexp to be used for any language. */
5697     default:
5698       add_regex (regex_arg, NULL);
5699       break;
5700     }
5701 }
5702
5703 /* Separate the regexp pattern, compile it,
5704    and care for optional name and modifiers. */
5705 static void
5706 add_regex (char *regexp_pattern, language *lang)
5707 {
5708   static struct re_pattern_buffer zeropattern;
5709   char sep, *pat, *name, *modifiers;
5710   char empty[] = "";
5711   const char *err;
5712   struct re_pattern_buffer *patbuf;
5713   regexp *rp;
5714   bool
5715     force_explicit_name = TRUE, /* do not use implicit tag names */
5716     ignore_case = FALSE,        /* case is significant */
5717     multi_line = FALSE,         /* matches are done one line at a time */
5718     single_line = FALSE;        /* dot does not match newline */
5719
5720
5721   if (strlen(regexp_pattern) < 3)
5722     {
5723       error ("null regexp", (char *)NULL);
5724       return;
5725     }
5726   sep = regexp_pattern[0];
5727   name = scan_separators (regexp_pattern);
5728   if (name == NULL)
5729     {
5730       error ("%s: unterminated regexp", regexp_pattern);
5731       return;
5732     }
5733   if (name[1] == sep)
5734     {
5735       error ("null name for regexp \"%s\"", regexp_pattern);
5736       return;
5737     }
5738   modifiers = scan_separators (name);
5739   if (modifiers == NULL)        /* no terminating separator --> no name */
5740     {
5741       modifiers = name;
5742       name = empty;
5743     }
5744   else
5745     modifiers += 1;             /* skip separator */
5746
5747   /* Parse regex modifiers. */
5748   for (; modifiers[0] != '\0'; modifiers++)
5749     switch (modifiers[0])
5750       {
5751       case 'N':
5752         if (modifiers == name)
5753           error ("forcing explicit tag name but no name, ignoring", NULL);
5754         force_explicit_name = TRUE;
5755         break;
5756       case 'i':
5757         ignore_case = TRUE;
5758         break;
5759       case 's':
5760         single_line = TRUE;
5761         /* FALLTHRU */
5762       case 'm':
5763         multi_line = TRUE;
5764         need_filebuf = TRUE;
5765         break;
5766       default:
5767         {
5768           char wrongmod [2];
5769           wrongmod[0] = modifiers[0];
5770           wrongmod[1] = '\0';
5771           error ("invalid regexp modifier `%s', ignoring", wrongmod);
5772         }
5773         break;
5774       }
5775
5776   patbuf = xnew (1, struct re_pattern_buffer);
5777   *patbuf = zeropattern;
5778   if (ignore_case)
5779     {
5780       static char lc_trans[CHARS];
5781       int i;
5782       for (i = 0; i < CHARS; i++)
5783         lc_trans[i] = lowcase (i);
5784       patbuf->translate = lc_trans;     /* translation table to fold case  */
5785     }
5786
5787   if (multi_line)
5788     pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5789   else
5790     pat = regexp_pattern;
5791
5792   if (single_line)
5793     re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5794   else
5795     re_set_syntax (RE_SYNTAX_EMACS);
5796
5797   err = re_compile_pattern (pat, strlen (pat), patbuf);
5798   if (multi_line)
5799     free (pat);
5800   if (err != NULL)
5801     {
5802       error ("%s while compiling pattern", err);
5803       return;
5804     }
5805
5806   rp = p_head;
5807   p_head = xnew (1, regexp);
5808   p_head->pattern = savestr (regexp_pattern);
5809   p_head->p_next = rp;
5810   p_head->lang = lang;
5811   p_head->pat = patbuf;
5812   p_head->name = savestr (name);
5813   p_head->error_signaled = FALSE;
5814   p_head->force_explicit_name = force_explicit_name;
5815   p_head->ignore_case = ignore_case;
5816   p_head->multi_line = multi_line;
5817 }
5818
5819 /*
5820  * Do the substitutions indicated by the regular expression and
5821  * arguments.
5822  */
5823 static char *
5824 substitute (char *in, char *out, struct re_registers *regs)
5825 {
5826   char *result, *t;
5827   int size, dig, diglen;
5828
5829   result = NULL;
5830   size = strlen (out);
5831
5832   /* Pass 1: figure out how much to allocate by finding all \N strings. */
5833   if (out[size - 1] == '\\')
5834     fatal ("pattern error in \"%s\"", out);
5835   for (t = etags_strchr (out, '\\');
5836        t != NULL;
5837        t = etags_strchr (t + 2, '\\'))
5838     if (ISDIGIT (t[1]))
5839       {
5840         dig = t[1] - '0';
5841         diglen = regs->end[dig] - regs->start[dig];
5842         size += diglen - 2;
5843       }
5844     else
5845       size -= 1;
5846
5847   /* Allocate space and do the substitutions. */
5848   assert (size >= 0);
5849   result = xnew (size + 1, char);
5850
5851   for (t = result; *out != '\0'; out++)
5852     if (*out == '\\' && ISDIGIT (*++out))
5853       {
5854         dig = *out - '0';
5855         diglen = regs->end[dig] - regs->start[dig];
5856         strncpy (t, in + regs->start[dig], diglen);
5857         t += diglen;
5858       }
5859     else
5860       *t++ = *out;
5861   *t = '\0';
5862
5863   assert (t <= result + size);
5864   assert (t - result == (int)strlen (result));
5865
5866   return result;
5867 }
5868
5869 /* Deallocate all regexps. */
5870 static void
5871 free_regexps (void)
5872 {
5873   regexp *rp;
5874   while (p_head != NULL)
5875     {
5876       rp = p_head->p_next;
5877       free (p_head->pattern);
5878       free (p_head->name);
5879       free (p_head);
5880       p_head = rp;
5881     }
5882   return;
5883 }
5884
5885 /*
5886  * Reads the whole file as a single string from `filebuf' and looks for
5887  * multi-line regular expressions, creating tags on matches.
5888  * readline already dealt with normal regexps.
5889  *
5890  * Idea by Ben Wing <ben@666.com> (2002).
5891  */
5892 static void
5893 regex_tag_multiline (void)
5894 {
5895   char *buffer = filebuf.buffer;
5896   regexp *rp;
5897   char *name;
5898
5899   for (rp = p_head; rp != NULL; rp = rp->p_next)
5900     {
5901       int match = 0;
5902
5903       if (!rp->multi_line)
5904         continue;               /* skip normal regexps */
5905
5906       /* Generic initialisations before parsing file from memory. */
5907       lineno = 1;               /* reset global line number */
5908       charno = 0;               /* reset global char number */
5909       linecharno = 0;           /* reset global char number of line start */
5910
5911       /* Only use generic regexps or those for the current language. */
5912       if (rp->lang != NULL && rp->lang != curfdp->lang)
5913         continue;
5914
5915       while (match >= 0 && match < filebuf.len)
5916         {
5917           match = re_search (rp->pat, buffer, filebuf.len, charno,
5918                              filebuf.len - match, &rp->regs);
5919           switch (match)
5920             {
5921             case -2:
5922               /* Some error. */
5923               if (!rp->error_signaled)
5924                 {
5925                   error ("regexp stack overflow while matching \"%s\"",
5926                          rp->pattern);
5927                   rp->error_signaled = TRUE;
5928                 }
5929               break;
5930             case -1:
5931               /* No match. */
5932               break;
5933             default:
5934               if (match == rp->regs.end[0])
5935                 {
5936                   if (!rp->error_signaled)
5937                     {
5938                       error ("regexp matches the empty string: \"%s\"",
5939                              rp->pattern);
5940                       rp->error_signaled = TRUE;
5941                     }
5942                   match = -3;   /* exit from while loop */
5943                   break;
5944                 }
5945
5946               /* Match occurred.  Construct a tag. */
5947               while (charno < rp->regs.end[0])
5948                 if (buffer[charno++] == '\n')
5949                   lineno++, linecharno = charno;
5950               name = rp->name;
5951               if (name[0] == '\0')
5952                 name = NULL;
5953               else /* make a named tag */
5954                 name = substitute (buffer, rp->name, &rp->regs);
5955               if (rp->force_explicit_name)
5956                 /* Force explicit tag name, if a name is there. */
5957                 pfnote (name, TRUE, buffer + linecharno,
5958                         charno - linecharno + 1, lineno, linecharno);
5959               else
5960                 make_tag (name, strlen (name), TRUE, buffer + linecharno,
5961                           charno - linecharno + 1, lineno, linecharno);
5962               break;
5963             }
5964         }
5965     }
5966 }
5967
5968 \f
5969 static bool
5970 nocase_tail (const char *cp)
5971 {
5972   register int len = 0;
5973
5974   while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
5975     cp++, len++;
5976   if (*cp == '\0' && !intoken (dbp[len]))
5977     {
5978       dbp += len;
5979       return TRUE;
5980     }
5981   return FALSE;
5982 }
5983
5984 static void
5985 get_tag (register char *bp, char **namepp)
5986 {
5987   register char *cp = bp;
5988
5989   if (*bp != '\0')
5990     {
5991       /* Go till you get to white space or a syntactic break */
5992       for (cp = bp + 1; !notinname (*cp); cp++)
5993         continue;
5994       make_tag (bp, cp - bp, TRUE,
5995                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5996     }
5997
5998   if (namepp != NULL)
5999     *namepp = savenstr (bp, cp - bp);
6000 }
6001
6002 /*
6003  * Read a line of text from `stream' into `lbp', excluding the
6004  * newline or CR-NL, if any.  Return the number of characters read from
6005  * `stream', which is the length of the line including the newline.
6006  *
6007  * On DOS or Windows we do not count the CR character, if any before the
6008  * NL, in the returned length; this mirrors the behavior of Emacs on those
6009  * platforms (for text files, it translates CR-NL to NL as it reads in the
6010  * file).
6011  *
6012  * If multi-line regular expressions are requested, each line read is
6013  * appended to `filebuf'.
6014  */
6015 static long
6016 readline_internal (linebuffer *lbp, register FILE *stream)
6017 {
6018   char *buffer = lbp->buffer;
6019   register char *p = lbp->buffer;
6020   register char *pend;
6021   int chars_deleted;
6022
6023   pend = p + lbp->size;         /* Separate to avoid 386/IX compiler bug.  */
6024
6025   for (;;)
6026     {
6027       register int c = getc (stream);
6028       if (p == pend)
6029         {
6030           /* We're at the end of linebuffer: expand it. */
6031           lbp->size *= 2;
6032           xrnew (buffer, lbp->size, char);
6033           p += buffer - lbp->buffer;
6034           pend = buffer + lbp->size;
6035           lbp->buffer = buffer;
6036         }
6037       if (c == EOF)
6038         {
6039           *p = '\0';
6040           chars_deleted = 0;
6041           break;
6042         }
6043       if (c == '\n')
6044         {
6045           if (p > buffer && p[-1] == '\r')
6046             {
6047               p -= 1;
6048 #ifdef DOS_NT
6049              /* Assume CRLF->LF translation will be performed by Emacs
6050                 when loading this file, so CRs won't appear in the buffer.
6051                 It would be cleaner to compensate within Emacs;
6052                 however, Emacs does not know how many CRs were deleted
6053                 before any given point in the file.  */
6054               chars_deleted = 1;
6055 #else
6056               chars_deleted = 2;
6057 #endif
6058             }
6059           else
6060             {
6061               chars_deleted = 1;
6062             }
6063           *p = '\0';
6064           break;
6065         }
6066       *p++ = c;
6067     }
6068   lbp->len = p - buffer;
6069
6070   if (need_filebuf              /* we need filebuf for multi-line regexps */
6071       && chars_deleted > 0)     /* not at EOF */
6072     {
6073       while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6074         {
6075           /* Expand filebuf. */
6076           filebuf.size *= 2;
6077           xrnew (filebuf.buffer, filebuf.size, char);
6078         }
6079       strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6080       filebuf.len += lbp->len;
6081       filebuf.buffer[filebuf.len++] = '\n';
6082       filebuf.buffer[filebuf.len] = '\0';
6083     }
6084
6085   return lbp->len + chars_deleted;
6086 }
6087
6088 /*
6089  * Like readline_internal, above, but in addition try to match the
6090  * input line against relevant regular expressions and manage #line
6091  * directives.
6092  */
6093 static void
6094 readline (linebuffer *lbp, FILE *stream)
6095 {
6096   long result;
6097
6098   linecharno = charno;          /* update global char number of line start */
6099   result = readline_internal (lbp, stream); /* read line */
6100   lineno += 1;                  /* increment global line number */
6101   charno += result;             /* increment global char number */
6102
6103   /* Honour #line directives. */
6104   if (!no_line_directive)
6105     {
6106       static bool discard_until_line_directive;
6107
6108       /* Check whether this is a #line directive. */
6109       if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6110         {
6111           unsigned int lno;
6112           int start = 0;
6113
6114           if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6115               && start > 0)     /* double quote character found */
6116             {
6117               char *endp = lbp->buffer + start;
6118
6119               while ((endp = etags_strchr (endp, '"')) != NULL
6120                      && endp[-1] == '\\')
6121                 endp++;
6122               if (endp != NULL)
6123                 /* Ok, this is a real #line directive.  Let's deal with it. */
6124                 {
6125                   char *taggedabsname;  /* absolute name of original file */
6126                   char *taggedfname;    /* name of original file as given */
6127                   char *name;           /* temp var */
6128
6129                   discard_until_line_directive = FALSE; /* found it */
6130                   name = lbp->buffer + start;
6131                   *endp = '\0';
6132                   canonicalize_filename (name);
6133                   taggedabsname = absolute_filename (name, tagfiledir);
6134                   if (filename_is_absolute (name)
6135                       || filename_is_absolute (curfdp->infname))
6136                     taggedfname = savestr (taggedabsname);
6137                   else
6138                     taggedfname = relative_filename (taggedabsname,tagfiledir);
6139
6140                   if (streq (curfdp->taggedfname, taggedfname))
6141                     /* The #line directive is only a line number change.  We
6142                        deal with this afterwards. */
6143                     free (taggedfname);
6144                   else
6145                     /* The tags following this #line directive should be
6146                        attributed to taggedfname.  In order to do this, set
6147                        curfdp accordingly. */
6148                     {
6149                       fdesc *fdp; /* file description pointer */
6150
6151                       /* Go look for a file description already set up for the
6152                          file indicated in the #line directive.  If there is
6153                          one, use it from now until the next #line
6154                          directive. */
6155                       for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6156                         if (streq (fdp->infname, curfdp->infname)
6157                             && streq (fdp->taggedfname, taggedfname))
6158                           /* If we remove the second test above (after the &&)
6159                              then all entries pertaining to the same file are
6160                              coalesced in the tags file.  If we use it, then
6161                              entries pertaining to the same file but generated
6162                              from different files (via #line directives) will
6163                              go into separate sections in the tags file.  These
6164                              alternatives look equivalent.  The first one
6165                              destroys some apparently useless information. */
6166                           {
6167                             curfdp = fdp;
6168                             free (taggedfname);
6169                             break;
6170                           }
6171                       /* Else, if we already tagged the real file, skip all
6172                          input lines until the next #line directive. */
6173                       if (fdp == NULL) /* not found */
6174                         for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6175                           if (streq (fdp->infabsname, taggedabsname))
6176                             {
6177                               discard_until_line_directive = TRUE;
6178                               free (taggedfname);
6179                               break;
6180                             }
6181                       /* Else create a new file description and use that from
6182                          now on, until the next #line directive. */
6183                       if (fdp == NULL) /* not found */
6184                         {
6185                           fdp = fdhead;
6186                           fdhead = xnew (1, fdesc);
6187                           *fdhead = *curfdp; /* copy curr. file description */
6188                           fdhead->next = fdp;
6189                           fdhead->infname = savestr (curfdp->infname);
6190                           fdhead->infabsname = savestr (curfdp->infabsname);
6191                           fdhead->infabsdir = savestr (curfdp->infabsdir);
6192                           fdhead->taggedfname = taggedfname;
6193                           fdhead->usecharno = FALSE;
6194                           fdhead->prop = NULL;
6195                           fdhead->written = FALSE;
6196                           curfdp = fdhead;
6197                         }
6198                     }
6199                   free (taggedabsname);
6200                   lineno = lno - 1;
6201                   readline (lbp, stream);
6202                   return;
6203                 } /* if a real #line directive */
6204             } /* if #line is followed by a number */
6205         } /* if line begins with "#line " */
6206
6207       /* If we are here, no #line directive was found. */
6208       if (discard_until_line_directive)
6209         {
6210           if (result > 0)
6211             {
6212               /* Do a tail recursion on ourselves, thus discarding the contents
6213                  of the line buffer. */
6214               readline (lbp, stream);
6215               return;
6216             }
6217           /* End of file. */
6218           discard_until_line_directive = FALSE;
6219           return;
6220         }
6221     } /* if #line directives should be considered */
6222
6223   {
6224     int match;
6225     regexp *rp;
6226     char *name;
6227
6228     /* Match against relevant regexps. */
6229     if (lbp->len > 0)
6230       for (rp = p_head; rp != NULL; rp = rp->p_next)
6231         {
6232           /* Only use generic regexps or those for the current language.
6233              Also do not use multiline regexps, which is the job of
6234              regex_tag_multiline. */
6235           if ((rp->lang != NULL && rp->lang != fdhead->lang)
6236               || rp->multi_line)
6237             continue;
6238
6239           match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6240           switch (match)
6241             {
6242             case -2:
6243               /* Some error. */
6244               if (!rp->error_signaled)
6245                 {
6246                   error ("regexp stack overflow while matching \"%s\"",
6247                          rp->pattern);
6248                   rp->error_signaled = TRUE;
6249                 }
6250               break;
6251             case -1:
6252               /* No match. */
6253               break;
6254             case 0:
6255               /* Empty string matched. */
6256               if (!rp->error_signaled)
6257                 {
6258                   error ("regexp matches the empty string: \"%s\"", rp->pattern);
6259                   rp->error_signaled = TRUE;
6260                 }
6261               break;
6262             default:
6263               /* Match occurred.  Construct a tag. */
6264               name = rp->name;
6265               if (name[0] == '\0')
6266                 name = NULL;
6267               else /* make a named tag */
6268                 name = substitute (lbp->buffer, rp->name, &rp->regs);
6269               if (rp->force_explicit_name)
6270                 /* Force explicit tag name, if a name is there. */
6271                 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6272               else
6273                 make_tag (name, strlen (name), TRUE,
6274                           lbp->buffer, match, lineno, linecharno);
6275               break;
6276             }
6277         }
6278   }
6279 }
6280
6281 \f
6282 /*
6283  * Return a pointer to a space of size strlen(cp)+1 allocated
6284  * with xnew where the string CP has been copied.
6285  */
6286 static char *
6287 savestr (const char *cp)
6288 {
6289   return savenstr (cp, strlen (cp));
6290 }
6291
6292 /*
6293  * Return a pointer to a space of size LEN+1 allocated with xnew where
6294  * the string CP has been copied for at most the first LEN characters.
6295  */
6296 static char *
6297 savenstr (const char *cp, int len)
6298 {
6299   register char *dp;
6300
6301   dp = xnew (len + 1, char);
6302   strncpy (dp, cp, len);
6303   dp[len] = '\0';
6304   return dp;
6305 }
6306
6307 /*
6308  * Return the ptr in sp at which the character c last
6309  * appears; NULL if not found
6310  *
6311  * Identical to POSIX strrchr, included for portability.
6312  */
6313 static char *
6314 etags_strrchr (register const char *sp, register int c)
6315 {
6316   register const char *r;
6317
6318   r = NULL;
6319   do
6320     {
6321       if (*sp == c)
6322         r = sp;
6323   } while (*sp++);
6324   return (char *)r;
6325 }
6326
6327 /*
6328  * Return the ptr in sp at which the character c first
6329  * appears; NULL if not found
6330  *
6331  * Identical to POSIX strchr, included for portability.
6332  */
6333 static char *
6334 etags_strchr (register const char *sp, register int c)
6335 {
6336   do
6337     {
6338       if (*sp == c)
6339         return (char *)sp;
6340     } while (*sp++);
6341   return NULL;
6342 }
6343
6344 /*
6345  * Compare two strings, ignoring case for alphabetic characters.
6346  *
6347  * Same as BSD's strcasecmp, included for portability.
6348  */
6349 static int
6350 etags_strcasecmp (register const char *s1, register const char *s2)
6351 {
6352   while (*s1 != '\0'
6353          && (ISALPHA (*s1) && ISALPHA (*s2)
6354              ? lowcase (*s1) == lowcase (*s2)
6355              : *s1 == *s2))
6356     s1++, s2++;
6357
6358   return (ISALPHA (*s1) && ISALPHA (*s2)
6359           ? lowcase (*s1) - lowcase (*s2)
6360           : *s1 - *s2);
6361 }
6362
6363 /*
6364  * Compare two strings, ignoring case for alphabetic characters.
6365  * Stop after a given number of characters
6366  *
6367  * Same as BSD's strncasecmp, included for portability.
6368  */
6369 static int
6370 etags_strncasecmp (register const char *s1, register const char *s2, register int n)
6371 {
6372   while (*s1 != '\0' && n-- > 0
6373          && (ISALPHA (*s1) && ISALPHA (*s2)
6374              ? lowcase (*s1) == lowcase (*s2)
6375              : *s1 == *s2))
6376     s1++, s2++;
6377
6378   if (n < 0)
6379     return 0;
6380   else
6381     return (ISALPHA (*s1) && ISALPHA (*s2)
6382             ? lowcase (*s1) - lowcase (*s2)
6383             : *s1 - *s2);
6384 }
6385
6386 /* Skip spaces (end of string is not space), return new pointer. */
6387 static char *
6388 skip_spaces (char *cp)
6389 {
6390   while (iswhite (*cp))
6391     cp++;
6392   return cp;
6393 }
6394
6395 /* Skip non spaces, except end of string, return new pointer. */
6396 static char *
6397 skip_non_spaces (char *cp)
6398 {
6399   while (*cp != '\0' && !iswhite (*cp))
6400     cp++;
6401   return cp;
6402 }
6403
6404 /* Print error message and exit.  */
6405 void
6406 fatal (const char *s1, const char *s2)
6407 {
6408   error (s1, s2);
6409   exit (EXIT_FAILURE);
6410 }
6411
6412 static void
6413 pfatal (const char *s1)
6414 {
6415   perror (s1);
6416   exit (EXIT_FAILURE);
6417 }
6418
6419 static void
6420 suggest_asking_for_help (void)
6421 {
6422   fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
6423            progname, NO_LONG_OPTIONS ? "-h" : "--help");
6424   exit (EXIT_FAILURE);
6425 }
6426
6427 /* Print error message.  `s1' is printf control string, `s2' is arg for it. */
6428 static void
6429 error (const char *s1, const char *s2)
6430 {
6431   fprintf (stderr, "%s: ", progname);
6432   fprintf (stderr, s1, s2);
6433   fprintf (stderr, "\n");
6434 }
6435
6436 /* Return a newly-allocated string whose contents
6437    concatenate those of s1, s2, s3.  */
6438 static char *
6439 concat (const char *s1, const char *s2, const char *s3)
6440 {
6441   int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6442   char *result = xnew (len1 + len2 + len3 + 1, char);
6443
6444   strcpy (result, s1);
6445   strcpy (result + len1, s2);
6446   strcpy (result + len1 + len2, s3);
6447   result[len1 + len2 + len3] = '\0';
6448
6449   return result;
6450 }
6451
6452 \f
6453 /* Does the same work as the system V getcwd, but does not need to
6454    guess the buffer size in advance. */
6455 static char *
6456 etags_getcwd (void)
6457 {
6458 #ifdef HAVE_GETCWD
6459   int bufsize = 200;
6460   char *path = xnew (bufsize, char);
6461
6462   while (getcwd (path, bufsize) == NULL)
6463     {
6464       if (errno != ERANGE)
6465         pfatal ("getcwd");
6466       bufsize *= 2;
6467       free (path);
6468       path = xnew (bufsize, char);
6469     }
6470
6471   canonicalize_filename (path);
6472   return path;
6473
6474 #else /* not HAVE_GETCWD */
6475 #if MSDOS
6476
6477   char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS.  */
6478
6479   getwd (path);
6480
6481   for (p = path; *p != '\0'; p++)
6482     if (*p == '\\')
6483       *p = '/';
6484     else
6485       *p = lowcase (*p);
6486
6487   return strdup (path);
6488 #else /* not MSDOS */
6489   linebuffer path;
6490   FILE *pipe;
6491
6492   linebuffer_init (&path);
6493   pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6494   if (pipe == NULL || readline_internal (&path, pipe) == 0)
6495     pfatal ("pwd");
6496   pclose (pipe);
6497
6498   return path.buffer;
6499 #endif /* not MSDOS */
6500 #endif /* not HAVE_GETCWD */
6501 }
6502
6503 /* Return a newly allocated string containing the file name of FILE
6504    relative to the absolute directory DIR (which should end with a slash). */
6505 static char *
6506 relative_filename (char *file, char *dir)
6507 {
6508   char *fp, *dp, *afn, *res;
6509   int i;
6510
6511   /* Find the common root of file and dir (with a trailing slash). */
6512   afn = absolute_filename (file, cwd);
6513   fp = afn;
6514   dp = dir;
6515   while (*fp++ == *dp++)
6516     continue;
6517   fp--, dp--;                   /* back to the first differing char */
6518 #ifdef DOS_NT
6519   if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6520     return afn;
6521 #endif
6522   do                            /* look at the equal chars until '/' */
6523     fp--, dp--;
6524   while (*fp != '/');
6525
6526   /* Build a sequence of "../" strings for the resulting relative file name. */
6527   i = 0;
6528   while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6529     i += 1;
6530   res = xnew (3*i + strlen (fp + 1) + 1, char);
6531   res[0] = '\0';
6532   while (i-- > 0)
6533     strcat (res, "../");
6534
6535   /* Add the file name relative to the common root of file and dir. */
6536   strcat (res, fp + 1);
6537   free (afn);
6538
6539   return res;
6540 }
6541
6542 /* Return a newly allocated string containing the absolute file name
6543    of FILE given DIR (which should end with a slash). */
6544 static char *
6545 absolute_filename (char *file, char *dir)
6546 {
6547   char *slashp, *cp, *res;
6548
6549   if (filename_is_absolute (file))
6550     res = savestr (file);
6551 #ifdef DOS_NT
6552   /* We don't support non-absolute file names with a drive
6553      letter, like `d:NAME' (it's too much hassle).  */
6554   else if (file[1] == ':')
6555     fatal ("%s: relative file names with drive letters not supported", file);
6556 #endif
6557   else
6558     res = concat (dir, file, "");
6559
6560   /* Delete the "/dirname/.." and "/." substrings. */
6561   slashp = etags_strchr (res, '/');
6562   while (slashp != NULL && slashp[0] != '\0')
6563     {
6564       if (slashp[1] == '.')
6565         {
6566           if (slashp[2] == '.'
6567               && (slashp[3] == '/' || slashp[3] == '\0'))
6568             {
6569               cp = slashp;
6570               do
6571                 cp--;
6572               while (cp >= res && !filename_is_absolute (cp));
6573               if (cp < res)
6574                 cp = slashp;    /* the absolute name begins with "/.." */
6575 #ifdef DOS_NT
6576               /* Under MSDOS and NT we get `d:/NAME' as absolute
6577                  file name, so the luser could say `d:/../NAME'.
6578                  We silently treat this as `d:/NAME'.  */
6579               else if (cp[0] != '/')
6580                 cp = slashp;
6581 #endif
6582 #ifdef HAVE_MEMMOVE
6583               memmove (cp, slashp + 3, strlen (slashp + 2));
6584 #else
6585               /* Overlapping copy isn't really okay */
6586               strcpy (cp, slashp + 3);
6587 #endif
6588               slashp = cp;
6589               continue;
6590             }
6591           else if (slashp[2] == '/' || slashp[2] == '\0')
6592             {
6593 #ifdef HAVE_MEMMOVE
6594               memmove (slashp, slashp + 2, strlen (slashp + 1));
6595 #else
6596               strcpy (slashp, slashp + 2);
6597 #endif
6598               continue;
6599             }
6600         }
6601
6602       slashp = etags_strchr (slashp + 1, '/');
6603     }
6604
6605   if (res[0] == '\0')           /* just a safety net: should never happen */
6606     {
6607       free (res);
6608       return savestr ("/");
6609     }
6610   else
6611     return res;
6612 }
6613
6614 /* Return a newly allocated string containing the absolute
6615    file name of dir where FILE resides given DIR (which should
6616    end with a slash). */
6617 static char *
6618 absolute_dirname (char *file, char *dir)
6619 {
6620   char *slashp, *res;
6621   char save;
6622
6623   slashp = etags_strrchr (file, '/');
6624   if (slashp == NULL)
6625     return savestr (dir);
6626   save = slashp[1];
6627   slashp[1] = '\0';
6628   res = absolute_filename (file, dir);
6629   slashp[1] = save;
6630
6631   return res;
6632 }
6633
6634 /* Whether the argument string is an absolute file name.  The argument
6635    string must have been canonicalized with canonicalize_filename. */
6636 static bool
6637 filename_is_absolute (char *fn)
6638 {
6639   return (fn[0] == '/'
6640 #ifdef DOS_NT
6641           || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
6642 #endif
6643           );
6644 }
6645
6646 /* Upcase DOS drive letter and collapse separators into single slashes.
6647    Works in place. */
6648 static void
6649 canonicalize_filename (register char *fn)
6650 {
6651   register char* cp;
6652   char sep = '/';
6653
6654 #ifdef DOS_NT
6655   /* Canonicalize drive letter case.  */
6656   if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
6657     fn[0] = upcase (fn[0]);
6658
6659   sep = '\\';
6660 #endif
6661
6662   /* Collapse multiple separators into a single slash. */
6663   for (cp = fn; *cp != '\0'; cp++, fn++)
6664     if (*cp == sep)
6665       {
6666         *fn = '/';
6667         while (cp[1] == sep)
6668           cp++;
6669       }
6670     else
6671       *fn = *cp;
6672   *fn = '\0';
6673 }
6674
6675 \f
6676 /* Initialize a linebuffer for use. */
6677 static void
6678 linebuffer_init (linebuffer *lbp)
6679 {
6680   lbp->size = (DEBUG) ? 3 : 200;
6681   lbp->buffer = xnew (lbp->size, char);
6682   lbp->buffer[0] = '\0';
6683   lbp->len = 0;
6684 }
6685
6686 /* Set the minimum size of a string contained in a linebuffer. */
6687 static void
6688 linebuffer_setlen (linebuffer *lbp, int toksize)
6689 {
6690   while (lbp->size <= toksize)
6691     {
6692       lbp->size *= 2;
6693       xrnew (lbp->buffer, lbp->size, char);
6694     }
6695   lbp->len = toksize;
6696 }
6697
6698 /* Like malloc but get fatal error if memory is exhausted. */
6699 static PTR
6700 xmalloc (unsigned int size)
6701 {
6702   PTR result = (PTR) malloc (size);
6703   if (result == NULL)
6704     fatal ("virtual memory exhausted", (char *)NULL);
6705   return result;
6706 }
6707
6708 static PTR
6709 xrealloc (char *ptr, unsigned int size)
6710 {
6711   PTR result = (PTR) realloc (ptr, size);
6712   if (result == NULL)
6713     fatal ("virtual memory exhausted", (char *)NULL);
6714   return result;
6715 }
6716
6717 /*
6718  * Local Variables:
6719  * indent-tabs-mode: t
6720  * tab-width: 8
6721  * fill-column: 79
6722  * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6723  * c-file-style: "gnu"
6724  * End:
6725  */
6726
6727 /* etags.c ends here */