code.delx.au - gnu-emacs/blob - lib-src/etags.c

   1 /* Tags file maker to go with GNU Emacs           -*- coding: latin-1 -*-
   2    Copyright (C) 1984, 1987, 1988, 1989, 1993, 1994, 1995,
   3                  1998, 1999, 2000, 2001, 2002, 2003, 2004,
   4                  2005, 2006 Free Software Foundation, Inc. and Ken Arnold
   5
   6  This file is not considered part of GNU Emacs.
   7
   8  This program is free software; you can redistribute it and/or modify
   9  it under the terms of the GNU General Public License as published by
  10  the Free Software Foundation; either version 2 of the License, or
  11  (at your option) any later version.
  12
  13  This program is distributed in the hope that it will be useful,
  14  but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16  GNU General Public License for more details.
  17
  18  You should have received a copy of the GNU General Public License
  19  along with this program; if not, write to the Free Software Foundation,
  20  Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
  21
  22 /*
  23  * Authors:
  24  *      Ctags originally by Ken Arnold.
  25  *      Fortran added by Jim Kleckner.
  26  *      Ed Pelegri-Llopart added C typedefs.
  27  *      Gnu Emacs TAGS format and modifications by RMS?
  28  * 1989 Sam Kendall added C++.
  29  * 1992 Joseph B. Wells improved C and C++ parsing.
  30  * 1993 Francesco Potortì reorganised C and C++.
  31  * 1994 Line-by-line regexp tags by Tom Tromey.
  32  * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
  33  * 2002 #line directives by Francesco Potortì.
  34  *
  35  * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
  36  */
  37
  38 /*
  39  * If you want to add support for a new language, start by looking at the LUA
  40  * language, which is the simplest.  Alternatively, consider shipping a
  41  * configuration file containing regexp definitions for etags.
  42  */
  43
  44 char pot_etags_version[] = "@(#) pot revision number is 17.20";
  45
  46 #define TRUE    1
  47 #define FALSE   0
  48
  49 #ifdef DEBUG
  50 #  undef DEBUG
  51 #  define DEBUG TRUE
  52 #else
  53 #  define DEBUG  FALSE
  54 #  define NDEBUG                /* disable assert */
  55 #endif
  56
  57 #ifdef HAVE_CONFIG_H
  58 # include <config.h>
  59   /* On some systems, Emacs defines static as nothing for the sake
  60      of unexec.  We don't want that here since we don't use unexec. */
  61 # undef static
  62 # ifndef PTR                    /* for XEmacs */
  63 #   define PTR void *
  64 # endif
  65 # ifndef __P                    /* for XEmacs */
  66 #   define __P(args) args
  67 # endif
  68 #else  /* no config.h */
  69 # if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
  70 #   define __P(args) args       /* use prototypes */
  71 #   define PTR void *           /* for generic pointers */
  72 # else /* not standard C */
  73 #   define __P(args) ()         /* no prototypes */
  74 #   define const                /* remove const for old compilers' sake */
  75 #   define PTR long *           /* don't use void* */
  76 # endif
  77 #endif /* !HAVE_CONFIG_H */
  78
  79 #ifndef _GNU_SOURCE
  80 # define _GNU_SOURCE 1          /* enables some compiler checks on GNU */
  81 #endif
  82
  83 /* WIN32_NATIVE is for XEmacs.
  84    MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
  85 #ifdef WIN32_NATIVE
  86 # undef MSDOS
  87 # undef  WINDOWSNT
  88 # define WINDOWSNT
  89 #endif /* WIN32_NATIVE */
  90
  91 #ifdef MSDOS
  92 # undef MSDOS
  93 # define MSDOS TRUE
  94 # include <fcntl.h>
  95 # include <sys/param.h>
  96 # include <io.h>
  97 # ifndef HAVE_CONFIG_H
  98 #   define DOS_NT
  99 #   include <sys/config.h>
 100 # endif
 101 #else
 102 # define MSDOS FALSE
 103 #endif /* MSDOS */
 104
 105 #ifdef WINDOWSNT
 106 # include <stdlib.h>
 107 # include <fcntl.h>
 108 # include <string.h>
 109 # include <direct.h>
 110 # include <io.h>
 111 # define MAXPATHLEN _MAX_PATH
 112 # undef HAVE_NTGUI
 113 # undef  DOS_NT
 114 # define DOS_NT
 115 # ifndef HAVE_GETCWD
 116 #   define HAVE_GETCWD
 117 # endif /* undef HAVE_GETCWD */
 118 #else /* not WINDOWSNT */
 119 # ifdef STDC_HEADERS
 120 #  include <stdlib.h>
 121 #  include <string.h>
 122 # else /* no standard C headers */
 123     extern char *getenv ();
 124 #  ifdef VMS
 125 #   define EXIT_SUCCESS 1
 126 #   define EXIT_FAILURE 0
 127 #  else /* no VMS */
 128 #   define EXIT_SUCCESS 0
 129 #   define EXIT_FAILURE 1
 130 #  endif
 131 # endif
 132 #endif /* !WINDOWSNT */
 133
 134 #ifdef HAVE_UNISTD_H
 135 # include <unistd.h>
 136 #else
 137 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
 138     extern char *getcwd (char *buf, size_t size);
 139 # endif
 140 #endif /* HAVE_UNISTD_H */
 141
 142 #include <stdio.h>
 143 #include <ctype.h>
 144 #include <errno.h>
 145 #ifndef errno
 146   extern int errno;
 147 #endif
 148 #include <sys/types.h>
 149 #include <sys/stat.h>
 150
 151 #include <assert.h>
 152 #ifdef NDEBUG
 153 # undef  assert                 /* some systems have a buggy assert.h */
 154 # define assert(x) ((void) 0)
 155 #endif
 156
 157 #if !defined (S_ISREG) && defined (S_IFREG)
 158 # define S_ISREG(m)     (((m) & S_IFMT) == S_IFREG)
 159 #endif
 160
 161 #ifdef NO_LONG_OPTIONS          /* define this if you don't have GNU getopt */
 162 # define NO_LONG_OPTIONS TRUE
 163 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
 164   extern char *optarg;
 165   extern int optind, opterr;
 166 #else
 167 # define NO_LONG_OPTIONS FALSE
 168 # include <getopt.h>
 169 #endif /* NO_LONG_OPTIONS */
 170
 171 #ifndef HAVE_CONFIG_H           /* this is a standalone compilation */
 172 # ifdef __CYGWIN__              /* compiling on Cygwin */
 173                              !!! NOTICE !!!
 174  the regex.h distributed with Cygwin is not compatible with etags, alas!
 175 If you want regular expression support, you should delete this notice and
 176               arrange to use the GNU regex.h and regex.c.
 177 # endif
 178 #endif
 179 #include <regex.h>
 180
 181 /* Define CTAGS to make the program "ctags" compatible with the usual one.
 182  Leave it undefined to make the program "etags", which makes emacs-style
 183  tag tables and tags typedefs, #defines and struct/union/enum by default. */
 184 #ifdef CTAGS
 185 # undef  CTAGS
 186 # define CTAGS TRUE
 187 #else
 188 # define CTAGS FALSE
 189 #endif
 190
 191 #define streq(s,t)      (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
 192 #define strcaseeq(s,t)  (assert((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
 193 #define strneq(s,t,n)   (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
 194 #define strncaseeq(s,t,n) (assert((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
 195
 196 #define CHARS 256               /* 2^sizeof(char) */
 197 #define CHAR(x)         ((unsigned int)(x) & (CHARS - 1))
 198 #define iswhite(c)      (_wht[CHAR(c)]) /* c is white (see white) */
 199 #define notinname(c)    (_nin[CHAR(c)]) /* c is not in a name (see nonam) */
 200 #define begtoken(c)     (_btk[CHAR(c)]) /* c can start token (see begtk) */
 201 #define intoken(c)      (_itk[CHAR(c)]) /* c can be in token (see midtk) */
 202 #define endtoken(c)     (_etk[CHAR(c)]) /* c ends tokens (see endtk) */
 203
 204 #define ISALNUM(c)      isalnum (CHAR(c))
 205 #define ISALPHA(c)      isalpha (CHAR(c))
 206 #define ISDIGIT(c)      isdigit (CHAR(c))
 207 #define ISLOWER(c)      islower (CHAR(c))
 208
 209 #define lowcase(c)      tolower (CHAR(c))
 210 #define upcase(c)       toupper (CHAR(c))
 211
 212
 213 /*
 214  *      xnew, xrnew -- allocate, reallocate storage
 215  *
 216  * SYNOPSIS:    Type *xnew (int n, Type);
 217  *              void xrnew (OldPointer, int n, Type);
 218  */
 219 #if DEBUG
 220 # include "chkmalloc.h"
 221 # define xnew(n,Type)     ((Type *) trace_malloc (__FILE__, __LINE__, \
 222                                                   (n) * sizeof (Type)))
 223 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
 224                                         (char *) (op), (n) * sizeof (Type)))
 225 #else
 226 # define xnew(n,Type)     ((Type *) xmalloc ((n) * sizeof (Type)))
 227 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
 228                                         (char *) (op), (n) * sizeof (Type)))
 229 #endif
 230
 231 #define bool int
 232
 233 typedef void Lang_function __P((FILE *));
 234
 235 typedef struct
 236 {
 237   char *suffix;                 /* file name suffix for this compressor */
 238   char *command;                /* takes one arg and decompresses to stdout */
 239 } compressor;
 240
 241 typedef struct
 242 {
 243   char *name;                   /* language name */
 244   char *help;                   /* detailed help for the language */
 245   Lang_function *function;      /* parse function */
 246   char **suffixes;              /* name suffixes of this language's files */
 247   char **filenames;             /* names of this language's files */
 248   char **interpreters;          /* interpreters for this language */
 249   bool metasource;              /* source used to generate other sources */
 250 } language;
 251
 252 typedef struct fdesc
 253 {
 254   struct fdesc *next;           /* for the linked list */
 255   char *infname;                /* uncompressed input file name */
 256   char *infabsname;             /* absolute uncompressed input file name */
 257   char *infabsdir;              /* absolute dir of input file */
 258   char *taggedfname;            /* file name to write in tagfile */
 259   language *lang;               /* language of file */
 260   char *prop;                   /* file properties to write in tagfile */
 261   bool usecharno;               /* etags tags shall contain char number */
 262   bool written;                 /* entry written in the tags file */
 263 } fdesc;
 264
 265 typedef struct node_st
 266 {                               /* sorting structure */
 267   struct node_st *left, *right; /* left and right sons */
 268   fdesc *fdp;                   /* description of file to whom tag belongs */
 269   char *name;                   /* tag name */
 270   char *regex;                  /* search regexp */
 271   bool valid;                   /* write this tag on the tag file */
 272   bool is_func;                 /* function tag: use regexp in CTAGS mode */
 273   bool been_warned;             /* warning already given for duplicated tag */
 274   int lno;                      /* line number tag is on */
 275   long cno;                     /* character number line starts on */
 276 } node;
 277
 278 /*
 279  * A `linebuffer' is a structure which holds a line of text.
 280  * `readline_internal' reads a line from a stream into a linebuffer
 281  * and works regardless of the length of the line.
 282  * SIZE is the size of BUFFER, LEN is the length of the string in
 283  * BUFFER after readline reads it.
 284  */
 285 typedef struct
 286 {
 287   long size;
 288   int len;
 289   char *buffer;
 290 } linebuffer;
 291
 292 /* Used to support mixing of --lang and file names. */
 293 typedef struct
 294 {
 295   enum {
 296     at_language,                /* a language specification */
 297     at_regexp,                  /* a regular expression */
 298     at_filename,                /* a file name */
 299     at_stdin,                   /* read from stdin here */
 300     at_end                      /* stop parsing the list */
 301   } arg_type;                   /* argument type */
 302   language *lang;               /* language associated with the argument */
 303   char *what;                   /* the argument itself */
 304 } argument;
 305
 306 /* Structure defining a regular expression. */
 307 typedef struct regexp
 308 {
 309   struct regexp *p_next;        /* pointer to next in list */
 310   language *lang;               /* if set, use only for this language */
 311   char *pattern;                /* the regexp pattern */
 312   char *name;                   /* tag name */
 313   struct re_pattern_buffer *pat; /* the compiled pattern */
 314   struct re_registers regs;     /* re registers */
 315   bool error_signaled;          /* already signaled for this regexp */
 316   bool force_explicit_name;     /* do not allow implict tag name */
 317   bool ignore_case;             /* ignore case when matching */
 318   bool multi_line;              /* do a multi-line match on the whole file */
 319 } regexp;
 320
 321
 322 /* Many compilers barf on this:
 323         Lang_function Ada_funcs;
 324    so let's write it this way */
 325 static void Ada_funcs __P((FILE *));
 326 static void Asm_labels __P((FILE *));
 327 static void C_entries __P((int c_ext, FILE *));
 328 static void default_C_entries __P((FILE *));
 329 static void plain_C_entries __P((FILE *));
 330 static void Cjava_entries __P((FILE *));
 331 static void Cobol_paragraphs __P((FILE *));
 332 static void Cplusplus_entries __P((FILE *));
 333 static void Cstar_entries __P((FILE *));
 334 static void Erlang_functions __P((FILE *));
 335 static void Forth_words __P((FILE *));
 336 static void Fortran_functions __P((FILE *));
 337 static void HTML_labels __P((FILE *));
 338 static void Lisp_functions __P((FILE *));
 339 static void Lua_functions __P((FILE *));
 340 static void Makefile_targets __P((FILE *));
 341 static void Pascal_functions __P((FILE *));
 342 static void Perl_functions __P((FILE *));
 343 static void PHP_functions __P((FILE *));
 344 static void PS_functions __P((FILE *));
 345 static void Prolog_functions __P((FILE *));
 346 static void Python_functions __P((FILE *));
 347 static void Scheme_functions __P((FILE *));
 348 static void TeX_commands __P((FILE *));
 349 static void Texinfo_nodes __P((FILE *));
 350 static void Yacc_entries __P((FILE *));
 351 static void just_read_file __P((FILE *));
 352
 353 static void print_language_names __P((void));
 354 static void print_version __P((void));
 355 static void print_help __P((argument *));
 356 int main __P((int, char **));
 357
 358 static compressor *get_compressor_from_suffix __P((char *, char **));
 359 static language *get_language_from_langname __P((const char *));
 360 static language *get_language_from_interpreter __P((char *));
 361 static language *get_language_from_filename __P((char *, bool));
 362 static void readline __P((linebuffer *, FILE *));
 363 static long readline_internal __P((linebuffer *, FILE *));
 364 static bool nocase_tail __P((char *));
 365 static void get_tag __P((char *, char **));
 366
 367 static void analyse_regex __P((char *));
 368 static void free_regexps __P((void));
 369 static void regex_tag_multiline __P((void));
 370 static void error __P((const char *, const char *));
 371 static void suggest_asking_for_help __P((void));
 372 void fatal __P((char *, char *));
 373 static void pfatal __P((char *));
 374 static void add_node __P((node *, node **));
 375
 376 static void init __P((void));
 377 static void process_file_name __P((char *, language *));
 378 static void process_file __P((FILE *, char *, language *));
 379 static void find_entries __P((FILE *));
 380 static void free_tree __P((node *));
 381 static void free_fdesc __P((fdesc *));
 382 static void pfnote __P((char *, bool, char *, int, int, long));
 383 static void make_tag __P((char *, int, bool, char *, int, int, long));
 384 static void invalidate_nodes __P((fdesc *, node **));
 385 static void put_entries __P((node *));
 386
 387 static char *concat __P((char *, char *, char *));
 388 static char *skip_spaces __P((char *));
 389 static char *skip_non_spaces __P((char *));
 390 static char *savenstr __P((char *, int));
 391 static char *savestr __P((char *));
 392 static char *etags_strchr __P((const char *, int));
 393 static char *etags_strrchr __P((const char *, int));
 394 static int etags_strcasecmp __P((const char *, const char *));
 395 static int etags_strncasecmp __P((const char *, const char *, int));
 396 static char *etags_getcwd __P((void));
 397 static char *relative_filename __P((char *, char *));
 398 static char *absolute_filename __P((char *, char *));
 399 static char *absolute_dirname __P((char *, char *));
 400 static bool filename_is_absolute __P((char *f));
 401 static void canonicalize_filename __P((char *));
 402 static void linebuffer_init __P((linebuffer *));
 403 static void linebuffer_setlen __P((linebuffer *, int));
 404 static PTR xmalloc __P((unsigned int));
 405 static PTR xrealloc __P((char *, unsigned int));
 406
 407 \f
 408 static char searchar = '/';     /* use /.../ searches */
 409
 410 static char *tagfile;           /* output file */
 411 static char *progname;          /* name this program was invoked with */
 412 static char *cwd;               /* current working directory */
 413 static char *tagfiledir;        /* directory of tagfile */
 414 static FILE *tagf;              /* ioptr for tags file */
 415
 416 static fdesc *fdhead;           /* head of file description list */
 417 static fdesc *curfdp;           /* current file description */
 418 static int lineno;              /* line number of current line */
 419 static long charno;             /* current character number */
 420 static long linecharno;         /* charno of start of current line */
 421 static char *dbp;               /* pointer to start of current tag */
 422
 423 static const int invalidcharno = -1;
 424
 425 static node *nodehead;          /* the head of the binary tree of tags */
 426 static node *last_node;         /* the last node created */
 427
 428 static linebuffer lb;           /* the current line */
 429 static linebuffer filebuf;      /* a buffer containing the whole file */
 430 static linebuffer token_name;   /* a buffer containing a tag name */
 431
 432 /* boolean "functions" (see init)       */
 433 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
 434 static char
 435   /* white chars */
 436   *white = " \f\t\n\r\v",
 437   /* not in a name */
 438   *nonam = " \f\t\n\r()=,;",    /* look at make_tag before modifying! */
 439   /* token ending chars */
 440   *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
 441   /* token starting chars */
 442   *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
 443   /* valid in-token chars */
 444   *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
 445
 446 static bool append_to_tagfile;  /* -a: append to tags */
 447 /* The next four default to TRUE for etags, but to FALSE for ctags.  */
 448 static bool typedefs;           /* -t: create tags for C and Ada typedefs */
 449 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
 450                                 /* 0 struct/enum/union decls, and C++ */
 451                                 /* member functions. */
 452 static bool constantypedefs;    /* -d: create tags for C #define, enum */
 453                                 /* constants and variables. */
 454                                 /* -D: opposite of -d.  Default under ctags. */
 455 static bool globals;            /* create tags for global variables */
 456 static bool declarations;       /* --declarations: tag them and extern in C&Co*/
 457 static bool members;            /* create tags for C member variables */
 458 static bool no_line_directive;  /* ignore #line directives (undocumented) */
 459 static bool update;             /* -u: update tags */
 460 static bool vgrind_style;       /* -v: create vgrind style index output */
 461 static bool no_warnings;        /* -w: suppress warnings */
 462 static bool cxref_style;        /* -x: create cxref style output */
 463 static bool cplusplus;          /* .[hc] means C++, not C */
 464 static bool ignoreindent;       /* -I: ignore indentation in C */
 465 static bool packages_only;      /* --packages-only: in Ada, only tag packages*/
 466
 467 /* STDIN is defined in LynxOS system headers */
 468 #ifdef STDIN
 469 # undef STDIN
 470 #endif
 471
 472 #define STDIN 0x1001            /* returned by getopt_long on --parse-stdin */
 473 static bool parsing_stdin;      /* --parse-stdin used */
 474
 475 static regexp *p_head;          /* list of all regexps */
 476 static bool need_filebuf;       /* some regexes are multi-line */
 477
 478 static struct option longopts[] =
 479 {
 480   { "append",             no_argument,       NULL,               'a'   },
 481   { "packages-only",      no_argument,       &packages_only,     TRUE  },
 482   { "c++",                no_argument,       NULL,               'C'   },
 483   { "declarations",       no_argument,       &declarations,      TRUE  },
 484   { "no-line-directive",  no_argument,       &no_line_directive, TRUE  },
 485   { "help",               no_argument,       NULL,               'h'   },
 486   { "help",               no_argument,       NULL,               'H'   },
 487   { "ignore-indentation", no_argument,       NULL,               'I'   },
 488   { "language",           required_argument, NULL,               'l'   },
 489   { "members",            no_argument,       &members,           TRUE  },
 490   { "no-members",         no_argument,       &members,           FALSE },
 491   { "output",             required_argument, NULL,               'o'   },
 492   { "regex",              required_argument, NULL,               'r'   },
 493   { "no-regex",           no_argument,       NULL,               'R'   },
 494   { "ignore-case-regex",  required_argument, NULL,               'c'   },
 495   { "parse-stdin",        required_argument, NULL,               STDIN },
 496   { "version",            no_argument,       NULL,               'V'   },
 497
 498 #if CTAGS /* Ctags options */
 499   { "backward-search",    no_argument,       NULL,               'B'   },
 500   { "cxref",              no_argument,       NULL,               'x'   },
 501   { "defines",            no_argument,       NULL,               'd'   },
 502   { "globals",            no_argument,       &globals,           TRUE  },
 503   { "typedefs",           no_argument,       NULL,               't'   },
 504   { "typedefs-and-c++",   no_argument,       NULL,               'T'   },
 505   { "update",             no_argument,       NULL,               'u'   },
 506   { "vgrind",             no_argument,       NULL,               'v'   },
 507   { "no-warn",            no_argument,       NULL,               'w'   },
 508
 509 #else /* Etags options */
 510   { "no-defines",         no_argument,       NULL,               'D'   },
 511   { "no-globals",         no_argument,       &globals,           FALSE },
 512   { "include",            required_argument, NULL,               'i'   },
 513 #endif
 514   { NULL }
 515 };
 516
 517 static compressor compressors[] =
 518 {
 519   { "z", "gzip -d -c"},
 520   { "Z", "gzip -d -c"},
 521   { "gz", "gzip -d -c"},
 522   { "GZ", "gzip -d -c"},
 523   { "bz2", "bzip2 -d -c" },
 524   { NULL }
 525 };
 526
 527 /*
 528  * Language stuff.
 529  */
 530
 531 /* Ada code */
 532 static char *Ada_suffixes [] =
 533   { "ads", "adb", "ada", NULL };
 534 static char Ada_help [] =
 535 "In Ada code, functions, procedures, packages, tasks and types are\n\
 536 tags.  Use the `--packages-only' option to create tags for\n\
 537 packages only.\n\
 538 Ada tag names have suffixes indicating the type of entity:\n\
 539         Entity type:    Qualifier:\n\
 540         ------------    ----------\n\
 541         function        /f\n\
 542         procedure       /p\n\
 543         package spec    /s\n\
 544         package body    /b\n\
 545         type            /t\n\
 546         task            /k\n\
 547 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
 548 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
 549 will just search for any tag `bidule'.";
 550
 551 /* Assembly code */
 552 static char *Asm_suffixes [] =
 553   { "a",        /* Unix assembler */
 554     "asm", /* Microcontroller assembly */
 555     "def", /* BSO/Tasking definition includes  */
 556     "inc", /* Microcontroller include files */
 557     "ins", /* Microcontroller include files */
 558     "s", "sa", /* Unix assembler */
 559     "S",   /* cpp-processed Unix assembler */
 560     "src", /* BSO/Tasking C compiler output */
 561     NULL
 562   };
 563 static char Asm_help [] =
 564 "In assembler code, labels appearing at the beginning of a line,\n\
 565 followed by a colon, are tags.";
 566
 567
 568 /* Note that .c and .h can be considered C++, if the --c++ flag was
 569    given, or if the `class' or `template' keyowrds are met inside the file.
 570    That is why default_C_entries is called for these. */
 571 static char *default_C_suffixes [] =
 572   { "c", "h", NULL };
 573 static char default_C_help [] =
 574 "In C code, any C function or typedef is a tag, and so are\n\
 575 definitions of `struct', `union' and `enum'.  `#define' macro\n\
 576 definitions and `enum' constants are tags unless you specify\n\
 577 `--no-defines'.  Global variables are tags unless you specify\n\
 578 `--no-globals'.  Use of `--no-globals' and `--no-defines'\n\
 579 can make the tags table file much smaller.\n\
 580 You can tag function declarations and external variables by\n\
 581 using `--declarations', and struct members by using `--members'.";
 582
 583 static char *Cplusplus_suffixes [] =
 584   { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
 585     "M",                        /* Objective C++ */
 586     "pdb",                      /* Postscript with C syntax */
 587     NULL };
 588 static char Cplusplus_help [] =
 589 "In C++ code, all the tag constructs of C code are tagged.  (Use\n\
 590 --help --lang=c --lang=c++ for full help.)\n\
 591 In addition to C tags, member functions are also recognized, and\n\
 592 optionally member variables if you use the `--members' option.\n\
 593 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
 594 and `CLASS::FUNCTION'.  `operator' definitions have tag names like\n\
 595 `operator+'.";
 596
 597 static char *Cjava_suffixes [] =
 598   { "java", NULL };
 599 static char Cjava_help [] =
 600 "In Java code, all the tags constructs of C and C++ code are\n\
 601 tagged.  (Use --help --lang=c --lang=c++ --lang=java for full help.)";
 602
 603
 604 static char *Cobol_suffixes [] =
 605   { "COB", "cob", NULL };
 606 static char Cobol_help [] =
 607 "In Cobol code, tags are paragraph names; that is, any word\n\
 608 starting in column 8 and followed by a period.";
 609
 610 static char *Cstar_suffixes [] =
 611   { "cs", "hs", NULL };
 612
 613 static char *Erlang_suffixes [] =
 614   { "erl", "hrl", NULL };
 615 static char Erlang_help [] =
 616 "In Erlang code, the tags are the functions, records and macros\n\
 617 defined in the file.";
 618
 619 char *Forth_suffixes [] =
 620   { "fth", "tok", NULL };
 621 static char Forth_help [] =
 622 "In Forth code, tags are words defined by `:',\n\
 623 constant, code, create, defer, value, variable, buffer:, field.";
 624
 625 static char *Fortran_suffixes [] =
 626   { "F", "f", "f90", "for", NULL };
 627 static char Fortran_help [] =
 628 "In Fortran code, functions, subroutines and block data are tags.";
 629
 630 static char *HTML_suffixes [] =
 631   { "htm", "html", "shtml", NULL };
 632 static char HTML_help [] =
 633 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
 634 `h3' headers.  Also, tags are `name=' in anchors and all\n\
 635 occurrences of `id='.";
 636
 637 static char *Lisp_suffixes [] =
 638   { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
 639 static char Lisp_help [] =
 640 "In Lisp code, any function defined with `defun', any variable\n\
 641 defined with `defvar' or `defconst', and in general the first\n\
 642 argument of any expression that starts with `(def' in column zero\n\
 643 is a tag.";
 644
 645 static char *Lua_suffixes [] =
 646   { "lua", "LUA", NULL };
 647 static char Lua_help [] =
 648 "In Lua scripts, all functions are tags.";
 649
 650 static char *Makefile_filenames [] =
 651   { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
 652 static char Makefile_help [] =
 653 "In makefiles, targets are tags; additionally, variables are tags\n\
 654 unless you specify `--no-globals'.";
 655
 656 static char *Objc_suffixes [] =
 657   { "lm",                       /* Objective lex file */
 658     "m",                        /* Objective C file */
 659      NULL };
 660 static char Objc_help [] =
 661 "In Objective C code, tags include Objective C definitions for classes,\n\
 662 class categories, methods and protocols.  Tags for variables and\n\
 663 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
 664 (Use --help --lang=c --lang=objc --lang=java for full help.)";
 665
 666 static char *Pascal_suffixes [] =
 667   { "p", "pas", NULL };
 668 static char Pascal_help [] =
 669 "In Pascal code, the tags are the functions and procedures defined\n\
 670 in the file.";
 671 /* " // this is for working around an Emacs highlighting bug... */
 672
 673 static char *Perl_suffixes [] =
 674   { "pl", "pm", NULL };
 675 static char *Perl_interpreters [] =
 676   { "perl", "@PERL@", NULL };
 677 static char Perl_help [] =
 678 "In Perl code, the tags are the packages, subroutines and variables\n\
 679 defined by the `package', `sub', `my' and `local' keywords.  Use\n\
 680 `--globals' if you want to tag global variables.  Tags for\n\
 681 subroutines are named `PACKAGE::SUB'.  The name for subroutines\n\
 682 defined in the default package is `main::SUB'.";
 683
 684 static char *PHP_suffixes [] =
 685   { "php", "php3", "php4", NULL };
 686 static char PHP_help [] =
 687 "In PHP code, tags are functions, classes and defines.  When using\n\
 688 the `--members' option, vars are tags too.";
 689
 690 static char *plain_C_suffixes [] =
 691   { "pc",                       /* Pro*C file */
 692      NULL };
 693
 694 static char *PS_suffixes [] =
 695   { "ps", "psw", NULL };        /* .psw is for PSWrap */
 696 static char PS_help [] =
 697 "In PostScript code, the tags are the functions.";
 698
 699 static char *Prolog_suffixes [] =
 700   { "prolog", NULL };
 701 static char Prolog_help [] =
 702 "In Prolog code, tags are predicates and rules at the beginning of\n\
 703 line.";
 704
 705 static char *Python_suffixes [] =
 706   { "py", NULL };
 707 static char Python_help [] =
 708 "In Python code, `def' or `class' at the beginning of a line\n\
 709 generate a tag.";
 710
 711 /* Can't do the `SCM' or `scm' prefix with a version number. */
 712 static char *Scheme_suffixes [] =
 713   { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
 714 static char Scheme_help [] =
 715 "In Scheme code, tags include anything defined with `def' or with a\n\
 716 construct whose name starts with `def'.  They also include\n\
 717 variables set with `set!' at top level in the file.";
 718
 719 static char *TeX_suffixes [] =
 720   { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
 721 static char TeX_help [] =
 722 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
 723 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
 724 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
 725 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
 726 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
 727 \n\
 728 Other commands can be specified by setting the environment variable\n\
 729 `TEXTAGS' to a colon-separated list like, for example,\n\
 730      TEXTAGS=\"mycommand:myothercommand\".";
 731
 732
 733 static char *Texinfo_suffixes [] =
 734   { "texi", "texinfo", "txi", NULL };
 735 static char Texinfo_help [] =
 736 "for texinfo files, lines starting with @node are tagged.";
 737
 738 static char *Yacc_suffixes [] =
 739   { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
 740 static char Yacc_help [] =
 741 "In Bison or Yacc input files, each rule defines as a tag the\n\
 742 nonterminal it constructs.  The portions of the file that contain\n\
 743 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
 744 for full help).";
 745
 746 static char auto_help [] =
 747 "`auto' is not a real language, it indicates to use\n\
 748 a default language for files base on file name suffix and file contents.";
 749
 750 static char none_help [] =
 751 "`none' is not a real language, it indicates to only do\n\
 752 regexp processing on files.";
 753
 754 static char no_lang_help [] =
 755 "No detailed help available for this language.";
 756
 757
 758 /*
 759  * Table of languages.
 760  *
 761  * It is ok for a given function to be listed under more than one
 762  * name.  I just didn't.
 763  */
 764
 765 static language lang_names [] =
 766 {
 767   { "ada",       Ada_help,       Ada_funcs,         Ada_suffixes       },
 768   { "asm",       Asm_help,       Asm_labels,        Asm_suffixes       },
 769   { "c",         default_C_help, default_C_entries, default_C_suffixes },
 770   { "c++",       Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
 771   { "c*",        no_lang_help,   Cstar_entries,     Cstar_suffixes     },
 772   { "cobol",     Cobol_help,     Cobol_paragraphs,  Cobol_suffixes     },
 773   { "erlang",    Erlang_help,    Erlang_functions,  Erlang_suffixes    },
 774   { "forth",     Forth_help,     Forth_words,       Forth_suffixes     },
 775   { "fortran",   Fortran_help,   Fortran_functions, Fortran_suffixes   },
 776   { "html",      HTML_help,      HTML_labels,       HTML_suffixes      },
 777   { "java",      Cjava_help,     Cjava_entries,     Cjava_suffixes     },
 778   { "lisp",      Lisp_help,      Lisp_functions,    Lisp_suffixes      },
 779   { "lua",       Lua_help,       Lua_functions,     Lua_suffixes       },
 780   { "makefile",  Makefile_help,Makefile_targets,NULL,Makefile_filenames},
 781   { "objc",      Objc_help,      plain_C_entries,   Objc_suffixes      },
 782   { "pascal",    Pascal_help,    Pascal_functions,  Pascal_suffixes    },
 783   { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
 784   { "php",       PHP_help,       PHP_functions,     PHP_suffixes       },
 785   { "postscript",PS_help,        PS_functions,      PS_suffixes        },
 786   { "proc",      no_lang_help,   plain_C_entries,   plain_C_suffixes   },
 787   { "prolog",    Prolog_help,    Prolog_functions,  Prolog_suffixes    },
 788   { "python",    Python_help,    Python_functions,  Python_suffixes    },
 789   { "scheme",    Scheme_help,    Scheme_functions,  Scheme_suffixes    },
 790   { "tex",       TeX_help,       TeX_commands,      TeX_suffixes       },
 791   { "texinfo",   Texinfo_help,   Texinfo_nodes,     Texinfo_suffixes   },
 792   { "yacc",      Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
 793   { "auto",      auto_help },                      /* default guessing scheme */
 794   { "none",      none_help,      just_read_file }, /* regexp matching only */
 795   { NULL }                /* end of list */
 796 };
 797
 798 \f
 799 static void
 800 print_language_names ()
 801 {
 802   language *lang;
 803   char **name, **ext;
 804
 805   puts ("\nThese are the currently supported languages, along with the\n\
 806 default file names and dot suffixes:");
 807   for (lang = lang_names; lang->name != NULL; lang++)
 808     {
 809       printf ("  %-*s", 10, lang->name);
 810       if (lang->filenames != NULL)
 811         for (name = lang->filenames; *name != NULL; name++)
 812           printf (" %s", *name);
 813       if (lang->suffixes != NULL)
 814         for (ext = lang->suffixes; *ext != NULL; ext++)
 815           printf (" .%s", *ext);
 816       puts ("");
 817     }
 818   puts ("where `auto' means use default language for files based on file\n\
 819 name suffix, and `none' means only do regexp processing on files.\n\
 820 If no language is specified and no matching suffix is found,\n\
 821 the first line of the file is read for a sharp-bang (#!) sequence\n\
 822 followed by the name of an interpreter.  If no such sequence is found,\n\
 823 Fortran is tried first; if no tags are found, C is tried next.\n\
 824 When parsing any C file, a \"class\" or \"template\" keyword\n\
 825 switches to C++.");
 826   puts ("Compressed files are supported using gzip and bzip2.\n\
 827 \n\
 828 For detailed help on a given language use, for example,\n\
 829 etags --help --lang=ada.");
 830 }
 831
 832 #ifndef EMACS_NAME
 833 # define EMACS_NAME "standalone"
 834 #endif
 835 #ifndef VERSION
 836 # define VERSION "version"
 837 #endif
 838 static void
 839 print_version ()
 840 {
 841   printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
 842   puts ("Copyright (C) 2006 Free Software Foundation, Inc. and Ken Arnold");
 843   puts ("This program is distributed under the same terms as Emacs");
 844
 845   exit (EXIT_SUCCESS);
 846 }
 847
 848 static void
 849 print_help (argbuffer)
 850      argument *argbuffer;
 851 {
 852   bool help_for_lang = FALSE;
 853
 854   for (; argbuffer->arg_type != at_end; argbuffer++)
 855     if (argbuffer->arg_type == at_language)
 856       {
 857         if (help_for_lang)
 858           puts ("");
 859         puts (argbuffer->lang->help);
 860         help_for_lang = TRUE;
 861       }
 862
 863   if (help_for_lang)
 864     exit (EXIT_SUCCESS);
 865
 866   printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
 867 \n\
 868 These are the options accepted by %s.\n", progname, progname);
 869   if (NO_LONG_OPTIONS)
 870     puts ("WARNING: long option names do not work with this executable,\n\
 871 as it is not linked with GNU getopt.");
 872   else
 873     puts ("You may use unambiguous abbreviations for the long option names.");
 874   puts ("  A - as file name means read names from stdin (one per line).\n\
 875 Absolute names are stored in the output file as they are.\n\
 876 Relative ones are stored relative to the output file's directory.\n");
 877
 878   puts ("-a, --append\n\
 879         Append tag entries to existing tags file.");
 880
 881   puts ("--packages-only\n\
 882         For Ada files, only generate tags for packages.");
 883
 884   if (CTAGS)
 885     puts ("-B, --backward-search\n\
 886         Write the search commands for the tag entries using '?', the\n\
 887         backward-search command instead of '/', the forward-search command.");
 888
 889   /* This option is mostly obsolete, because etags can now automatically
 890      detect C++.  Retained for backward compatibility and for debugging and
 891      experimentation.  In principle, we could want to tag as C++ even
 892      before any "class" or "template" keyword.
 893   puts ("-C, --c++\n\
 894         Treat files whose name suffix defaults to C language as C++ files.");
 895   */
 896
 897   puts ("--declarations\n\
 898         In C and derived languages, create tags for function declarations,");
 899   if (CTAGS)
 900     puts ("\tand create tags for extern variables if --globals is used.");
 901   else
 902     puts
 903       ("\tand create tags for extern variables unless --no-globals is used.");
 904
 905   if (CTAGS)
 906     puts ("-d, --defines\n\
 907         Create tag entries for C #define constants and enum constants, too.");
 908   else
 909     puts ("-D, --no-defines\n\
 910         Don't create tag entries for C #define constants and enum constants.\n\
 911         This makes the tags file smaller.");
 912
 913   if (!CTAGS)
 914     puts ("-i FILE, --include=FILE\n\
 915         Include a note in tag file indicating that, when searching for\n\
 916         a tag, one should also consult the tags file FILE after\n\
 917         checking the current file.");
 918
 919   puts ("-l LANG, --language=LANG\n\
 920         Force the following files to be considered as written in the\n\
 921         named language up to the next --language=LANG option.");
 922
 923   if (CTAGS)
 924     puts ("--globals\n\
 925         Create tag entries for global variables in some languages.");
 926   else
 927     puts ("--no-globals\n\
 928         Do not create tag entries for global variables in some\n\
 929         languages.  This makes the tags file smaller.");
 930   puts ("--members\n\
 931         Create tag entries for members of structures in some languages.");
 932
 933   puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
 934         Make a tag for each line matching a regular expression pattern\n\
 935         in the following files.  {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
 936         files only.  REGEXFILE is a file containing one REGEXP per line.\n\
 937         REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
 938         optional.  The TAGREGEXP pattern is anchored (as if preceded by ^).");
 939   puts ("       If TAGNAME/ is present, the tags created are named.\n\
 940         For example Tcl named tags can be created with:\n\
 941           --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
 942         MODS are optional one-letter modifiers: `i' means to ignore case,\n\
 943         `m' means to allow multi-line matches, `s' implies `m' and\n\
 944         causes dot to match any character, including newline.");
 945   puts ("-R, --no-regex\n\
 946         Don't create tags from regexps for the following files.");
 947   puts ("-I, --ignore-indentation\n\
 948         In C and C++ do not assume that a closing brace in the first\n\
 949         column is the final brace of a function or structure definition.");
 950   puts ("-o FILE, --output=FILE\n\
 951         Write the tags to FILE.");
 952   puts ("--parse-stdin=NAME\n\
 953         Read from standard input and record tags as belonging to file NAME.");
 954
 955   if (CTAGS)
 956     {
 957       puts ("-t, --typedefs\n\
 958         Generate tag entries for C and Ada typedefs.");
 959       puts ("-T, --typedefs-and-c++\n\
 960         Generate tag entries for C typedefs, C struct/enum/union tags,\n\
 961         and C++ member functions.");
 962     }
 963
 964   if (CTAGS)
 965     puts ("-u, --update\n\
 966         Update the tag entries for the given files, leaving tag\n\
 967         entries for other files in place.  Currently, this is\n\
 968         implemented by deleting the existing entries for the given\n\
 969         files and then rewriting the new entries at the end of the\n\
 970         tags file.  It is often faster to simply rebuild the entire\n\
 971         tag file than to use this.");
 972
 973   if (CTAGS)
 974     {
 975       puts ("-v, --vgrind\n\
 976         Print on the standard output an index of items intended for\n\
 977         human consumption, similar to the output of vgrind.  The index\n\
 978         is sorted, and gives the page number of each item.");
 979       puts ("-w, --no-warn\n\
 980         Suppress warning messages about entries defined in multiple\n\
 981         files.");
 982       puts ("-x, --cxref\n\
 983         Like --vgrind, but in the style of cxref, rather than vgrind.\n\
 984         The output uses line numbers instead of page numbers, but\n\
 985         beyond that the differences are cosmetic; try both to see\n\
 986         which you like.");
 987     }
 988
 989   puts ("-V, --version\n\
 990         Print the version of the program.\n\
 991 -h, --help\n\
 992         Print this help message.\n\
 993         Followed by one or more `--language' options prints detailed\n\
 994         help about tag generation for the specified languages.");
 995
 996   print_language_names ();
 997
 998   puts ("");
 999   puts ("Report bugs to bug-gnu-emacs@gnu.org");
1000
1001   exit (EXIT_SUCCESS);
1002 }
1003
1004 \f
1005 #ifdef VMS                      /* VMS specific functions */
1006
1007 #define EOS     '\0'
1008
1009 /* This is a BUG!  ANY arbitrary limit is a BUG!
1010    Won't someone please fix this?  */
1011 #define MAX_FILE_SPEC_LEN       255
1012 typedef struct  {
1013   short   curlen;
1014   char    body[MAX_FILE_SPEC_LEN + 1];
1015 } vspec;
1016
1017 /*
1018  v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
1019  returning in each successive call the next file name matching the input
1020  spec. The function expects that each in_spec passed
1021  to it will be processed to completion; in particular, up to and
1022  including the call following that in which the last matching name
1023  is returned, the function ignores the value of in_spec, and will
1024  only start processing a new spec with the following call.
1025  If an error occurs, on return out_spec contains the value
1026  of in_spec when the error occurred.
1027
1028  With each successive file name returned in out_spec, the
1029  function's return value is one. When there are no more matching
1030  names the function returns zero. If on the first call no file
1031  matches in_spec, or there is any other error, -1 is returned.
1032 */
1033
1034 #include        <rmsdef.h>
1035 #include        <descrip.h>
1036 #define         OUTSIZE MAX_FILE_SPEC_LEN
1037 static short
1038 fn_exp (out, in)
1039      vspec *out;
1040      char *in;
1041 {
1042   static long context = 0;
1043   static struct dsc$descriptor_s o;
1044   static struct dsc$descriptor_s i;
1045   static bool pass1 = TRUE;
1046   long status;
1047   short retval;
1048
1049   if (pass1)
1050     {
1051       pass1 = FALSE;
1052       o.dsc$a_pointer = (char *) out;
1053       o.dsc$w_length = (short)OUTSIZE;
1054       i.dsc$a_pointer = in;
1055       i.dsc$w_length = (short)strlen(in);
1056       i.dsc$b_dtype = DSC$K_DTYPE_T;
1057       i.dsc$b_class = DSC$K_CLASS_S;
1058       o.dsc$b_dtype = DSC$K_DTYPE_VT;
1059       o.dsc$b_class = DSC$K_CLASS_VS;
1060     }
1061   if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
1062     {
1063       out->body[out->curlen] = EOS;
1064       return 1;
1065     }
1066   else if (status == RMS$_NMF)
1067     retval = 0;
1068   else
1069     {
1070       strcpy(out->body, in);
1071       retval = -1;
1072     }
1073   lib$find_file_end(&context);
1074   pass1 = TRUE;
1075   return retval;
1076 }
1077
1078 /*
1079   v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
1080   name of each file specified by the provided arg expanding wildcards.
1081 */
1082 static char *
1083 gfnames (arg, p_error)
1084      char *arg;
1085      bool *p_error;
1086 {
1087   static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
1088
1089   switch (fn_exp (&filename, arg))
1090     {
1091     case 1:
1092       *p_error = FALSE;
1093       return filename.body;
1094     case 0:
1095       *p_error = FALSE;
1096       return NULL;
1097     default:
1098       *p_error = TRUE;
1099       return filename.body;
1100     }
1101 }
1102
1103 #ifndef OLD  /* Newer versions of VMS do provide `system'.  */
1104 system (cmd)
1105      char *cmd;
1106 {
1107   error ("%s", "system() function not implemented under VMS");
1108 }
1109 #endif
1110
1111 #define VERSION_DELIM   ';'
1112 char *massage_name (s)
1113      char *s;
1114 {
1115   char *start = s;
1116
1117   for ( ; *s; s++)
1118     if (*s == VERSION_DELIM)
1119       {
1120         *s = EOS;
1121         break;
1122       }
1123     else
1124       *s = lowcase (*s);
1125   return start;
1126 }
1127 #endif /* VMS */
1128
1129 \f
1130 int
1131 main (argc, argv)
1132      int argc;
1133      char *argv[];
1134 {
1135   int i;
1136   unsigned int nincluded_files;
1137   char **included_files;
1138   argument *argbuffer;
1139   int current_arg, file_count;
1140   linebuffer filename_lb;
1141   bool help_asked = FALSE;
1142 #ifdef VMS
1143   bool got_err;
1144 #endif
1145  char *optstring;
1146  int opt;
1147
1148
1149 #ifdef DOS_NT
1150   _fmode = O_BINARY;   /* all of files are treated as binary files */
1151 #endif /* DOS_NT */
1152
1153   progname = argv[0];
1154   nincluded_files = 0;
1155   included_files = xnew (argc, char *);
1156   current_arg = 0;
1157   file_count = 0;
1158
1159   /* Allocate enough no matter what happens.  Overkill, but each one
1160      is small. */
1161   argbuffer = xnew (argc, argument);
1162
1163   /*
1164    * If etags, always find typedefs and structure tags.  Why not?
1165    * Also default to find macro constants, enum constants and
1166    * global variables.
1167    */
1168   if (!CTAGS)
1169     {
1170       typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1171       globals = TRUE;
1172     }
1173
1174   /* When the optstring begins with a '-' getopt_long does not rearrange the
1175      non-options arguments to be at the end, but leaves them alone. */
1176   optstring = concat (NO_LONG_OPTIONS ? "" : "-",
1177                       "ac:Cf:Il:o:r:RSVhH",
1178                       (CTAGS) ? "BxdtTuvw" : "Di:");
1179
1180   while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1181     switch (opt)
1182       {
1183       case 0:
1184         /* If getopt returns 0, then it has already processed a
1185            long-named option.  We should do nothing.  */
1186         break;
1187
1188       case 1:
1189         /* This means that a file name has been seen.  Record it. */
1190         argbuffer[current_arg].arg_type = at_filename;
1191         argbuffer[current_arg].what     = optarg;
1192         ++current_arg;
1193         ++file_count;
1194         break;
1195
1196       case STDIN:
1197         /* Parse standard input.  Idea by Vivek <vivek@etla.org>. */
1198         argbuffer[current_arg].arg_type = at_stdin;
1199         argbuffer[current_arg].what     = optarg;
1200         ++current_arg;
1201         ++file_count;
1202         if (parsing_stdin)
1203           fatal ("cannot parse standard input more than once", (char *)NULL);
1204         parsing_stdin = TRUE;
1205         break;
1206
1207         /* Common options. */
1208       case 'a': append_to_tagfile = TRUE;       break;
1209       case 'C': cplusplus = TRUE;               break;
1210       case 'f':         /* for compatibility with old makefiles */
1211       case 'o':
1212         if (tagfile)
1213           {
1214             error ("-o option may only be given once.", (char *)NULL);
1215             suggest_asking_for_help ();
1216             /* NOTREACHED */
1217           }
1218         tagfile = optarg;
1219         break;
1220       case 'I':
1221       case 'S':         /* for backward compatibility */
1222         ignoreindent = TRUE;
1223         break;
1224       case 'l':
1225         {
1226           language *lang = get_language_from_langname (optarg);
1227           if (lang != NULL)
1228             {
1229               argbuffer[current_arg].lang = lang;
1230               argbuffer[current_arg].arg_type = at_language;
1231               ++current_arg;
1232             }
1233         }
1234         break;
1235       case 'c':
1236         /* Backward compatibility: support obsolete --ignore-case-regexp. */
1237         optarg = concat (optarg, "i", ""); /* memory leak here */
1238         /* FALLTHRU */
1239       case 'r':
1240         argbuffer[current_arg].arg_type = at_regexp;
1241         argbuffer[current_arg].what = optarg;
1242         ++current_arg;
1243         break;
1244       case 'R':
1245         argbuffer[current_arg].arg_type = at_regexp;
1246         argbuffer[current_arg].what = NULL;
1247         ++current_arg;
1248         break;
1249       case 'V':
1250         print_version ();
1251         break;
1252       case 'h':
1253       case 'H':
1254         help_asked = TRUE;
1255         break;
1256
1257         /* Etags options */
1258       case 'D': constantypedefs = FALSE;                        break;
1259       case 'i': included_files[nincluded_files++] = optarg;     break;
1260
1261         /* Ctags options. */
1262       case 'B': searchar = '?';                                 break;
1263       case 'd': constantypedefs = TRUE;                         break;
1264       case 't': typedefs = TRUE;                                break;
1265       case 'T': typedefs = typedefs_or_cplusplus = TRUE;        break;
1266       case 'u': update = TRUE;                                  break;
1267       case 'v': vgrind_style = TRUE;                      /*FALLTHRU*/
1268       case 'x': cxref_style = TRUE;                             break;
1269       case 'w': no_warnings = TRUE;                             break;
1270       default:
1271         suggest_asking_for_help ();
1272         /* NOTREACHED */
1273       }
1274
1275   /* No more options.  Store the rest of arguments. */
1276   for (; optind < argc; optind++)
1277     {
1278       argbuffer[current_arg].arg_type = at_filename;
1279       argbuffer[current_arg].what = argv[optind];
1280       ++current_arg;
1281       ++file_count;
1282     }
1283
1284   argbuffer[current_arg].arg_type = at_end;
1285
1286   if (help_asked)
1287     print_help (argbuffer);
1288     /* NOTREACHED */
1289
1290   if (nincluded_files == 0 && file_count == 0)
1291     {
1292       error ("no input files specified.", (char *)NULL);
1293       suggest_asking_for_help ();
1294       /* NOTREACHED */
1295     }
1296
1297   if (tagfile == NULL)
1298     tagfile = CTAGS ? "tags" : "TAGS";
1299   cwd = etags_getcwd ();        /* the current working directory */
1300   if (cwd[strlen (cwd) - 1] != '/')
1301     {
1302       char *oldcwd = cwd;
1303       cwd = concat (oldcwd, "/", "");
1304       free (oldcwd);
1305     }
1306   /* Relative file names are made relative to the current directory. */
1307   if (streq (tagfile, "-")
1308       || strneq (tagfile, "/dev/", 5))
1309     tagfiledir = cwd;
1310   else
1311     tagfiledir = absolute_dirname (tagfile, cwd);
1312
1313   init ();                      /* set up boolean "functions" */
1314
1315   linebuffer_init (&lb);
1316   linebuffer_init (&filename_lb);
1317   linebuffer_init (&filebuf);
1318   linebuffer_init (&token_name);
1319
1320   if (!CTAGS)
1321     {
1322       if (streq (tagfile, "-"))
1323         {
1324           tagf = stdout;
1325 #ifdef DOS_NT
1326           /* Switch redirected `stdout' to binary mode (setting `_fmode'
1327              doesn't take effect until after `stdout' is already open). */
1328           if (!isatty (fileno (stdout)))
1329             setmode (fileno (stdout), O_BINARY);
1330 #endif /* DOS_NT */
1331         }
1332       else
1333         tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1334       if (tagf == NULL)
1335         pfatal (tagfile);
1336     }
1337
1338   /*
1339    * Loop through files finding functions.
1340    */
1341   for (i = 0; i < current_arg; i++)
1342     {
1343       static language *lang;    /* non-NULL if language is forced */
1344       char *this_file;
1345
1346       switch (argbuffer[i].arg_type)
1347         {
1348         case at_language:
1349           lang = argbuffer[i].lang;
1350           break;
1351         case at_regexp:
1352           analyse_regex (argbuffer[i].what);
1353           break;
1354         case at_filename:
1355 #ifdef VMS
1356           while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
1357             {
1358               if (got_err)
1359                 {
1360                   error ("can't find file %s\n", this_file);
1361                   argc--, argv++;
1362                 }
1363               else
1364                 {
1365                   this_file = massage_name (this_file);
1366                 }
1367 #else
1368               this_file = argbuffer[i].what;
1369 #endif
1370               /* Input file named "-" means read file names from stdin
1371                  (one per line) and use them. */
1372               if (streq (this_file, "-"))
1373                 {
1374                   if (parsing_stdin)
1375                     fatal ("cannot parse standard input AND read file names from it",
1376                            (char *)NULL);
1377                   while (readline_internal (&filename_lb, stdin) > 0)
1378                     process_file_name (filename_lb.buffer, lang);
1379                 }
1380               else
1381                 process_file_name (this_file, lang);
1382 #ifdef VMS
1383             }
1384 #endif
1385           break;
1386         case at_stdin:
1387           this_file = argbuffer[i].what;
1388           process_file (stdin, this_file, lang);
1389           break;
1390         }
1391     }
1392
1393   free_regexps ();
1394   free (lb.buffer);
1395   free (filebuf.buffer);
1396   free (token_name.buffer);
1397
1398   if (!CTAGS || cxref_style)
1399     {
1400       /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1401       put_entries (nodehead);
1402       free_tree (nodehead);
1403       nodehead = NULL;
1404       if (!CTAGS)
1405         {
1406           fdesc *fdp;
1407
1408           /* Output file entries that have no tags. */
1409           for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1410             if (!fdp->written)
1411               fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1412
1413           while (nincluded_files-- > 0)
1414             fprintf (tagf, "\f\n%s,include\n", *included_files++);
1415
1416           if (fclose (tagf) == EOF)
1417             pfatal (tagfile);
1418         }
1419
1420       exit (EXIT_SUCCESS);
1421     }
1422
1423   if (update)
1424     {
1425       char cmd[BUFSIZ];
1426       for (i = 0; i < current_arg; ++i)
1427         {
1428           switch (argbuffer[i].arg_type)
1429             {
1430             case at_filename:
1431             case at_stdin:
1432               break;
1433             default:
1434               continue;         /* the for loop */
1435             }
1436           sprintf (cmd,
1437                    "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1438                    tagfile, argbuffer[i].what, tagfile);
1439           if (system (cmd) != EXIT_SUCCESS)
1440             fatal ("failed to execute shell command", (char *)NULL);
1441         }
1442       append_to_tagfile = TRUE;
1443     }
1444
1445   tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1446   if (tagf == NULL)
1447     pfatal (tagfile);
1448   put_entries (nodehead);       /* write all the tags (CTAGS) */
1449   free_tree (nodehead);
1450   nodehead = NULL;
1451   if (fclose (tagf) == EOF)
1452     pfatal (tagfile);
1453
1454   if (CTAGS)
1455     if (append_to_tagfile || update)
1456       {
1457         char cmd[2*BUFSIZ+10];
1458         sprintf (cmd, "sort -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile);
1459         exit (system (cmd));
1460       }
1461   return EXIT_SUCCESS;
1462 }
1463
1464
1465 /*
1466  * Return a compressor given the file name.  If EXTPTR is non-zero,
1467  * return a pointer into FILE where the compressor-specific
1468  * extension begins.  If no compressor is found, NULL is returned
1469  * and EXTPTR is not significant.
1470  * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1471  */
1472 static compressor *
1473 get_compressor_from_suffix (file, extptr)
1474      char *file;
1475      char **extptr;
1476 {
1477   compressor *compr;
1478   char *slash, *suffix;
1479
1480   /* This relies on FN to be after canonicalize_filename,
1481      so we don't need to consider backslashes on DOS_NT.  */
1482   slash = etags_strrchr (file, '/');
1483   suffix = etags_strrchr (file, '.');
1484   if (suffix == NULL || suffix < slash)
1485     return NULL;
1486   if (extptr != NULL)
1487     *extptr = suffix;
1488   suffix += 1;
1489   /* Let those poor souls who live with DOS 8+3 file name limits get
1490      some solace by treating foo.cgz as if it were foo.c.gz, etc.
1491      Only the first do loop is run if not MSDOS */
1492   do
1493     {
1494       for (compr = compressors; compr->suffix != NULL; compr++)
1495         if (streq (compr->suffix, suffix))
1496           return compr;
1497       if (!MSDOS)
1498         break;                  /* do it only once: not really a loop */
1499       if (extptr != NULL)
1500         *extptr = ++suffix;
1501     } while (*suffix != '\0');
1502   return NULL;
1503 }
1504
1505
1506
1507 /*
1508  * Return a language given the name.
1509  */
1510 static language *
1511 get_language_from_langname (name)
1512      const char *name;
1513 {
1514   language *lang;
1515
1516   if (name == NULL)
1517     error ("empty language name", (char *)NULL);
1518   else
1519     {
1520       for (lang = lang_names; lang->name != NULL; lang++)
1521         if (streq (name, lang->name))
1522           return lang;
1523       error ("unknown language \"%s\"", name);
1524     }
1525
1526   return NULL;
1527 }
1528
1529
1530 /*
1531  * Return a language given the interpreter name.
1532  */
1533 static language *
1534 get_language_from_interpreter (interpreter)
1535      char *interpreter;
1536 {
1537   language *lang;
1538   char **iname;
1539
1540   if (interpreter == NULL)
1541     return NULL;
1542   for (lang = lang_names; lang->name != NULL; lang++)
1543     if (lang->interpreters != NULL)
1544       for (iname = lang->interpreters; *iname != NULL; iname++)
1545         if (streq (*iname, interpreter))
1546             return lang;
1547
1548   return NULL;
1549 }
1550
1551
1552
1553 /*
1554  * Return a language given the file name.
1555  */
1556 static language *
1557 get_language_from_filename (file, case_sensitive)
1558      char *file;
1559      bool case_sensitive;
1560 {
1561   language *lang;
1562   char **name, **ext, *suffix;
1563
1564   /* Try whole file name first. */
1565   for (lang = lang_names; lang->name != NULL; lang++)
1566     if (lang->filenames != NULL)
1567       for (name = lang->filenames; *name != NULL; name++)
1568         if ((case_sensitive)
1569             ? streq (*name, file)
1570             : strcaseeq (*name, file))
1571           return lang;
1572
1573   /* If not found, try suffix after last dot. */
1574   suffix = etags_strrchr (file, '.');
1575   if (suffix == NULL)
1576     return NULL;
1577   suffix += 1;
1578   for (lang = lang_names; lang->name != NULL; lang++)
1579     if (lang->suffixes != NULL)
1580       for (ext = lang->suffixes; *ext != NULL; ext++)
1581         if ((case_sensitive)
1582             ? streq (*ext, suffix)
1583             : strcaseeq (*ext, suffix))
1584           return lang;
1585   return NULL;
1586 }
1587
1588 \f
1589 /*
1590  * This routine is called on each file argument.
1591  */
1592 static void
1593 process_file_name (file, lang)
1594      char *file;
1595      language *lang;
1596 {
1597   struct stat stat_buf;
1598   FILE *inf;
1599   fdesc *fdp;
1600   compressor *compr;
1601   char *compressed_name, *uncompressed_name;
1602   char *ext, *real_name;
1603   int retval;
1604
1605   canonicalize_filename (file);
1606   if (streq (file, tagfile) && !streq (tagfile, "-"))
1607     {
1608       error ("skipping inclusion of %s in self.", file);
1609       return;
1610     }
1611   if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1612     {
1613       compressed_name = NULL;
1614       real_name = uncompressed_name = savestr (file);
1615     }
1616   else
1617     {
1618       real_name = compressed_name = savestr (file);
1619       uncompressed_name = savenstr (file, ext - file);
1620     }
1621
1622   /* If the canonicalized uncompressed name
1623      has already been dealt with, skip it silently. */
1624   for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1625     {
1626       assert (fdp->infname != NULL);
1627       if (streq (uncompressed_name, fdp->infname))
1628         goto cleanup;
1629     }
1630
1631   if (stat (real_name, &stat_buf) != 0)
1632     {
1633       /* Reset real_name and try with a different name. */
1634       real_name = NULL;
1635       if (compressed_name != NULL) /* try with the given suffix */
1636         {
1637           if (stat (uncompressed_name, &stat_buf) == 0)
1638             real_name = uncompressed_name;
1639         }
1640       else                      /* try all possible suffixes */
1641         {
1642           for (compr = compressors; compr->suffix != NULL; compr++)
1643             {
1644               compressed_name = concat (file, ".", compr->suffix);
1645               if (stat (compressed_name, &stat_buf) != 0)
1646                 {
1647                   if (MSDOS)
1648                     {
1649                       char *suf = compressed_name + strlen (file);
1650                       size_t suflen = strlen (compr->suffix) + 1;
1651                       for ( ; suf[1]; suf++, suflen--)
1652                         {
1653                           memmove (suf, suf + 1, suflen);
1654                           if (stat (compressed_name, &stat_buf) == 0)
1655                             {
1656                               real_name = compressed_name;
1657                               break;
1658                             }
1659                         }
1660                       if (real_name != NULL)
1661                         break;
1662                     } /* MSDOS */
1663                   free (compressed_name);
1664                   compressed_name = NULL;
1665                 }
1666               else
1667                 {
1668                   real_name = compressed_name;
1669                   break;
1670                 }
1671             }
1672         }
1673       if (real_name == NULL)
1674         {
1675           perror (file);
1676           goto cleanup;
1677         }
1678     } /* try with a different name */
1679
1680   if (!S_ISREG (stat_buf.st_mode))
1681     {
1682       error ("skipping %s: it is not a regular file.", real_name);
1683       goto cleanup;
1684     }
1685   if (real_name == compressed_name)
1686     {
1687       char *cmd = concat (compr->command, " ", real_name);
1688       inf = (FILE *) popen (cmd, "r");
1689       free (cmd);
1690     }
1691   else
1692     inf = fopen (real_name, "r");
1693   if (inf == NULL)
1694     {
1695       perror (real_name);
1696       goto cleanup;
1697     }
1698
1699   process_file (inf, uncompressed_name, lang);
1700
1701   if (real_name == compressed_name)
1702     retval = pclose (inf);
1703   else
1704     retval = fclose (inf);
1705   if (retval < 0)
1706     pfatal (file);
1707
1708  cleanup:
1709   if (compressed_name) free (compressed_name);
1710   if (uncompressed_name) free (uncompressed_name);
1711   last_node = NULL;
1712   curfdp = NULL;
1713   return;
1714 }
1715
1716 static void
1717 process_file (fh, fn, lang)
1718      FILE *fh;
1719      char *fn;
1720      language *lang;
1721 {
1722   static const fdesc emptyfdesc;
1723   fdesc *fdp;
1724
1725   /* Create a new input file description entry. */
1726   fdp = xnew (1, fdesc);
1727   *fdp = emptyfdesc;
1728   fdp->next = fdhead;
1729   fdp->infname = savestr (fn);
1730   fdp->lang = lang;
1731   fdp->infabsname = absolute_filename (fn, cwd);
1732   fdp->infabsdir = absolute_dirname (fn, cwd);
1733   if (filename_is_absolute (fn))
1734     {
1735       /* An absolute file name.  Canonicalize it. */
1736       fdp->taggedfname = absolute_filename (fn, NULL);
1737     }
1738   else
1739     {
1740       /* A file name relative to cwd.  Make it relative
1741          to the directory of the tags file. */
1742       fdp->taggedfname = relative_filename (fn, tagfiledir);
1743     }
1744   fdp->usecharno = TRUE;        /* use char position when making tags */
1745   fdp->prop = NULL;
1746   fdp->written = FALSE;         /* not written on tags file yet */
1747
1748   fdhead = fdp;
1749   curfdp = fdhead;              /* the current file description */
1750
1751   find_entries (fh);
1752
1753   /* If not Ctags, and if this is not metasource and if it contained no #line
1754      directives, we can write the tags and free all nodes pointing to
1755      curfdp. */
1756   if (!CTAGS
1757       && curfdp->usecharno      /* no #line directives in this file */
1758       && !curfdp->lang->metasource)
1759     {
1760       node *np, *prev;
1761
1762       /* Look for the head of the sublist relative to this file.  See add_node
1763          for the structure of the node tree. */
1764       prev = NULL;
1765       for (np = nodehead; np != NULL; prev = np, np = np->left)
1766         if (np->fdp == curfdp)
1767           break;
1768
1769       /* If we generated tags for this file, write and delete them. */
1770       if (np != NULL)
1771         {
1772           /* This is the head of the last sublist, if any.  The following
1773              instructions depend on this being true. */
1774           assert (np->left == NULL);
1775
1776           assert (fdhead == curfdp);
1777           assert (last_node->fdp == curfdp);
1778           put_entries (np);     /* write tags for file curfdp->taggedfname */
1779           free_tree (np);       /* remove the written nodes */
1780           if (prev == NULL)
1781             nodehead = NULL;    /* no nodes left */
1782           else
1783             prev->left = NULL;  /* delete the pointer to the sublist */
1784         }
1785     }
1786 }
1787
1788 /*
1789  * This routine sets up the boolean pseudo-functions which work
1790  * by setting boolean flags dependent upon the corresponding character.
1791  * Every char which is NOT in that string is not a white char.  Therefore,
1792  * all of the array "_wht" is set to FALSE, and then the elements
1793  * subscripted by the chars in "white" are set to TRUE.  Thus "_wht"
1794  * of a char is TRUE if it is the string "white", else FALSE.
1795  */
1796 static void
1797 init ()
1798 {
1799   register char *sp;
1800   register int i;
1801
1802   for (i = 0; i < CHARS; i++)
1803     iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1804   for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1805   for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1806   notinname('\0') = notinname('\n');
1807   for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1808   begtoken('\0') = begtoken('\n');
1809   for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1810   intoken('\0') = intoken('\n');
1811   for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1812   endtoken('\0') = endtoken('\n');
1813 }
1814
1815 /*
1816  * This routine opens the specified file and calls the function
1817  * which finds the function and type definitions.
1818  */
1819 static void
1820 find_entries (inf)
1821      FILE *inf;
1822 {
1823   char *cp;
1824   language *lang = curfdp->lang;
1825   Lang_function *parser = NULL;
1826
1827   /* If user specified a language, use it. */
1828   if (lang != NULL && lang->function != NULL)
1829     {
1830       parser = lang->function;
1831     }
1832
1833   /* Else try to guess the language given the file name. */
1834   if (parser == NULL)
1835     {
1836       lang = get_language_from_filename (curfdp->infname, TRUE);
1837       if (lang != NULL && lang->function != NULL)
1838         {
1839           curfdp->lang = lang;
1840           parser = lang->function;
1841         }
1842     }
1843
1844   /* Else look for sharp-bang as the first two characters. */
1845   if (parser == NULL
1846       && readline_internal (&lb, inf) > 0
1847       && lb.len >= 2
1848       && lb.buffer[0] == '#'
1849       && lb.buffer[1] == '!')
1850     {
1851       char *lp;
1852
1853       /* Set lp to point at the first char after the last slash in the
1854          line or, if no slashes, at the first nonblank.  Then set cp to
1855          the first successive blank and terminate the string. */
1856       lp = etags_strrchr (lb.buffer+2, '/');
1857       if (lp != NULL)
1858         lp += 1;
1859       else
1860         lp = skip_spaces (lb.buffer + 2);
1861       cp = skip_non_spaces (lp);
1862       *cp = '\0';
1863
1864       if (strlen (lp) > 0)
1865         {
1866           lang = get_language_from_interpreter (lp);
1867           if (lang != NULL && lang->function != NULL)
1868             {
1869               curfdp->lang = lang;
1870               parser = lang->function;
1871             }
1872         }
1873     }
1874
1875   /* We rewind here, even if inf may be a pipe.  We fail if the
1876      length of the first line is longer than the pipe block size,
1877      which is unlikely. */
1878   rewind (inf);
1879
1880   /* Else try to guess the language given the case insensitive file name. */
1881   if (parser == NULL)
1882     {
1883       lang = get_language_from_filename (curfdp->infname, FALSE);
1884       if (lang != NULL && lang->function != NULL)
1885         {
1886           curfdp->lang = lang;
1887           parser = lang->function;
1888         }
1889     }
1890
1891   /* Else try Fortran or C. */
1892   if (parser == NULL)
1893     {
1894       node *old_last_node = last_node;
1895
1896       curfdp->lang = get_language_from_langname ("fortran");
1897       find_entries (inf);
1898
1899       if (old_last_node == last_node)
1900         /* No Fortran entries found.  Try C. */
1901         {
1902           /* We do not tag if rewind fails.
1903              Only the file name will be recorded in the tags file. */
1904           rewind (inf);
1905           curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1906           find_entries (inf);
1907         }
1908       return;
1909     }
1910
1911   if (!no_line_directive
1912       && curfdp->lang != NULL && curfdp->lang->metasource)
1913     /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1914        file, or anyway we parsed a file that is automatically generated from
1915        this one.  If this is the case, the bingo.c file contained #line
1916        directives that generated tags pointing to this file.  Let's delete
1917        them all before parsing this file, which is the real source. */
1918     {
1919       fdesc **fdpp = &fdhead;
1920       while (*fdpp != NULL)
1921         if (*fdpp != curfdp
1922             && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1923           /* We found one of those!  We must delete both the file description
1924              and all tags referring to it. */
1925           {
1926             fdesc *badfdp = *fdpp;
1927
1928             /* Delete the tags referring to badfdp->taggedfname
1929                that were obtained from badfdp->infname. */
1930             invalidate_nodes (badfdp, &nodehead);
1931
1932             *fdpp = badfdp->next; /* remove the bad description from the list */
1933             free_fdesc (badfdp);
1934           }
1935         else
1936           fdpp = &(*fdpp)->next; /* advance the list pointer */
1937     }
1938
1939   assert (parser != NULL);
1940
1941   /* Generic initialisations before reading from file. */
1942   linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1943
1944   /* Generic initialisations before parsing file with readline. */
1945   lineno = 0;                  /* reset global line number */
1946   charno = 0;                  /* reset global char number */
1947   linecharno = 0;              /* reset global char number of line start */
1948
1949   parser (inf);
1950
1951   regex_tag_multiline ();
1952 }
1953
1954 \f
1955 /*
1956  * Check whether an implicitly named tag should be created,
1957  * then call `pfnote'.
1958  * NAME is a string that is internally copied by this function.
1959  *
1960  * TAGS format specification
1961  * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1962  * The following is explained in some more detail in etc/ETAGS.EBNF.
1963  *
1964  * make_tag creates tags with "implicit tag names" (unnamed tags)
1965  * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1966  *  1. NAME does not contain any of the characters in NONAM;
1967  *  2. LINESTART contains name as either a rightmost, or rightmost but
1968  *     one character, substring;
1969  *  3. the character, if any, immediately before NAME in LINESTART must
1970  *     be a character in NONAM;
1971  *  4. the character, if any, immediately after NAME in LINESTART must
1972  *     also be a character in NONAM.
1973  *
1974  * The implementation uses the notinname() macro, which recognises the
1975  * characters stored in the string `nonam'.
1976  * etags.el needs to use the same characters that are in NONAM.
1977  */
1978 static void
1979 make_tag (name, namelen, is_func, linestart, linelen, lno, cno)
1980      char *name;                /* tag name, or NULL if unnamed */
1981      int namelen;               /* tag length */
1982      bool is_func;              /* tag is a function */
1983      char *linestart;           /* start of the line where tag is */
1984      int linelen;               /* length of the line where tag is */
1985      int lno;                   /* line number */
1986      long cno;                  /* character number */
1987 {
1988   bool named = (name != NULL && namelen > 0);
1989
1990   if (!CTAGS && named)          /* maybe set named to false */
1991     /* Let's try to make an implicit tag name, that is, create an unnamed tag
1992        such that etags.el can guess a name from it. */
1993     {
1994       int i;
1995       register char *cp = name;
1996
1997       for (i = 0; i < namelen; i++)
1998         if (notinname (*cp++))
1999           break;
2000       if (i == namelen)                         /* rule #1 */
2001         {
2002           cp = linestart + linelen - namelen;
2003           if (notinname (linestart[linelen-1]))
2004             cp -= 1;                            /* rule #4 */
2005           if (cp >= linestart                   /* rule #2 */
2006               && (cp == linestart
2007                   || notinname (cp[-1]))        /* rule #3 */
2008               && strneq (name, cp, namelen))    /* rule #2 */
2009             named = FALSE;      /* use implicit tag name */
2010         }
2011     }
2012
2013   if (named)
2014     name = savenstr (name, namelen);
2015   else
2016     name = NULL;
2017   pfnote (name, is_func, linestart, linelen, lno, cno);
2018 }
2019
2020 /* Record a tag. */
2021 static void
2022 pfnote (name, is_func, linestart, linelen, lno, cno)
2023      char *name;                /* tag name, or NULL if unnamed */
2024      bool is_func;              /* tag is a function */
2025      char *linestart;           /* start of the line where tag is */
2026      int linelen;               /* length of the line where tag is */
2027      int lno;                   /* line number */
2028      long cno;                  /* character number */
2029 {
2030   register node *np;
2031
2032   assert (name == NULL || name[0] != '\0');
2033   if (CTAGS && name == NULL)
2034     return;
2035
2036   np = xnew (1, node);
2037
2038   /* If ctags mode, change name "main" to M<thisfilename>. */
2039   if (CTAGS && !cxref_style && streq (name, "main"))
2040     {
2041       register char *fp = etags_strrchr (curfdp->taggedfname, '/');
2042       np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
2043       fp = etags_strrchr (np->name, '.');
2044       if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
2045         fp[0] = '\0';
2046     }
2047   else
2048     np->name = name;
2049   np->valid = TRUE;
2050   np->been_warned = FALSE;
2051   np->fdp = curfdp;
2052   np->is_func = is_func;
2053   np->lno = lno;
2054   if (np->fdp->usecharno)
2055     /* Our char numbers are 0-base, because of C language tradition?
2056        ctags compatibility?  old versions compatibility?   I don't know.
2057        Anyway, since emacs's are 1-base we expect etags.el to take care
2058        of the difference.  If we wanted to have 1-based numbers, we would
2059        uncomment the +1 below. */
2060     np->cno = cno /* + 1 */ ;
2061   else
2062     np->cno = invalidcharno;
2063   np->left = np->right = NULL;
2064   if (CTAGS && !cxref_style)
2065     {
2066       if (strlen (linestart) < 50)
2067         np->regex = concat (linestart, "$", "");
2068       else
2069         np->regex = savenstr (linestart, 50);
2070     }
2071   else
2072     np->regex = savenstr (linestart, linelen);
2073
2074   add_node (np, &nodehead);
2075 }
2076
2077 /*
2078  * free_tree ()
2079  *      recurse on left children, iterate on right children.
2080  */
2081 static void
2082 free_tree (np)
2083      register node *np;
2084 {
2085   while (np)
2086     {
2087       register node *node_right = np->right;
2088       free_tree (np->left);
2089       if (np->name != NULL)
2090         free (np->name);
2091       free (np->regex);
2092       free (np);
2093       np = node_right;
2094     }
2095 }
2096
2097 /*
2098  * free_fdesc ()
2099  *      delete a file description
2100  */
2101 static void
2102 free_fdesc (fdp)
2103      register fdesc *fdp;
2104 {
2105   if (fdp->infname != NULL) free (fdp->infname);
2106   if (fdp->infabsname != NULL) free (fdp->infabsname);
2107   if (fdp->infabsdir != NULL) free (fdp->infabsdir);
2108   if (fdp->taggedfname != NULL) free (fdp->taggedfname);
2109   if (fdp->prop != NULL) free (fdp->prop);
2110   free (fdp);
2111 }
2112
2113 /*
2114  * add_node ()
2115  *      Adds a node to the tree of nodes.  In etags mode, sort by file
2116  *      name.  In ctags mode, sort by tag name.  Make no attempt at
2117  *      balancing.
2118  *
2119  *      add_node is the only function allowed to add nodes, so it can
2120  *      maintain state.
2121  */
2122 static void
2123 add_node (np, cur_node_p)
2124      node *np, **cur_node_p;
2125 {
2126   register int dif;
2127   register node *cur_node = *cur_node_p;
2128
2129   if (cur_node == NULL)
2130     {
2131       *cur_node_p = np;
2132       last_node = np;
2133       return;
2134     }
2135
2136   if (!CTAGS)
2137     /* Etags Mode */
2138     {
2139       /* For each file name, tags are in a linked sublist on the right
2140          pointer.  The first tags of different files are a linked list
2141          on the left pointer.  last_node points to the end of the last
2142          used sublist. */
2143       if (last_node != NULL && last_node->fdp == np->fdp)
2144         {
2145           /* Let's use the same sublist as the last added node. */
2146           assert (last_node->right == NULL);
2147           last_node->right = np;
2148           last_node = np;
2149         }
2150       else if (cur_node->fdp == np->fdp)
2151         {
2152           /* Scanning the list we found the head of a sublist which is
2153              good for us.  Let's scan this sublist. */
2154           add_node (np, &cur_node->right);
2155         }
2156       else
2157         /* The head of this sublist is not good for us.  Let's try the
2158            next one. */
2159         add_node (np, &cur_node->left);
2160     } /* if ETAGS mode */
2161
2162   else
2163     {
2164       /* Ctags Mode */
2165       dif = strcmp (np->name, cur_node->name);
2166
2167       /*
2168        * If this tag name matches an existing one, then
2169        * do not add the node, but maybe print a warning.
2170        */
2171       if (!dif)
2172         {
2173           if (np->fdp == cur_node->fdp)
2174             {
2175               if (!no_warnings)
2176                 {
2177                   fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2178                            np->fdp->infname, lineno, np->name);
2179                   fprintf (stderr, "Second entry ignored\n");
2180                 }
2181             }
2182           else if (!cur_node->been_warned && !no_warnings)
2183             {
2184               fprintf
2185                 (stderr,
2186                  "Duplicate entry in files %s and %s: %s (Warning only)\n",
2187                  np->fdp->infname, cur_node->fdp->infname, np->name);
2188               cur_node->been_warned = TRUE;
2189             }
2190           return;
2191         }
2192
2193       /* Actually add the node */
2194       add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2195     } /* if CTAGS mode */
2196 }
2197
2198 /*
2199  * invalidate_nodes ()
2200  *      Scan the node tree and invalidate all nodes pointing to the
2201  *      given file description (CTAGS case) or free them (ETAGS case).
2202  */
2203 static void
2204 invalidate_nodes (badfdp, npp)
2205      fdesc *badfdp;
2206      node **npp;
2207 {
2208   node *np = *npp;
2209
2210   if (np == NULL)
2211     return;
2212
2213   if (CTAGS)
2214     {
2215       if (np->left != NULL)
2216         invalidate_nodes (badfdp, &np->left);
2217       if (np->fdp == badfdp)
2218         np->valid = FALSE;
2219       if (np->right != NULL)
2220         invalidate_nodes (badfdp, &np->right);
2221     }
2222   else
2223     {
2224       assert (np->fdp != NULL);
2225       if (np->fdp == badfdp)
2226         {
2227           *npp = np->left;      /* detach the sublist from the list */
2228           np->left = NULL;      /* isolate it */
2229           free_tree (np);       /* free it */
2230           invalidate_nodes (badfdp, npp);
2231         }
2232       else
2233         invalidate_nodes (badfdp, &np->left);
2234     }
2235 }
2236
2237 \f
2238 static int total_size_of_entries __P((node *));
2239 static int number_len __P((long));
2240
2241 /* Length of a non-negative number's decimal representation. */
2242 static int
2243 number_len (num)
2244      long num;
2245 {
2246   int len = 1;
2247   while ((num /= 10) > 0)
2248     len += 1;
2249   return len;
2250 }
2251
2252 /*
2253  * Return total number of characters that put_entries will output for
2254  * the nodes in the linked list at the right of the specified node.
2255  * This count is irrelevant with etags.el since emacs 19.34 at least,
2256  * but is still supplied for backward compatibility.
2257  */
2258 static int
2259 total_size_of_entries (np)
2260      register node *np;
2261 {
2262   register int total = 0;
2263
2264   for (; np != NULL; np = np->right)
2265     if (np->valid)
2266       {
2267         total += strlen (np->regex) + 1;                /* pat\177 */
2268         if (np->name != NULL)
2269           total += strlen (np->name) + 1;               /* name\001 */
2270         total += number_len ((long) np->lno) + 1;       /* lno, */
2271         if (np->cno != invalidcharno)                   /* cno */
2272           total += number_len (np->cno);
2273         total += 1;                                     /* newline */
2274       }
2275
2276   return total;
2277 }
2278
2279 static void
2280 put_entries (np)
2281      register node *np;
2282 {
2283   register char *sp;
2284   static fdesc *fdp = NULL;
2285
2286   if (np == NULL)
2287     return;
2288
2289   /* Output subentries that precede this one */
2290   if (CTAGS)
2291     put_entries (np->left);
2292
2293   /* Output this entry */
2294   if (np->valid)
2295     {
2296       if (!CTAGS)
2297         {
2298           /* Etags mode */
2299           if (fdp != np->fdp)
2300             {
2301               fdp = np->fdp;
2302               fprintf (tagf, "\f\n%s,%d\n",
2303                        fdp->taggedfname, total_size_of_entries (np));
2304               fdp->written = TRUE;
2305             }
2306           fputs (np->regex, tagf);
2307           fputc ('\177', tagf);
2308           if (np->name != NULL)
2309             {
2310               fputs (np->name, tagf);
2311               fputc ('\001', tagf);
2312             }
2313           fprintf (tagf, "%d,", np->lno);
2314           if (np->cno != invalidcharno)
2315             fprintf (tagf, "%ld", np->cno);
2316           fputs ("\n", tagf);
2317         }
2318       else
2319         {
2320           /* Ctags mode */
2321           if (np->name == NULL)
2322             error ("internal error: NULL name in ctags mode.", (char *)NULL);
2323
2324           if (cxref_style)
2325             {
2326               if (vgrind_style)
2327                 fprintf (stdout, "%s %s %d\n",
2328                          np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2329               else
2330                 fprintf (stdout, "%-16s %3d %-16s %s\n",
2331                          np->name, np->lno, np->fdp->taggedfname, np->regex);
2332             }
2333           else
2334             {
2335               fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2336
2337               if (np->is_func)
2338                 {               /* function or #define macro with args */
2339                   putc (searchar, tagf);
2340                   putc ('^', tagf);
2341
2342                   for (sp = np->regex; *sp; sp++)
2343                     {
2344                       if (*sp == '\\' || *sp == searchar)
2345                         putc ('\\', tagf);
2346                       putc (*sp, tagf);
2347                     }
2348                   putc (searchar, tagf);
2349                 }
2350               else
2351                 {               /* anything else; text pattern inadequate */
2352                   fprintf (tagf, "%d", np->lno);
2353                 }
2354               putc ('\n', tagf);
2355             }
2356         }
2357     } /* if this node contains a valid tag */
2358
2359   /* Output subentries that follow this one */
2360   put_entries (np->right);
2361   if (!CTAGS)
2362     put_entries (np->left);
2363 }
2364
2365 \f
2366 /* C extensions. */
2367 #define C_EXT   0x00fff         /* C extensions */
2368 #define C_PLAIN 0x00000         /* C */
2369 #define C_PLPL  0x00001         /* C++ */
2370 #define C_STAR  0x00003         /* C* */
2371 #define C_JAVA  0x00005         /* JAVA */
2372 #define C_AUTO  0x01000         /* C, but switch to C++ if `class' is met */
2373 #define YACC    0x10000         /* yacc file */
2374
2375 /*
2376  * The C symbol tables.
2377  */
2378 enum sym_type
2379 {
2380   st_none,
2381   st_C_objprot, st_C_objimpl, st_C_objend,
2382   st_C_gnumacro,
2383   st_C_ignore, st_C_attribute,
2384   st_C_javastruct,
2385   st_C_operator,
2386   st_C_class, st_C_template,
2387   st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2388 };
2389
2390 static unsigned int hash __P((const char *, unsigned int));
2391 static struct C_stab_entry * in_word_set __P((const char *, unsigned int));
2392 static enum sym_type C_symtype __P((char *, int, int));
2393
2394 /* Feed stuff between (but not including) %[ and %] lines to:
2395      gperf -m 5
2396 %[
2397 %compare-strncmp
2398 %enum
2399 %struct-type
2400 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2401 %%
2402 if,             0,                      st_C_ignore
2403 for,            0,                      st_C_ignore
2404 while,          0,                      st_C_ignore
2405 switch,         0,                      st_C_ignore
2406 return,         0,                      st_C_ignore
2407 __attribute__,  0,                      st_C_attribute
2408 @interface,     0,                      st_C_objprot
2409 @protocol,      0,                      st_C_objprot
2410 @implementation,0,                      st_C_objimpl
2411 @end,           0,                      st_C_objend
2412 import,         (C_JAVA & !C_PLPL),     st_C_ignore
2413 package,        (C_JAVA & !C_PLPL),     st_C_ignore
2414 friend,         C_PLPL,                 st_C_ignore
2415 extends,        (C_JAVA & !C_PLPL),     st_C_javastruct
2416 implements,     (C_JAVA & !C_PLPL),     st_C_javastruct
2417 interface,      (C_JAVA & !C_PLPL),     st_C_struct
2418 class,          0,                      st_C_class
2419 namespace,      C_PLPL,                 st_C_struct
2420 domain,         C_STAR,                 st_C_struct
2421 union,          0,                      st_C_struct
2422 struct,         0,                      st_C_struct
2423 extern,         0,                      st_C_extern
2424 enum,           0,                      st_C_enum
2425 typedef,        0,                      st_C_typedef
2426 define,         0,                      st_C_define
2427 undef,          0,                      st_C_define
2428 operator,       C_PLPL,                 st_C_operator
2429 template,       0,                      st_C_template
2430 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2431 DEFUN,          0,                      st_C_gnumacro
2432 SYSCALL,        0,                      st_C_gnumacro
2433 ENTRY,          0,                      st_C_gnumacro
2434 PSEUDO,         0,                      st_C_gnumacro
2435 # These are defined inside C functions, so currently they are not met.
2436 # EXFUN used in glibc, DEFVAR_* in emacs.
2437 #EXFUN,         0,                      st_C_gnumacro
2438 #DEFVAR_,       0,                      st_C_gnumacro
2439 %]
2440 and replace lines between %< and %> with its output, then:
2441  - remove the #if characterset check
2442  - make in_word_set static and not inline. */
2443 /*%<*/
2444 /* C code produced by gperf version 3.0.1 */
2445 /* Command-line: gperf -m 5  */
2446 /* Computed positions: -k'2-3' */
2447
2448 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
2449 /* maximum key range = 33, duplicates = 0 */
2450
2451 #ifdef __GNUC__
2452 __inline
2453 #else
2454 #ifdef __cplusplus
2455 inline
2456 #endif
2457 #endif
2458 static unsigned int
2459 hash (str, len)
2460      register const char *str;
2461      register unsigned int len;
2462 {
2463   static unsigned char asso_values[] =
2464     {
2465       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2466       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2467       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2468       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2469       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2470       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2471       35, 35, 35, 35, 35, 35, 35, 35, 35, 15,
2472       14, 35, 35, 35, 35, 35, 35, 35, 14, 35,
2473       35, 35, 35, 12, 13, 35, 35, 35, 35, 12,
2474       35, 35, 35, 35, 35,  1, 35, 16, 35,  6,
2475       23,  0,  0, 35, 22,  0, 35, 35,  5,  0,
2476        0, 15,  1, 35,  6, 35,  8, 19, 35, 16,
2477        4,  5, 35, 35, 35, 35, 35, 35, 35, 35,
2478       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2479       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2480       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2481       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2482       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2483       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2484       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2485       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2486       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2487       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2488       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2489       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2490       35, 35, 35, 35, 35, 35
2491     };
2492   register int hval = len;
2493
2494   switch (hval)
2495     {
2496       default:
2497         hval += asso_values[(unsigned char)str[2]];
2498       /*FALLTHROUGH*/
2499       case 2:
2500         hval += asso_values[(unsigned char)str[1]];
2501         break;
2502     }
2503   return hval;
2504 }
2505
2506 static struct C_stab_entry *
2507 in_word_set (str, len)
2508      register const char *str;
2509      register unsigned int len;
2510 {
2511   enum
2512     {
2513       TOTAL_KEYWORDS = 32,
2514       MIN_WORD_LENGTH = 2,
2515       MAX_WORD_LENGTH = 15,
2516       MIN_HASH_VALUE = 2,
2517       MAX_HASH_VALUE = 34
2518     };
2519
2520   static struct C_stab_entry wordlist[] =
2521     {
2522       {""}, {""},
2523       {"if",            0,                      st_C_ignore},
2524       {""},
2525       {"@end",          0,                      st_C_objend},
2526       {"union",         0,                      st_C_struct},
2527       {"define",                0,                      st_C_define},
2528       {"import",                (C_JAVA & !C_PLPL),     st_C_ignore},
2529       {"template",      0,                      st_C_template},
2530       {"operator",      C_PLPL,                 st_C_operator},
2531       {"@interface",    0,                      st_C_objprot},
2532       {"implements",    (C_JAVA & !C_PLPL),     st_C_javastruct},
2533       {"friend",                C_PLPL,                 st_C_ignore},
2534       {"typedef",       0,                      st_C_typedef},
2535       {"return",                0,                      st_C_ignore},
2536       {"@implementation",0,                     st_C_objimpl},
2537       {"@protocol",     0,                      st_C_objprot},
2538       {"interface",     (C_JAVA & !C_PLPL),     st_C_struct},
2539       {"extern",                0,                      st_C_extern},
2540       {"extends",       (C_JAVA & !C_PLPL),     st_C_javastruct},
2541       {"struct",                0,                      st_C_struct},
2542       {"domain",                C_STAR,                 st_C_struct},
2543       {"switch",                0,                      st_C_ignore},
2544       {"enum",          0,                      st_C_enum},
2545       {"for",           0,                      st_C_ignore},
2546       {"namespace",     C_PLPL,                 st_C_struct},
2547       {"class",         0,                      st_C_class},
2548       {"while",         0,                      st_C_ignore},
2549       {"undef",         0,                      st_C_define},
2550       {"package",       (C_JAVA & !C_PLPL),     st_C_ignore},
2551       {"__attribute__", 0,                      st_C_attribute},
2552       {"SYSCALL",       0,                      st_C_gnumacro},
2553       {"ENTRY",         0,                      st_C_gnumacro},
2554       {"PSEUDO",                0,                      st_C_gnumacro},
2555       {"DEFUN",         0,                      st_C_gnumacro}
2556     };
2557
2558   if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2559     {
2560       register int key = hash (str, len);
2561
2562       if (key <= MAX_HASH_VALUE && key >= 0)
2563         {
2564           register const char *s = wordlist[key].name;
2565
2566           if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2567             return &wordlist[key];
2568         }
2569     }
2570   return 0;
2571 }
2572 /*%>*/
2573
2574 static enum sym_type
2575 C_symtype (str, len, c_ext)
2576      char *str;
2577      int len;
2578      int c_ext;
2579 {
2580   register struct C_stab_entry *se = in_word_set (str, len);
2581
2582   if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2583     return st_none;
2584   return se->type;
2585 }
2586
2587 \f
2588 /*
2589  * Ignoring __attribute__ ((list))
2590  */
2591 static bool inattribute;        /* looking at an __attribute__ construct */
2592
2593 /*
2594  * C functions and variables are recognized using a simple
2595  * finite automaton.  fvdef is its state variable.
2596  */
2597 static enum
2598 {
2599   fvnone,                       /* nothing seen */
2600   fdefunkey,                    /* Emacs DEFUN keyword seen */
2601   fdefunname,                   /* Emacs DEFUN name seen */
2602   foperator,                    /* func: operator keyword seen (cplpl) */
2603   fvnameseen,                   /* function or variable name seen */
2604   fstartlist,                   /* func: just after open parenthesis */
2605   finlist,                      /* func: in parameter list */
2606   flistseen,                    /* func: after parameter list */
2607   fignore,                      /* func: before open brace */
2608   vignore                       /* var-like: ignore until ';' */
2609 } fvdef;
2610
2611 static bool fvextern;           /* func or var: extern keyword seen; */
2612
2613 /*
2614  * typedefs are recognized using a simple finite automaton.
2615  * typdef is its state variable.
2616  */
2617 static enum
2618 {
2619   tnone,                        /* nothing seen */
2620   tkeyseen,                     /* typedef keyword seen */
2621   ttypeseen,                    /* defined type seen */
2622   tinbody,                      /* inside typedef body */
2623   tend,                         /* just before typedef tag */
2624   tignore                       /* junk after typedef tag */
2625 } typdef;
2626
2627 /*
2628  * struct-like structures (enum, struct and union) are recognized
2629  * using another simple finite automaton.  `structdef' is its state
2630  * variable.
2631  */
2632 static enum
2633 {
2634   snone,                        /* nothing seen yet,
2635                                    or in struct body if bracelev > 0 */
2636   skeyseen,                     /* struct-like keyword seen */
2637   stagseen,                     /* struct-like tag seen */
2638   scolonseen                    /* colon seen after struct-like tag */
2639 } structdef;
2640
2641 /*
2642  * When objdef is different from onone, objtag is the name of the class.
2643  */
2644 static char *objtag = "<uninited>";
2645
2646 /*
2647  * Yet another little state machine to deal with preprocessor lines.
2648  */
2649 static enum
2650 {
2651   dnone,                        /* nothing seen */
2652   dsharpseen,                   /* '#' seen as first char on line */
2653   ddefineseen,                  /* '#' and 'define' seen */
2654   dignorerest                   /* ignore rest of line */
2655 } definedef;
2656
2657 /*
2658  * State machine for Objective C protocols and implementations.
2659  * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2660  */
2661 static enum
2662 {
2663   onone,                        /* nothing seen */
2664   oprotocol,                    /* @interface or @protocol seen */
2665   oimplementation,              /* @implementations seen */
2666   otagseen,                     /* class name seen */
2667   oparenseen,                   /* parenthesis before category seen */
2668   ocatseen,                     /* category name seen */
2669   oinbody,                      /* in @implementation body */
2670   omethodsign,                  /* in @implementation body, after +/- */
2671   omethodtag,                   /* after method name */
2672   omethodcolon,                 /* after method colon */
2673   omethodparm,                  /* after method parameter */
2674   oignore                       /* wait for @end */
2675 } objdef;
2676
2677
2678 /*
2679  * Use this structure to keep info about the token read, and how it
2680  * should be tagged.  Used by the make_C_tag function to build a tag.
2681  */
2682 static struct tok
2683 {
2684   char *line;                   /* string containing the token */
2685   int offset;                   /* where the token starts in LINE */
2686   int length;                   /* token length */
2687   /*
2688     The previous members can be used to pass strings around for generic
2689     purposes.  The following ones specifically refer to creating tags.  In this
2690     case the token contained here is the pattern that will be used to create a
2691     tag.
2692   */
2693   bool valid;                   /* do not create a tag; the token should be
2694                                    invalidated whenever a state machine is
2695                                    reset prematurely */
2696   bool named;                   /* create a named tag */
2697   int lineno;                   /* source line number of tag */
2698   long linepos;                 /* source char number of tag */
2699 } token;                        /* latest token read */
2700
2701 /*
2702  * Variables and functions for dealing with nested structures.
2703  * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2704  */
2705 static void pushclass_above __P((int, char *, int));
2706 static void popclass_above __P((int));
2707 static void write_classname __P((linebuffer *, char *qualifier));
2708
2709 static struct {
2710   char **cname;                 /* nested class names */
2711   int *bracelev;                /* nested class brace level */
2712   int nl;                       /* class nesting level (elements used) */
2713   int size;                     /* length of the array */
2714 } cstack;                       /* stack for nested declaration tags */
2715 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2716 #define nestlev         (cstack.nl)
2717 /* After struct keyword or in struct body, not inside a nested function. */
2718 #define instruct        (structdef == snone && nestlev > 0                      \
2719                          && bracelev == cstack.bracelev[nestlev-1] + 1)
2720
2721 static void
2722 pushclass_above (bracelev, str, len)
2723      int bracelev;
2724      char *str;
2725      int len;
2726 {
2727   int nl;
2728
2729   popclass_above (bracelev);
2730   nl = cstack.nl;
2731   if (nl >= cstack.size)
2732     {
2733       int size = cstack.size *= 2;
2734       xrnew (cstack.cname, size, char *);
2735       xrnew (cstack.bracelev, size, int);
2736     }
2737   assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2738   cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2739   cstack.bracelev[nl] = bracelev;
2740   cstack.nl = nl + 1;
2741 }
2742
2743 static void
2744 popclass_above (bracelev)
2745      int bracelev;
2746 {
2747   int nl;
2748
2749   for (nl = cstack.nl - 1;
2750        nl >= 0 && cstack.bracelev[nl] >= bracelev;
2751        nl--)
2752     {
2753       if (cstack.cname[nl] != NULL)
2754         free (cstack.cname[nl]);
2755       cstack.nl = nl;
2756     }
2757 }
2758
2759 static void
2760 write_classname (cn, qualifier)
2761      linebuffer *cn;
2762      char *qualifier;
2763 {
2764   int i, len;
2765   int qlen = strlen (qualifier);
2766
2767   if (cstack.nl == 0 || cstack.cname[0] == NULL)
2768     {
2769       len = 0;
2770       cn->len = 0;
2771       cn->buffer[0] = '\0';
2772     }
2773   else
2774     {
2775       len = strlen (cstack.cname[0]);
2776       linebuffer_setlen (cn, len);
2777       strcpy (cn->buffer, cstack.cname[0]);
2778     }
2779   for (i = 1; i < cstack.nl; i++)
2780     {
2781       char *s;
2782       int slen;
2783
2784       s = cstack.cname[i];
2785       if (s == NULL)
2786         continue;
2787       slen = strlen (s);
2788       len += slen + qlen;
2789       linebuffer_setlen (cn, len);
2790       strncat (cn->buffer, qualifier, qlen);
2791       strncat (cn->buffer, s, slen);
2792     }
2793 }
2794
2795 \f
2796 static bool consider_token __P((char *, int, int, int *, int, int, bool *));
2797 static void make_C_tag __P((bool));
2798
2799 /*
2800  * consider_token ()
2801  *      checks to see if the current token is at the start of a
2802  *      function or variable, or corresponds to a typedef, or
2803  *      is a struct/union/enum tag, or #define, or an enum constant.
2804  *
2805  *      *IS_FUNC gets TRUE iff the token is a function or #define macro
2806  *      with args.  C_EXTP points to which language we are looking at.
2807  *
2808  * Globals
2809  *      fvdef                   IN OUT
2810  *      structdef               IN OUT
2811  *      definedef               IN OUT
2812  *      typdef                  IN OUT
2813  *      objdef                  IN OUT
2814  */
2815
2816 static bool
2817 consider_token (str, len, c, c_extp, bracelev, parlev, is_func_or_var)
2818      register char *str;        /* IN: token pointer */
2819      register int len;          /* IN: token length */
2820      register int c;            /* IN: first char after the token */
2821      int *c_extp;               /* IN, OUT: C extensions mask */
2822      int bracelev;              /* IN: brace level */
2823      int parlev;                /* IN: parenthesis level */
2824      bool *is_func_or_var;      /* OUT: function or variable found */
2825 {
2826   /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2827      structtype is the type of the preceding struct-like keyword, and
2828      structbracelev is the brace level where it has been seen. */
2829   static enum sym_type structtype;
2830   static int structbracelev;
2831   static enum sym_type toktype;
2832
2833
2834   toktype = C_symtype (str, len, *c_extp);
2835
2836   /*
2837    * Skip __attribute__
2838    */
2839   if (toktype == st_C_attribute)
2840     {
2841       inattribute = TRUE;
2842       return FALSE;
2843      }
2844
2845    /*
2846     * Advance the definedef state machine.
2847     */
2848    switch (definedef)
2849      {
2850      case dnone:
2851        /* We're not on a preprocessor line. */
2852        if (toktype == st_C_gnumacro)
2853          {
2854            fvdef = fdefunkey;
2855            return FALSE;
2856          }
2857        break;
2858      case dsharpseen:
2859        if (toktype == st_C_define)
2860          {
2861            definedef = ddefineseen;
2862          }
2863        else
2864          {
2865            definedef = dignorerest;
2866          }
2867        return FALSE;
2868      case ddefineseen:
2869        /*
2870         * Make a tag for any macro, unless it is a constant
2871         * and constantypedefs is FALSE.
2872         */
2873        definedef = dignorerest;
2874        *is_func_or_var = (c == '(');
2875        if (!*is_func_or_var && !constantypedefs)
2876          return FALSE;
2877        else
2878          return TRUE;
2879      case dignorerest:
2880        return FALSE;
2881      default:
2882        error ("internal error: definedef value.", (char *)NULL);
2883      }
2884
2885    /*
2886     * Now typedefs
2887     */
2888    switch (typdef)
2889      {
2890      case tnone:
2891        if (toktype == st_C_typedef)
2892          {
2893            if (typedefs)
2894              typdef = tkeyseen;
2895            fvextern = FALSE;
2896            fvdef = fvnone;
2897            return FALSE;
2898          }
2899        break;
2900      case tkeyseen:
2901        switch (toktype)
2902          {
2903          case st_none:
2904          case st_C_class:
2905          case st_C_struct:
2906          case st_C_enum:
2907            typdef = ttypeseen;
2908          }
2909        break;
2910      case ttypeseen:
2911        if (structdef == snone && fvdef == fvnone)
2912          {
2913            fvdef = fvnameseen;
2914            return TRUE;
2915          }
2916        break;
2917      case tend:
2918        switch (toktype)
2919          {
2920          case st_C_class:
2921          case st_C_struct:
2922          case st_C_enum:
2923            return FALSE;
2924          }
2925        return TRUE;
2926      }
2927
2928    /*
2929     * This structdef business is NOT invoked when we are ctags and the
2930     * file is plain C.  This is because a struct tag may have the same
2931     * name as another tag, and this loses with ctags.
2932     */
2933    switch (toktype)
2934      {
2935      case st_C_javastruct:
2936        if (structdef == stagseen)
2937          structdef = scolonseen;
2938        return FALSE;
2939      case st_C_template:
2940      case st_C_class:
2941        if ((*c_extp & C_AUTO)   /* automatic detection of C++ language */
2942            && bracelev == 0
2943            && definedef == dnone && structdef == snone
2944            && typdef == tnone && fvdef == fvnone)
2945          *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2946        if (toktype == st_C_template)
2947          break;
2948        /* FALLTHRU */
2949      case st_C_struct:
2950      case st_C_enum:
2951        if (parlev == 0
2952            && fvdef != vignore
2953            && (typdef == tkeyseen
2954                || (typedefs_or_cplusplus && structdef == snone)))
2955          {
2956            structdef = skeyseen;
2957            structtype = toktype;
2958            structbracelev = bracelev;
2959            if (fvdef == fvnameseen)
2960              fvdef = fvnone;
2961          }
2962        return FALSE;
2963      }
2964
2965    if (structdef == skeyseen)
2966      {
2967        structdef = stagseen;
2968        return TRUE;
2969      }
2970
2971    if (typdef != tnone)
2972      definedef = dnone;
2973
2974    /* Detect Objective C constructs. */
2975    switch (objdef)
2976      {
2977      case onone:
2978        switch (toktype)
2979          {
2980          case st_C_objprot:
2981            objdef = oprotocol;
2982            return FALSE;
2983          case st_C_objimpl:
2984            objdef = oimplementation;
2985            return FALSE;
2986          }
2987        break;
2988      case oimplementation:
2989        /* Save the class tag for functions or variables defined inside. */
2990        objtag = savenstr (str, len);
2991        objdef = oinbody;
2992        return FALSE;
2993      case oprotocol:
2994        /* Save the class tag for categories. */
2995        objtag = savenstr (str, len);
2996        objdef = otagseen;
2997        *is_func_or_var = TRUE;
2998        return TRUE;
2999      case oparenseen:
3000        objdef = ocatseen;
3001        *is_func_or_var = TRUE;
3002        return TRUE;
3003      case oinbody:
3004        break;
3005      case omethodsign:
3006        if (parlev == 0)
3007          {
3008            fvdef = fvnone;
3009            objdef = omethodtag;
3010            linebuffer_setlen (&token_name, len);
3011            strncpy (token_name.buffer, str, len);
3012            token_name.buffer[len] = '\0';
3013            return TRUE;
3014          }
3015        return FALSE;
3016      case omethodcolon:
3017        if (parlev == 0)
3018          objdef = omethodparm;
3019        return FALSE;
3020      case omethodparm:
3021        if (parlev == 0)
3022          {
3023            fvdef = fvnone;
3024            objdef = omethodtag;
3025            linebuffer_setlen (&token_name, token_name.len + len);
3026            strncat (token_name.buffer, str, len);
3027            return TRUE;
3028          }
3029        return FALSE;
3030      case oignore:
3031        if (toktype == st_C_objend)
3032          {
3033            /* Memory leakage here: the string pointed by objtag is
3034               never released, because many tests would be needed to
3035               avoid breaking on incorrect input code.  The amount of
3036               memory leaked here is the sum of the lengths of the
3037               class tags.
3038            free (objtag); */
3039            objdef = onone;
3040          }
3041        return FALSE;
3042      }
3043
3044    /* A function, variable or enum constant? */
3045    switch (toktype)
3046      {
3047      case st_C_extern:
3048        fvextern = TRUE;
3049        switch  (fvdef)
3050          {
3051          case finlist:
3052          case flistseen:
3053          case fignore:
3054          case vignore:
3055            break;
3056          default:
3057            fvdef = fvnone;
3058          }
3059        return FALSE;
3060      case st_C_ignore:
3061        fvextern = FALSE;
3062        fvdef = vignore;
3063        return FALSE;
3064      case st_C_operator:
3065        fvdef = foperator;
3066        *is_func_or_var = TRUE;
3067        return TRUE;
3068      case st_none:
3069        if (constantypedefs
3070            && structdef == snone
3071            && structtype == st_C_enum && bracelev > structbracelev)
3072          return TRUE;           /* enum constant */
3073        switch (fvdef)
3074          {
3075          case fdefunkey:
3076            if (bracelev > 0)
3077              break;
3078            fvdef = fdefunname;  /* GNU macro */
3079            *is_func_or_var = TRUE;
3080            return TRUE;
3081          case fvnone:
3082            switch (typdef)
3083              {
3084              case ttypeseen:
3085                return FALSE;
3086              case tnone:
3087                if ((strneq (str, "asm", 3) && endtoken (str[3]))
3088                    || (strneq (str, "__asm__", 7) && endtoken (str[7])))
3089                  {
3090                    fvdef = vignore;
3091                    return FALSE;
3092                  }
3093                break;
3094              }
3095           /* FALLTHRU */
3096           case fvnameseen:
3097           if (len >= 10 && strneq (str+len-10, "::operator", 10))
3098             {
3099               if (*c_extp & C_AUTO) /* automatic detection of C++ */
3100                 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3101               fvdef = foperator;
3102               *is_func_or_var = TRUE;
3103               return TRUE;
3104             }
3105           if (bracelev > 0 && !instruct)
3106             break;
3107           fvdef = fvnameseen;   /* function or variable */
3108           *is_func_or_var = TRUE;
3109           return TRUE;
3110         }
3111       break;
3112     }
3113
3114   return FALSE;
3115 }
3116
3117 \f
3118 /*
3119  * C_entries often keeps pointers to tokens or lines which are older than
3120  * the line currently read.  By keeping two line buffers, and switching
3121  * them at end of line, it is possible to use those pointers.
3122  */
3123 static struct
3124 {
3125   long linepos;
3126   linebuffer lb;
3127 } lbs[2];
3128
3129 #define current_lb_is_new (newndx == curndx)
3130 #define switch_line_buffers() (curndx = 1 - curndx)
3131
3132 #define curlb (lbs[curndx].lb)
3133 #define newlb (lbs[newndx].lb)
3134 #define curlinepos (lbs[curndx].linepos)
3135 #define newlinepos (lbs[newndx].linepos)
3136
3137 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3138 #define cplpl (c_ext & C_PLPL)
3139 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3140
3141 #define CNL_SAVE_DEFINEDEF()                                            \
3142 do {                                                                    \
3143   curlinepos = charno;                                                  \
3144   readline (&curlb, inf);                                               \
3145   lp = curlb.buffer;                                                    \
3146   quotednl = FALSE;                                                     \
3147   newndx = curndx;                                                      \
3148 } while (0)
3149
3150 #define CNL()                                                           \
3151 do {                                                                    \
3152   CNL_SAVE_DEFINEDEF();                                                 \
3153   if (savetoken.valid)                                                  \
3154     {                                                                   \
3155       token = savetoken;                                                \
3156       savetoken.valid = FALSE;                                          \
3157     }                                                                   \
3158   definedef = dnone;                                                    \
3159 } while (0)
3160
3161
3162 static void
3163 make_C_tag (isfun)
3164      bool isfun;
3165 {
3166   /* This function should never be called when token.valid is FALSE, but
3167      we must protect against invalid input or internal errors. */
3168   if (!DEBUG && !token.valid)
3169     return;
3170
3171   if (token.valid)
3172     make_tag (token_name.buffer, token_name.len, isfun, token.line,
3173               token.offset+token.length+1, token.lineno, token.linepos);
3174   else                          /* this case is optimised away if !DEBUG */
3175     make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3176               token_name.len + 17, isfun, token.line,
3177               token.offset+token.length+1, token.lineno, token.linepos);
3178
3179   token.valid = FALSE;
3180 }
3181
3182
3183 /*
3184  * C_entries ()
3185  *      This routine finds functions, variables, typedefs,
3186  *      #define's, enum constants and struct/union/enum definitions in
3187  *      C syntax and adds them to the list.
3188  */
3189 static void
3190 C_entries (c_ext, inf)
3191      int c_ext;                 /* extension of C */
3192      FILE *inf;                 /* input file */
3193 {
3194   register char c;              /* latest char read; '\0' for end of line */
3195   register char *lp;            /* pointer one beyond the character `c' */
3196   int curndx, newndx;           /* indices for current and new lb */
3197   register int tokoff;          /* offset in line of start of current token */
3198   register int toklen;          /* length of current token */
3199   char *qualifier;              /* string used to qualify names */
3200   int qlen;                     /* length of qualifier */
3201   int bracelev;                 /* current brace level */
3202   int bracketlev;               /* current bracket level */
3203   int parlev;                   /* current parenthesis level */
3204   int attrparlev;               /* __attribute__ parenthesis level */
3205   int templatelev;              /* current template level */
3206   int typdefbracelev;           /* bracelev where a typedef struct body begun */
3207   bool incomm, inquote, inchar, quotednl, midtoken;
3208   bool yacc_rules;              /* in the rules part of a yacc file */
3209   struct tok savetoken = {0};   /* token saved during preprocessor handling */
3210
3211
3212   linebuffer_init (&lbs[0].lb);
3213   linebuffer_init (&lbs[1].lb);
3214   if (cstack.size == 0)
3215     {
3216       cstack.size = (DEBUG) ? 1 : 4;
3217       cstack.nl = 0;
3218       cstack.cname = xnew (cstack.size, char *);
3219       cstack.bracelev = xnew (cstack.size, int);
3220     }
3221
3222   tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3223   curndx = newndx = 0;
3224   lp = curlb.buffer;
3225   *lp = 0;
3226
3227   fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3228   structdef = snone; definedef = dnone; objdef = onone;
3229   yacc_rules = FALSE;
3230   midtoken = inquote = inchar = incomm = quotednl = FALSE;
3231   token.valid = savetoken.valid = FALSE;
3232   bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3233   if (cjava)
3234     { qualifier = "."; qlen = 1; }
3235   else
3236     { qualifier = "::"; qlen = 2; }
3237
3238
3239   while (!feof (inf))
3240     {
3241       c = *lp++;
3242       if (c == '\\')
3243         {
3244           /* If we are at the end of the line, the next character is a
3245              '\0'; do not skip it, because it is what tells us
3246              to read the next line.  */
3247           if (*lp == '\0')
3248             {
3249               quotednl = TRUE;
3250               continue;
3251             }
3252           lp++;
3253           c = ' ';
3254         }
3255       else if (incomm)
3256         {
3257           switch (c)
3258             {
3259             case '*':
3260               if (*lp == '/')
3261                 {
3262                   c = *lp++;
3263                   incomm = FALSE;
3264                 }
3265               break;
3266             case '\0':
3267               /* Newlines inside comments do not end macro definitions in
3268                  traditional cpp. */
3269               CNL_SAVE_DEFINEDEF ();
3270               break;
3271             }
3272           continue;
3273         }
3274       else if (inquote)
3275         {
3276           switch (c)
3277             {
3278             case '"':
3279               inquote = FALSE;
3280               break;
3281             case '\0':
3282               /* Newlines inside strings do not end macro definitions
3283                  in traditional cpp, even though compilers don't
3284                  usually accept them. */
3285               CNL_SAVE_DEFINEDEF ();
3286               break;
3287             }
3288           continue;
3289         }
3290       else if (inchar)
3291         {
3292           switch (c)
3293             {
3294             case '\0':
3295               /* Hmmm, something went wrong. */
3296               CNL ();
3297               /* FALLTHRU */
3298             case '\'':
3299               inchar = FALSE;
3300               break;
3301             }
3302           continue;
3303         }
3304       else if (bracketlev > 0)
3305         {
3306           switch (c)
3307             {
3308             case ']':
3309               if (--bracketlev > 0)
3310                 continue;
3311               break;
3312             case '\0':
3313               CNL_SAVE_DEFINEDEF ();
3314               break;
3315             }
3316           continue;
3317         }
3318       else switch (c)
3319         {
3320         case '"':
3321           inquote = TRUE;
3322           if (inattribute)
3323             break;
3324           switch (fvdef)
3325             {
3326             case fdefunkey:
3327             case fstartlist:
3328             case finlist:
3329             case fignore:
3330             case vignore:
3331               break;
3332             default:
3333               fvextern = FALSE;
3334               fvdef = fvnone;
3335             }
3336           continue;
3337         case '\'':
3338           inchar = TRUE;
3339           if (inattribute)
3340             break;
3341           if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
3342             {
3343               fvextern = FALSE;
3344               fvdef = fvnone;
3345             }
3346           continue;
3347         case '/':
3348           if (*lp == '*')
3349             {
3350               lp++;
3351               incomm = TRUE;
3352               continue;
3353             }
3354           else if (/* cplpl && */ *lp == '/')
3355             {
3356               c = '\0';
3357               break;
3358             }
3359           else
3360             break;
3361         case '%':
3362           if ((c_ext & YACC) && *lp == '%')
3363             {
3364               /* Entering or exiting rules section in yacc file. */
3365               lp++;
3366               definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3367               typdef = tnone; structdef = snone;
3368               midtoken = inquote = inchar = incomm = quotednl = FALSE;
3369               bracelev = 0;
3370               yacc_rules = !yacc_rules;
3371               continue;
3372             }
3373           else
3374             break;
3375         case '#':
3376           if (definedef == dnone)
3377             {
3378               char *cp;
3379               bool cpptoken = TRUE;
3380
3381               /* Look back on this line.  If all blanks, or nonblanks
3382                  followed by an end of comment, this is a preprocessor
3383                  token. */
3384               for (cp = newlb.buffer; cp < lp-1; cp++)
3385                 if (!iswhite (*cp))
3386                   {
3387                     if (*cp == '*' && *(cp+1) == '/')
3388                       {
3389                         cp++;
3390                         cpptoken = TRUE;
3391                       }
3392                     else
3393                       cpptoken = FALSE;
3394                   }
3395               if (cpptoken)
3396                 definedef = dsharpseen;
3397             } /* if (definedef == dnone) */
3398           continue;
3399         case '[':
3400           bracketlev++;
3401             continue;
3402         } /* switch (c) */
3403
3404
3405       /* Consider token only if some involved conditions are satisfied. */
3406       if (typdef != tignore
3407           && definedef != dignorerest
3408           && fvdef != finlist
3409           && templatelev == 0
3410           && (definedef != dnone
3411               || structdef != scolonseen)
3412           && !inattribute)
3413         {
3414           if (midtoken)
3415             {
3416               if (endtoken (c))
3417                 {
3418                   if (c == ':' && *lp == ':' && begtoken (lp[1]))
3419                     /* This handles :: in the middle,
3420                        but not at the beginning of an identifier.
3421                        Also, space-separated :: is not recognised. */
3422                     {
3423                       if (c_ext & C_AUTO) /* automatic detection of C++ */
3424                         c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3425                       lp += 2;
3426                       toklen += 2;
3427                       c = lp[-1];
3428                       goto still_in_token;
3429                     }
3430                   else
3431                     {
3432                       bool funorvar = FALSE;
3433
3434                       if (yacc_rules
3435                           || consider_token (newlb.buffer + tokoff, toklen, c,
3436                                              &c_ext, bracelev, parlev,
3437                                              &funorvar))
3438                         {
3439                           if (fvdef == foperator)
3440                             {
3441                               char *oldlp = lp;
3442                               lp = skip_spaces (lp-1);
3443                               if (*lp != '\0')
3444                                 lp += 1;
3445                               while (*lp != '\0'
3446                                      && !iswhite (*lp) && *lp != '(')
3447                                 lp += 1;
3448                               c = *lp++;
3449                               toklen += lp - oldlp;
3450                             }
3451                           token.named = FALSE;
3452                           if (!plainc
3453                               && nestlev > 0 && definedef == dnone)
3454                             /* in struct body */
3455                             {
3456                               write_classname (&token_name, qualifier);
3457                               linebuffer_setlen (&token_name,
3458                                                  token_name.len+qlen+toklen);
3459                               strcat (token_name.buffer, qualifier);
3460                               strncat (token_name.buffer,
3461                                        newlb.buffer + tokoff, toklen);
3462                               token.named = TRUE;
3463                             }
3464                           else if (objdef == ocatseen)
3465                             /* Objective C category */
3466                             {
3467                               int len = strlen (objtag) + 2 + toklen;
3468                               linebuffer_setlen (&token_name, len);
3469                               strcpy (token_name.buffer, objtag);
3470                               strcat (token_name.buffer, "(");
3471                               strncat (token_name.buffer,
3472                                        newlb.buffer + tokoff, toklen);
3473                               strcat (token_name.buffer, ")");
3474                               token.named = TRUE;
3475                             }
3476                           else if (objdef == omethodtag
3477                                    || objdef == omethodparm)
3478                             /* Objective C method */
3479                             {
3480                               token.named = TRUE;
3481                             }
3482                           else if (fvdef == fdefunname)
3483                             /* GNU DEFUN and similar macros */
3484                             {
3485                               bool defun = (newlb.buffer[tokoff] == 'F');
3486                               int off = tokoff;
3487                               int len = toklen;
3488
3489                               /* Rewrite the tag so that emacs lisp DEFUNs
3490                                  can be found by their elisp name */
3491                               if (defun)
3492                                 {
3493                                   off += 1;
3494                                   len -= 1;
3495                                 }
3496                               len = toklen;
3497                               linebuffer_setlen (&token_name, len);
3498                               strncpy (token_name.buffer,
3499                                        newlb.buffer + off, len);
3500                               token_name.buffer[len] = '\0';
3501                               if (defun)
3502                                 while (--len >= 0)
3503                                   if (token_name.buffer[len] == '_')
3504                                     token_name.buffer[len] = '-';
3505                               token.named = defun;
3506                             }
3507                           else
3508                             {
3509                               linebuffer_setlen (&token_name, toklen);
3510                               strncpy (token_name.buffer,
3511                                        newlb.buffer + tokoff, toklen);
3512                               token_name.buffer[toklen] = '\0';
3513                               /* Name macros and members. */
3514                               token.named = (structdef == stagseen
3515                                              || typdef == ttypeseen
3516                                              || typdef == tend
3517                                              || (funorvar
3518                                                  && definedef == dignorerest)
3519                                              || (funorvar
3520                                                  && definedef == dnone
3521                                                  && structdef == snone
3522                                                  && bracelev > 0));
3523                             }
3524                           token.lineno = lineno;
3525                           token.offset = tokoff;
3526                           token.length = toklen;
3527                           token.line = newlb.buffer;
3528                           token.linepos = newlinepos;
3529                           token.valid = TRUE;
3530
3531                           if (definedef == dnone
3532                               && (fvdef == fvnameseen
3533                                   || fvdef == foperator
3534                                   || structdef == stagseen
3535                                   || typdef == tend
3536                                   || typdef == ttypeseen
3537                                   || objdef != onone))
3538                             {
3539                               if (current_lb_is_new)
3540                                 switch_line_buffers ();
3541                             }
3542                           else if (definedef != dnone
3543                                    || fvdef == fdefunname
3544                                    || instruct)
3545                             make_C_tag (funorvar);
3546                         }
3547                       else /* not yacc and consider_token failed */
3548                         {
3549                           if (inattribute && fvdef == fignore)
3550                             {
3551                               /* We have just met __attribute__ after a
3552                                  function parameter list: do not tag the
3553                                  function again. */
3554                               fvdef = fvnone;
3555                             }
3556                         }
3557                       midtoken = FALSE;
3558                     }
3559                 } /* if (endtoken (c)) */
3560               else if (intoken (c))
3561                 still_in_token:
3562                 {
3563                   toklen++;
3564                   continue;
3565                 }
3566             } /* if (midtoken) */
3567           else if (begtoken (c))
3568             {
3569               switch (definedef)
3570                 {
3571                 case dnone:
3572                   switch (fvdef)
3573                     {
3574                     case fstartlist:
3575                       /* This prevents tagging fb in
3576                          void (__attribute__((noreturn)) *fb) (void);
3577                          Fixing this is not easy and not very important. */
3578                       fvdef = finlist;
3579                       continue;
3580                     case flistseen:
3581                       if (plainc || declarations)
3582                         {
3583                           make_C_tag (TRUE); /* a function */
3584                           fvdef = fignore;
3585                         }
3586                       break;
3587                     }
3588                   if (structdef == stagseen && !cjava)
3589                     {
3590                       popclass_above (bracelev);
3591                       structdef = snone;
3592                     }
3593                   break;
3594                 case dsharpseen:
3595                   savetoken = token;
3596                   break;
3597                 }
3598               if (!yacc_rules || lp == newlb.buffer + 1)
3599                 {
3600                   tokoff = lp - 1 - newlb.buffer;
3601                   toklen = 1;
3602                   midtoken = TRUE;
3603                 }
3604               continue;
3605             } /* if (begtoken) */
3606         } /* if must look at token */
3607
3608
3609       /* Detect end of line, colon, comma, semicolon and various braces
3610          after having handled a token.*/
3611       switch (c)
3612         {
3613         case ':':
3614           if (inattribute)
3615             break;
3616           if (yacc_rules && token.offset == 0 && token.valid)
3617             {
3618               make_C_tag (FALSE); /* a yacc function */
3619               break;
3620             }
3621           if (definedef != dnone)
3622             break;
3623           switch (objdef)
3624             {
3625             case  otagseen:
3626               objdef = oignore;
3627               make_C_tag (TRUE); /* an Objective C class */
3628               break;
3629             case omethodtag:
3630             case omethodparm:
3631               objdef = omethodcolon;
3632               linebuffer_setlen (&token_name, token_name.len + 1);
3633               strcat (token_name.buffer, ":");
3634               break;
3635             }
3636           if (structdef == stagseen)
3637             {
3638               structdef = scolonseen;
3639               break;
3640             }
3641           /* Should be useless, but may be work as a safety net. */
3642           if (cplpl && fvdef == flistseen)
3643             {
3644               make_C_tag (TRUE); /* a function */
3645               fvdef = fignore;
3646               break;
3647             }
3648           break;
3649         case ';':
3650           if (definedef != dnone || inattribute)
3651             break;
3652           switch (typdef)
3653             {
3654             case tend:
3655             case ttypeseen:
3656               make_C_tag (FALSE); /* a typedef */
3657               typdef = tnone;
3658               fvdef = fvnone;
3659               break;
3660             case tnone:
3661             case tinbody:
3662             case tignore:
3663               switch (fvdef)
3664                 {
3665                 case fignore:
3666                   if (typdef == tignore || cplpl)
3667                     fvdef = fvnone;
3668                   break;
3669                 case fvnameseen:
3670                   if ((globals && bracelev == 0 && (!fvextern || declarations))
3671                       || (members && instruct))
3672                     make_C_tag (FALSE); /* a variable */
3673                   fvextern = FALSE;
3674                   fvdef = fvnone;
3675                   token.valid = FALSE;
3676                   break;
3677                 case flistseen:
3678                   if ((declarations
3679                        && (cplpl || !instruct)
3680                        && (typdef == tnone || (typdef != tignore && instruct)))
3681                       || (members
3682                           && plainc && instruct))
3683                     make_C_tag (TRUE);  /* a function */
3684                   /* FALLTHRU */
3685                 default:
3686                   fvextern = FALSE;
3687                   fvdef = fvnone;
3688                   if (declarations
3689                        && cplpl && structdef == stagseen)
3690                     make_C_tag (FALSE); /* forward declaration */
3691                   else
3692                     token.valid = FALSE;
3693                 } /* switch (fvdef) */
3694               /* FALLTHRU */
3695             default:
3696               if (!instruct)
3697                 typdef = tnone;
3698             }
3699           if (structdef == stagseen)
3700             structdef = snone;
3701           break;
3702         case ',':
3703           if (definedef != dnone || inattribute)
3704             break;
3705           switch (objdef)
3706             {
3707             case omethodtag:
3708             case omethodparm:
3709               make_C_tag (TRUE); /* an Objective C method */
3710               objdef = oinbody;
3711               break;
3712             }
3713           switch (fvdef)
3714             {
3715             case fdefunkey:
3716             case foperator:
3717             case fstartlist:
3718             case finlist:
3719             case fignore:
3720             case vignore:
3721               break;
3722             case fdefunname:
3723               fvdef = fignore;
3724               break;
3725             case fvnameseen:
3726               if (parlev == 0
3727                   && ((globals
3728                        && bracelev == 0
3729                        && templatelev == 0
3730                        && (!fvextern || declarations))
3731                       || (members && instruct)))
3732                   make_C_tag (FALSE); /* a variable */
3733               break;
3734             case flistseen:
3735               if ((declarations && typdef == tnone && !instruct)
3736                   || (members && typdef != tignore && instruct))
3737                 {
3738                   make_C_tag (TRUE); /* a function */
3739                   fvdef = fvnameseen;
3740                 }
3741               else if (!declarations)
3742                 fvdef = fvnone;
3743               token.valid = FALSE;
3744               break;
3745             default:
3746               fvdef = fvnone;
3747             }
3748           if (structdef == stagseen)
3749             structdef = snone;
3750           break;
3751         case ']':
3752           if (definedef != dnone || inattribute)
3753             break;
3754           if (structdef == stagseen)
3755             structdef = snone;
3756           switch (typdef)
3757             {
3758             case ttypeseen:
3759             case tend:
3760               typdef = tignore;
3761               make_C_tag (FALSE);       /* a typedef */
3762               break;
3763             case tnone:
3764             case tinbody:
3765               switch (fvdef)
3766                 {
3767                 case foperator:
3768                 case finlist:
3769                 case fignore:
3770                 case vignore:
3771                   break;
3772                 case fvnameseen:
3773                   if ((members && bracelev == 1)
3774                       || (globals && bracelev == 0
3775                           && (!fvextern || declarations)))
3776                     make_C_tag (FALSE); /* a variable */
3777                   /* FALLTHRU */
3778                 default:
3779                   fvdef = fvnone;
3780                 }
3781               break;
3782             }
3783           break;
3784         case '(':
3785           if (inattribute)
3786             {
3787               attrparlev++;
3788               break;
3789             }
3790           if (definedef != dnone)
3791             break;
3792           if (objdef == otagseen && parlev == 0)
3793             objdef = oparenseen;
3794           switch (fvdef)
3795             {
3796             case fvnameseen:
3797               if (typdef == ttypeseen
3798                   && *lp != '*'
3799                   && !instruct)
3800                 {
3801                   /* This handles constructs like:
3802                      typedef void OperatorFun (int fun); */
3803                   make_C_tag (FALSE);
3804                   typdef = tignore;
3805                   fvdef = fignore;
3806                   break;
3807                 }
3808               /* FALLTHRU */
3809             case foperator:
3810               fvdef = fstartlist;
3811               break;
3812             case flistseen:
3813               fvdef = finlist;
3814               break;
3815             }
3816           parlev++;
3817           break;
3818         case ')':
3819           if (inattribute)
3820             {
3821               if (--attrparlev == 0)
3822                 inattribute = FALSE;
3823               break;
3824             }
3825           if (definedef != dnone)
3826             break;
3827           if (objdef == ocatseen && parlev == 1)
3828             {
3829               make_C_tag (TRUE); /* an Objective C category */
3830               objdef = oignore;
3831             }
3832           if (--parlev == 0)
3833             {
3834               switch (fvdef)
3835                 {
3836                 case fstartlist:
3837                 case finlist:
3838                   fvdef = flistseen;
3839                   break;
3840                 }
3841               if (!instruct
3842                   && (typdef == tend
3843                       || typdef == ttypeseen))
3844                 {
3845                   typdef = tignore;
3846                   make_C_tag (FALSE); /* a typedef */
3847                 }
3848             }
3849           else if (parlev < 0)  /* can happen due to ill-conceived #if's. */
3850             parlev = 0;
3851           break;
3852         case '{':
3853           if (definedef != dnone)
3854             break;
3855           if (typdef == ttypeseen)
3856             {
3857               /* Whenever typdef is set to tinbody (currently only
3858                  here), typdefbracelev should be set to bracelev. */
3859               typdef = tinbody;
3860               typdefbracelev = bracelev;
3861             }
3862           switch (fvdef)
3863             {
3864             case flistseen:
3865               make_C_tag (TRUE);    /* a function */
3866               /* FALLTHRU */
3867             case fignore:
3868               fvdef = fvnone;
3869               break;
3870             case fvnone:
3871               switch (objdef)
3872                 {
3873                 case otagseen:
3874                   make_C_tag (TRUE); /* an Objective C class */
3875                   objdef = oignore;
3876                   break;
3877                 case omethodtag:
3878                 case omethodparm:
3879                   make_C_tag (TRUE); /* an Objective C method */
3880                   objdef = oinbody;
3881                   break;
3882                 default:
3883                   /* Neutralize `extern "C" {' grot. */
3884                   if (bracelev == 0 && structdef == snone && nestlev == 0
3885                       && typdef == tnone)
3886                     bracelev = -1;
3887                 }
3888               break;
3889             }
3890           switch (structdef)
3891             {
3892             case skeyseen:         /* unnamed struct */
3893               pushclass_above (bracelev, NULL, 0);
3894               structdef = snone;
3895               break;
3896             case stagseen:         /* named struct or enum */
3897             case scolonseen:       /* a class */
3898               pushclass_above (bracelev,token.line+token.offset, token.length);
3899               structdef = snone;
3900               make_C_tag (FALSE);  /* a struct or enum */
3901               break;
3902             }
3903           bracelev++;
3904           break;
3905         case '*':
3906           if (definedef != dnone)
3907             break;
3908           if (fvdef == fstartlist)
3909             {
3910               fvdef = fvnone;   /* avoid tagging `foo' in `foo (*bar()) ()' */
3911               token.valid = FALSE;
3912             }
3913           break;
3914         case '}':
3915           if (definedef != dnone)
3916             break;
3917           if (!ignoreindent && lp == newlb.buffer + 1)
3918             {
3919               if (bracelev != 0)
3920                 token.valid = FALSE;
3921               bracelev = 0;     /* reset brace level if first column */
3922               parlev = 0;       /* also reset paren level, just in case... */
3923             }
3924           else if (bracelev > 0)
3925             bracelev--;
3926           else
3927             token.valid = FALSE; /* something gone amiss, token unreliable */
3928           popclass_above (bracelev);
3929           structdef = snone;
3930           /* Only if typdef == tinbody is typdefbracelev significant. */
3931           if (typdef == tinbody && bracelev <= typdefbracelev)
3932             {
3933               assert (bracelev == typdefbracelev);
3934               typdef = tend;
3935             }
3936           break;
3937         case '=':
3938           if (definedef != dnone)
3939             break;
3940           switch (fvdef)
3941             {
3942             case foperator:
3943             case finlist:
3944             case fignore:
3945             case vignore:
3946               break;
3947             case fvnameseen:
3948               if ((members && bracelev == 1)
3949                   || (globals && bracelev == 0 && (!fvextern || declarations)))
3950                 make_C_tag (FALSE); /* a variable */
3951               /* FALLTHRU */
3952             default:
3953               fvdef = vignore;
3954             }
3955           break;
3956         case '<':
3957           if (cplpl
3958               && (structdef == stagseen || fvdef == fvnameseen))
3959             {
3960               templatelev++;
3961               break;
3962             }
3963           goto resetfvdef;
3964         case '>':
3965           if (templatelev > 0)
3966             {
3967               templatelev--;
3968               break;
3969             }
3970           goto resetfvdef;
3971         case '+':
3972         case '-':
3973           if (objdef == oinbody && bracelev == 0)
3974             {
3975               objdef = omethodsign;
3976               break;
3977             }
3978           /* FALLTHRU */
3979         resetfvdef:
3980         case '#': case '~': case '&': case '%': case '/':
3981         case '|': case '^': case '!': case '.': case '?':
3982           if (definedef != dnone)
3983             break;
3984           /* These surely cannot follow a function tag in C. */
3985           switch (fvdef)
3986             {
3987             case foperator:
3988             case finlist:
3989             case fignore:
3990             case vignore:
3991               break;
3992             default:
3993               fvdef = fvnone;
3994             }
3995           break;
3996         case '\0':
3997           if (objdef == otagseen)
3998             {
3999               make_C_tag (TRUE); /* an Objective C class */
4000               objdef = oignore;
4001             }
4002           /* If a macro spans multiple lines don't reset its state. */
4003           if (quotednl)
4004             CNL_SAVE_DEFINEDEF ();
4005           else
4006             CNL ();
4007           break;
4008         } /* switch (c) */
4009
4010     } /* while not eof */
4011
4012   free (lbs[0].lb.buffer);
4013   free (lbs[1].lb.buffer);
4014 }
4015
4016 /*
4017  * Process either a C++ file or a C file depending on the setting
4018  * of a global flag.
4019  */
4020 static void
4021 default_C_entries (inf)
4022      FILE *inf;
4023 {
4024   C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
4025 }
4026
4027 /* Always do plain C. */
4028 static void
4029 plain_C_entries (inf)
4030      FILE *inf;
4031 {
4032   C_entries (0, inf);
4033 }
4034
4035 /* Always do C++. */
4036 static void
4037 Cplusplus_entries (inf)
4038      FILE *inf;
4039 {
4040   C_entries (C_PLPL, inf);
4041 }
4042
4043 /* Always do Java. */
4044 static void
4045 Cjava_entries (inf)
4046      FILE *inf;
4047 {
4048   C_entries (C_JAVA, inf);
4049 }
4050
4051 /* Always do C*. */
4052 static void
4053 Cstar_entries (inf)
4054      FILE *inf;
4055 {
4056   C_entries (C_STAR, inf);
4057 }
4058
4059 /* Always do Yacc. */
4060 static void
4061 Yacc_entries (inf)
4062      FILE *inf;
4063 {
4064   C_entries (YACC, inf);
4065 }
4066
4067 \f
4068 /* Useful macros. */
4069 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer)    \
4070   for (;                        /* loop initialization */               \
4071        !feof (file_pointer)     /* loop test */                         \
4072        &&                       /* instructions at start of loop */     \
4073           (readline (&line_buffer, file_pointer),                       \
4074            char_pointer = line_buffer.buffer,                           \
4075            TRUE);                                                       \
4076       )
4077
4078 #define LOOKING_AT(cp, kw)  /* kw is the keyword, a literal string */   \
4079   ((assert("" kw), TRUE)   /* syntax error if not a literal string */   \
4080    && strneq ((cp), kw, sizeof(kw)-1)           /* cp points at kw */   \
4081    && notinname ((cp)[sizeof(kw)-1])            /* end of kw */         \
4082    && ((cp) = skip_spaces((cp)+sizeof(kw)-1)))  /* skip spaces */
4083
4084 /* Similar to LOOKING_AT but does not use notinname, does not skip */
4085 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
4086   ((assert("" kw), TRUE)     /* syntax error if not a literal string */ \
4087    && strncaseeq ((cp), kw, sizeof(kw)-1)       /* cp points at kw */   \
4088    && ((cp) += sizeof(kw)-1))                   /* skip spaces */
4089
4090 /*
4091  * Read a file, but do no processing.  This is used to do regexp
4092  * matching on files that have no language defined.
4093  */
4094 static void
4095 just_read_file (inf)
4096      FILE *inf;
4097 {
4098   register char *dummy;
4099
4100   LOOP_ON_INPUT_LINES (inf, lb, dummy)
4101     continue;
4102 }
4103
4104 \f
4105 /* Fortran parsing */
4106
4107 static void F_takeprec __P((void));
4108 static void F_getit __P((FILE *));
4109
4110 static void
4111 F_takeprec ()
4112 {
4113   dbp = skip_spaces (dbp);
4114   if (*dbp != '*')
4115     return;
4116   dbp++;
4117   dbp = skip_spaces (dbp);
4118   if (strneq (dbp, "(*)", 3))
4119     {
4120       dbp += 3;
4121       return;
4122     }
4123   if (!ISDIGIT (*dbp))
4124     {
4125       --dbp;                    /* force failure */
4126       return;
4127     }
4128   do
4129     dbp++;
4130   while (ISDIGIT (*dbp));
4131 }
4132
4133 static void
4134 F_getit (inf)
4135      FILE *inf;
4136 {
4137   register char *cp;
4138
4139   dbp = skip_spaces (dbp);
4140   if (*dbp == '\0')
4141     {
4142       readline (&lb, inf);
4143       dbp = lb.buffer;
4144       if (dbp[5] != '&')
4145         return;
4146       dbp += 6;
4147       dbp = skip_spaces (dbp);
4148     }
4149   if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
4150     return;
4151   for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4152     continue;
4153   make_tag (dbp, cp-dbp, TRUE,
4154             lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4155 }
4156
4157
4158 static void
4159 Fortran_functions (inf)
4160      FILE *inf;
4161 {
4162   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4163     {
4164       if (*dbp == '%')
4165         dbp++;                  /* Ratfor escape to fortran */
4166       dbp = skip_spaces (dbp);
4167       if (*dbp == '\0')
4168         continue;
4169       switch (lowcase (*dbp))
4170         {
4171         case 'i':
4172           if (nocase_tail ("integer"))
4173             F_takeprec ();
4174           break;
4175         case 'r':
4176           if (nocase_tail ("real"))
4177             F_takeprec ();
4178           break;
4179         case 'l':
4180           if (nocase_tail ("logical"))
4181             F_takeprec ();
4182           break;
4183         case 'c':
4184           if (nocase_tail ("complex") || nocase_tail ("character"))
4185             F_takeprec ();
4186           break;
4187         case 'd':
4188           if (nocase_tail ("double"))
4189             {
4190               dbp = skip_spaces (dbp);
4191               if (*dbp == '\0')
4192                 continue;
4193               if (nocase_tail ("precision"))
4194                 break;
4195               continue;
4196             }
4197           break;
4198         }
4199       dbp = skip_spaces (dbp);
4200       if (*dbp == '\0')
4201         continue;
4202       switch (lowcase (*dbp))
4203         {
4204         case 'f':
4205           if (nocase_tail ("function"))
4206             F_getit (inf);
4207           continue;
4208         case 's':
4209           if (nocase_tail ("subroutine"))
4210             F_getit (inf);
4211           continue;
4212         case 'e':
4213           if (nocase_tail ("entry"))
4214             F_getit (inf);
4215           continue;
4216         case 'b':
4217           if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4218             {
4219               dbp = skip_spaces (dbp);
4220               if (*dbp == '\0') /* assume un-named */
4221                 make_tag ("blockdata", 9, TRUE,
4222                           lb.buffer, dbp - lb.buffer, lineno, linecharno);
4223               else
4224                 F_getit (inf);  /* look for name */
4225             }
4226           continue;
4227         }
4228     }
4229 }
4230
4231 \f
4232 /*
4233  * Ada parsing
4234  * Original code by
4235  * Philippe Waroquiers (1998)
4236  */
4237
4238 static void Ada_getit __P((FILE *, char *));
4239
4240 /* Once we are positioned after an "interesting" keyword, let's get
4241    the real tag value necessary. */
4242 static void
4243 Ada_getit (inf, name_qualifier)
4244      FILE *inf;
4245      char *name_qualifier;
4246 {
4247   register char *cp;
4248   char *name;
4249   char c;
4250
4251   while (!feof (inf))
4252     {
4253       dbp = skip_spaces (dbp);
4254       if (*dbp == '\0'
4255           || (dbp[0] == '-' && dbp[1] == '-'))
4256         {
4257           readline (&lb, inf);
4258           dbp = lb.buffer;
4259         }
4260       switch (lowcase(*dbp))
4261         {
4262         case 'b':
4263           if (nocase_tail ("body"))
4264             {
4265               /* Skipping body of   procedure body   or   package body or ....
4266                  resetting qualifier to body instead of spec. */
4267               name_qualifier = "/b";
4268               continue;
4269             }
4270           break;
4271         case 't':
4272           /* Skipping type of   task type   or   protected type ... */
4273           if (nocase_tail ("type"))
4274             continue;
4275           break;
4276         }
4277       if (*dbp == '"')
4278         {
4279           dbp += 1;
4280           for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4281             continue;
4282         }
4283       else
4284         {
4285           dbp = skip_spaces (dbp);
4286           for (cp = dbp;
4287                (*cp != '\0'
4288                 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4289                cp++)
4290             continue;
4291           if (cp == dbp)
4292             return;
4293         }
4294       c = *cp;
4295       *cp = '\0';
4296       name = concat (dbp, name_qualifier, "");
4297       *cp = c;
4298       make_tag (name, strlen (name), TRUE,
4299                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4300       free (name);
4301       if (c == '"')
4302         dbp = cp + 1;
4303       return;
4304     }
4305 }
4306
4307 static void
4308 Ada_funcs (inf)
4309      FILE *inf;
4310 {
4311   bool inquote = FALSE;
4312   bool skip_till_semicolumn = FALSE;
4313
4314   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4315     {
4316       while (*dbp != '\0')
4317         {
4318           /* Skip a string i.e. "abcd". */
4319           if (inquote || (*dbp == '"'))
4320             {
4321               dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
4322               if (dbp != NULL)
4323                 {
4324                   inquote = FALSE;
4325                   dbp += 1;
4326                   continue;     /* advance char */
4327                 }
4328               else
4329                 {
4330                   inquote = TRUE;
4331                   break;        /* advance line */
4332                 }
4333             }
4334
4335           /* Skip comments. */
4336           if (dbp[0] == '-' && dbp[1] == '-')
4337             break;              /* advance line */
4338
4339           /* Skip character enclosed in single quote i.e. 'a'
4340              and skip single quote starting an attribute i.e. 'Image. */
4341           if (*dbp == '\'')
4342             {
4343               dbp++ ;
4344               if (*dbp != '\0')
4345                 dbp++;
4346               continue;
4347             }
4348
4349           if (skip_till_semicolumn)
4350             {
4351               if (*dbp == ';')
4352                 skip_till_semicolumn = FALSE;
4353               dbp++;
4354               continue;         /* advance char */
4355             }
4356
4357           /* Search for beginning of a token.  */
4358           if (!begtoken (*dbp))
4359             {
4360               dbp++;
4361               continue;         /* advance char */
4362             }
4363
4364           /* We are at the beginning of a token. */
4365           switch (lowcase(*dbp))
4366             {
4367             case 'f':
4368               if (!packages_only && nocase_tail ("function"))
4369                 Ada_getit (inf, "/f");
4370               else
4371                 break;          /* from switch */
4372               continue;         /* advance char */
4373             case 'p':
4374               if (!packages_only && nocase_tail ("procedure"))
4375                 Ada_getit (inf, "/p");
4376               else if (nocase_tail ("package"))
4377                 Ada_getit (inf, "/s");
4378               else if (nocase_tail ("protected")) /* protected type */
4379                 Ada_getit (inf, "/t");
4380               else
4381                 break;          /* from switch */
4382               continue;         /* advance char */
4383
4384             case 'u':
4385               if (typedefs && !packages_only && nocase_tail ("use"))
4386                 {
4387                   /* when tagging types, avoid tagging  use type Pack.Typename;
4388                      for this, we will skip everything till a ; */
4389                   skip_till_semicolumn = TRUE;
4390                   continue;     /* advance char */
4391                 }
4392
4393             case 't':
4394               if (!packages_only && nocase_tail ("task"))
4395                 Ada_getit (inf, "/k");
4396               else if (typedefs && !packages_only && nocase_tail ("type"))
4397                 {
4398                   Ada_getit (inf, "/t");
4399                   while (*dbp != '\0')
4400                     dbp += 1;
4401                 }
4402               else
4403                 break;          /* from switch */
4404               continue;         /* advance char */
4405             }
4406
4407           /* Look for the end of the token. */
4408           while (!endtoken (*dbp))
4409             dbp++;
4410
4411         } /* advance char */
4412     } /* advance line */
4413 }
4414
4415 \f
4416 /*
4417  * Unix and microcontroller assembly tag handling
4418  * Labels:  /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4419  * Idea by Bob Weiner, Motorola Inc. (1994)
4420  */
4421 static void
4422 Asm_labels (inf)
4423      FILE *inf;
4424 {
4425   register char *cp;
4426
4427   LOOP_ON_INPUT_LINES (inf, lb, cp)
4428     {
4429       /* If first char is alphabetic or one of [_.$], test for colon
4430          following identifier. */
4431       if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4432         {
4433           /* Read past label. */
4434           cp++;
4435           while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4436             cp++;
4437           if (*cp == ':' || iswhite (*cp))
4438             /* Found end of label, so copy it and add it to the table. */
4439             make_tag (lb.buffer, cp - lb.buffer, TRUE,
4440                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4441         }
4442     }
4443 }
4444
4445 \f
4446 /*
4447  * Perl support
4448  * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4449  * Perl variable names: /^(my|local).../
4450  * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4451  * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4452  * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4453  */
4454 static void
4455 Perl_functions (inf)
4456      FILE *inf;
4457 {
4458   char *package = savestr ("main"); /* current package name */
4459   register char *cp;
4460
4461   LOOP_ON_INPUT_LINES (inf, lb, cp)
4462     {
4463       skip_spaces(cp);
4464
4465       if (LOOKING_AT (cp, "package"))
4466         {
4467           free (package);
4468           get_tag (cp, &package);
4469         }
4470       else if (LOOKING_AT (cp, "sub"))
4471         {
4472           char *pos;
4473           char *sp = cp;
4474
4475           while (!notinname (*cp))
4476             cp++;
4477           if (cp == sp)
4478             continue;           /* nothing found */
4479           if ((pos = etags_strchr (sp, ':')) != NULL
4480               && pos < cp && pos[1] == ':')
4481             /* The name is already qualified. */
4482             make_tag (sp, cp - sp, TRUE,
4483                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4484           else
4485             /* Qualify it. */
4486             {
4487               char savechar, *name;
4488
4489               savechar = *cp;
4490               *cp = '\0';
4491               name = concat (package, "::", sp);
4492               *cp = savechar;
4493               make_tag (name, strlen(name), TRUE,
4494                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4495               free (name);
4496             }
4497         }
4498        else if (globals)        /* only if we are tagging global vars */
4499         {
4500           /* Skip a qualifier, if any. */
4501           bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4502           /* After "my" or "local", but before any following paren or space. */
4503           char *varstart = cp;
4504
4505           if (qual              /* should this be removed?  If yes, how? */
4506               && (*cp == '$' || *cp == '@' || *cp == '%'))
4507             {
4508               varstart += 1;
4509               do
4510                 cp++;
4511               while (ISALNUM (*cp) || *cp == '_');
4512             }
4513           else if (qual)
4514             {
4515               /* Should be examining a variable list at this point;
4516                  could insist on seeing an open parenthesis. */
4517               while (*cp != '\0' && *cp != ';' && *cp != '=' &&  *cp != ')')
4518                 cp++;
4519             }
4520           else
4521             continue;
4522
4523           make_tag (varstart, cp - varstart, FALSE,
4524                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4525         }
4526     }
4527   free (package);
4528 }
4529
4530
4531 /*
4532  * Python support
4533  * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4534  * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4535  * More ideas by seb bacon <seb@jamkit.com> (2002)
4536  */
4537 static void
4538 Python_functions (inf)
4539      FILE *inf;
4540 {
4541   register char *cp;
4542
4543   LOOP_ON_INPUT_LINES (inf, lb, cp)
4544     {
4545       cp = skip_spaces (cp);
4546       if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4547         {
4548           char *name = cp;
4549           while (!notinname (*cp) && *cp != ':')
4550             cp++;
4551           make_tag (name, cp - name, TRUE,
4552                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4553         }
4554     }
4555 }
4556
4557 \f
4558 /*
4559  * PHP support
4560  * Look for:
4561  *  - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4562  *  - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4563  *  - /^[ \t]*define\(\"[^\"]+/
4564  * Only with --members:
4565  *  - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4566  * Idea by Diez B. Roggisch (2001)
4567  */
4568 static void
4569 PHP_functions (inf)
4570      FILE *inf;
4571 {
4572   register char *cp, *name;
4573   bool search_identifier = FALSE;
4574
4575   LOOP_ON_INPUT_LINES (inf, lb, cp)
4576     {
4577       cp = skip_spaces (cp);
4578       name = cp;
4579       if (search_identifier
4580           && *cp != '\0')
4581         {
4582           while (!notinname (*cp))
4583             cp++;
4584           make_tag (name, cp - name, TRUE,
4585                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4586           search_identifier = FALSE;
4587         }
4588       else if (LOOKING_AT (cp, "function"))
4589         {
4590           if(*cp == '&')
4591             cp = skip_spaces (cp+1);
4592           if(*cp != '\0')
4593             {
4594               name = cp;
4595               while (!notinname (*cp))
4596                 cp++;
4597               make_tag (name, cp - name, TRUE,
4598                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4599             }
4600           else
4601             search_identifier = TRUE;
4602         }
4603       else if (LOOKING_AT (cp, "class"))
4604         {
4605           if (*cp != '\0')
4606             {
4607               name = cp;
4608               while (*cp != '\0' && !iswhite (*cp))
4609                 cp++;
4610               make_tag (name, cp - name, FALSE,
4611                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4612             }
4613           else
4614             search_identifier = TRUE;
4615         }
4616       else if (strneq (cp, "define", 6)
4617                && (cp = skip_spaces (cp+6))
4618                && *cp++ == '('
4619                && (*cp == '"' || *cp == '\''))
4620         {
4621           char quote = *cp++;
4622           name = cp;
4623           while (*cp != quote && *cp != '\0')
4624             cp++;
4625           make_tag (name, cp - name, FALSE,
4626                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4627         }
4628       else if (members
4629                && LOOKING_AT (cp, "var")
4630                && *cp == '$')
4631         {
4632           name = cp;
4633           while (!notinname(*cp))
4634             cp++;
4635           make_tag (name, cp - name, FALSE,
4636                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4637         }
4638     }
4639 }
4640
4641 \f
4642 /*
4643  * Cobol tag functions
4644  * We could look for anything that could be a paragraph name.
4645  * i.e. anything that starts in column 8 is one word and ends in a full stop.
4646  * Idea by Corny de Souza (1993)
4647  */
4648 static void
4649 Cobol_paragraphs (inf)
4650      FILE *inf;
4651 {
4652   register char *bp, *ep;
4653
4654   LOOP_ON_INPUT_LINES (inf, lb, bp)
4655     {
4656       if (lb.len < 9)
4657         continue;
4658       bp += 8;
4659
4660       /* If eoln, compiler option or comment ignore whole line. */
4661       if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4662         continue;
4663
4664       for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4665         continue;
4666       if (*ep++ == '.')
4667         make_tag (bp, ep - bp, TRUE,
4668                   lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4669     }
4670 }
4671
4672 \f
4673 /*
4674  * Makefile support
4675  * Ideas by Assar Westerlund <assar@sics.se> (2001)
4676  */
4677 static void
4678 Makefile_targets (inf)
4679      FILE *inf;
4680 {
4681   register char *bp;
4682
4683   LOOP_ON_INPUT_LINES (inf, lb, bp)
4684     {
4685       if (*bp == '\t' || *bp == '#')
4686         continue;
4687       while (*bp != '\0' && *bp != '=' && *bp != ':')
4688         bp++;
4689       if (*bp == ':' || (globals && *bp == '='))
4690         make_tag (lb.buffer, bp - lb.buffer, TRUE,
4691                   lb.buffer, bp - lb.buffer + 1, lineno, linecharno);
4692     }
4693 }
4694
4695 \f
4696 /*
4697  * Pascal parsing
4698  * Original code by Mosur K. Mohan (1989)
4699  *
4700  *  Locates tags for procedures & functions.  Doesn't do any type- or
4701  *  var-definitions.  It does look for the keyword "extern" or
4702  *  "forward" immediately following the procedure statement; if found,
4703  *  the tag is skipped.
4704  */
4705 static void
4706 Pascal_functions (inf)
4707      FILE *inf;
4708 {
4709   linebuffer tline;             /* mostly copied from C_entries */
4710   long save_lcno;
4711   int save_lineno, namelen, taglen;
4712   char c, *name;
4713
4714   bool                          /* each of these flags is TRUE iff: */
4715     incomment,                  /* point is inside a comment */
4716     inquote,                    /* point is inside '..' string */
4717     get_tagname,                /* point is after PROCEDURE/FUNCTION
4718                                    keyword, so next item = potential tag */
4719     found_tag,                  /* point is after a potential tag */
4720     inparms,                    /* point is within parameter-list */
4721     verify_tag;                 /* point has passed the parm-list, so the
4722                                    next token will determine whether this
4723                                    is a FORWARD/EXTERN to be ignored, or
4724                                    whether it is a real tag */
4725
4726   save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4727   name = NULL;                  /* keep compiler quiet */
4728   dbp = lb.buffer;
4729   *dbp = '\0';
4730   linebuffer_init (&tline);
4731
4732   incomment = inquote = FALSE;
4733   found_tag = FALSE;            /* have a proc name; check if extern */
4734   get_tagname = FALSE;          /* found "procedure" keyword         */
4735   inparms = FALSE;              /* found '(' after "proc"            */
4736   verify_tag = FALSE;           /* check if "extern" is ahead        */
4737
4738
4739   while (!feof (inf))           /* long main loop to get next char */
4740     {
4741       c = *dbp++;
4742       if (c == '\0')            /* if end of line */
4743         {
4744           readline (&lb, inf);
4745           dbp = lb.buffer;
4746           if (*dbp == '\0')
4747             continue;
4748           if (!((found_tag && verify_tag)
4749                 || get_tagname))
4750             c = *dbp++;         /* only if don't need *dbp pointing
4751                                    to the beginning of the name of
4752                                    the procedure or function */
4753         }
4754       if (incomment)
4755         {
4756           if (c == '}')         /* within { } comments */
4757             incomment = FALSE;
4758           else if (c == '*' && *dbp == ')') /* within (* *) comments */
4759             {
4760               dbp++;
4761               incomment = FALSE;
4762             }
4763           continue;
4764         }
4765       else if (inquote)
4766         {
4767           if (c == '\'')
4768             inquote = FALSE;
4769           continue;
4770         }
4771       else
4772         switch (c)
4773           {
4774           case '\'':
4775             inquote = TRUE;     /* found first quote */
4776             continue;
4777           case '{':             /* found open { comment */
4778             incomment = TRUE;
4779             continue;
4780           case '(':
4781             if (*dbp == '*')    /* found open (* comment */
4782               {
4783                 incomment = TRUE;
4784                 dbp++;
4785               }
4786             else if (found_tag) /* found '(' after tag, i.e., parm-list */
4787               inparms = TRUE;
4788             continue;
4789           case ')':             /* end of parms list */
4790             if (inparms)
4791               inparms = FALSE;
4792             continue;
4793           case ';':
4794             if (found_tag && !inparms) /* end of proc or fn stmt */
4795               {
4796                 verify_tag = TRUE;
4797                 break;
4798               }
4799             continue;
4800           }
4801       if (found_tag && verify_tag && (*dbp != ' '))
4802         {
4803           /* Check if this is an "extern" declaration. */
4804           if (*dbp == '\0')
4805             continue;
4806           if (lowcase (*dbp == 'e'))
4807             {
4808               if (nocase_tail ("extern")) /* superfluous, really! */
4809                 {
4810                   found_tag = FALSE;
4811                   verify_tag = FALSE;
4812                 }
4813             }
4814           else if (lowcase (*dbp) == 'f')
4815             {
4816               if (nocase_tail ("forward")) /* check for forward reference */
4817                 {
4818                   found_tag = FALSE;
4819                   verify_tag = FALSE;
4820                 }
4821             }
4822           if (found_tag && verify_tag) /* not external proc, so make tag */
4823             {
4824               found_tag = FALSE;
4825               verify_tag = FALSE;
4826               make_tag (name, namelen, TRUE,
4827                         tline.buffer, taglen, save_lineno, save_lcno);
4828               continue;
4829             }
4830         }
4831       if (get_tagname)          /* grab name of proc or fn */
4832         {
4833           char *cp;
4834
4835           if (*dbp == '\0')
4836             continue;
4837
4838           /* Find block name. */
4839           for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4840             continue;
4841
4842           /* Save all values for later tagging. */
4843           linebuffer_setlen (&tline, lb.len);
4844           strcpy (tline.buffer, lb.buffer);
4845           save_lineno = lineno;
4846           save_lcno = linecharno;
4847           name = tline.buffer + (dbp - lb.buffer);
4848           namelen = cp - dbp;
4849           taglen = cp - lb.buffer + 1;
4850
4851           dbp = cp;             /* set dbp to e-o-token */
4852           get_tagname = FALSE;
4853           found_tag = TRUE;
4854           continue;
4855
4856           /* And proceed to check for "extern". */
4857         }
4858       else if (!incomment && !inquote && !found_tag)
4859         {
4860           /* Check for proc/fn keywords. */
4861           switch (lowcase (c))
4862             {
4863             case 'p':
4864               if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4865                 get_tagname = TRUE;
4866               continue;
4867             case 'f':
4868               if (nocase_tail ("unction"))
4869                 get_tagname = TRUE;
4870               continue;
4871             }
4872         }
4873     } /* while not eof */
4874
4875   free (tline.buffer);
4876 }
4877
4878 \f
4879 /*
4880  * Lisp tag functions
4881  *  look for (def or (DEF, quote or QUOTE
4882  */
4883
4884 static void L_getit __P((void));
4885
4886 static void
4887 L_getit ()
4888 {
4889   if (*dbp == '\'')             /* Skip prefix quote */
4890     dbp++;
4891   else if (*dbp == '(')
4892   {
4893     dbp++;
4894     /* Try to skip "(quote " */
4895     if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4896       /* Ok, then skip "(" before name in (defstruct (foo)) */
4897       dbp = skip_spaces (dbp);
4898   }
4899   get_tag (dbp, NULL);
4900 }
4901
4902 static void
4903 Lisp_functions (inf)
4904      FILE *inf;
4905 {
4906   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4907     {
4908       if (dbp[0] != '(')
4909         continue;
4910
4911       if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4912         {
4913           dbp = skip_non_spaces (dbp);
4914           dbp = skip_spaces (dbp);
4915           L_getit ();
4916         }
4917       else
4918         {
4919           /* Check for (foo::defmumble name-defined ... */
4920           do
4921             dbp++;
4922           while (!notinname (*dbp) && *dbp != ':');
4923           if (*dbp == ':')
4924             {
4925               do
4926                 dbp++;
4927               while (*dbp == ':');
4928
4929               if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4930                 {
4931                   dbp = skip_non_spaces (dbp);
4932                   dbp = skip_spaces (dbp);
4933                   L_getit ();
4934                 }
4935             }
4936         }
4937     }
4938 }
4939
4940 \f
4941 /*
4942  * Lua script language parsing
4943  * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
4944  *
4945  *  "function" and "local function" are tags if they start at column 1.
4946  */
4947 static void
4948 Lua_functions (inf)
4949      FILE *inf;
4950 {
4951   register char *bp;
4952
4953   LOOP_ON_INPUT_LINES (inf, lb, bp)
4954     {
4955       if (bp[0] != 'f' && bp[0] != 'l')
4956         continue;
4957
4958       LOOKING_AT (bp, "local"); /* skip possible "local" */
4959
4960       if (LOOKING_AT (bp, "function"))
4961         get_tag (bp, NULL);
4962     }
4963 }
4964
4965 \f
4966 /*
4967  * Postscript tags
4968  * Just look for lines where the first character is '/'
4969  * Also look at "defineps" for PSWrap
4970  * Ideas by:
4971  *   Richard Mlynarik <mly@adoc.xerox.com> (1997)
4972  *   Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4973  */
4974 static void
4975 PS_functions (inf)
4976      FILE *inf;
4977 {
4978   register char *bp, *ep;
4979
4980   LOOP_ON_INPUT_LINES (inf, lb, bp)
4981     {
4982       if (bp[0] == '/')
4983         {
4984           for (ep = bp+1;
4985                *ep != '\0' && *ep != ' ' && *ep != '{';
4986                ep++)
4987             continue;
4988           make_tag (bp, ep - bp, TRUE,
4989                     lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4990         }
4991       else if (LOOKING_AT (bp, "defineps"))
4992         get_tag (bp, NULL);
4993     }
4994 }
4995
4996 \f
4997 /*
4998  * Forth tags
4999  * Ignore anything after \ followed by space or in ( )
5000  * Look for words defined by :
5001  * Look for constant, code, create, defer, value, and variable
5002  * OBP extensions:  Look for buffer:, field,
5003  * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
5004  */
5005 static void
5006 Forth_words (inf)
5007      FILE *inf;
5008 {
5009   register char *bp;
5010
5011   LOOP_ON_INPUT_LINES (inf, lb, bp)
5012     while ((bp = skip_spaces (bp))[0] != '\0')
5013       if (bp[0] == '\\' && iswhite(bp[1]))
5014         break;                  /* read next line */
5015       else if (bp[0] == '(' && iswhite(bp[1]))
5016         do                      /* skip to ) or eol */
5017           bp++;
5018         while (*bp != ')' && *bp != '\0');
5019       else if ((bp[0] == ':' && iswhite(bp[1]) && bp++)
5020                || LOOKING_AT_NOCASE (bp, "constant")
5021                || LOOKING_AT_NOCASE (bp, "code")
5022                || LOOKING_AT_NOCASE (bp, "create")
5023                || LOOKING_AT_NOCASE (bp, "defer")
5024                || LOOKING_AT_NOCASE (bp, "value")
5025                || LOOKING_AT_NOCASE (bp, "variable")
5026                || LOOKING_AT_NOCASE (bp, "buffer:")
5027                || LOOKING_AT_NOCASE (bp, "field"))
5028         get_tag (skip_spaces (bp), NULL); /* Yay!  A definition! */
5029       else
5030         bp = skip_non_spaces (bp);
5031 }
5032
5033 \f
5034 /*
5035  * Scheme tag functions
5036  * look for (def... xyzzy
5037  *          (def... (xyzzy
5038  *          (def ... ((...(xyzzy ....
5039  *          (set! xyzzy
5040  * Original code by Ken Haase (1985?)
5041  */
5042 static void
5043 Scheme_functions (inf)
5044      FILE *inf;
5045 {
5046   register char *bp;
5047
5048   LOOP_ON_INPUT_LINES (inf, lb, bp)
5049     {
5050       if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
5051         {
5052           bp = skip_non_spaces (bp+4);
5053           /* Skip over open parens and white space */
5054           while (notinname (*bp))
5055             bp++;
5056           get_tag (bp, NULL);
5057         }
5058       if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
5059         get_tag (bp, NULL);
5060     }
5061 }
5062
5063 \f
5064 /* Find tags in TeX and LaTeX input files.  */
5065
5066 /* TEX_toktab is a table of TeX control sequences that define tags.
5067  * Each entry records one such control sequence.
5068  *
5069  * Original code from who knows whom.
5070  * Ideas by:
5071  *   Stefan Monnier (2002)
5072  */
5073
5074 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
5075
5076 /* Default set of control sequences to put into TEX_toktab.
5077    The value of environment var TEXTAGS is prepended to this.  */
5078 static char *TEX_defenv = "\
5079 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5080 :part:appendix:entry:index:def\
5081 :newcommand:renewcommand:newenvironment:renewenvironment";
5082
5083 static void TEX_mode __P((FILE *));
5084 static void TEX_decode_env __P((char *, char *));
5085
5086 static char TEX_esc = '\\';
5087 static char TEX_opgrp = '{';
5088 static char TEX_clgrp = '}';
5089
5090 /*
5091  * TeX/LaTeX scanning loop.
5092  */
5093 static void
5094 TeX_commands (inf)
5095      FILE *inf;
5096 {
5097   char *cp;
5098   linebuffer *key;
5099
5100   /* Select either \ or ! as escape character.  */
5101   TEX_mode (inf);
5102
5103   /* Initialize token table once from environment. */
5104   if (TEX_toktab == NULL)
5105     TEX_decode_env ("TEXTAGS", TEX_defenv);
5106
5107   LOOP_ON_INPUT_LINES (inf, lb, cp)
5108     {
5109       /* Look at each TEX keyword in line. */
5110       for (;;)
5111         {
5112           /* Look for a TEX escape. */
5113           while (*cp++ != TEX_esc)
5114             if (cp[-1] == '\0' || cp[-1] == '%')
5115               goto tex_next_line;
5116
5117           for (key = TEX_toktab; key->buffer != NULL; key++)
5118             if (strneq (cp, key->buffer, key->len))
5119               {
5120                 register char *p;
5121                 int namelen, linelen;
5122                 bool opgrp = FALSE;
5123
5124                 cp = skip_spaces (cp + key->len);
5125                 if (*cp == TEX_opgrp)
5126                   {
5127                     opgrp = TRUE;
5128                     cp++;
5129                   }
5130                 for (p = cp;
5131                      (!iswhite (*p) && *p != '#' &&
5132                       *p != TEX_opgrp && *p != TEX_clgrp);
5133                      p++)
5134                   continue;
5135                 namelen = p - cp;
5136                 linelen = lb.len;
5137                 if (!opgrp || *p == TEX_clgrp)
5138                   {
5139                     while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5140                       *p++;
5141                     linelen = p - lb.buffer + 1;
5142                   }
5143                 make_tag (cp, namelen, TRUE,
5144                           lb.buffer, linelen, lineno, linecharno);
5145                 goto tex_next_line; /* We only tag a line once */
5146               }
5147         }
5148     tex_next_line:
5149       ;
5150     }
5151 }
5152
5153 #define TEX_LESC '\\'
5154 #define TEX_SESC '!'
5155
5156 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
5157    chars accordingly. */
5158 static void
5159 TEX_mode (inf)
5160      FILE *inf;
5161 {
5162   int c;
5163
5164   while ((c = getc (inf)) != EOF)
5165     {
5166       /* Skip to next line if we hit the TeX comment char. */
5167       if (c == '%')
5168         while (c != '\n' && c != EOF)
5169           c = getc (inf);
5170       else if (c == TEX_LESC || c == TEX_SESC )
5171         break;
5172     }
5173
5174   if (c == TEX_LESC)
5175     {
5176       TEX_esc = TEX_LESC;
5177       TEX_opgrp = '{';
5178       TEX_clgrp = '}';
5179     }
5180   else
5181     {
5182       TEX_esc = TEX_SESC;
5183       TEX_opgrp = '<';
5184       TEX_clgrp = '>';
5185     }
5186   /* If the input file is compressed, inf is a pipe, and rewind may fail.
5187      No attempt is made to correct the situation. */
5188   rewind (inf);
5189 }
5190
5191 /* Read environment and prepend it to the default string.
5192    Build token table. */
5193 static void
5194 TEX_decode_env (evarname, defenv)
5195      char *evarname;
5196      char *defenv;
5197 {
5198   register char *env, *p;
5199   int i, len;
5200
5201   /* Append default string to environment. */
5202   env = getenv (evarname);
5203   if (!env)
5204     env = defenv;
5205   else
5206     {
5207       char *oldenv = env;
5208       env = concat (oldenv, defenv, "");
5209     }
5210
5211   /* Allocate a token table */
5212   for (len = 1, p = env; p;)
5213     if ((p = etags_strchr (p, ':')) && *++p != '\0')
5214       len++;
5215   TEX_toktab = xnew (len, linebuffer);
5216
5217   /* Unpack environment string into token table. Be careful about */
5218   /* zero-length strings (leading ':', "::" and trailing ':') */
5219   for (i = 0; *env != '\0';)
5220     {
5221       p = etags_strchr (env, ':');
5222       if (!p)                   /* End of environment string. */
5223         p = env + strlen (env);
5224       if (p - env > 0)
5225         {                       /* Only non-zero strings. */
5226           TEX_toktab[i].buffer = savenstr (env, p - env);
5227           TEX_toktab[i].len = p - env;
5228           i++;
5229         }
5230       if (*p)
5231         env = p + 1;
5232       else
5233         {
5234           TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5235           TEX_toktab[i].len = 0;
5236           break;
5237         }
5238     }
5239 }
5240
5241 \f
5242 /* Texinfo support.  Dave Love, Mar. 2000.  */
5243 static void
5244 Texinfo_nodes (inf)
5245      FILE * inf;
5246 {
5247   char *cp, *start;
5248   LOOP_ON_INPUT_LINES (inf, lb, cp)
5249     if (LOOKING_AT (cp, "@node"))
5250       {
5251         start = cp;
5252         while (*cp != '\0' && *cp != ',')
5253           cp++;
5254         make_tag (start, cp - start, TRUE,
5255                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5256       }
5257 }
5258
5259 \f
5260 /*
5261  * HTML support.
5262  * Contents of <title>, <h1>, <h2>, <h3> are tags.
5263  * Contents of <a name=xxx> are tags with name xxx.
5264  *
5265  * Francesco Potortì, 2002.
5266  */
5267 static void
5268 HTML_labels (inf)
5269      FILE * inf;
5270 {
5271   bool getnext = FALSE;         /* next text outside of HTML tags is a tag */
5272   bool skiptag = FALSE;         /* skip to the end of the current HTML tag */
5273   bool intag = FALSE;           /* inside an html tag, looking for ID= */
5274   bool inanchor = FALSE;        /* when INTAG, is an anchor, look for NAME= */
5275   char *end;
5276
5277
5278   linebuffer_setlen (&token_name, 0); /* no name in buffer */
5279
5280   LOOP_ON_INPUT_LINES (inf, lb, dbp)
5281     for (;;)                    /* loop on the same line */
5282       {
5283         if (skiptag)            /* skip HTML tag */
5284           {
5285             while (*dbp != '\0' && *dbp != '>')
5286               dbp++;
5287             if (*dbp == '>')
5288               {
5289                 dbp += 1;
5290                 skiptag = FALSE;
5291                 continue;       /* look on the same line */
5292               }
5293             break;              /* go to next line */
5294           }
5295
5296         else if (intag) /* look for "name=" or "id=" */
5297           {
5298             while (*dbp != '\0' && *dbp != '>'
5299                    && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5300               dbp++;
5301             if (*dbp == '\0')
5302               break;            /* go to next line */
5303             if (*dbp == '>')
5304               {
5305                 dbp += 1;
5306                 intag = FALSE;
5307                 continue;       /* look on the same line */
5308               }
5309             if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5310                 || LOOKING_AT_NOCASE (dbp, "id="))
5311               {
5312                 bool quoted = (dbp[0] == '"');
5313
5314                 if (quoted)
5315                   for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5316                     continue;
5317                 else
5318                   for (end = dbp; *end != '\0' && intoken (*end); end++)
5319                     continue;
5320                 linebuffer_setlen (&token_name, end - dbp);
5321                 strncpy (token_name.buffer, dbp, end - dbp);
5322                 token_name.buffer[end - dbp] = '\0';
5323
5324                 dbp = end;
5325                 intag = FALSE;  /* we found what we looked for */
5326                 skiptag = TRUE; /* skip to the end of the tag */
5327                 getnext = TRUE; /* then grab the text */
5328                 continue;       /* look on the same line */
5329               }
5330             dbp += 1;
5331           }
5332
5333         else if (getnext)       /* grab next tokens and tag them */
5334           {
5335             dbp = skip_spaces (dbp);
5336             if (*dbp == '\0')
5337               break;            /* go to next line */
5338             if (*dbp == '<')
5339               {
5340                 intag = TRUE;
5341                 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5342                 continue;       /* look on the same line */
5343               }
5344
5345             for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5346               continue;
5347             make_tag (token_name.buffer, token_name.len, TRUE,
5348                       dbp, end - dbp, lineno, linecharno);
5349             linebuffer_setlen (&token_name, 0); /* no name in buffer */
5350             getnext = FALSE;
5351             break;              /* go to next line */
5352           }
5353
5354         else                    /* look for an interesting HTML tag */
5355           {
5356             while (*dbp != '\0' && *dbp != '<')
5357               dbp++;
5358             if (*dbp == '\0')
5359               break;            /* go to next line */
5360             intag = TRUE;
5361             if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5362               {
5363                 inanchor = TRUE;
5364                 continue;       /* look on the same line */
5365               }
5366             else if (LOOKING_AT_NOCASE (dbp, "<title>")
5367                      || LOOKING_AT_NOCASE (dbp, "<h1>")
5368                      || LOOKING_AT_NOCASE (dbp, "<h2>")
5369                      || LOOKING_AT_NOCASE (dbp, "<h3>"))
5370               {
5371                 intag = FALSE;
5372                 getnext = TRUE;
5373                 continue;       /* look on the same line */
5374               }
5375             dbp += 1;
5376           }
5377       }
5378 }
5379
5380 \f
5381 /*
5382  * Prolog support
5383  *
5384  * Assumes that the predicate or rule starts at column 0.
5385  * Only the first clause of a predicate or rule is added.
5386  * Original code by Sunichirou Sugou (1989)
5387  * Rewritten by Anders Lindgren (1996)
5388  */
5389 static int prolog_pr __P((char *, char *));
5390 static void prolog_skip_comment __P((linebuffer *, FILE *));
5391 static int prolog_atom __P((char *, int));
5392
5393 static void
5394 Prolog_functions (inf)
5395      FILE *inf;
5396 {
5397   char *cp, *last;
5398   int len;
5399   int allocated;
5400
5401   allocated = 0;
5402   len = 0;
5403   last = NULL;
5404
5405   LOOP_ON_INPUT_LINES (inf, lb, cp)
5406     {
5407       if (cp[0] == '\0')        /* Empty line */
5408         continue;
5409       else if (iswhite (cp[0])) /* Not a predicate */
5410         continue;
5411       else if (cp[0] == '/' && cp[1] == '*')    /* comment. */
5412         prolog_skip_comment (&lb, inf);
5413       else if ((len = prolog_pr (cp, last)) > 0)
5414         {
5415           /* Predicate or rule.  Store the function name so that we
5416              only generate a tag for the first clause.  */
5417           if (last == NULL)
5418             last = xnew(len + 1, char);
5419           else if (len + 1 > allocated)
5420             xrnew (last, len + 1, char);
5421           allocated = len + 1;
5422           strncpy (last, cp, len);
5423           last[len] = '\0';
5424         }
5425     }
5426   if (last != NULL)
5427     free (last);
5428 }
5429
5430
5431 static void
5432 prolog_skip_comment (plb, inf)
5433      linebuffer *plb;
5434      FILE *inf;
5435 {
5436   char *cp;
5437
5438   do
5439     {
5440       for (cp = plb->buffer; *cp != '\0'; cp++)
5441         if (cp[0] == '*' && cp[1] == '/')
5442           return;
5443       readline (plb, inf);
5444     }
5445   while (!feof(inf));
5446 }
5447
5448 /*
5449  * A predicate or rule definition is added if it matches:
5450  *     <beginning of line><Prolog Atom><whitespace>(
5451  * or  <beginning of line><Prolog Atom><whitespace>:-
5452  *
5453  * It is added to the tags database if it doesn't match the
5454  * name of the previous clause header.
5455  *
5456  * Return the size of the name of the predicate or rule, or 0 if no
5457  * header was found.
5458  */
5459 static int
5460 prolog_pr (s, last)
5461      char *s;
5462      char *last;                /* Name of last clause. */
5463 {
5464   int pos;
5465   int len;
5466
5467   pos = prolog_atom (s, 0);
5468   if (pos < 1)
5469     return 0;
5470
5471   len = pos;
5472   pos = skip_spaces (s + pos) - s;
5473
5474   if ((s[pos] == '.'
5475        || (s[pos] == '(' && (pos += 1))
5476        || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5477       && (last == NULL          /* save only the first clause */
5478           || len != (int)strlen (last)
5479           || !strneq (s, last, len)))
5480         {
5481           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5482           return len;
5483         }
5484   else
5485     return 0;
5486 }
5487
5488 /*
5489  * Consume a Prolog atom.
5490  * Return the number of bytes consumed, or -1 if there was an error.
5491  *
5492  * A prolog atom, in this context, could be one of:
5493  * - An alphanumeric sequence, starting with a lower case letter.
5494  * - A quoted arbitrary string. Single quotes can escape themselves.
5495  *   Backslash quotes everything.
5496  */
5497 static int
5498 prolog_atom (s, pos)
5499      char *s;
5500      int pos;
5501 {
5502   int origpos;
5503
5504   origpos = pos;
5505
5506   if (ISLOWER(s[pos]) || (s[pos] == '_'))
5507     {
5508       /* The atom is unquoted. */
5509       pos++;
5510       while (ISALNUM(s[pos]) || (s[pos] == '_'))
5511         {
5512           pos++;
5513         }
5514       return pos - origpos;
5515     }
5516   else if (s[pos] == '\'')
5517     {
5518       pos++;
5519
5520       for (;;)
5521         {
5522           if (s[pos] == '\'')
5523             {
5524               pos++;
5525               if (s[pos] != '\'')
5526                 break;
5527               pos++;            /* A double quote */
5528             }
5529           else if (s[pos] == '\0')
5530             /* Multiline quoted atoms are ignored. */
5531             return -1;
5532           else if (s[pos] == '\\')
5533             {
5534               if (s[pos+1] == '\0')
5535                 return -1;
5536               pos += 2;
5537             }
5538           else
5539             pos++;
5540         }
5541       return pos - origpos;
5542     }
5543   else
5544     return -1;
5545 }
5546
5547 \f
5548 /*
5549  * Support for Erlang
5550  *
5551  * Generates tags for functions, defines, and records.
5552  * Assumes that Erlang functions start at column 0.
5553  * Original code by Anders Lindgren (1996)
5554  */
5555 static int erlang_func __P((char *, char *));
5556 static void erlang_attribute __P((char *));
5557 static int erlang_atom __P((char *));
5558
5559 static void
5560 Erlang_functions (inf)
5561      FILE *inf;
5562 {
5563   char *cp, *last;
5564   int len;
5565   int allocated;
5566
5567   allocated = 0;
5568   len = 0;
5569   last = NULL;
5570
5571   LOOP_ON_INPUT_LINES (inf, lb, cp)
5572     {
5573       if (cp[0] == '\0')        /* Empty line */
5574         continue;
5575       else if (iswhite (cp[0])) /* Not function nor attribute */
5576         continue;
5577       else if (cp[0] == '%')    /* comment */
5578         continue;
5579       else if (cp[0] == '"')    /* Sometimes, strings start in column one */
5580         continue;
5581       else if (cp[0] == '-')    /* attribute, e.g. "-define" */
5582         {
5583           erlang_attribute (cp);
5584           if (last != NULL)
5585             {
5586               free (last);
5587               last = NULL;
5588             }
5589         }
5590       else if ((len = erlang_func (cp, last)) > 0)
5591         {
5592           /*
5593            * Function.  Store the function name so that we only
5594            * generates a tag for the first clause.
5595            */
5596           if (last == NULL)
5597             last = xnew (len + 1, char);
5598           else if (len + 1 > allocated)
5599             xrnew (last, len + 1, char);
5600           allocated = len + 1;
5601           strncpy (last, cp, len);
5602           last[len] = '\0';
5603         }
5604     }
5605   if (last != NULL)
5606     free (last);
5607 }
5608
5609
5610 /*
5611  * A function definition is added if it matches:
5612  *     <beginning of line><Erlang Atom><whitespace>(
5613  *
5614  * It is added to the tags database if it doesn't match the
5615  * name of the previous clause header.
5616  *
5617  * Return the size of the name of the function, or 0 if no function
5618  * was found.
5619  */
5620 static int
5621 erlang_func (s, last)
5622      char *s;
5623      char *last;                /* Name of last clause. */
5624 {
5625   int pos;
5626   int len;
5627
5628   pos = erlang_atom (s);
5629   if (pos < 1)
5630     return 0;
5631
5632   len = pos;
5633   pos = skip_spaces (s + pos) - s;
5634
5635   /* Save only the first clause. */
5636   if (s[pos++] == '('
5637       && (last == NULL
5638           || len != (int)strlen (last)
5639           || !strneq (s, last, len)))
5640         {
5641           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5642           return len;
5643         }
5644
5645   return 0;
5646 }
5647
5648
5649 /*
5650  * Handle attributes.  Currently, tags are generated for defines
5651  * and records.
5652  *
5653  * They are on the form:
5654  * -define(foo, bar).
5655  * -define(Foo(M, N), M+N).
5656  * -record(graph, {vtab = notable, cyclic = true}).
5657  */
5658 static void
5659 erlang_attribute (s)
5660      char *s;
5661 {
5662   char *cp = s;
5663
5664   if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5665       && *cp++ == '(')
5666     {
5667       int len = erlang_atom (skip_spaces (cp));
5668       if (len > 0)
5669         make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5670     }
5671   return;
5672 }
5673
5674
5675 /*
5676  * Consume an Erlang atom (or variable).
5677  * Return the number of bytes consumed, or -1 if there was an error.
5678  */
5679 static int
5680 erlang_atom (s)
5681      char *s;
5682 {
5683   int pos = 0;
5684
5685   if (ISALPHA (s[pos]) || s[pos] == '_')
5686     {
5687       /* The atom is unquoted. */
5688       do
5689         pos++;
5690       while (ISALNUM (s[pos]) || s[pos] == '_');
5691     }
5692   else if (s[pos] == '\'')
5693     {
5694       for (pos++; s[pos] != '\''; pos++)
5695         if (s[pos] == '\0'      /* multiline quoted atoms are ignored */
5696             || (s[pos] == '\\' && s[++pos] == '\0'))
5697           return 0;
5698       pos++;
5699     }
5700
5701   return pos;
5702 }
5703
5704 \f
5705 static char *scan_separators __P((char *));
5706 static void add_regex __P((char *, language *));
5707 static char *substitute __P((char *, char *, struct re_registers *));
5708
5709 /*
5710  * Take a string like "/blah/" and turn it into "blah", verifying
5711  * that the first and last characters are the same, and handling
5712  * quoted separator characters.  Actually, stops on the occurrence of
5713  * an unquoted separator.  Also process \t, \n, etc. and turn into
5714  * appropriate characters. Works in place.  Null terminates name string.
5715  * Returns pointer to terminating separator, or NULL for
5716  * unterminated regexps.
5717  */
5718 static char *
5719 scan_separators (name)
5720      char *name;
5721 {
5722   char sep = name[0];
5723   char *copyto = name;
5724   bool quoted = FALSE;
5725
5726   for (++name; *name != '\0'; ++name)
5727     {
5728       if (quoted)
5729         {
5730           switch (*name)
5731             {
5732             case 'a': *copyto++ = '\007'; break; /* BEL (bell)           */
5733             case 'b': *copyto++ = '\b'; break;   /* BS (back space)      */
5734             case 'd': *copyto++ = 0177; break;   /* DEL (delete)         */
5735             case 'e': *copyto++ = 033; break;    /* ESC (delete)         */
5736             case 'f': *copyto++ = '\f'; break;   /* FF (form feed)       */
5737             case 'n': *copyto++ = '\n'; break;   /* NL (new line)        */
5738             case 'r': *copyto++ = '\r'; break;   /* CR (carriage return) */
5739             case 't': *copyto++ = '\t'; break;   /* TAB (horizontal tab) */
5740             case 'v': *copyto++ = '\v'; break;   /* VT (vertical tab)    */
5741             default:
5742               if (*name == sep)
5743                 *copyto++ = sep;
5744               else
5745                 {
5746                   /* Something else is quoted, so preserve the quote. */
5747                   *copyto++ = '\\';
5748                   *copyto++ = *name;
5749                 }
5750               break;
5751             }
5752           quoted = FALSE;
5753         }
5754       else if (*name == '\\')
5755         quoted = TRUE;
5756       else if (*name == sep)
5757         break;
5758       else
5759         *copyto++ = *name;
5760     }
5761   if (*name != sep)
5762     name = NULL;                /* signal unterminated regexp */
5763
5764   /* Terminate copied string. */
5765   *copyto = '\0';
5766   return name;
5767 }
5768
5769 /* Look at the argument of --regex or --no-regex and do the right
5770    thing.  Same for each line of a regexp file. */
5771 static void
5772 analyse_regex (regex_arg)
5773      char *regex_arg;
5774 {
5775   if (regex_arg == NULL)
5776     {
5777       free_regexps ();          /* --no-regex: remove existing regexps */
5778       return;
5779     }
5780
5781   /* A real --regexp option or a line in a regexp file. */
5782   switch (regex_arg[0])
5783     {
5784       /* Comments in regexp file or null arg to --regex. */
5785     case '\0':
5786     case ' ':
5787     case '\t':
5788       break;
5789
5790       /* Read a regex file.  This is recursive and may result in a
5791          loop, which will stop when the file descriptors are exhausted. */
5792     case '@':
5793       {
5794         FILE *regexfp;
5795         linebuffer regexbuf;
5796         char *regexfile = regex_arg + 1;
5797
5798         /* regexfile is a file containing regexps, one per line. */
5799         regexfp = fopen (regexfile, "r");
5800         if (regexfp == NULL)
5801           {
5802             pfatal (regexfile);
5803             return;
5804           }
5805         linebuffer_init (&regexbuf);
5806         while (readline_internal (&regexbuf, regexfp) > 0)
5807           analyse_regex (regexbuf.buffer);
5808         free (regexbuf.buffer);
5809         fclose (regexfp);
5810       }
5811       break;
5812
5813       /* Regexp to be used for a specific language only. */
5814     case '{':
5815       {
5816         language *lang;
5817         char *lang_name = regex_arg + 1;
5818         char *cp;
5819
5820         for (cp = lang_name; *cp != '}'; cp++)
5821           if (*cp == '\0')
5822             {
5823               error ("unterminated language name in regex: %s", regex_arg);
5824               return;
5825             }
5826         *cp++ = '\0';
5827         lang = get_language_from_langname (lang_name);
5828         if (lang == NULL)
5829           return;
5830         add_regex (cp, lang);
5831       }
5832       break;
5833
5834       /* Regexp to be used for any language. */
5835     default:
5836       add_regex (regex_arg, NULL);
5837       break;
5838     }
5839 }
5840
5841 /* Separate the regexp pattern, compile it,
5842    and care for optional name and modifiers. */
5843 static void
5844 add_regex (regexp_pattern, lang)
5845      char *regexp_pattern;
5846      language *lang;
5847 {
5848   static struct re_pattern_buffer zeropattern;
5849   char sep, *pat, *name, *modifiers;
5850   const char *err;
5851   struct re_pattern_buffer *patbuf;
5852   regexp *rp;
5853   bool
5854     force_explicit_name = TRUE, /* do not use implicit tag names */
5855     ignore_case = FALSE,        /* case is significant */
5856     multi_line = FALSE,         /* matches are done one line at a time */
5857     single_line = FALSE;        /* dot does not match newline */
5858
5859
5860   if (strlen(regexp_pattern) < 3)
5861     {
5862       error ("null regexp", (char *)NULL);
5863       return;
5864     }
5865   sep = regexp_pattern[0];
5866   name = scan_separators (regexp_pattern);
5867   if (name == NULL)
5868     {
5869       error ("%s: unterminated regexp", regexp_pattern);
5870       return;
5871     }
5872   if (name[1] == sep)
5873     {
5874       error ("null name for regexp \"%s\"", regexp_pattern);
5875       return;
5876     }
5877   modifiers = scan_separators (name);
5878   if (modifiers == NULL)        /* no terminating separator --> no name */
5879     {
5880       modifiers = name;
5881       name = "";
5882     }
5883   else
5884     modifiers += 1;             /* skip separator */
5885
5886   /* Parse regex modifiers. */
5887   for (; modifiers[0] != '\0'; modifiers++)
5888     switch (modifiers[0])
5889       {
5890       case 'N':
5891         if (modifiers == name)
5892           error ("forcing explicit tag name but no name, ignoring", NULL);
5893         force_explicit_name = TRUE;
5894         break;
5895       case 'i':
5896         ignore_case = TRUE;
5897         break;
5898       case 's':
5899         single_line = TRUE;
5900         /* FALLTHRU */
5901       case 'm':
5902         multi_line = TRUE;
5903         need_filebuf = TRUE;
5904         break;
5905       default:
5906         {
5907           char wrongmod [2];
5908           wrongmod[0] = modifiers[0];
5909           wrongmod[1] = '\0';
5910           error ("invalid regexp modifier `%s', ignoring", wrongmod);
5911         }
5912         break;
5913       }
5914
5915   patbuf = xnew (1, struct re_pattern_buffer);
5916   *patbuf = zeropattern;
5917   if (ignore_case)
5918     {
5919       static char lc_trans[CHARS];
5920       int i;
5921       for (i = 0; i < CHARS; i++)
5922         lc_trans[i] = lowcase (i);
5923       patbuf->translate = lc_trans;     /* translation table to fold case  */
5924     }
5925
5926   if (multi_line)
5927     pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5928   else
5929     pat = regexp_pattern;
5930
5931   if (single_line)
5932     re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5933   else
5934     re_set_syntax (RE_SYNTAX_EMACS);
5935
5936   err = re_compile_pattern (pat, strlen (regexp_pattern), patbuf);
5937   if (multi_line)
5938     free (pat);
5939   if (err != NULL)
5940     {
5941       error ("%s while compiling pattern", err);
5942       return;
5943     }
5944
5945   rp = p_head;
5946   p_head = xnew (1, regexp);
5947   p_head->pattern = savestr (regexp_pattern);
5948   p_head->p_next = rp;
5949   p_head->lang = lang;
5950   p_head->pat = patbuf;
5951   p_head->name = savestr (name);
5952   p_head->error_signaled = FALSE;
5953   p_head->force_explicit_name = force_explicit_name;
5954   p_head->ignore_case = ignore_case;
5955   p_head->multi_line = multi_line;
5956 }
5957
5958 /*
5959  * Do the substitutions indicated by the regular expression and
5960  * arguments.
5961  */
5962 static char *
5963 substitute (in, out, regs)
5964      char *in, *out;
5965      struct re_registers *regs;
5966 {
5967   char *result, *t;
5968   int size, dig, diglen;
5969
5970   result = NULL;
5971   size = strlen (out);
5972
5973   /* Pass 1: figure out how much to allocate by finding all \N strings. */
5974   if (out[size - 1] == '\\')
5975     fatal ("pattern error in \"%s\"", out);
5976   for (t = etags_strchr (out, '\\');
5977        t != NULL;
5978        t = etags_strchr (t + 2, '\\'))
5979     if (ISDIGIT (t[1]))
5980       {
5981         dig = t[1] - '0';
5982         diglen = regs->end[dig] - regs->start[dig];
5983         size += diglen - 2;
5984       }
5985     else
5986       size -= 1;
5987
5988   /* Allocate space and do the substitutions. */
5989   assert (size >= 0);
5990   result = xnew (size + 1, char);
5991
5992   for (t = result; *out != '\0'; out++)
5993     if (*out == '\\' && ISDIGIT (*++out))
5994       {
5995         dig = *out - '0';
5996         diglen = regs->end[dig] - regs->start[dig];
5997         strncpy (t, in + regs->start[dig], diglen);
5998         t += diglen;
5999       }
6000     else
6001       *t++ = *out;
6002   *t = '\0';
6003
6004   assert (t <= result + size);
6005   assert (t - result == (int)strlen (result));
6006
6007   return result;
6008 }
6009
6010 /* Deallocate all regexps. */
6011 static void
6012 free_regexps ()
6013 {
6014   regexp *rp;
6015   while (p_head != NULL)
6016     {
6017       rp = p_head->p_next;
6018       free (p_head->pattern);
6019       free (p_head->name);
6020       free (p_head);
6021       p_head = rp;
6022     }
6023   return;
6024 }
6025
6026 /*
6027  * Reads the whole file as a single string from `filebuf' and looks for
6028  * multi-line regular expressions, creating tags on matches.
6029  * readline already dealt with normal regexps.
6030  *
6031  * Idea by Ben Wing <ben@666.com> (2002).
6032  */
6033 static void
6034 regex_tag_multiline ()
6035 {
6036   char *buffer = filebuf.buffer;
6037   regexp *rp;
6038   char *name;
6039
6040   for (rp = p_head; rp != NULL; rp = rp->p_next)
6041     {
6042       int match = 0;
6043
6044       if (!rp->multi_line)
6045         continue;               /* skip normal regexps */
6046
6047       /* Generic initialisations before parsing file from memory. */
6048       lineno = 1;               /* reset global line number */
6049       charno = 0;               /* reset global char number */
6050       linecharno = 0;           /* reset global char number of line start */
6051
6052       /* Only use generic regexps or those for the current language. */
6053       if (rp->lang != NULL && rp->lang != curfdp->lang)
6054         continue;
6055
6056       while (match >= 0 && match < filebuf.len)
6057         {
6058           match = re_search (rp->pat, buffer, filebuf.len, charno,
6059                              filebuf.len - match, &rp->regs);
6060           switch (match)
6061             {
6062             case -2:
6063               /* Some error. */
6064               if (!rp->error_signaled)
6065                 {
6066                   error ("regexp stack overflow while matching \"%s\"",
6067                          rp->pattern);
6068                   rp->error_signaled = TRUE;
6069                 }
6070               break;
6071             case -1:
6072               /* No match. */
6073               break;
6074             default:
6075               if (match == rp->regs.end[0])
6076                 {
6077                   if (!rp->error_signaled)
6078                     {
6079                       error ("regexp matches the empty string: \"%s\"",
6080                              rp->pattern);
6081                       rp->error_signaled = TRUE;
6082                     }
6083                   match = -3;   /* exit from while loop */
6084                   break;
6085                 }
6086
6087               /* Match occurred.  Construct a tag. */
6088               while (charno < rp->regs.end[0])
6089                 if (buffer[charno++] == '\n')
6090                   lineno++, linecharno = charno;
6091               name = rp->name;
6092               if (name[0] == '\0')
6093                 name = NULL;
6094               else /* make a named tag */
6095                 name = substitute (buffer, rp->name, &rp->regs);
6096               if (rp->force_explicit_name)
6097                 /* Force explicit tag name, if a name is there. */
6098                 pfnote (name, TRUE, buffer + linecharno,
6099                         charno - linecharno + 1, lineno, linecharno);
6100               else
6101                 make_tag (name, strlen (name), TRUE, buffer + linecharno,
6102                           charno - linecharno + 1, lineno, linecharno);
6103               break;
6104             }
6105         }
6106     }
6107 }
6108
6109 \f
6110 static bool
6111 nocase_tail (cp)
6112      char *cp;
6113 {
6114   register int len = 0;
6115
6116   while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
6117     cp++, len++;
6118   if (*cp == '\0' && !intoken (dbp[len]))
6119     {
6120       dbp += len;
6121       return TRUE;
6122     }
6123   return FALSE;
6124 }
6125
6126 static void
6127 get_tag (bp, namepp)
6128      register char *bp;
6129      char **namepp;
6130 {
6131   register char *cp = bp;
6132
6133   if (*bp != '\0')
6134     {
6135       /* Go till you get to white space or a syntactic break */
6136       for (cp = bp + 1; !notinname (*cp); cp++)
6137         continue;
6138       make_tag (bp, cp - bp, TRUE,
6139                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6140     }
6141
6142   if (namepp != NULL)
6143     *namepp = savenstr (bp, cp - bp);
6144 }
6145
6146 /*
6147  * Read a line of text from `stream' into `lbp', excluding the
6148  * newline or CR-NL, if any.  Return the number of characters read from
6149  * `stream', which is the length of the line including the newline.
6150  *
6151  * On DOS or Windows we do not count the CR character, if any before the
6152  * NL, in the returned length; this mirrors the behavior of Emacs on those
6153  * platforms (for text files, it translates CR-NL to NL as it reads in the
6154  * file).
6155  *
6156  * If multi-line regular expressions are requested, each line read is
6157  * appended to `filebuf'.
6158  */
6159 static long
6160 readline_internal (lbp, stream)
6161      linebuffer *lbp;
6162      register FILE *stream;
6163 {
6164   char *buffer = lbp->buffer;
6165   register char *p = lbp->buffer;
6166   register char *pend;
6167   int chars_deleted;
6168
6169   pend = p + lbp->size;         /* Separate to avoid 386/IX compiler bug.  */
6170
6171   for (;;)
6172     {
6173       register int c = getc (stream);
6174       if (p == pend)
6175         {
6176           /* We're at the end of linebuffer: expand it. */
6177           lbp->size *= 2;
6178           xrnew (buffer, lbp->size, char);
6179           p += buffer - lbp->buffer;
6180           pend = buffer + lbp->size;
6181           lbp->buffer = buffer;
6182         }
6183       if (c == EOF)
6184         {
6185           *p = '\0';
6186           chars_deleted = 0;
6187           break;
6188         }
6189       if (c == '\n')
6190         {
6191           if (p > buffer && p[-1] == '\r')
6192             {
6193               p -= 1;
6194 #ifdef DOS_NT
6195              /* Assume CRLF->LF translation will be performed by Emacs
6196                 when loading this file, so CRs won't appear in the buffer.
6197                 It would be cleaner to compensate within Emacs;
6198                 however, Emacs does not know how many CRs were deleted
6199                 before any given point in the file.  */
6200               chars_deleted = 1;
6201 #else
6202               chars_deleted = 2;
6203 #endif
6204             }
6205           else
6206             {
6207               chars_deleted = 1;
6208             }
6209           *p = '\0';
6210           break;
6211         }
6212       *p++ = c;
6213     }
6214   lbp->len = p - buffer;
6215
6216   if (need_filebuf              /* we need filebuf for multi-line regexps */
6217       && chars_deleted > 0)     /* not at EOF */
6218     {
6219       while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6220         {
6221           /* Expand filebuf. */
6222           filebuf.size *= 2;
6223           xrnew (filebuf.buffer, filebuf.size, char);
6224         }
6225       strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6226       filebuf.len += lbp->len;
6227       filebuf.buffer[filebuf.len++] = '\n';
6228       filebuf.buffer[filebuf.len] = '\0';
6229     }
6230
6231   return lbp->len + chars_deleted;
6232 }
6233
6234 /*
6235  * Like readline_internal, above, but in addition try to match the
6236  * input line against relevant regular expressions and manage #line
6237  * directives.
6238  */
6239 static void
6240 readline (lbp, stream)
6241      linebuffer *lbp;
6242      FILE *stream;
6243 {
6244   long result;
6245
6246   linecharno = charno;          /* update global char number of line start */
6247   result = readline_internal (lbp, stream); /* read line */
6248   lineno += 1;                  /* increment global line number */
6249   charno += result;             /* increment global char number */
6250
6251   /* Honour #line directives. */
6252   if (!no_line_directive)
6253     {
6254       static bool discard_until_line_directive;
6255
6256       /* Check whether this is a #line directive. */
6257       if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6258         {
6259           int start, lno;
6260
6261           if (DEBUG) start = 0; /* shut up the compiler */
6262           if (sscanf (lbp->buffer, "#line %d %n\"", &lno, &start) >= 1
6263               && lbp->buffer[start] == '"')
6264             {
6265               char *endp = lbp->buffer + ++start;
6266
6267               assert (start > 0);
6268               while ((endp = etags_strchr (endp, '"')) != NULL
6269                      && endp[-1] == '\\')
6270                 endp++;
6271               if (endp != NULL)
6272                 /* Ok, this is a real #line directive.  Let's deal with it. */
6273                 {
6274                   char *taggedabsname;  /* absolute name of original file */
6275                   char *taggedfname;    /* name of original file as given */
6276                   char *name;           /* temp var */
6277
6278                   discard_until_line_directive = FALSE; /* found it */
6279                   name = lbp->buffer + start;
6280                   *endp = '\0';
6281                   canonicalize_filename (name); /* for DOS */
6282                   taggedabsname = absolute_filename (name, curfdp->infabsdir);
6283                   if (filename_is_absolute (name)
6284                       || filename_is_absolute (curfdp->infname))
6285                     taggedfname = savestr (taggedabsname);
6286                   else
6287                     taggedfname = relative_filename (taggedabsname,tagfiledir);
6288
6289                   if (streq (curfdp->taggedfname, taggedfname))
6290                     /* The #line directive is only a line number change.  We
6291                        deal with this afterwards. */
6292                     free (taggedfname);
6293                   else
6294                     /* The tags following this #line directive should be
6295                        attributed to taggedfname.  In order to do this, set
6296                        curfdp accordingly. */
6297                     {
6298                       fdesc *fdp; /* file description pointer */
6299
6300                       /* Go look for a file description already set up for the
6301                          file indicated in the #line directive.  If there is
6302                          one, use it from now until the next #line
6303                          directive. */
6304                       for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6305                         if (streq (fdp->infname, curfdp->infname)
6306                             && streq (fdp->taggedfname, taggedfname))
6307                           /* If we remove the second test above (after the &&)
6308                              then all entries pertaining to the same file are
6309                              coalesced in the tags file.  If we use it, then
6310                              entries pertaining to the same file but generated
6311                              from different files (via #line directives) will
6312                              go into separate sections in the tags file.  These
6313                              alternatives look equivalent.  The first one
6314                              destroys some apparently useless information. */
6315                           {
6316                             curfdp = fdp;
6317                             free (taggedfname);
6318                             break;
6319                           }
6320                       /* Else, if we already tagged the real file, skip all
6321                          input lines until the next #line directive. */
6322                       if (fdp == NULL) /* not found */
6323                         for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6324                           if (streq (fdp->infabsname, taggedabsname))
6325                             {
6326                               discard_until_line_directive = TRUE;
6327                               free (taggedfname);
6328                               break;
6329                             }
6330                       /* Else create a new file description and use that from
6331                          now on, until the next #line directive. */
6332                       if (fdp == NULL) /* not found */
6333                         {
6334                           fdp = fdhead;
6335                           fdhead = xnew (1, fdesc);
6336                           *fdhead = *curfdp; /* copy curr. file description */
6337                           fdhead->next = fdp;
6338                           fdhead->infname = savestr (curfdp->infname);
6339                           fdhead->infabsname = savestr (curfdp->infabsname);
6340                           fdhead->infabsdir = savestr (curfdp->infabsdir);
6341                           fdhead->taggedfname = taggedfname;
6342                           fdhead->usecharno = FALSE;
6343                           fdhead->prop = NULL;
6344                           fdhead->written = FALSE;
6345                           curfdp = fdhead;
6346                         }
6347                     }
6348                   free (taggedabsname);
6349                   lineno = lno - 1;
6350                   readline (lbp, stream);
6351                   return;
6352                 } /* if a real #line directive */
6353             } /* if #line is followed by a a number */
6354         } /* if line begins with "#line " */
6355
6356       /* If we are here, no #line directive was found. */
6357       if (discard_until_line_directive)
6358         {
6359           if (result > 0)
6360             {
6361               /* Do a tail recursion on ourselves, thus discarding the contents
6362                  of the line buffer. */
6363               readline (lbp, stream);
6364               return;
6365             }
6366           /* End of file. */
6367           discard_until_line_directive = FALSE;
6368           return;
6369         }
6370     } /* if #line directives should be considered */
6371
6372   {
6373     int match;
6374     regexp *rp;
6375     char *name;
6376
6377     /* Match against relevant regexps. */
6378     if (lbp->len > 0)
6379       for (rp = p_head; rp != NULL; rp = rp->p_next)
6380         {
6381           /* Only use generic regexps or those for the current language.
6382              Also do not use multiline regexps, which is the job of
6383              regex_tag_multiline. */
6384           if ((rp->lang != NULL && rp->lang != fdhead->lang)
6385               || rp->multi_line)
6386             continue;
6387
6388           match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6389           switch (match)
6390             {
6391             case -2:
6392               /* Some error. */
6393               if (!rp->error_signaled)
6394                 {
6395                   error ("regexp stack overflow while matching \"%s\"",
6396                          rp->pattern);
6397                   rp->error_signaled = TRUE;
6398                 }
6399               break;
6400             case -1:
6401               /* No match. */
6402               break;
6403             case 0:
6404               /* Empty string matched. */
6405               if (!rp->error_signaled)
6406                 {
6407                   error ("regexp matches the empty string: \"%s\"", rp->pattern);
6408                   rp->error_signaled = TRUE;
6409                 }
6410               break;
6411             default:
6412               /* Match occurred.  Construct a tag. */
6413               name = rp->name;
6414               if (name[0] == '\0')
6415                 name = NULL;
6416               else /* make a named tag */
6417                 name = substitute (lbp->buffer, rp->name, &rp->regs);
6418               if (rp->force_explicit_name)
6419                 /* Force explicit tag name, if a name is there. */
6420                 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6421               else
6422                 make_tag (name, strlen (name), TRUE,
6423                           lbp->buffer, match, lineno, linecharno);
6424               break;
6425             }
6426         }
6427   }
6428 }
6429
6430 \f
6431 /*
6432  * Return a pointer to a space of size strlen(cp)+1 allocated
6433  * with xnew where the string CP has been copied.
6434  */
6435 static char *
6436 savestr (cp)
6437      char *cp;
6438 {
6439   return savenstr (cp, strlen (cp));
6440 }
6441
6442 /*
6443  * Return a pointer to a space of size LEN+1 allocated with xnew where
6444  * the string CP has been copied for at most the first LEN characters.
6445  */
6446 static char *
6447 savenstr (cp, len)
6448      char *cp;
6449      int len;
6450 {
6451   register char *dp;
6452
6453   dp = xnew (len + 1, char);
6454   strncpy (dp, cp, len);
6455   dp[len] = '\0';
6456   return dp;
6457 }
6458
6459 /*
6460  * Return the ptr in sp at which the character c last
6461  * appears; NULL if not found
6462  *
6463  * Identical to POSIX strrchr, included for portability.
6464  */
6465 static char *
6466 etags_strrchr (sp, c)
6467      register const char *sp;
6468      register int c;
6469 {
6470   register const char *r;
6471
6472   r = NULL;
6473   do
6474     {
6475       if (*sp == c)
6476         r = sp;
6477   } while (*sp++);
6478   return (char *)r;
6479 }
6480
6481 /*
6482  * Return the ptr in sp at which the character c first
6483  * appears; NULL if not found
6484  *
6485  * Identical to POSIX strchr, included for portability.
6486  */
6487 static char *
6488 etags_strchr (sp, c)
6489      register const char *sp;
6490      register int c;
6491 {
6492   do
6493     {
6494       if (*sp == c)
6495         return (char *)sp;
6496     } while (*sp++);
6497   return NULL;
6498 }
6499
6500 /*
6501  * Compare two strings, ignoring case for alphabetic characters.
6502  *
6503  * Same as BSD's strcasecmp, included for portability.
6504  */
6505 static int
6506 etags_strcasecmp (s1, s2)
6507      register const char *s1;
6508      register const char *s2;
6509 {
6510   while (*s1 != '\0'
6511          && (ISALPHA (*s1) && ISALPHA (*s2)
6512              ? lowcase (*s1) == lowcase (*s2)
6513              : *s1 == *s2))
6514     s1++, s2++;
6515
6516   return (ISALPHA (*s1) && ISALPHA (*s2)
6517           ? lowcase (*s1) - lowcase (*s2)
6518           : *s1 - *s2);
6519 }
6520
6521 /*
6522  * Compare two strings, ignoring case for alphabetic characters.
6523  * Stop after a given number of characters
6524  *
6525  * Same as BSD's strncasecmp, included for portability.
6526  */
6527 static int
6528 etags_strncasecmp (s1, s2, n)
6529      register const char *s1;
6530      register const char *s2;
6531      register int n;
6532 {
6533   while (*s1 != '\0' && n-- > 0
6534          && (ISALPHA (*s1) && ISALPHA (*s2)
6535              ? lowcase (*s1) == lowcase (*s2)
6536              : *s1 == *s2))
6537     s1++, s2++;
6538
6539   if (n < 0)
6540     return 0;
6541   else
6542     return (ISALPHA (*s1) && ISALPHA (*s2)
6543             ? lowcase (*s1) - lowcase (*s2)
6544             : *s1 - *s2);
6545 }
6546
6547 /* Skip spaces (end of string is not space), return new pointer. */
6548 static char *
6549 skip_spaces (cp)
6550      char *cp;
6551 {
6552   while (iswhite (*cp))
6553     cp++;
6554   return cp;
6555 }
6556
6557 /* Skip non spaces, except end of string, return new pointer. */
6558 static char *
6559 skip_non_spaces (cp)
6560      char *cp;
6561 {
6562   while (*cp != '\0' && !iswhite (*cp))
6563     cp++;
6564   return cp;
6565 }
6566
6567 /* Print error message and exit.  */
6568 void
6569 fatal (s1, s2)
6570      char *s1, *s2;
6571 {
6572   error (s1, s2);
6573   exit (EXIT_FAILURE);
6574 }
6575
6576 static void
6577 pfatal (s1)
6578      char *s1;
6579 {
6580   perror (s1);
6581   exit (EXIT_FAILURE);
6582 }
6583
6584 static void
6585 suggest_asking_for_help ()
6586 {
6587   fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
6588            progname, NO_LONG_OPTIONS ? "-h" : "--help");
6589   exit (EXIT_FAILURE);
6590 }
6591
6592 /* Print error message.  `s1' is printf control string, `s2' is arg for it. */
6593 static void
6594 error (s1, s2)
6595      const char *s1, *s2;
6596 {
6597   fprintf (stderr, "%s: ", progname);
6598   fprintf (stderr, s1, s2);
6599   fprintf (stderr, "\n");
6600 }
6601
6602 /* Return a newly-allocated string whose contents
6603    concatenate those of s1, s2, s3.  */
6604 static char *
6605 concat (s1, s2, s3)
6606      char *s1, *s2, *s3;
6607 {
6608   int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6609   char *result = xnew (len1 + len2 + len3 + 1, char);
6610
6611   strcpy (result, s1);
6612   strcpy (result + len1, s2);
6613   strcpy (result + len1 + len2, s3);
6614   result[len1 + len2 + len3] = '\0';
6615
6616   return result;
6617 }
6618
6619 \f
6620 /* Does the same work as the system V getcwd, but does not need to
6621    guess the buffer size in advance. */
6622 static char *
6623 etags_getcwd ()
6624 {
6625 #ifdef HAVE_GETCWD
6626   int bufsize = 200;
6627   char *path = xnew (bufsize, char);
6628
6629   while (getcwd (path, bufsize) == NULL)
6630     {
6631       if (errno != ERANGE)
6632         pfatal ("getcwd");
6633       bufsize *= 2;
6634       free (path);
6635       path = xnew (bufsize, char);
6636     }
6637
6638   canonicalize_filename (path);
6639   return path;
6640
6641 #else /* not HAVE_GETCWD */
6642 #if MSDOS
6643
6644   char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS.  */
6645
6646   getwd (path);
6647
6648   for (p = path; *p != '\0'; p++)
6649     if (*p == '\\')
6650       *p = '/';
6651     else
6652       *p = lowcase (*p);
6653
6654   return strdup (path);
6655 #else /* not MSDOS */
6656   linebuffer path;
6657   FILE *pipe;
6658
6659   linebuffer_init (&path);
6660   pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6661   if (pipe == NULL || readline_internal (&path, pipe) == 0)
6662     pfatal ("pwd");
6663   pclose (pipe);
6664
6665   return path.buffer;
6666 #endif /* not MSDOS */
6667 #endif /* not HAVE_GETCWD */
6668 }
6669
6670 /* Return a newly allocated string containing the file name of FILE
6671    relative to the absolute directory DIR (which should end with a slash). */
6672 static char *
6673 relative_filename (file, dir)
6674      char *file, *dir;
6675 {
6676   char *fp, *dp, *afn, *res;
6677   int i;
6678
6679   /* Find the common root of file and dir (with a trailing slash). */
6680   afn = absolute_filename (file, cwd);
6681   fp = afn;
6682   dp = dir;
6683   while (*fp++ == *dp++)
6684     continue;
6685   fp--, dp--;                   /* back to the first differing char */
6686 #ifdef DOS_NT
6687   if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6688     return afn;
6689 #endif
6690   do                            /* look at the equal chars until '/' */
6691     fp--, dp--;
6692   while (*fp != '/');
6693
6694   /* Build a sequence of "../" strings for the resulting relative file name. */
6695   i = 0;
6696   while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6697     i += 1;
6698   res = xnew (3*i + strlen (fp + 1) + 1, char);
6699   res[0] = '\0';
6700   while (i-- > 0)
6701     strcat (res, "../");
6702
6703   /* Add the file name relative to the common root of file and dir. */
6704   strcat (res, fp + 1);
6705   free (afn);
6706
6707   return res;
6708 }
6709
6710 /* Return a newly allocated string containing the absolute file name
6711    of FILE given DIR (which should end with a slash). */
6712 static char *
6713 absolute_filename (file, dir)
6714      char *file, *dir;
6715 {
6716   char *slashp, *cp, *res;
6717
6718   if (filename_is_absolute (file))
6719     res = savestr (file);
6720 #ifdef DOS_NT
6721   /* We don't support non-absolute file names with a drive
6722      letter, like `d:NAME' (it's too much hassle).  */
6723   else if (file[1] == ':')
6724     fatal ("%s: relative file names with drive letters not supported", file);
6725 #endif
6726   else
6727     res = concat (dir, file, "");
6728
6729   /* Delete the "/dirname/.." and "/." substrings. */
6730   slashp = etags_strchr (res, '/');
6731   while (slashp != NULL && slashp[0] != '\0')
6732     {
6733       if (slashp[1] == '.')
6734         {
6735           if (slashp[2] == '.'
6736               && (slashp[3] == '/' || slashp[3] == '\0'))
6737             {
6738               cp = slashp;
6739               do
6740                 cp--;
6741               while (cp >= res && !filename_is_absolute (cp));
6742               if (cp < res)
6743                 cp = slashp;    /* the absolute name begins with "/.." */
6744 #ifdef DOS_NT
6745               /* Under MSDOS and NT we get `d:/NAME' as absolute
6746                  file name, so the luser could say `d:/../NAME'.
6747                  We silently treat this as `d:/NAME'.  */
6748               else if (cp[0] != '/')
6749                 cp = slashp;
6750 #endif
6751               strcpy (cp, slashp + 3);
6752               slashp = cp;
6753               continue;
6754             }
6755           else if (slashp[2] == '/' || slashp[2] == '\0')
6756             {
6757               strcpy (slashp, slashp + 2);
6758               continue;
6759             }
6760         }
6761
6762       slashp = etags_strchr (slashp + 1, '/');
6763     }
6764
6765   if (res[0] == '\0')           /* just a safety net: should never happen */
6766     {
6767       free (res);
6768       return savestr ("/");
6769     }
6770   else
6771     return res;
6772 }
6773
6774 /* Return a newly allocated string containing the absolute
6775    file name of dir where FILE resides given DIR (which should
6776    end with a slash). */
6777 static char *
6778 absolute_dirname (file, dir)
6779      char *file, *dir;
6780 {
6781   char *slashp, *res;
6782   char save;
6783
6784   canonicalize_filename (file);
6785   slashp = etags_strrchr (file, '/');
6786   if (slashp == NULL)
6787     return savestr (dir);
6788   save = slashp[1];
6789   slashp[1] = '\0';
6790   res = absolute_filename (file, dir);
6791   slashp[1] = save;
6792
6793   return res;
6794 }
6795
6796 /* Whether the argument string is an absolute file name.  The argument
6797    string must have been canonicalized with canonicalize_filename. */
6798 static bool
6799 filename_is_absolute (fn)
6800      char *fn;
6801 {
6802   return (fn[0] == '/'
6803 #ifdef DOS_NT
6804           || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
6805 #endif
6806           );
6807 }
6808
6809 /* Translate backslashes into slashes.  Works in place. */
6810 static void
6811 canonicalize_filename (fn)
6812      register char *fn;
6813 {
6814 #ifdef DOS_NT
6815   /* Canonicalize drive letter case.  */
6816   if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
6817     fn[0] = upcase (fn[0]);
6818   /* Convert backslashes to slashes.  */
6819   for (; *fn != '\0'; fn++)
6820     if (*fn == '\\')
6821       *fn = '/';
6822 #else
6823   /* No action. */
6824   fn = NULL;                    /* shut up the compiler */
6825 #endif
6826 }
6827
6828 \f
6829 /* Initialize a linebuffer for use */
6830 static void
6831 linebuffer_init (lbp)
6832      linebuffer *lbp;
6833 {
6834   lbp->size = (DEBUG) ? 3 : 200;
6835   lbp->buffer = xnew (lbp->size, char);
6836   lbp->buffer[0] = '\0';
6837   lbp->len = 0;
6838 }
6839
6840 /* Set the minimum size of a string contained in a linebuffer. */
6841 static void
6842 linebuffer_setlen (lbp, toksize)
6843      linebuffer *lbp;
6844      int toksize;
6845 {
6846   while (lbp->size <= toksize)
6847     {
6848       lbp->size *= 2;
6849       xrnew (lbp->buffer, lbp->size, char);
6850     }
6851   lbp->len = toksize;
6852 }
6853
6854 /* Like malloc but get fatal error if memory is exhausted. */
6855 static PTR
6856 xmalloc (size)
6857      unsigned int size;
6858 {
6859   PTR result = (PTR) malloc (size);
6860   if (result == NULL)
6861     fatal ("virtual memory exhausted", (char *)NULL);
6862   return result;
6863 }
6864
6865 static PTR
6866 xrealloc (ptr, size)
6867      char *ptr;
6868      unsigned int size;
6869 {
6870   PTR result = (PTR) realloc (ptr, size);
6871   if (result == NULL)
6872     fatal ("virtual memory exhausted", (char *)NULL);
6873   return result;
6874 }
6875
6876 /*
6877  * Local Variables:
6878  * indent-tabs-mode: t
6879  * tab-width: 8
6880  * fill-column: 79
6881  * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6882  * End:
6883  */
6884
6885 /* arch-tag: 8a9b748d-390c-4922-99db-2eeefa921051
6886    (do not change this comment) */
6887
6888 /* etags.c ends here */