code.delx.au - gnu-emacs/blob - lib-src/etags.c

   1 /* Tags file maker to go with GNU Emacs           -*- coding: latin-1 -*-
   2    Copyright (C) 1984, 1987, 1988, 1989, 1993, 1994, 1995,
   3                  1998, 1999, 2000, 2001, 2002, 2003, 2004,
   4                  2005, 2006 Free Software Foundation, Inc. and Ken Arnold
   5
   6  This file is not considered part of GNU Emacs.
   7
   8  This program is free software; you can redistribute it and/or modify
   9  it under the terms of the GNU General Public License as published by
  10  the Free Software Foundation; either version 2 of the License, or
  11  (at your option) any later version.
  12
  13  This program is distributed in the hope that it will be useful,
  14  but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16  GNU General Public License for more details.
  17
  18  You should have received a copy of the GNU General Public License
  19  along with this program; if not, write to the Free Software Foundation,
  20  Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
  21
  22 /*
  23  * Authors:
  24  *      Ctags originally by Ken Arnold.
  25  *      Fortran added by Jim Kleckner.
  26  *      Ed Pelegri-Llopart added C typedefs.
  27  *      Gnu Emacs TAGS format and modifications by RMS?
  28  * 1989 Sam Kendall added C++.
  29  * 1992 Joseph B. Wells improved C and C++ parsing.
  30  * 1993 Francesco Potortì reorganised C and C++.
  31  * 1994 Line-by-line regexp tags by Tom Tromey.
  32  * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
  33  * 2002 #line directives by Francesco Potortì.
  34  *
  35  * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
  36  */
  37
  38 /*
  39  * If you want to add support for a new language, start by looking at the LUA
  40  * language, which is the simplest.  Alternatively, consider shipping a
  41  * configuration file containing regexp definitions for etags.
  42  */
  43
  44 char pot_etags_version[] = "@(#) pot revision number is 17.15";
  45
  46 #define TRUE    1
  47 #define FALSE   0
  48
  49 #ifdef DEBUG
  50 #  undef DEBUG
  51 #  define DEBUG TRUE
  52 #else
  53 #  define DEBUG  FALSE
  54 #  define NDEBUG                /* disable assert */
  55 #endif
  56
  57 #ifdef HAVE_CONFIG_H
  58 # include <config.h>
  59   /* On some systems, Emacs defines static as nothing for the sake
  60      of unexec.  We don't want that here since we don't use unexec. */
  61 # undef static
  62 # define ETAGS_REGEXPS          /* use the regexp features */
  63 # define LONG_OPTIONS           /* accept long options */
  64 # ifndef PTR                    /* for Xemacs */
  65 #   define PTR void *
  66 # endif
  67 # ifndef __P                    /* for Xemacs */
  68 #   define __P(args) args
  69 # endif
  70 #else  /* no config.h */
  71 # if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
  72 #   define __P(args) args       /* use prototypes */
  73 #   define PTR void *           /* for generic pointers */
  74 # else /* not standard C */
  75 #   define __P(args) ()         /* no prototypes */
  76 #   define const                /* remove const for old compilers' sake */
  77 #   define PTR long *           /* don't use void* */
  78 # endif
  79 #endif /* !HAVE_CONFIG_H */
  80
  81 #ifndef _GNU_SOURCE
  82 # define _GNU_SOURCE 1          /* enables some compiler checks on GNU */
  83 #endif
  84
  85 #ifdef LONG_OPTIONS
  86 #  undef LONG_OPTIONS
  87 #  define LONG_OPTIONS TRUE
  88 #else
  89 #  define LONG_OPTIONS  FALSE
  90 #endif
  91
  92 /* WIN32_NATIVE is for Xemacs.
  93    MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
  94 #ifdef WIN32_NATIVE
  95 # undef MSDOS
  96 # undef  WINDOWSNT
  97 # define WINDOWSNT
  98 #endif /* WIN32_NATIVE */
  99
 100 #ifdef MSDOS
 101 # undef MSDOS
 102 # define MSDOS TRUE
 103 # include <fcntl.h>
 104 # include <sys/param.h>
 105 # include <io.h>
 106 # ifndef HAVE_CONFIG_H
 107 #   define DOS_NT
 108 #   include <sys/config.h>
 109 # endif
 110 #else
 111 # define MSDOS FALSE
 112 #endif /* MSDOS */
 113
 114 #ifdef WINDOWSNT
 115 # include <stdlib.h>
 116 # include <fcntl.h>
 117 # include <string.h>
 118 # include <direct.h>
 119 # include <io.h>
 120 # define MAXPATHLEN _MAX_PATH
 121 # undef HAVE_NTGUI
 122 # undef  DOS_NT
 123 # define DOS_NT
 124 # ifndef HAVE_GETCWD
 125 #   define HAVE_GETCWD
 126 # endif /* undef HAVE_GETCWD */
 127 #else /* not WINDOWSNT */
 128 # ifdef STDC_HEADERS
 129 #  include <stdlib.h>
 130 #  include <string.h>
 131 # else /* no standard C headers */
 132     extern char *getenv ();
 133 #  ifdef VMS
 134 #   define EXIT_SUCCESS 1
 135 #   define EXIT_FAILURE 0
 136 #  else /* no VMS */
 137 #   define EXIT_SUCCESS 0
 138 #   define EXIT_FAILURE 1
 139 #  endif
 140 # endif
 141 #endif /* !WINDOWSNT */
 142
 143 #ifdef HAVE_UNISTD_H
 144 # include <unistd.h>
 145 #else
 146 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
 147     extern char *getcwd (char *buf, size_t size);
 148 # endif
 149 #endif /* HAVE_UNISTD_H */
 150
 151 #include <stdio.h>
 152 #include <ctype.h>
 153 #include <errno.h>
 154 #ifndef errno
 155   extern int errno;
 156 #endif
 157 #include <sys/types.h>
 158 #include <sys/stat.h>
 159
 160 #include <assert.h>
 161 #ifdef NDEBUG
 162 # undef  assert                 /* some systems have a buggy assert.h */
 163 # define assert(x) ((void) 0)
 164 #endif
 165
 166 #if !defined (S_ISREG) && defined (S_IFREG)
 167 # define S_ISREG(m)     (((m) & S_IFMT) == S_IFREG)
 168 #endif
 169
 170 #if LONG_OPTIONS
 171 # include <getopt.h>
 172 #else
 173 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
 174   extern char *optarg;
 175   extern int optind, opterr;
 176 #endif /* LONG_OPTIONS */
 177
 178 #ifdef ETAGS_REGEXPS
 179 # ifndef HAVE_CONFIG_H          /* this is a standalone compilation */
 180 #   ifdef __CYGWIN__            /* compiling on Cygwin */
 181                              !!! NOTICE !!!
 182  the regex.h distributed with Cygwin is not compatible with etags, alas!
 183 If you want regular expression support, you should delete this notice and
 184               arrange to use the GNU regex.h and regex.c.
 185 #   endif
 186 # endif
 187 # include <regex.h>
 188 #endif /* ETAGS_REGEXPS */
 189
 190 /* Define CTAGS to make the program "ctags" compatible with the usual one.
 191  Leave it undefined to make the program "etags", which makes emacs-style
 192  tag tables and tags typedefs, #defines and struct/union/enum by default. */
 193 #ifdef CTAGS
 194 # undef  CTAGS
 195 # define CTAGS TRUE
 196 #else
 197 # define CTAGS FALSE
 198 #endif
 199
 200 #define streq(s,t)      (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
 201 #define strcaseeq(s,t)  (assert((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
 202 #define strneq(s,t,n)   (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
 203 #define strncaseeq(s,t,n) (assert((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
 204
 205 #define CHARS 256               /* 2^sizeof(char) */
 206 #define CHAR(x)         ((unsigned int)(x) & (CHARS - 1))
 207 #define iswhite(c)      (_wht[CHAR(c)]) /* c is white (see white) */
 208 #define notinname(c)    (_nin[CHAR(c)]) /* c is not in a name (see nonam) */
 209 #define begtoken(c)     (_btk[CHAR(c)]) /* c can start token (see begtk) */
 210 #define intoken(c)      (_itk[CHAR(c)]) /* c can be in token (see midtk) */
 211 #define endtoken(c)     (_etk[CHAR(c)]) /* c ends tokens (see endtk) */
 212
 213 #define ISALNUM(c)      isalnum (CHAR(c))
 214 #define ISALPHA(c)      isalpha (CHAR(c))
 215 #define ISDIGIT(c)      isdigit (CHAR(c))
 216 #define ISLOWER(c)      islower (CHAR(c))
 217
 218 #define lowcase(c)      tolower (CHAR(c))
 219 #define upcase(c)       toupper (CHAR(c))
 220
 221
 222 /*
 223  *      xnew, xrnew -- allocate, reallocate storage
 224  *
 225  * SYNOPSIS:    Type *xnew (int n, Type);
 226  *              void xrnew (OldPointer, int n, Type);
 227  */
 228 #if DEBUG
 229 # include "chkmalloc.h"
 230 # define xnew(n,Type)     ((Type *) trace_malloc (__FILE__, __LINE__, \
 231                                                   (n) * sizeof (Type)))
 232 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
 233                                         (char *) (op), (n) * sizeof (Type)))
 234 #else
 235 # define xnew(n,Type)     ((Type *) xmalloc ((n) * sizeof (Type)))
 236 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
 237                                         (char *) (op), (n) * sizeof (Type)))
 238 #endif
 239
 240 #define bool int
 241
 242 typedef void Lang_function __P((FILE *));
 243
 244 typedef struct
 245 {
 246   char *suffix;                 /* file name suffix for this compressor */
 247   char *command;                /* takes one arg and decompresses to stdout */
 248 } compressor;
 249
 250 typedef struct
 251 {
 252   char *name;                   /* language name */
 253   char *help;                   /* detailed help for the language */
 254   Lang_function *function;      /* parse function */
 255   char **suffixes;              /* name suffixes of this language's files */
 256   char **filenames;             /* names of this language's files */
 257   char **interpreters;          /* interpreters for this language */
 258   bool metasource;              /* source used to generate other sources */
 259 } language;
 260
 261 typedef struct fdesc
 262 {
 263   struct fdesc *next;           /* for the linked list */
 264   char *infname;                /* uncompressed input file name */
 265   char *infabsname;             /* absolute uncompressed input file name */
 266   char *infabsdir;              /* absolute dir of input file */
 267   char *taggedfname;            /* file name to write in tagfile */
 268   language *lang;               /* language of file */
 269   char *prop;                   /* file properties to write in tagfile */
 270   bool usecharno;               /* etags tags shall contain char number */
 271   bool written;                 /* entry written in the tags file */
 272 } fdesc;
 273
 274 typedef struct node_st
 275 {                               /* sorting structure */
 276   struct node_st *left, *right; /* left and right sons */
 277   fdesc *fdp;                   /* description of file to whom tag belongs */
 278   char *name;                   /* tag name */
 279   char *regex;                  /* search regexp */
 280   bool valid;                   /* write this tag on the tag file */
 281   bool is_func;                 /* function tag: use regexp in CTAGS mode */
 282   bool been_warned;             /* warning already given for duplicated tag */
 283   int lno;                      /* line number tag is on */
 284   long cno;                     /* character number line starts on */
 285 } node;
 286
 287 /*
 288  * A `linebuffer' is a structure which holds a line of text.
 289  * `readline_internal' reads a line from a stream into a linebuffer
 290  * and works regardless of the length of the line.
 291  * SIZE is the size of BUFFER, LEN is the length of the string in
 292  * BUFFER after readline reads it.
 293  */
 294 typedef struct
 295 {
 296   long size;
 297   int len;
 298   char *buffer;
 299 } linebuffer;
 300
 301 /* Used to support mixing of --lang and file names. */
 302 typedef struct
 303 {
 304   enum {
 305     at_language,                /* a language specification */
 306     at_regexp,                  /* a regular expression */
 307     at_filename,                /* a file name */
 308     at_stdin,                   /* read from stdin here */
 309     at_end                      /* stop parsing the list */
 310   } arg_type;                   /* argument type */
 311   language *lang;               /* language associated with the argument */
 312   char *what;                   /* the argument itself */
 313 } argument;
 314
 315 #ifdef ETAGS_REGEXPS
 316 /* Structure defining a regular expression. */
 317 typedef struct regexp
 318 {
 319   struct regexp *p_next;        /* pointer to next in list */
 320   language *lang;               /* if set, use only for this language */
 321   char *pattern;                /* the regexp pattern */
 322   char *name;                   /* tag name */
 323   struct re_pattern_buffer *pat; /* the compiled pattern */
 324   struct re_registers regs;     /* re registers */
 325   bool error_signaled;          /* already signaled for this regexp */
 326   bool force_explicit_name;     /* do not allow implict tag name */
 327   bool ignore_case;             /* ignore case when matching */
 328   bool multi_line;              /* do a multi-line match on the whole file */
 329 } regexp;
 330 #endif /* ETAGS_REGEXPS */
 331
 332
 333 /* Many compilers barf on this:
 334         Lang_function Ada_funcs;
 335    so let's write it this way */
 336 static void Ada_funcs __P((FILE *));
 337 static void Asm_labels __P((FILE *));
 338 static void C_entries __P((int c_ext, FILE *));
 339 static void default_C_entries __P((FILE *));
 340 static void plain_C_entries __P((FILE *));
 341 static void Cjava_entries __P((FILE *));
 342 static void Cobol_paragraphs __P((FILE *));
 343 static void Cplusplus_entries __P((FILE *));
 344 static void Cstar_entries __P((FILE *));
 345 static void Erlang_functions __P((FILE *));
 346 static void Forth_words __P((FILE *));
 347 static void Fortran_functions __P((FILE *));
 348 static void HTML_labels __P((FILE *));
 349 static void Lisp_functions __P((FILE *));
 350 static void Lua_functions __P((FILE *));
 351 static void Makefile_targets __P((FILE *));
 352 static void Pascal_functions __P((FILE *));
 353 static void Perl_functions __P((FILE *));
 354 static void PHP_functions __P((FILE *));
 355 static void PS_functions __P((FILE *));
 356 static void Prolog_functions __P((FILE *));
 357 static void Python_functions __P((FILE *));
 358 static void Scheme_functions __P((FILE *));
 359 static void TeX_commands __P((FILE *));
 360 static void Texinfo_nodes __P((FILE *));
 361 static void Yacc_entries __P((FILE *));
 362 static void just_read_file __P((FILE *));
 363
 364 static void print_language_names __P((void));
 365 static void print_version __P((void));
 366 static void print_help __P((argument *));
 367 int main __P((int, char **));
 368
 369 static compressor *get_compressor_from_suffix __P((char *, char **));
 370 static language *get_language_from_langname __P((const char *));
 371 static language *get_language_from_interpreter __P((char *));
 372 static language *get_language_from_filename __P((char *, bool));
 373 static void readline __P((linebuffer *, FILE *));
 374 static long readline_internal __P((linebuffer *, FILE *));
 375 static bool nocase_tail __P((char *));
 376 static void get_tag __P((char *, char **));
 377
 378 #ifdef ETAGS_REGEXPS
 379 static void analyse_regex __P((char *));
 380 static void free_regexps __P((void));
 381 static void regex_tag_multiline __P((void));
 382 #endif /* ETAGS_REGEXPS */
 383 static void error __P((const char *, const char *));
 384 static void suggest_asking_for_help __P((void));
 385 void fatal __P((char *, char *));
 386 static void pfatal __P((char *));
 387 static void add_node __P((node *, node **));
 388
 389 static void init __P((void));
 390 static void process_file_name __P((char *, language *));
 391 static void process_file __P((FILE *, char *, language *));
 392 static void find_entries __P((FILE *));
 393 static void free_tree __P((node *));
 394 static void free_fdesc __P((fdesc *));
 395 static void pfnote __P((char *, bool, char *, int, int, long));
 396 static void make_tag __P((char *, int, bool, char *, int, int, long));
 397 static void invalidate_nodes __P((fdesc *, node **));
 398 static void put_entries __P((node *));
 399
 400 static char *concat __P((char *, char *, char *));
 401 static char *skip_spaces __P((char *));
 402 static char *skip_non_spaces __P((char *));
 403 static char *savenstr __P((char *, int));
 404 static char *savestr __P((char *));
 405 static char *etags_strchr __P((const char *, int));
 406 static char *etags_strrchr __P((const char *, int));
 407 static int etags_strcasecmp __P((const char *, const char *));
 408 static int etags_strncasecmp __P((const char *, const char *, int));
 409 static char *etags_getcwd __P((void));
 410 static char *relative_filename __P((char *, char *));
 411 static char *absolute_filename __P((char *, char *));
 412 static char *absolute_dirname __P((char *, char *));
 413 static bool filename_is_absolute __P((char *f));
 414 static void canonicalize_filename __P((char *));
 415 static void linebuffer_init __P((linebuffer *));
 416 static void linebuffer_setlen __P((linebuffer *, int));
 417 static PTR xmalloc __P((unsigned int));
 418 static PTR xrealloc __P((char *, unsigned int));
 419
 420 \f
 421 static char searchar = '/';     /* use /.../ searches */
 422
 423 static char *tagfile;           /* output file */
 424 static char *progname;          /* name this program was invoked with */
 425 static char *cwd;               /* current working directory */
 426 static char *tagfiledir;        /* directory of tagfile */
 427 static FILE *tagf;              /* ioptr for tags file */
 428
 429 static fdesc *fdhead;           /* head of file description list */
 430 static fdesc *curfdp;           /* current file description */
 431 static int lineno;              /* line number of current line */
 432 static long charno;             /* current character number */
 433 static long linecharno;         /* charno of start of current line */
 434 static char *dbp;               /* pointer to start of current tag */
 435
 436 static const int invalidcharno = -1;
 437
 438 static node *nodehead;          /* the head of the binary tree of tags */
 439 static node *last_node;         /* the last node created */
 440
 441 static linebuffer lb;           /* the current line */
 442 static linebuffer filebuf;      /* a buffer containing the whole file */
 443 static linebuffer token_name;   /* a buffer containing a tag name */
 444
 445 /* boolean "functions" (see init)       */
 446 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
 447 static char
 448   /* white chars */
 449   *white = " \f\t\n\r\v",
 450   /* not in a name */
 451   *nonam = " \f\t\n\r()=,;",    /* look at make_tag before modifying! */
 452   /* token ending chars */
 453   *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
 454   /* token starting chars */
 455   *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
 456   /* valid in-token chars */
 457   *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
 458
 459 static bool append_to_tagfile;  /* -a: append to tags */
 460 /* The next four default to TRUE for etags, but to FALSE for ctags.  */
 461 static bool typedefs;           /* -t: create tags for C and Ada typedefs */
 462 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
 463                                 /* 0 struct/enum/union decls, and C++ */
 464                                 /* member functions. */
 465 static bool constantypedefs;    /* -d: create tags for C #define, enum */
 466                                 /* constants and variables. */
 467                                 /* -D: opposite of -d.  Default under ctags. */
 468 static bool globals;            /* create tags for global variables */
 469 static bool declarations;       /* --declarations: tag them and extern in C&Co*/
 470 static bool members;            /* create tags for C member variables */
 471 static bool no_line_directive;  /* ignore #line directives (undocumented) */
 472 static bool update;             /* -u: update tags */
 473 static bool vgrind_style;       /* -v: create vgrind style index output */
 474 static bool no_warnings;        /* -w: suppress warnings */
 475 static bool cxref_style;        /* -x: create cxref style output */
 476 static bool cplusplus;          /* .[hc] means C++, not C */
 477 static bool ignoreindent;       /* -I: ignore indentation in C */
 478 static bool packages_only;      /* --packages-only: in Ada, only tag packages*/
 479
 480 /* STDIN is defined in LynxOS system headers */
 481 #ifdef STDIN
 482 # undef STDIN
 483 #endif
 484
 485 #define STDIN 0x1001            /* returned by getopt_long on --parse-stdin */
 486 static bool parsing_stdin;      /* --parse-stdin used */
 487
 488 #ifdef ETAGS_REGEXPS
 489 static regexp *p_head;          /* list of all regexps */
 490 static bool need_filebuf;       /* some regexes are multi-line */
 491 #else
 492 # define need_filebuf FALSE
 493 #endif /* ETAGS_REGEXPS */
 494
 495 #if LONG_OPTIONS
 496 static struct option longopts[] =
 497 {
 498   { "append",             no_argument,       NULL,               'a'   },
 499   { "packages-only",      no_argument,       &packages_only,     TRUE  },
 500   { "c++",                no_argument,       NULL,               'C'   },
 501   { "declarations",       no_argument,       &declarations,      TRUE  },
 502   { "no-line-directive",  no_argument,       &no_line_directive, TRUE  },
 503   { "help",               no_argument,       NULL,               'h'   },
 504   { "help",               no_argument,       NULL,               'H'   },
 505   { "ignore-indentation", no_argument,       NULL,               'I'   },
 506   { "language",           required_argument, NULL,               'l'   },
 507   { "members",            no_argument,       &members,           TRUE  },
 508   { "no-members",         no_argument,       &members,           FALSE },
 509   { "output",             required_argument, NULL,               'o'   },
 510 #ifdef ETAGS_REGEXPS
 511   { "regex",              required_argument, NULL,               'r'   },
 512   { "no-regex",           no_argument,       NULL,               'R'   },
 513   { "ignore-case-regex",  required_argument, NULL,               'c'   },
 514 #endif /* ETAGS_REGEXPS */
 515   { "parse-stdin",        required_argument, NULL,               STDIN },
 516   { "version",            no_argument,       NULL,               'V'   },
 517
 518 #if CTAGS /* Ctags options */
 519   { "backward-search",    no_argument,       NULL,               'B'   },
 520   { "cxref",              no_argument,       NULL,               'x'   },
 521   { "defines",            no_argument,       NULL,               'd'   },
 522   { "globals",            no_argument,       &globals,           TRUE  },
 523   { "typedefs",           no_argument,       NULL,               't'   },
 524   { "typedefs-and-c++",   no_argument,       NULL,               'T'   },
 525   { "update",             no_argument,       NULL,               'u'   },
 526   { "vgrind",             no_argument,       NULL,               'v'   },
 527   { "no-warn",            no_argument,       NULL,               'w'   },
 528
 529 #else /* Etags options */
 530   { "no-defines",         no_argument,       NULL,               'D'   },
 531   { "no-globals",         no_argument,       &globals,           FALSE },
 532   { "include",            required_argument, NULL,               'i'   },
 533 #endif
 534   { NULL }
 535 };
 536 #endif /* LONG_OPTIONS */
 537
 538 static compressor compressors[] =
 539 {
 540   { "z", "gzip -d -c"},
 541   { "Z", "gzip -d -c"},
 542   { "gz", "gzip -d -c"},
 543   { "GZ", "gzip -d -c"},
 544   { "bz2", "bzip2 -d -c" },
 545   { NULL }
 546 };
 547
 548 /*
 549  * Language stuff.
 550  */
 551
 552 /* Ada code */
 553 static char *Ada_suffixes [] =
 554   { "ads", "adb", "ada", NULL };
 555 static char Ada_help [] =
 556 "In Ada code, functions, procedures, packages, tasks and types are\n\
 557 tags.  Use the `--packages-only' option to create tags for\n\
 558 packages only.\n\
 559 Ada tag names have suffixes indicating the type of entity:\n\
 560         Entity type:    Qualifier:\n\
 561         ------------    ----------\n\
 562         function        /f\n\
 563         procedure       /p\n\
 564         package spec    /s\n\
 565         package body    /b\n\
 566         type            /t\n\
 567         task            /k\n\
 568 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
 569 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
 570 will just search for any tag `bidule'.";
 571
 572 /* Assembly code */
 573 static char *Asm_suffixes [] =
 574   { "a",        /* Unix assembler */
 575     "asm", /* Microcontroller assembly */
 576     "def", /* BSO/Tasking definition includes  */
 577     "inc", /* Microcontroller include files */
 578     "ins", /* Microcontroller include files */
 579     "s", "sa", /* Unix assembler */
 580     "S",   /* cpp-processed Unix assembler */
 581     "src", /* BSO/Tasking C compiler output */
 582     NULL
 583   };
 584 static char Asm_help [] =
 585 "In assembler code, labels appearing at the beginning of a line,\n\
 586 followed by a colon, are tags.";
 587
 588
 589 /* Note that .c and .h can be considered C++, if the --c++ flag was
 590    given, or if the `class' or `template' keyowrds are met inside the file.
 591    That is why default_C_entries is called for these. */
 592 static char *default_C_suffixes [] =
 593   { "c", "h", NULL };
 594 static char default_C_help [] =
 595 "In C code, any C function or typedef is a tag, and so are\n\
 596 definitions of `struct', `union' and `enum'.  `#define' macro\n\
 597 definitions and `enum' constants are tags unless you specify\n\
 598 `--no-defines'.  Global variables are tags unless you specify\n\
 599 `--no-globals'.  Use of `--no-globals' and `--no-defines'\n\
 600 can make the tags table file much smaller.\n\
 601 You can tag function declarations and external variables by\n\
 602 using `--declarations', and struct members by using `--members'.";
 603
 604 static char *Cplusplus_suffixes [] =
 605   { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
 606     "M",                        /* Objective C++ */
 607     "pdb",                      /* Postscript with C syntax */
 608     NULL };
 609 static char Cplusplus_help [] =
 610 "In C++ code, all the tag constructs of C code are tagged.  (Use\n\
 611 --help --lang=c --lang=c++ for full help.)\n\
 612 In addition to C tags, member functions are also recognized, and\n\
 613 optionally member variables if you use the `--members' option.\n\
 614 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
 615 and `CLASS::FUNCTION'.  `operator' definitions have tag names like\n\
 616 `operator+'.";
 617
 618 static char *Cjava_suffixes [] =
 619   { "java", NULL };
 620 static char Cjava_help [] =
 621 "In Java code, all the tags constructs of C and C++ code are\n\
 622 tagged.  (Use --help --lang=c --lang=c++ --lang=java for full help.)";
 623
 624
 625 static char *Cobol_suffixes [] =
 626   { "COB", "cob", NULL };
 627 static char Cobol_help [] =
 628 "In Cobol code, tags are paragraph names; that is, any word\n\
 629 starting in column 8 and followed by a period.";
 630
 631 static char *Cstar_suffixes [] =
 632   { "cs", "hs", NULL };
 633
 634 static char *Erlang_suffixes [] =
 635   { "erl", "hrl", NULL };
 636 static char Erlang_help [] =
 637 "In Erlang code, the tags are the functions, records and macros\n\
 638 defined in the file.";
 639
 640 char *Forth_suffixes [] =
 641   { "fth", "tok", NULL };
 642 static char Forth_help [] =
 643 "In Forth code, tags are words defined by `:',\n\
 644 constant, code, create, defer, value, variable, buffer:, field.";
 645
 646 static char *Fortran_suffixes [] =
 647   { "F", "f", "f90", "for", NULL };
 648 static char Fortran_help [] =
 649 "In Fortran code, functions, subroutines and block data are tags.";
 650
 651 static char *HTML_suffixes [] =
 652   { "htm", "html", "shtml", NULL };
 653 static char HTML_help [] =
 654 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
 655 `h3' headers.  Also, tags are `name=' in anchors and all\n\
 656 occurrences of `id='.";
 657
 658 static char *Lisp_suffixes [] =
 659   { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
 660 static char Lisp_help [] =
 661 "In Lisp code, any function defined with `defun', any variable\n\
 662 defined with `defvar' or `defconst', and in general the first\n\
 663 argument of any expression that starts with `(def' in column zero\n\
 664 is a tag.";
 665
 666 static char *Lua_suffixes [] =
 667   { "lua", "LUA", NULL };
 668 static char Lua_help [] =
 669 "In Lua scripts, all functions are tags.";
 670
 671 static char *Makefile_filenames [] =
 672   { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
 673 static char Makefile_help [] =
 674 "In makefiles, targets are tags; additionally, variables are tags\n\
 675 unless you specify `--no-globals'.";
 676
 677 static char *Objc_suffixes [] =
 678   { "lm",                       /* Objective lex file */
 679     "m",                        /* Objective C file */
 680      NULL };
 681 static char Objc_help [] =
 682 "In Objective C code, tags include Objective C definitions for classes,\n\
 683 class categories, methods and protocols.  Tags for variables and\n\
 684 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.";
 685
 686 static char *Pascal_suffixes [] =
 687   { "p", "pas", NULL };
 688 static char Pascal_help [] =
 689 "In Pascal code, the tags are the functions and procedures defined\n\
 690 in the file.";
 691
 692 static char *Perl_suffixes [] =
 693   { "pl", "pm", NULL };
 694 static char *Perl_interpreters [] =
 695   { "perl", "@PERL@", NULL };
 696 static char Perl_help [] =
 697 "In Perl code, the tags are the packages, subroutines and variables\n\
 698 defined by the `package', `sub', `my' and `local' keywords.  Use\n\
 699 `--globals' if you want to tag global variables.  Tags for\n\
 700 subroutines are named `PACKAGE::SUB'.  The name for subroutines\n\
 701 defined in the default package is `main::SUB'.";
 702
 703 static char *PHP_suffixes [] =
 704   { "php", "php3", "php4", NULL };
 705 static char PHP_help [] =
 706 "In PHP code, tags are functions, classes and defines.  When using\n\
 707 the `--members' option, vars are tags too.";
 708
 709 static char *plain_C_suffixes [] =
 710   { "pc",                       /* Pro*C file */
 711      NULL };
 712
 713 static char *PS_suffixes [] =
 714   { "ps", "psw", NULL };        /* .psw is for PSWrap */
 715 static char PS_help [] =
 716 "In PostScript code, the tags are the functions.";
 717
 718 static char *Prolog_suffixes [] =
 719   { "prolog", NULL };
 720 static char Prolog_help [] =
 721 "In Prolog code, tags are predicates and rules at the beginning of\n\
 722 line.";
 723
 724 static char *Python_suffixes [] =
 725   { "py", NULL };
 726 static char Python_help [] =
 727 "In Python code, `def' or `class' at the beginning of a line\n\
 728 generate a tag.";
 729
 730 /* Can't do the `SCM' or `scm' prefix with a version number. */
 731 static char *Scheme_suffixes [] =
 732   { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
 733 static char Scheme_help [] =
 734 "In Scheme code, tags include anything defined with `def' or with a\n\
 735 construct whose name starts with `def'.  They also include\n\
 736 variables set with `set!' at top level in the file.";
 737
 738 static char *TeX_suffixes [] =
 739   { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
 740 static char TeX_help [] =
 741 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
 742 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
 743 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
 744 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
 745 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
 746 \n\
 747 Other commands can be specified by setting the environment variable\n\
 748 `TEXTAGS' to a colon-separated list like, for example,\n\
 749      TEXTAGS=\"mycommand:myothercommand\".";
 750
 751
 752 static char *Texinfo_suffixes [] =
 753   { "texi", "texinfo", "txi", NULL };
 754 static char Texinfo_help [] =
 755 "for texinfo files, lines starting with @node are tagged.";
 756
 757 static char *Yacc_suffixes [] =
 758   { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
 759 static char Yacc_help [] =
 760 "In Bison or Yacc input files, each rule defines as a tag the\n\
 761 nonterminal it constructs.  The portions of the file that contain\n\
 762 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
 763 for full help).";
 764
 765 static char auto_help [] =
 766 "`auto' is not a real language, it indicates to use\n\
 767 a default language for files base on file name suffix and file contents.";
 768
 769 static char none_help [] =
 770 "`none' is not a real language, it indicates to only do\n\
 771 regexp processing on files.";
 772
 773 static char no_lang_help [] =
 774 "No detailed help available for this language.";
 775
 776
 777 /*
 778  * Table of languages.
 779  *
 780  * It is ok for a given function to be listed under more than one
 781  * name.  I just didn't.
 782  */
 783
 784 static language lang_names [] =
 785 {
 786   { "ada",       Ada_help,       Ada_funcs,         Ada_suffixes       },
 787   { "asm",       Asm_help,       Asm_labels,        Asm_suffixes       },
 788   { "c",         default_C_help, default_C_entries, default_C_suffixes },
 789   { "c++",       Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
 790   { "c*",        no_lang_help,   Cstar_entries,     Cstar_suffixes     },
 791   { "cobol",     Cobol_help,     Cobol_paragraphs,  Cobol_suffixes     },
 792   { "erlang",    Erlang_help,    Erlang_functions,  Erlang_suffixes    },
 793   { "forth",     Forth_help,     Forth_words,       Forth_suffixes     },
 794   { "fortran",   Fortran_help,   Fortran_functions, Fortran_suffixes   },
 795   { "html",      HTML_help,      HTML_labels,       HTML_suffixes      },
 796   { "java",      Cjava_help,     Cjava_entries,     Cjava_suffixes     },
 797   { "lisp",      Lisp_help,      Lisp_functions,    Lisp_suffixes      },
 798   { "lua",       Lua_help,       Lua_functions,     Lua_suffixes       },
 799   { "makefile",  Makefile_help,Makefile_targets,NULL,Makefile_filenames},
 800   { "objc",      Objc_help,      plain_C_entries,   Objc_suffixes      },
 801   { "pascal",    Pascal_help,    Pascal_functions,  Pascal_suffixes    },
 802   { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
 803   { "php",       PHP_help,       PHP_functions,     PHP_suffixes       },
 804   { "postscript",PS_help,        PS_functions,      PS_suffixes        },
 805   { "proc",      no_lang_help,   plain_C_entries,   plain_C_suffixes   },
 806   { "prolog",    Prolog_help,    Prolog_functions,  Prolog_suffixes    },
 807   { "python",    Python_help,    Python_functions,  Python_suffixes    },
 808   { "scheme",    Scheme_help,    Scheme_functions,  Scheme_suffixes    },
 809   { "tex",       TeX_help,       TeX_commands,      TeX_suffixes       },
 810   { "texinfo",   Texinfo_help,   Texinfo_nodes,     Texinfo_suffixes   },
 811   { "yacc",      Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
 812   { "auto",      auto_help },                      /* default guessing scheme */
 813   { "none",      none_help,      just_read_file }, /* regexp matching only */
 814   { NULL }                /* end of list */
 815 };
 816
 817 \f
 818 static void
 819 print_language_names ()
 820 {
 821   language *lang;
 822   char **name, **ext;
 823
 824   puts ("\nThese are the currently supported languages, along with the\n\
 825 default file names and dot suffixes:");
 826   for (lang = lang_names; lang->name != NULL; lang++)
 827     {
 828       printf ("  %-*s", 10, lang->name);
 829       if (lang->filenames != NULL)
 830         for (name = lang->filenames; *name != NULL; name++)
 831           printf (" %s", *name);
 832       if (lang->suffixes != NULL)
 833         for (ext = lang->suffixes; *ext != NULL; ext++)
 834           printf (" .%s", *ext);
 835       puts ("");
 836     }
 837   puts ("where `auto' means use default language for files based on file\n\
 838 name suffix, and `none' means only do regexp processing on files.\n\
 839 If no language is specified and no matching suffix is found,\n\
 840 the first line of the file is read for a sharp-bang (#!) sequence\n\
 841 followed by the name of an interpreter.  If no such sequence is found,\n\
 842 Fortran is tried first; if no tags are found, C is tried next.\n\
 843 When parsing any C file, a \"class\" or \"template\" keyword\n\
 844 switches to C++.");
 845   puts ("Compressed files are supported using gzip and bzip2.\n\
 846 \n\
 847 For detailed help on a given language use, for example,\n\
 848 etags --help --lang=ada.");
 849 }
 850
 851 #ifndef EMACS_NAME
 852 # define EMACS_NAME "standalone"
 853 #endif
 854 #ifndef VERSION
 855 # define VERSION "version"
 856 #endif
 857 static void
 858 print_version ()
 859 {
 860   printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
 861   puts ("Copyright (C) 2006 Free Software Foundation, Inc. and Ken Arnold");
 862   puts ("This program is distributed under the same terms as Emacs");
 863
 864   exit (EXIT_SUCCESS);
 865 }
 866
 867 static void
 868 print_help (argbuffer)
 869      argument *argbuffer;
 870 {
 871   bool help_for_lang = FALSE;
 872
 873   for (; argbuffer->arg_type != at_end; argbuffer++)
 874     if (argbuffer->arg_type == at_language)
 875       {
 876         if (help_for_lang)
 877           puts ("");
 878         puts (argbuffer->lang->help);
 879         help_for_lang = TRUE;
 880       }
 881
 882   if (help_for_lang)
 883     exit (EXIT_SUCCESS);
 884
 885   printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
 886 \n\
 887 These are the options accepted by %s.\n", progname, progname);
 888   if (LONG_OPTIONS)
 889     puts ("You may use unambiguous abbreviations for the long option names.");
 890   else
 891     puts ("Long option names do not work with this executable, as it is not\n\
 892 linked with GNU getopt.");
 893   puts ("  A - as file name means read names from stdin (one per line).\n\
 894 Absolute names are stored in the output file as they are.\n\
 895 Relative ones are stored relative to the output file's directory.\n");
 896
 897   puts ("-a, --append\n\
 898         Append tag entries to existing tags file.");
 899
 900   puts ("--packages-only\n\
 901         For Ada files, only generate tags for packages.");
 902
 903   if (CTAGS)
 904     puts ("-B, --backward-search\n\
 905         Write the search commands for the tag entries using '?', the\n\
 906         backward-search command instead of '/', the forward-search command.");
 907
 908   /* This option is mostly obsolete, because etags can now automatically
 909      detect C++.  Retained for backward compatibility and for debugging and
 910      experimentation.  In principle, we could want to tag as C++ even
 911      before any "class" or "template" keyword.
 912   puts ("-C, --c++\n\
 913         Treat files whose name suffix defaults to C language as C++ files.");
 914   */
 915
 916   puts ("--declarations\n\
 917         In C and derived languages, create tags for function declarations,");
 918   if (CTAGS)
 919     puts ("\tand create tags for extern variables if --globals is used.");
 920   else
 921     puts
 922       ("\tand create tags for extern variables unless --no-globals is used.");
 923
 924   if (CTAGS)
 925     puts ("-d, --defines\n\
 926         Create tag entries for C #define constants and enum constants, too.");
 927   else
 928     puts ("-D, --no-defines\n\
 929         Don't create tag entries for C #define constants and enum constants.\n\
 930         This makes the tags file smaller.");
 931
 932   if (!CTAGS)
 933     puts ("-i FILE, --include=FILE\n\
 934         Include a note in tag file indicating that, when searching for\n\
 935         a tag, one should also consult the tags file FILE after\n\
 936         checking the current file.");
 937
 938   puts ("-l LANG, --language=LANG\n\
 939         Force the following files to be considered as written in the\n\
 940         named language up to the next --language=LANG option.");
 941
 942   if (CTAGS)
 943     puts ("--globals\n\
 944         Create tag entries for global variables in some languages.");
 945   else
 946     puts ("--no-globals\n\
 947         Do not create tag entries for global variables in some\n\
 948         languages.  This makes the tags file smaller.");
 949   puts ("--members\n\
 950         Create tag entries for members of structures in some languages.");
 951
 952 #ifdef ETAGS_REGEXPS
 953   puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
 954         Make a tag for each line matching a regular expression pattern\n\
 955         in the following files.  {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
 956         files only.  REGEXFILE is a file containing one REGEXP per line.\n\
 957         REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
 958         optional.  The TAGREGEXP pattern is anchored (as if preceded by ^).");
 959   puts ("       If TAGNAME/ is present, the tags created are named.\n\
 960         For example Tcl named tags can be created with:\n\
 961           --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
 962         MODS are optional one-letter modifiers: `i' means to ignore case,\n\
 963         `m' means to allow multi-line matches, `s' implies `m' and\n\
 964         causes dot to match any character, including newline.");
 965   puts ("-R, --no-regex\n\
 966         Don't create tags from regexps for the following files.");
 967 #endif /* ETAGS_REGEXPS */
 968   puts ("-I, --ignore-indentation\n\
 969         In C and C++ do not assume that a closing brace in the first\n\
 970         column is the final brace of a function or structure definition.");
 971   puts ("-o FILE, --output=FILE\n\
 972         Write the tags to FILE.");
 973   puts ("--parse-stdin=NAME\n\
 974         Read from standard input and record tags as belonging to file NAME.");
 975
 976   if (CTAGS)
 977     {
 978       puts ("-t, --typedefs\n\
 979         Generate tag entries for C and Ada typedefs.");
 980       puts ("-T, --typedefs-and-c++\n\
 981         Generate tag entries for C typedefs, C struct/enum/union tags,\n\
 982         and C++ member functions.");
 983     }
 984
 985   if (CTAGS)
 986     puts ("-u, --update\n\
 987         Update the tag entries for the given files, leaving tag\n\
 988         entries for other files in place.  Currently, this is\n\
 989         implemented by deleting the existing entries for the given\n\
 990         files and then rewriting the new entries at the end of the\n\
 991         tags file.  It is often faster to simply rebuild the entire\n\
 992         tag file than to use this.");
 993
 994   if (CTAGS)
 995     {
 996       puts ("-v, --vgrind\n\
 997         Print on the standard output an index of items intended for\n\
 998         human consumption, similar to the output of vgrind.  The index\n\
 999         is sorted, and gives the page number of each item.");
1000       puts ("-w, --no-warn\n\
1001         Suppress warning messages about entries defined in multiple\n\
1002         files.");
1003       puts ("-x, --cxref\n\
1004         Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1005         The output uses line numbers instead of page numbers, but\n\
1006         beyond that the differences are cosmetic; try both to see\n\
1007         which you like.");
1008     }
1009
1010   puts ("-V, --version\n\
1011         Print the version of the program.\n\
1012 -h, --help\n\
1013         Print this help message.\n\
1014         Followed by one or more `--language' options prints detailed\n\
1015         help about tag generation for the specified languages.");
1016
1017   print_language_names ();
1018
1019   puts ("");
1020   puts ("Report bugs to bug-gnu-emacs@gnu.org");
1021
1022   exit (EXIT_SUCCESS);
1023 }
1024
1025 \f
1026 #ifdef VMS                      /* VMS specific functions */
1027
1028 #define EOS     '\0'
1029
1030 /* This is a BUG!  ANY arbitrary limit is a BUG!
1031    Won't someone please fix this?  */
1032 #define MAX_FILE_SPEC_LEN       255
1033 typedef struct  {
1034   short   curlen;
1035   char    body[MAX_FILE_SPEC_LEN + 1];
1036 } vspec;
1037
1038 /*
1039  v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
1040  returning in each successive call the next file name matching the input
1041  spec. The function expects that each in_spec passed
1042  to it will be processed to completion; in particular, up to and
1043  including the call following that in which the last matching name
1044  is returned, the function ignores the value of in_spec, and will
1045  only start processing a new spec with the following call.
1046  If an error occurs, on return out_spec contains the value
1047  of in_spec when the error occurred.
1048
1049  With each successive file name returned in out_spec, the
1050  function's return value is one. When there are no more matching
1051  names the function returns zero. If on the first call no file
1052  matches in_spec, or there is any other error, -1 is returned.
1053 */
1054
1055 #include        <rmsdef.h>
1056 #include        <descrip.h>
1057 #define         OUTSIZE MAX_FILE_SPEC_LEN
1058 static short
1059 fn_exp (out, in)
1060      vspec *out;
1061      char *in;
1062 {
1063   static long context = 0;
1064   static struct dsc$descriptor_s o;
1065   static struct dsc$descriptor_s i;
1066   static bool pass1 = TRUE;
1067   long status;
1068   short retval;
1069
1070   if (pass1)
1071     {
1072       pass1 = FALSE;
1073       o.dsc$a_pointer = (char *) out;
1074       o.dsc$w_length = (short)OUTSIZE;
1075       i.dsc$a_pointer = in;
1076       i.dsc$w_length = (short)strlen(in);
1077       i.dsc$b_dtype = DSC$K_DTYPE_T;
1078       i.dsc$b_class = DSC$K_CLASS_S;
1079       o.dsc$b_dtype = DSC$K_DTYPE_VT;
1080       o.dsc$b_class = DSC$K_CLASS_VS;
1081     }
1082   if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
1083     {
1084       out->body[out->curlen] = EOS;
1085       return 1;
1086     }
1087   else if (status == RMS$_NMF)
1088     retval = 0;
1089   else
1090     {
1091       strcpy(out->body, in);
1092       retval = -1;
1093     }
1094   lib$find_file_end(&context);
1095   pass1 = TRUE;
1096   return retval;
1097 }
1098
1099 /*
1100   v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
1101   name of each file specified by the provided arg expanding wildcards.
1102 */
1103 static char *
1104 gfnames (arg, p_error)
1105      char *arg;
1106      bool *p_error;
1107 {
1108   static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
1109
1110   switch (fn_exp (&filename, arg))
1111     {
1112     case 1:
1113       *p_error = FALSE;
1114       return filename.body;
1115     case 0:
1116       *p_error = FALSE;
1117       return NULL;
1118     default:
1119       *p_error = TRUE;
1120       return filename.body;
1121     }
1122 }
1123
1124 #ifndef OLD  /* Newer versions of VMS do provide `system'.  */
1125 system (cmd)
1126      char *cmd;
1127 {
1128   error ("%s", "system() function not implemented under VMS");
1129 }
1130 #endif
1131
1132 #define VERSION_DELIM   ';'
1133 char *massage_name (s)
1134      char *s;
1135 {
1136   char *start = s;
1137
1138   for ( ; *s; s++)
1139     if (*s == VERSION_DELIM)
1140       {
1141         *s = EOS;
1142         break;
1143       }
1144     else
1145       *s = lowcase (*s);
1146   return start;
1147 }
1148 #endif /* VMS */
1149
1150 \f
1151 int
1152 main (argc, argv)
1153      int argc;
1154      char *argv[];
1155 {
1156   int i;
1157   unsigned int nincluded_files;
1158   char **included_files;
1159   argument *argbuffer;
1160   int current_arg, file_count;
1161   linebuffer filename_lb;
1162   bool help_asked = FALSE;
1163 #ifdef VMS
1164   bool got_err;
1165 #endif
1166  char *optstring;
1167  int opt;
1168
1169
1170 #ifdef DOS_NT
1171   _fmode = O_BINARY;   /* all of files are treated as binary files */
1172 #endif /* DOS_NT */
1173
1174   progname = argv[0];
1175   nincluded_files = 0;
1176   included_files = xnew (argc, char *);
1177   current_arg = 0;
1178   file_count = 0;
1179
1180   /* Allocate enough no matter what happens.  Overkill, but each one
1181      is small. */
1182   argbuffer = xnew (argc, argument);
1183
1184   /*
1185    * If etags, always find typedefs and structure tags.  Why not?
1186    * Also default to find macro constants, enum constants and
1187    * global variables.
1188    */
1189   if (!CTAGS)
1190     {
1191       typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1192       globals = TRUE;
1193     }
1194
1195   /* When the optstring begins with a '-' getopt_long does not rearrange the
1196      non-options arguments to be at the end, but leaves them alone. */
1197   optstring = "-";
1198 #ifdef ETAGS_REGEXPS
1199   optstring = "-r:Rc:";
1200 #endif /* ETAGS_REGEXPS */
1201   if (!LONG_OPTIONS)
1202     optstring += 1;             /* remove the initial '-' */
1203   optstring = concat (optstring,
1204                       "aCf:Il:o:SVhH",
1205                       (CTAGS) ? "BxdtTuvw" : "Di:");
1206
1207   while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1208     switch (opt)
1209       {
1210       case 0:
1211         /* If getopt returns 0, then it has already processed a
1212            long-named option.  We should do nothing.  */
1213         break;
1214
1215       case 1:
1216         /* This means that a file name has been seen.  Record it. */
1217         argbuffer[current_arg].arg_type = at_filename;
1218         argbuffer[current_arg].what     = optarg;
1219         ++current_arg;
1220         ++file_count;
1221         break;
1222
1223       case STDIN:
1224         /* Parse standard input.  Idea by Vivek <vivek@etla.org>. */
1225         argbuffer[current_arg].arg_type = at_stdin;
1226         argbuffer[current_arg].what     = optarg;
1227         ++current_arg;
1228         ++file_count;
1229         if (parsing_stdin)
1230           fatal ("cannot parse standard input more than once", (char *)NULL);
1231         parsing_stdin = TRUE;
1232         break;
1233
1234         /* Common options. */
1235       case 'a': append_to_tagfile = TRUE;       break;
1236       case 'C': cplusplus = TRUE;               break;
1237       case 'f':         /* for compatibility with old makefiles */
1238       case 'o':
1239         if (tagfile)
1240           {
1241             error ("-o option may only be given once.", (char *)NULL);
1242             suggest_asking_for_help ();
1243             /* NOTREACHED */
1244           }
1245         tagfile = optarg;
1246         break;
1247       case 'I':
1248       case 'S':         /* for backward compatibility */
1249         ignoreindent = TRUE;
1250         break;
1251       case 'l':
1252         {
1253           language *lang = get_language_from_langname (optarg);
1254           if (lang != NULL)
1255             {
1256               argbuffer[current_arg].lang = lang;
1257               argbuffer[current_arg].arg_type = at_language;
1258               ++current_arg;
1259             }
1260         }
1261         break;
1262       case 'c':
1263         /* Backward compatibility: support obsolete --ignore-case-regexp. */
1264         optarg = concat (optarg, "i", ""); /* memory leak here */
1265         /* FALLTHRU */
1266       case 'r':
1267         argbuffer[current_arg].arg_type = at_regexp;
1268         argbuffer[current_arg].what = optarg;
1269         ++current_arg;
1270         break;
1271       case 'R':
1272         argbuffer[current_arg].arg_type = at_regexp;
1273         argbuffer[current_arg].what = NULL;
1274         ++current_arg;
1275         break;
1276       case 'V':
1277         print_version ();
1278         break;
1279       case 'h':
1280       case 'H':
1281         help_asked = TRUE;
1282         break;
1283
1284         /* Etags options */
1285       case 'D': constantypedefs = FALSE;                        break;
1286       case 'i': included_files[nincluded_files++] = optarg;     break;
1287
1288         /* Ctags options. */
1289       case 'B': searchar = '?';                                 break;
1290       case 'd': constantypedefs = TRUE;                         break;
1291       case 't': typedefs = TRUE;                                break;
1292       case 'T': typedefs = typedefs_or_cplusplus = TRUE;        break;
1293       case 'u': update = TRUE;                                  break;
1294       case 'v': vgrind_style = TRUE;                      /*FALLTHRU*/
1295       case 'x': cxref_style = TRUE;                             break;
1296       case 'w': no_warnings = TRUE;                             break;
1297       default:
1298         suggest_asking_for_help ();
1299         /* NOTREACHED */
1300       }
1301
1302   /* No more options.  Store the rest of arguments. */
1303   for (; optind < argc; optind++)
1304     {
1305       argbuffer[current_arg].arg_type = at_filename;
1306       argbuffer[current_arg].what = argv[optind];
1307       ++current_arg;
1308       ++file_count;
1309     }
1310
1311   argbuffer[current_arg].arg_type = at_end;
1312
1313   if (help_asked)
1314     print_help (argbuffer);
1315     /* NOTREACHED */
1316
1317   if (nincluded_files == 0 && file_count == 0)
1318     {
1319       error ("no input files specified.", (char *)NULL);
1320       suggest_asking_for_help ();
1321       /* NOTREACHED */
1322     }
1323
1324   if (tagfile == NULL)
1325     tagfile = CTAGS ? "tags" : "TAGS";
1326   cwd = etags_getcwd ();        /* the current working directory */
1327   if (cwd[strlen (cwd) - 1] != '/')
1328     {
1329       char *oldcwd = cwd;
1330       cwd = concat (oldcwd, "/", "");
1331       free (oldcwd);
1332     }
1333   /* Relative file names are made relative to the current directory. */
1334   if (streq (tagfile, "-")
1335       || strneq (tagfile, "/dev/", 5))
1336     tagfiledir = cwd;
1337   else
1338     tagfiledir = absolute_dirname (tagfile, cwd);
1339
1340   init ();                      /* set up boolean "functions" */
1341
1342   linebuffer_init (&lb);
1343   linebuffer_init (&filename_lb);
1344   linebuffer_init (&filebuf);
1345   linebuffer_init (&token_name);
1346
1347   if (!CTAGS)
1348     {
1349       if (streq (tagfile, "-"))
1350         {
1351           tagf = stdout;
1352 #ifdef DOS_NT
1353           /* Switch redirected `stdout' to binary mode (setting `_fmode'
1354              doesn't take effect until after `stdout' is already open). */
1355           if (!isatty (fileno (stdout)))
1356             setmode (fileno (stdout), O_BINARY);
1357 #endif /* DOS_NT */
1358         }
1359       else
1360         tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1361       if (tagf == NULL)
1362         pfatal (tagfile);
1363     }
1364
1365   /*
1366    * Loop through files finding functions.
1367    */
1368   for (i = 0; i < current_arg; i++)
1369     {
1370       static language *lang;    /* non-NULL if language is forced */
1371       char *this_file;
1372
1373       switch (argbuffer[i].arg_type)
1374         {
1375         case at_language:
1376           lang = argbuffer[i].lang;
1377           break;
1378 #ifdef ETAGS_REGEXPS
1379         case at_regexp:
1380           analyse_regex (argbuffer[i].what);
1381           break;
1382 #endif
1383         case at_filename:
1384 #ifdef VMS
1385           while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
1386             {
1387               if (got_err)
1388                 {
1389                   error ("can't find file %s\n", this_file);
1390                   argc--, argv++;
1391                 }
1392               else
1393                 {
1394                   this_file = massage_name (this_file);
1395                 }
1396 #else
1397               this_file = argbuffer[i].what;
1398 #endif
1399               /* Input file named "-" means read file names from stdin
1400                  (one per line) and use them. */
1401               if (streq (this_file, "-"))
1402                 {
1403                   if (parsing_stdin)
1404                     fatal ("cannot parse standard input AND read file names from it",
1405                            (char *)NULL);
1406                   while (readline_internal (&filename_lb, stdin) > 0)
1407                     process_file_name (filename_lb.buffer, lang);
1408                 }
1409               else
1410                 process_file_name (this_file, lang);
1411 #ifdef VMS
1412             }
1413 #endif
1414           break;
1415         case at_stdin:
1416           this_file = argbuffer[i].what;
1417           process_file (stdin, this_file, lang);
1418           break;
1419         }
1420     }
1421
1422 #ifdef ETAGS_REGEXPS
1423   free_regexps ();
1424 #endif /* ETAGS_REGEXPS */
1425   free (lb.buffer);
1426   free (filebuf.buffer);
1427   free (token_name.buffer);
1428
1429   if (!CTAGS || cxref_style)
1430     {
1431       /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1432       put_entries (nodehead);
1433       free_tree (nodehead);
1434       nodehead = NULL;
1435       if (!CTAGS)
1436         {
1437           fdesc *fdp;
1438
1439           /* Output file entries that have no tags. */
1440           for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1441             if (!fdp->written)
1442               fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1443
1444           while (nincluded_files-- > 0)
1445             fprintf (tagf, "\f\n%s,include\n", *included_files++);
1446
1447           if (fclose (tagf) == EOF)
1448             pfatal (tagfile);
1449         }
1450
1451       exit (EXIT_SUCCESS);
1452     }
1453
1454   if (update)
1455     {
1456       char cmd[BUFSIZ];
1457       for (i = 0; i < current_arg; ++i)
1458         {
1459           switch (argbuffer[i].arg_type)
1460             {
1461             case at_filename:
1462             case at_stdin:
1463               break;
1464             default:
1465               continue;         /* the for loop */
1466             }
1467           sprintf (cmd,
1468                    "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1469                    tagfile, argbuffer[i].what, tagfile);
1470           if (system (cmd) != EXIT_SUCCESS)
1471             fatal ("failed to execute shell command", (char *)NULL);
1472         }
1473       append_to_tagfile = TRUE;
1474     }
1475
1476   tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1477   if (tagf == NULL)
1478     pfatal (tagfile);
1479   put_entries (nodehead);       /* write all the tags (CTAGS) */
1480   free_tree (nodehead);
1481   nodehead = NULL;
1482   if (fclose (tagf) == EOF)
1483     pfatal (tagfile);
1484
1485   if (CTAGS)
1486     if (append_to_tagfile || update)
1487       {
1488         char cmd[2*BUFSIZ+10];
1489         sprintf (cmd, "sort -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile);
1490         exit (system (cmd));
1491       }
1492   return EXIT_SUCCESS;
1493 }
1494
1495
1496 /*
1497  * Return a compressor given the file name.  If EXTPTR is non-zero,
1498  * return a pointer into FILE where the compressor-specific
1499  * extension begins.  If no compressor is found, NULL is returned
1500  * and EXTPTR is not significant.
1501  * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1502  */
1503 static compressor *
1504 get_compressor_from_suffix (file, extptr)
1505      char *file;
1506      char **extptr;
1507 {
1508   compressor *compr;
1509   char *slash, *suffix;
1510
1511   /* This relies on FN to be after canonicalize_filename,
1512      so we don't need to consider backslashes on DOS_NT.  */
1513   slash = etags_strrchr (file, '/');
1514   suffix = etags_strrchr (file, '.');
1515   if (suffix == NULL || suffix < slash)
1516     return NULL;
1517   if (extptr != NULL)
1518     *extptr = suffix;
1519   suffix += 1;
1520   /* Let those poor souls who live with DOS 8+3 file name limits get
1521      some solace by treating foo.cgz as if it were foo.c.gz, etc.
1522      Only the first do loop is run if not MSDOS */
1523   do
1524     {
1525       for (compr = compressors; compr->suffix != NULL; compr++)
1526         if (streq (compr->suffix, suffix))
1527           return compr;
1528       if (!MSDOS)
1529         break;                  /* do it only once: not really a loop */
1530       if (extptr != NULL)
1531         *extptr = ++suffix;
1532     } while (*suffix != '\0');
1533   return NULL;
1534 }
1535
1536
1537
1538 /*
1539  * Return a language given the name.
1540  */
1541 static language *
1542 get_language_from_langname (name)
1543      const char *name;
1544 {
1545   language *lang;
1546
1547   if (name == NULL)
1548     error ("empty language name", (char *)NULL);
1549   else
1550     {
1551       for (lang = lang_names; lang->name != NULL; lang++)
1552         if (streq (name, lang->name))
1553           return lang;
1554       error ("unknown language \"%s\"", name);
1555     }
1556
1557   return NULL;
1558 }
1559
1560
1561 /*
1562  * Return a language given the interpreter name.
1563  */
1564 static language *
1565 get_language_from_interpreter (interpreter)
1566      char *interpreter;
1567 {
1568   language *lang;
1569   char **iname;
1570
1571   if (interpreter == NULL)
1572     return NULL;
1573   for (lang = lang_names; lang->name != NULL; lang++)
1574     if (lang->interpreters != NULL)
1575       for (iname = lang->interpreters; *iname != NULL; iname++)
1576         if (streq (*iname, interpreter))
1577             return lang;
1578
1579   return NULL;
1580 }
1581
1582
1583
1584 /*
1585  * Return a language given the file name.
1586  */
1587 static language *
1588 get_language_from_filename (file, case_sensitive)
1589      char *file;
1590      bool case_sensitive;
1591 {
1592   language *lang;
1593   char **name, **ext, *suffix;
1594
1595   /* Try whole file name first. */
1596   for (lang = lang_names; lang->name != NULL; lang++)
1597     if (lang->filenames != NULL)
1598       for (name = lang->filenames; *name != NULL; name++)
1599         if ((case_sensitive)
1600             ? streq (*name, file)
1601             : strcaseeq (*name, file))
1602           return lang;
1603
1604   /* If not found, try suffix after last dot. */
1605   suffix = etags_strrchr (file, '.');
1606   if (suffix == NULL)
1607     return NULL;
1608   suffix += 1;
1609   for (lang = lang_names; lang->name != NULL; lang++)
1610     if (lang->suffixes != NULL)
1611       for (ext = lang->suffixes; *ext != NULL; ext++)
1612         if ((case_sensitive)
1613             ? streq (*ext, suffix)
1614             : strcaseeq (*ext, suffix))
1615           return lang;
1616   return NULL;
1617 }
1618
1619 \f
1620 /*
1621  * This routine is called on each file argument.
1622  */
1623 static void
1624 process_file_name (file, lang)
1625      char *file;
1626      language *lang;
1627 {
1628   struct stat stat_buf;
1629   FILE *inf;
1630   fdesc *fdp;
1631   compressor *compr;
1632   char *compressed_name, *uncompressed_name;
1633   char *ext, *real_name;
1634   int retval;
1635
1636   canonicalize_filename (file);
1637   if (streq (file, tagfile) && !streq (tagfile, "-"))
1638     {
1639       error ("skipping inclusion of %s in self.", file);
1640       return;
1641     }
1642   if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1643     {
1644       compressed_name = NULL;
1645       real_name = uncompressed_name = savestr (file);
1646     }
1647   else
1648     {
1649       real_name = compressed_name = savestr (file);
1650       uncompressed_name = savenstr (file, ext - file);
1651     }
1652
1653   /* If the canonicalized uncompressed name
1654      has already been dealt with, skip it silently. */
1655   for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1656     {
1657       assert (fdp->infname != NULL);
1658       if (streq (uncompressed_name, fdp->infname))
1659         goto cleanup;
1660     }
1661
1662   if (stat (real_name, &stat_buf) != 0)
1663     {
1664       /* Reset real_name and try with a different name. */
1665       real_name = NULL;
1666       if (compressed_name != NULL) /* try with the given suffix */
1667         {
1668           if (stat (uncompressed_name, &stat_buf) == 0)
1669             real_name = uncompressed_name;
1670         }
1671       else                      /* try all possible suffixes */
1672         {
1673           for (compr = compressors; compr->suffix != NULL; compr++)
1674             {
1675               compressed_name = concat (file, ".", compr->suffix);
1676               if (stat (compressed_name, &stat_buf) != 0)
1677                 {
1678                   if (MSDOS)
1679                     {
1680                       char *suf = compressed_name + strlen (file);
1681                       size_t suflen = strlen (compr->suffix) + 1;
1682                       for ( ; suf[1]; suf++, suflen--)
1683                         {
1684                           memmove (suf, suf + 1, suflen);
1685                           if (stat (compressed_name, &stat_buf) == 0)
1686                             {
1687                               real_name = compressed_name;
1688                               break;
1689                             }
1690                         }
1691                       if (real_name != NULL)
1692                         break;
1693                     } /* MSDOS */
1694                   free (compressed_name);
1695                   compressed_name = NULL;
1696                 }
1697               else
1698                 {
1699                   real_name = compressed_name;
1700                   break;
1701                 }
1702             }
1703         }
1704       if (real_name == NULL)
1705         {
1706           perror (file);
1707           goto cleanup;
1708         }
1709     } /* try with a different name */
1710
1711   if (!S_ISREG (stat_buf.st_mode))
1712     {
1713       error ("skipping %s: it is not a regular file.", real_name);
1714       goto cleanup;
1715     }
1716   if (real_name == compressed_name)
1717     {
1718       char *cmd = concat (compr->command, " ", real_name);
1719       inf = (FILE *) popen (cmd, "r");
1720       free (cmd);
1721     }
1722   else
1723     inf = fopen (real_name, "r");
1724   if (inf == NULL)
1725     {
1726       perror (real_name);
1727       goto cleanup;
1728     }
1729
1730   process_file (inf, uncompressed_name, lang);
1731
1732   if (real_name == compressed_name)
1733     retval = pclose (inf);
1734   else
1735     retval = fclose (inf);
1736   if (retval < 0)
1737     pfatal (file);
1738
1739  cleanup:
1740   if (compressed_name) free (compressed_name);
1741   if (uncompressed_name) free (uncompressed_name);
1742   last_node = NULL;
1743   curfdp = NULL;
1744   return;
1745 }
1746
1747 static void
1748 process_file (fh, fn, lang)
1749      FILE *fh;
1750      char *fn;
1751      language *lang;
1752 {
1753   static const fdesc emptyfdesc;
1754   fdesc *fdp;
1755
1756   /* Create a new input file description entry. */
1757   fdp = xnew (1, fdesc);
1758   *fdp = emptyfdesc;
1759   fdp->next = fdhead;
1760   fdp->infname = savestr (fn);
1761   fdp->lang = lang;
1762   fdp->infabsname = absolute_filename (fn, cwd);
1763   fdp->infabsdir = absolute_dirname (fn, cwd);
1764   if (filename_is_absolute (fn))
1765     {
1766       /* An absolute file name.  Canonicalize it. */
1767       fdp->taggedfname = absolute_filename (fn, NULL);
1768     }
1769   else
1770     {
1771       /* A file name relative to cwd.  Make it relative
1772          to the directory of the tags file. */
1773       fdp->taggedfname = relative_filename (fn, tagfiledir);
1774     }
1775   fdp->usecharno = TRUE;        /* use char position when making tags */
1776   fdp->prop = NULL;
1777   fdp->written = FALSE;         /* not written on tags file yet */
1778
1779   fdhead = fdp;
1780   curfdp = fdhead;              /* the current file description */
1781
1782   find_entries (fh);
1783
1784   /* If not Ctags, and if this is not metasource and if it contained no #line
1785      directives, we can write the tags and free all nodes pointing to
1786      curfdp. */
1787   if (!CTAGS
1788       && curfdp->usecharno      /* no #line directives in this file */
1789       && !curfdp->lang->metasource)
1790     {
1791       node *np, *prev;
1792
1793       /* Look for the head of the sublist relative to this file.  See add_node
1794          for the structure of the node tree. */
1795       prev = NULL;
1796       for (np = nodehead; np != NULL; prev = np, np = np->left)
1797         if (np->fdp == curfdp)
1798           break;
1799
1800       /* If we generated tags for this file, write and delete them. */
1801       if (np != NULL)
1802         {
1803           /* This is the head of the last sublist, if any.  The following
1804              instructions depend on this being true. */
1805           assert (np->left == NULL);
1806
1807           assert (fdhead == curfdp);
1808           assert (last_node->fdp == curfdp);
1809           put_entries (np);     /* write tags for file curfdp->taggedfname */
1810           free_tree (np);       /* remove the written nodes */
1811           if (prev == NULL)
1812             nodehead = NULL;    /* no nodes left */
1813           else
1814             prev->left = NULL;  /* delete the pointer to the sublist */
1815         }
1816     }
1817 }
1818
1819 /*
1820  * This routine sets up the boolean pseudo-functions which work
1821  * by setting boolean flags dependent upon the corresponding character.
1822  * Every char which is NOT in that string is not a white char.  Therefore,
1823  * all of the array "_wht" is set to FALSE, and then the elements
1824  * subscripted by the chars in "white" are set to TRUE.  Thus "_wht"
1825  * of a char is TRUE if it is the string "white", else FALSE.
1826  */
1827 static void
1828 init ()
1829 {
1830   register char *sp;
1831   register int i;
1832
1833   for (i = 0; i < CHARS; i++)
1834     iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1835   for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1836   for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1837   notinname('\0') = notinname('\n');
1838   for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1839   begtoken('\0') = begtoken('\n');
1840   for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1841   intoken('\0') = intoken('\n');
1842   for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1843   endtoken('\0') = endtoken('\n');
1844 }
1845
1846 /*
1847  * This routine opens the specified file and calls the function
1848  * which finds the function and type definitions.
1849  */
1850 static void
1851 find_entries (inf)
1852      FILE *inf;
1853 {
1854   char *cp;
1855   language *lang = curfdp->lang;
1856   Lang_function *parser = NULL;
1857
1858   /* If user specified a language, use it. */
1859   if (lang != NULL && lang->function != NULL)
1860     {
1861       parser = lang->function;
1862     }
1863
1864   /* Else try to guess the language given the file name. */
1865   if (parser == NULL)
1866     {
1867       lang = get_language_from_filename (curfdp->infname, TRUE);
1868       if (lang != NULL && lang->function != NULL)
1869         {
1870           curfdp->lang = lang;
1871           parser = lang->function;
1872         }
1873     }
1874
1875   /* Else look for sharp-bang as the first two characters. */
1876   if (parser == NULL
1877       && readline_internal (&lb, inf) > 0
1878       && lb.len >= 2
1879       && lb.buffer[0] == '#'
1880       && lb.buffer[1] == '!')
1881     {
1882       char *lp;
1883
1884       /* Set lp to point at the first char after the last slash in the
1885          line or, if no slashes, at the first nonblank.  Then set cp to
1886          the first successive blank and terminate the string. */
1887       lp = etags_strrchr (lb.buffer+2, '/');
1888       if (lp != NULL)
1889         lp += 1;
1890       else
1891         lp = skip_spaces (lb.buffer + 2);
1892       cp = skip_non_spaces (lp);
1893       *cp = '\0';
1894
1895       if (strlen (lp) > 0)
1896         {
1897           lang = get_language_from_interpreter (lp);
1898           if (lang != NULL && lang->function != NULL)
1899             {
1900               curfdp->lang = lang;
1901               parser = lang->function;
1902             }
1903         }
1904     }
1905
1906   /* We rewind here, even if inf may be a pipe.  We fail if the
1907      length of the first line is longer than the pipe block size,
1908      which is unlikely. */
1909   rewind (inf);
1910
1911   /* Else try to guess the language given the case insensitive file name. */
1912   if (parser == NULL)
1913     {
1914       lang = get_language_from_filename (curfdp->infname, FALSE);
1915       if (lang != NULL && lang->function != NULL)
1916         {
1917           curfdp->lang = lang;
1918           parser = lang->function;
1919         }
1920     }
1921
1922   /* Else try Fortran or C. */
1923   if (parser == NULL)
1924     {
1925       node *old_last_node = last_node;
1926
1927       curfdp->lang = get_language_from_langname ("fortran");
1928       find_entries (inf);
1929
1930       if (old_last_node == last_node)
1931         /* No Fortran entries found.  Try C. */
1932         {
1933           /* We do not tag if rewind fails.
1934              Only the file name will be recorded in the tags file. */
1935           rewind (inf);
1936           curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1937           find_entries (inf);
1938         }
1939       return;
1940     }
1941
1942   if (!no_line_directive
1943       && curfdp->lang != NULL && curfdp->lang->metasource)
1944     /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1945        file, or anyway we parsed a file that is automatically generated from
1946        this one.  If this is the case, the bingo.c file contained #line
1947        directives that generated tags pointing to this file.  Let's delete
1948        them all before parsing this file, which is the real source. */
1949     {
1950       fdesc **fdpp = &fdhead;
1951       while (*fdpp != NULL)
1952         if (*fdpp != curfdp
1953             && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1954           /* We found one of those!  We must delete both the file description
1955              and all tags referring to it. */
1956           {
1957             fdesc *badfdp = *fdpp;
1958
1959             /* Delete the tags referring to badfdp->taggedfname
1960                that were obtained from badfdp->infname. */
1961             invalidate_nodes (badfdp, &nodehead);
1962
1963             *fdpp = badfdp->next; /* remove the bad description from the list */
1964             free_fdesc (badfdp);
1965           }
1966         else
1967           fdpp = &(*fdpp)->next; /* advance the list pointer */
1968     }
1969
1970   assert (parser != NULL);
1971
1972   /* Generic initialisations before reading from file. */
1973   linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1974
1975   /* Generic initialisations before parsing file with readline. */
1976   lineno = 0;                  /* reset global line number */
1977   charno = 0;                  /* reset global char number */
1978   linecharno = 0;              /* reset global char number of line start */
1979
1980   parser (inf);
1981
1982 #ifdef ETAGS_REGEXPS
1983   regex_tag_multiline ();
1984 #endif /* ETAGS_REGEXPS */
1985 }
1986
1987 \f
1988 /*
1989  * Check whether an implicitly named tag should be created,
1990  * then call `pfnote'.
1991  * NAME is a string that is internally copied by this function.
1992  *
1993  * TAGS format specification
1994  * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1995  * The following is explained in some more detail in etc/ETAGS.EBNF.
1996  *
1997  * make_tag creates tags with "implicit tag names" (unnamed tags)
1998  * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1999  *  1. NAME does not contain any of the characters in NONAM;
2000  *  2. LINESTART contains name as either a rightmost, or rightmost but
2001  *     one character, substring;
2002  *  3. the character, if any, immediately before NAME in LINESTART must
2003  *     be a character in NONAM;
2004  *  4. the character, if any, immediately after NAME in LINESTART must
2005  *     also be a character in NONAM.
2006  *
2007  * The implementation uses the notinname() macro, which recognises the
2008  * characters stored in the string `nonam'.
2009  * etags.el needs to use the same characters that are in NONAM.
2010  */
2011 static void
2012 make_tag (name, namelen, is_func, linestart, linelen, lno, cno)
2013      char *name;                /* tag name, or NULL if unnamed */
2014      int namelen;               /* tag length */
2015      bool is_func;              /* tag is a function */
2016      char *linestart;           /* start of the line where tag is */
2017      int linelen;               /* length of the line where tag is */
2018      int lno;                   /* line number */
2019      long cno;                  /* character number */
2020 {
2021   bool named = (name != NULL && namelen > 0);
2022
2023   if (!CTAGS && named)          /* maybe set named to false */
2024     /* Let's try to make an implicit tag name, that is, create an unnamed tag
2025        such that etags.el can guess a name from it. */
2026     {
2027       int i;
2028       register char *cp = name;
2029
2030       for (i = 0; i < namelen; i++)
2031         if (notinname (*cp++))
2032           break;
2033       if (i == namelen)                         /* rule #1 */
2034         {
2035           cp = linestart + linelen - namelen;
2036           if (notinname (linestart[linelen-1]))
2037             cp -= 1;                            /* rule #4 */
2038           if (cp >= linestart                   /* rule #2 */
2039               && (cp == linestart
2040                   || notinname (cp[-1]))        /* rule #3 */
2041               && strneq (name, cp, namelen))    /* rule #2 */
2042             named = FALSE;      /* use implicit tag name */
2043         }
2044     }
2045
2046   if (named)
2047     name = savenstr (name, namelen);
2048   else
2049     name = NULL;
2050   pfnote (name, is_func, linestart, linelen, lno, cno);
2051 }
2052
2053 /* Record a tag. */
2054 static void
2055 pfnote (name, is_func, linestart, linelen, lno, cno)
2056      char *name;                /* tag name, or NULL if unnamed */
2057      bool is_func;              /* tag is a function */
2058      char *linestart;           /* start of the line where tag is */
2059      int linelen;               /* length of the line where tag is */
2060      int lno;                   /* line number */
2061      long cno;                  /* character number */
2062 {
2063   register node *np;
2064
2065   assert (name == NULL || name[0] != '\0');
2066   if (CTAGS && name == NULL)
2067     return;
2068
2069   np = xnew (1, node);
2070
2071   /* If ctags mode, change name "main" to M<thisfilename>. */
2072   if (CTAGS && !cxref_style && streq (name, "main"))
2073     {
2074       register char *fp = etags_strrchr (curfdp->taggedfname, '/');
2075       np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
2076       fp = etags_strrchr (np->name, '.');
2077       if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
2078         fp[0] = '\0';
2079     }
2080   else
2081     np->name = name;
2082   np->valid = TRUE;
2083   np->been_warned = FALSE;
2084   np->fdp = curfdp;
2085   np->is_func = is_func;
2086   np->lno = lno;
2087   if (np->fdp->usecharno)
2088     /* Our char numbers are 0-base, because of C language tradition?
2089        ctags compatibility?  old versions compatibility?   I don't know.
2090        Anyway, since emacs's are 1-base we expect etags.el to take care
2091        of the difference.  If we wanted to have 1-based numbers, we would
2092        uncomment the +1 below. */
2093     np->cno = cno /* + 1 */ ;
2094   else
2095     np->cno = invalidcharno;
2096   np->left = np->right = NULL;
2097   if (CTAGS && !cxref_style)
2098     {
2099       if (strlen (linestart) < 50)
2100         np->regex = concat (linestart, "$", "");
2101       else
2102         np->regex = savenstr (linestart, 50);
2103     }
2104   else
2105     np->regex = savenstr (linestart, linelen);
2106
2107   add_node (np, &nodehead);
2108 }
2109
2110 /*
2111  * free_tree ()
2112  *      recurse on left children, iterate on right children.
2113  */
2114 static void
2115 free_tree (np)
2116      register node *np;
2117 {
2118   while (np)
2119     {
2120       register node *node_right = np->right;
2121       free_tree (np->left);
2122       if (np->name != NULL)
2123         free (np->name);
2124       free (np->regex);
2125       free (np);
2126       np = node_right;
2127     }
2128 }
2129
2130 /*
2131  * free_fdesc ()
2132  *      delete a file description
2133  */
2134 static void
2135 free_fdesc (fdp)
2136      register fdesc *fdp;
2137 {
2138   if (fdp->infname != NULL) free (fdp->infname);
2139   if (fdp->infabsname != NULL) free (fdp->infabsname);
2140   if (fdp->infabsdir != NULL) free (fdp->infabsdir);
2141   if (fdp->taggedfname != NULL) free (fdp->taggedfname);
2142   if (fdp->prop != NULL) free (fdp->prop);
2143   free (fdp);
2144 }
2145
2146 /*
2147  * add_node ()
2148  *      Adds a node to the tree of nodes.  In etags mode, sort by file
2149  *      name.  In ctags mode, sort by tag name.  Make no attempt at
2150  *      balancing.
2151  *
2152  *      add_node is the only function allowed to add nodes, so it can
2153  *      maintain state.
2154  */
2155 static void
2156 add_node (np, cur_node_p)
2157      node *np, **cur_node_p;
2158 {
2159   register int dif;
2160   register node *cur_node = *cur_node_p;
2161
2162   if (cur_node == NULL)
2163     {
2164       *cur_node_p = np;
2165       last_node = np;
2166       return;
2167     }
2168
2169   if (!CTAGS)
2170     /* Etags Mode */
2171     {
2172       /* For each file name, tags are in a linked sublist on the right
2173          pointer.  The first tags of different files are a linked list
2174          on the left pointer.  last_node points to the end of the last
2175          used sublist. */
2176       if (last_node != NULL && last_node->fdp == np->fdp)
2177         {
2178           /* Let's use the same sublist as the last added node. */
2179           assert (last_node->right == NULL);
2180           last_node->right = np;
2181           last_node = np;
2182         }
2183       else if (cur_node->fdp == np->fdp)
2184         {
2185           /* Scanning the list we found the head of a sublist which is
2186              good for us.  Let's scan this sublist. */
2187           add_node (np, &cur_node->right);
2188         }
2189       else
2190         /* The head of this sublist is not good for us.  Let's try the
2191            next one. */
2192         add_node (np, &cur_node->left);
2193     } /* if ETAGS mode */
2194
2195   else
2196     {
2197       /* Ctags Mode */
2198       dif = strcmp (np->name, cur_node->name);
2199
2200       /*
2201        * If this tag name matches an existing one, then
2202        * do not add the node, but maybe print a warning.
2203        */
2204       if (!dif)
2205         {
2206           if (np->fdp == cur_node->fdp)
2207             {
2208               if (!no_warnings)
2209                 {
2210                   fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2211                            np->fdp->infname, lineno, np->name);
2212                   fprintf (stderr, "Second entry ignored\n");
2213                 }
2214             }
2215           else if (!cur_node->been_warned && !no_warnings)
2216             {
2217               fprintf
2218                 (stderr,
2219                  "Duplicate entry in files %s and %s: %s (Warning only)\n",
2220                  np->fdp->infname, cur_node->fdp->infname, np->name);
2221               cur_node->been_warned = TRUE;
2222             }
2223           return;
2224         }
2225
2226       /* Actually add the node */
2227       add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2228     } /* if CTAGS mode */
2229 }
2230
2231 /*
2232  * invalidate_nodes ()
2233  *      Scan the node tree and invalidate all nodes pointing to the
2234  *      given file description (CTAGS case) or free them (ETAGS case).
2235  */
2236 static void
2237 invalidate_nodes (badfdp, npp)
2238      fdesc *badfdp;
2239      node **npp;
2240 {
2241   node *np = *npp;
2242
2243   if (np == NULL)
2244     return;
2245
2246   if (CTAGS)
2247     {
2248       if (np->left != NULL)
2249         invalidate_nodes (badfdp, &np->left);
2250       if (np->fdp == badfdp)
2251         np->valid = FALSE;
2252       if (np->right != NULL)
2253         invalidate_nodes (badfdp, &np->right);
2254     }
2255   else
2256     {
2257       assert (np->fdp != NULL);
2258       if (np->fdp == badfdp)
2259         {
2260           *npp = np->left;      /* detach the sublist from the list */
2261           np->left = NULL;      /* isolate it */
2262           free_tree (np);       /* free it */
2263           invalidate_nodes (badfdp, npp);
2264         }
2265       else
2266         invalidate_nodes (badfdp, &np->left);
2267     }
2268 }
2269
2270 \f
2271 static int total_size_of_entries __P((node *));
2272 static int number_len __P((long));
2273
2274 /* Length of a non-negative number's decimal representation. */
2275 static int
2276 number_len (num)
2277      long num;
2278 {
2279   int len = 1;
2280   while ((num /= 10) > 0)
2281     len += 1;
2282   return len;
2283 }
2284
2285 /*
2286  * Return total number of characters that put_entries will output for
2287  * the nodes in the linked list at the right of the specified node.
2288  * This count is irrelevant with etags.el since emacs 19.34 at least,
2289  * but is still supplied for backward compatibility.
2290  */
2291 static int
2292 total_size_of_entries (np)
2293      register node *np;
2294 {
2295   register int total = 0;
2296
2297   for (; np != NULL; np = np->right)
2298     if (np->valid)
2299       {
2300         total += strlen (np->regex) + 1;                /* pat\177 */
2301         if (np->name != NULL)
2302           total += strlen (np->name) + 1;               /* name\001 */
2303         total += number_len ((long) np->lno) + 1;       /* lno, */
2304         if (np->cno != invalidcharno)                   /* cno */
2305           total += number_len (np->cno);
2306         total += 1;                                     /* newline */
2307       }
2308
2309   return total;
2310 }
2311
2312 static void
2313 put_entries (np)
2314      register node *np;
2315 {
2316   register char *sp;
2317   static fdesc *fdp = NULL;
2318
2319   if (np == NULL)
2320     return;
2321
2322   /* Output subentries that precede this one */
2323   if (CTAGS)
2324     put_entries (np->left);
2325
2326   /* Output this entry */
2327   if (np->valid)
2328     {
2329       if (!CTAGS)
2330         {
2331           /* Etags mode */
2332           if (fdp != np->fdp)
2333             {
2334               fdp = np->fdp;
2335               fprintf (tagf, "\f\n%s,%d\n",
2336                        fdp->taggedfname, total_size_of_entries (np));
2337               fdp->written = TRUE;
2338             }
2339           fputs (np->regex, tagf);
2340           fputc ('\177', tagf);
2341           if (np->name != NULL)
2342             {
2343               fputs (np->name, tagf);
2344               fputc ('\001', tagf);
2345             }
2346           fprintf (tagf, "%d,", np->lno);
2347           if (np->cno != invalidcharno)
2348             fprintf (tagf, "%ld", np->cno);
2349           fputs ("\n", tagf);
2350         }
2351       else
2352         {
2353           /* Ctags mode */
2354           if (np->name == NULL)
2355             error ("internal error: NULL name in ctags mode.", (char *)NULL);
2356
2357           if (cxref_style)
2358             {
2359               if (vgrind_style)
2360                 fprintf (stdout, "%s %s %d\n",
2361                          np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2362               else
2363                 fprintf (stdout, "%-16s %3d %-16s %s\n",
2364                          np->name, np->lno, np->fdp->taggedfname, np->regex);
2365             }
2366           else
2367             {
2368               fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2369
2370               if (np->is_func)
2371                 {               /* function or #define macro with args */
2372                   putc (searchar, tagf);
2373                   putc ('^', tagf);
2374
2375                   for (sp = np->regex; *sp; sp++)
2376                     {
2377                       if (*sp == '\\' || *sp == searchar)
2378                         putc ('\\', tagf);
2379                       putc (*sp, tagf);
2380                     }
2381                   putc (searchar, tagf);
2382                 }
2383               else
2384                 {               /* anything else; text pattern inadequate */
2385                   fprintf (tagf, "%d", np->lno);
2386                 }
2387               putc ('\n', tagf);
2388             }
2389         }
2390     } /* if this node contains a valid tag */
2391
2392   /* Output subentries that follow this one */
2393   put_entries (np->right);
2394   if (!CTAGS)
2395     put_entries (np->left);
2396 }
2397
2398 \f
2399 /* C extensions. */
2400 #define C_EXT   0x00fff         /* C extensions */
2401 #define C_PLAIN 0x00000         /* C */
2402 #define C_PLPL  0x00001         /* C++ */
2403 #define C_STAR  0x00003         /* C* */
2404 #define C_JAVA  0x00005         /* JAVA */
2405 #define C_AUTO  0x01000         /* C, but switch to C++ if `class' is met */
2406 #define YACC    0x10000         /* yacc file */
2407
2408 /*
2409  * The C symbol tables.
2410  */
2411 enum sym_type
2412 {
2413   st_none,
2414   st_C_objprot, st_C_objimpl, st_C_objend,
2415   st_C_gnumacro,
2416   st_C_ignore, st_C_attribute,
2417   st_C_javastruct,
2418   st_C_operator,
2419   st_C_class, st_C_template,
2420   st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2421 };
2422
2423 static unsigned int hash __P((const char *, unsigned int));
2424 static struct C_stab_entry * in_word_set __P((const char *, unsigned int));
2425 static enum sym_type C_symtype __P((char *, int, int));
2426
2427 /* Feed stuff between (but not including) %[ and %] lines to:
2428      gperf -m 5
2429 %[
2430 %compare-strncmp
2431 %enum
2432 %struct-type
2433 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2434 %%
2435 if,             0,                      st_C_ignore
2436 for,            0,                      st_C_ignore
2437 while,          0,                      st_C_ignore
2438 switch,         0,                      st_C_ignore
2439 return,         0,                      st_C_ignore
2440 __attribute__,  0,                      st_C_attribute
2441 @interface,     0,                      st_C_objprot
2442 @protocol,      0,                      st_C_objprot
2443 @implementation,0,                      st_C_objimpl
2444 @end,           0,                      st_C_objend
2445 import,         (C_JAVA & !C_PLPL),     st_C_ignore
2446 package,        (C_JAVA & !C_PLPL),     st_C_ignore
2447 friend,         C_PLPL,                 st_C_ignore
2448 extends,        (C_JAVA & !C_PLPL),     st_C_javastruct
2449 implements,     (C_JAVA & !C_PLPL),     st_C_javastruct
2450 interface,      (C_JAVA & !C_PLPL),     st_C_struct
2451 class,          0,                      st_C_class
2452 namespace,      C_PLPL,                 st_C_struct
2453 domain,         C_STAR,                 st_C_struct
2454 union,          0,                      st_C_struct
2455 struct,         0,                      st_C_struct
2456 extern,         0,                      st_C_extern
2457 enum,           0,                      st_C_enum
2458 typedef,        0,                      st_C_typedef
2459 define,         0,                      st_C_define
2460 operator,       C_PLPL,                 st_C_operator
2461 template,       0,                      st_C_template
2462 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2463 DEFUN,          0,                      st_C_gnumacro
2464 SYSCALL,        0,                      st_C_gnumacro
2465 ENTRY,          0,                      st_C_gnumacro
2466 PSEUDO,         0,                      st_C_gnumacro
2467 # These are defined inside C functions, so currently they are not met.
2468 # EXFUN used in glibc, DEFVAR_* in emacs.
2469 #EXFUN,         0,                      st_C_gnumacro
2470 #DEFVAR_,       0,                      st_C_gnumacro
2471 %]
2472 and replace lines between %< and %> with its output, then:
2473  - remove the #if characterset check
2474  - make in_word_set static and not inline. */
2475 /*%<*/
2476 /* C code produced by gperf version 3.0.1 */
2477 /* Command-line: gperf -m 5  */
2478 /* Computed positions: -k'1-2' */
2479
2480 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
2481 /* maximum key range = 31, duplicates = 0 */
2482
2483 #ifdef __GNUC__
2484 __inline
2485 #else
2486 #ifdef __cplusplus
2487 inline
2488 #endif
2489 #endif
2490 static unsigned int
2491 hash (str, len)
2492      register const char *str;
2493      register unsigned int len;
2494 {
2495   static unsigned char asso_values[] =
2496     {
2497       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2498       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2499       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2500       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2501       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2502       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2503       34, 34, 34, 34,  1, 34, 34, 34, 14, 14,
2504       34, 34, 34, 34, 34, 34, 34, 34, 13, 34,
2505       13, 34, 34, 12, 34, 34, 34, 34, 34, 11,
2506       34, 34, 34, 34, 34,  8, 34, 11, 34, 12,
2507       11,  0,  1, 34,  7,  0, 34, 34, 11,  9,
2508        0,  4,  0, 34,  7,  4, 14, 21, 34, 15,
2509        0,  2, 34, 34, 34, 34, 34, 34, 34, 34,
2510       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2511       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2512       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2513       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2514       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2515       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2516       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2517       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2518       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2519       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2520       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2521       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2522       34, 34, 34, 34, 34, 34
2523     };
2524   return len + asso_values[(unsigned char)str[1]] + asso_values[(unsigned char)str[0]];
2525 }
2526
2527 static struct C_stab_entry *
2528 in_word_set (str, len)
2529      register const char *str;
2530      register unsigned int len;
2531 {
2532   enum
2533     {
2534       TOTAL_KEYWORDS = 31,
2535       MIN_WORD_LENGTH = 2,
2536       MAX_WORD_LENGTH = 15,
2537       MIN_HASH_VALUE = 3,
2538       MAX_HASH_VALUE = 33
2539     };
2540
2541   static struct C_stab_entry wordlist[] =
2542     {
2543       {""}, {""}, {""},
2544       {"if",            0,                      st_C_ignore},
2545       {"enum",          0,                      st_C_enum},
2546       {"@end",          0,                      st_C_objend},
2547       {"extern",                0,                      st_C_extern},
2548       {"extends",       (C_JAVA & !C_PLPL),     st_C_javastruct},
2549       {"for",           0,                      st_C_ignore},
2550       {"interface",     (C_JAVA & !C_PLPL),     st_C_struct},
2551       {"@protocol",     0,                      st_C_objprot},
2552       {"@interface",    0,                      st_C_objprot},
2553       {"operator",      C_PLPL,                 st_C_operator},
2554       {"return",                0,                      st_C_ignore},
2555       {"friend",                C_PLPL,                 st_C_ignore},
2556       {"import",                (C_JAVA & !C_PLPL),     st_C_ignore},
2557       {"@implementation",0,                     st_C_objimpl},
2558       {"define",                0,                      st_C_define},
2559       {"package",       (C_JAVA & !C_PLPL),     st_C_ignore},
2560       {"implements",    (C_JAVA & !C_PLPL),     st_C_javastruct},
2561       {"namespace",     C_PLPL,                 st_C_struct},
2562       {"domain",                C_STAR,                 st_C_struct},
2563       {"template",      0,                      st_C_template},
2564       {"typedef",       0,                      st_C_typedef},
2565       {"struct",                0,                      st_C_struct},
2566       {"switch",                0,                      st_C_ignore},
2567       {"union",         0,                      st_C_struct},
2568       {"while",         0,                      st_C_ignore},
2569       {"class",         0,                      st_C_class},
2570       {"__attribute__", 0,                      st_C_attribute},
2571       {"SYSCALL",       0,                      st_C_gnumacro},
2572       {"PSEUDO",                0,                      st_C_gnumacro},
2573       {"ENTRY",         0,                      st_C_gnumacro},
2574       {"DEFUN",         0,                      st_C_gnumacro}
2575     };
2576
2577   if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2578     {
2579       register int key = hash (str, len);
2580
2581       if (key <= MAX_HASH_VALUE && key >= 0)
2582         {
2583           register const char *s = wordlist[key].name;
2584
2585           if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2586             return &wordlist[key];
2587         }
2588     }
2589   return 0;
2590 }
2591 /*%>*/
2592
2593 static enum sym_type
2594 C_symtype (str, len, c_ext)
2595      char *str;
2596      int len;
2597      int c_ext;
2598 {
2599   register struct C_stab_entry *se = in_word_set (str, len);
2600
2601   if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2602     return st_none;
2603   return se->type;
2604 }
2605
2606 \f
2607 /*
2608  * Ignoring __attribute__ ((list))
2609  */
2610 static bool inattribute;        /* looking at an __attribute__ construct */
2611
2612 /*
2613  * C functions and variables are recognized using a simple
2614  * finite automaton.  fvdef is its state variable.
2615  */
2616 static enum
2617 {
2618   fvnone,                       /* nothing seen */
2619   fdefunkey,                    /* Emacs DEFUN keyword seen */
2620   fdefunname,                   /* Emacs DEFUN name seen */
2621   foperator,                    /* func: operator keyword seen (cplpl) */
2622   fvnameseen,                   /* function or variable name seen */
2623   fstartlist,                   /* func: just after open parenthesis */
2624   finlist,                      /* func: in parameter list */
2625   flistseen,                    /* func: after parameter list */
2626   fignore,                      /* func: before open brace */
2627   vignore                       /* var-like: ignore until ';' */
2628 } fvdef;
2629
2630 static bool fvextern;           /* func or var: extern keyword seen; */
2631
2632 /*
2633  * typedefs are recognized using a simple finite automaton.
2634  * typdef is its state variable.
2635  */
2636 static enum
2637 {
2638   tnone,                        /* nothing seen */
2639   tkeyseen,                     /* typedef keyword seen */
2640   ttypeseen,                    /* defined type seen */
2641   tinbody,                      /* inside typedef body */
2642   tend,                         /* just before typedef tag */
2643   tignore                       /* junk after typedef tag */
2644 } typdef;
2645
2646 /*
2647  * struct-like structures (enum, struct and union) are recognized
2648  * using another simple finite automaton.  `structdef' is its state
2649  * variable.
2650  */
2651 static enum
2652 {
2653   snone,                        /* nothing seen yet,
2654                                    or in struct body if bracelev > 0 */
2655   skeyseen,                     /* struct-like keyword seen */
2656   stagseen,                     /* struct-like tag seen */
2657   scolonseen                    /* colon seen after struct-like tag */
2658 } structdef;
2659
2660 /*
2661  * When objdef is different from onone, objtag is the name of the class.
2662  */
2663 static char *objtag = "<uninited>";
2664
2665 /*
2666  * Yet another little state machine to deal with preprocessor lines.
2667  */
2668 static enum
2669 {
2670   dnone,                        /* nothing seen */
2671   dsharpseen,                   /* '#' seen as first char on line */
2672   ddefineseen,                  /* '#' and 'define' seen */
2673   dignorerest                   /* ignore rest of line */
2674 } definedef;
2675
2676 /*
2677  * State machine for Objective C protocols and implementations.
2678  * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2679  */
2680 static enum
2681 {
2682   onone,                        /* nothing seen */
2683   oprotocol,                    /* @interface or @protocol seen */
2684   oimplementation,              /* @implementations seen */
2685   otagseen,                     /* class name seen */
2686   oparenseen,                   /* parenthesis before category seen */
2687   ocatseen,                     /* category name seen */
2688   oinbody,                      /* in @implementation body */
2689   omethodsign,                  /* in @implementation body, after +/- */
2690   omethodtag,                   /* after method name */
2691   omethodcolon,                 /* after method colon */
2692   omethodparm,                  /* after method parameter */
2693   oignore                       /* wait for @end */
2694 } objdef;
2695
2696
2697 /*
2698  * Use this structure to keep info about the token read, and how it
2699  * should be tagged.  Used by the make_C_tag function to build a tag.
2700  */
2701 static struct tok
2702 {
2703   char *line;                   /* string containing the token */
2704   int offset;                   /* where the token starts in LINE */
2705   int length;                   /* token length */
2706   /*
2707     The previous members can be used to pass strings around for generic
2708     purposes.  The following ones specifically refer to creating tags.  In this
2709     case the token contained here is the pattern that will be used to create a
2710     tag.
2711   */
2712   bool valid;                   /* do not create a tag; the token should be
2713                                    invalidated whenever a state machine is
2714                                    reset prematurely */
2715   bool named;                   /* create a named tag */
2716   int lineno;                   /* source line number of tag */
2717   long linepos;                 /* source char number of tag */
2718 } token;                        /* latest token read */
2719
2720 /*
2721  * Variables and functions for dealing with nested structures.
2722  * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2723  */
2724 static void pushclass_above __P((int, char *, int));
2725 static void popclass_above __P((int));
2726 static void write_classname __P((linebuffer *, char *qualifier));
2727
2728 static struct {
2729   char **cname;                 /* nested class names */
2730   int *bracelev;                /* nested class brace level */
2731   int nl;                       /* class nesting level (elements used) */
2732   int size;                     /* length of the array */
2733 } cstack;                       /* stack for nested declaration tags */
2734 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2735 #define nestlev         (cstack.nl)
2736 /* After struct keyword or in struct body, not inside a nested function. */
2737 #define instruct        (structdef == snone && nestlev > 0                      \
2738                          && bracelev == cstack.bracelev[nestlev-1] + 1)
2739
2740 static void
2741 pushclass_above (bracelev, str, len)
2742      int bracelev;
2743      char *str;
2744      int len;
2745 {
2746   int nl;
2747
2748   popclass_above (bracelev);
2749   nl = cstack.nl;
2750   if (nl >= cstack.size)
2751     {
2752       int size = cstack.size *= 2;
2753       xrnew (cstack.cname, size, char *);
2754       xrnew (cstack.bracelev, size, int);
2755     }
2756   assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2757   cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2758   cstack.bracelev[nl] = bracelev;
2759   cstack.nl = nl + 1;
2760 }
2761
2762 static void
2763 popclass_above (bracelev)
2764      int bracelev;
2765 {
2766   int nl;
2767
2768   for (nl = cstack.nl - 1;
2769        nl >= 0 && cstack.bracelev[nl] >= bracelev;
2770        nl--)
2771     {
2772       if (cstack.cname[nl] != NULL)
2773         free (cstack.cname[nl]);
2774       cstack.nl = nl;
2775     }
2776 }
2777
2778 static void
2779 write_classname (cn, qualifier)
2780      linebuffer *cn;
2781      char *qualifier;
2782 {
2783   int i, len;
2784   int qlen = strlen (qualifier);
2785
2786   if (cstack.nl == 0 || cstack.cname[0] == NULL)
2787     {
2788       len = 0;
2789       cn->len = 0;
2790       cn->buffer[0] = '\0';
2791     }
2792   else
2793     {
2794       len = strlen (cstack.cname[0]);
2795       linebuffer_setlen (cn, len);
2796       strcpy (cn->buffer, cstack.cname[0]);
2797     }
2798   for (i = 1; i < cstack.nl; i++)
2799     {
2800       char *s;
2801       int slen;
2802
2803       s = cstack.cname[i];
2804       if (s == NULL)
2805         continue;
2806       slen = strlen (s);
2807       len += slen + qlen;
2808       linebuffer_setlen (cn, len);
2809       strncat (cn->buffer, qualifier, qlen);
2810       strncat (cn->buffer, s, slen);
2811     }
2812 }
2813
2814 \f
2815 static bool consider_token __P((char *, int, int, int *, int, int, bool *));
2816 static void make_C_tag __P((bool));
2817
2818 /*
2819  * consider_token ()
2820  *      checks to see if the current token is at the start of a
2821  *      function or variable, or corresponds to a typedef, or
2822  *      is a struct/union/enum tag, or #define, or an enum constant.
2823  *
2824  *      *IS_FUNC gets TRUE iff the token is a function or #define macro
2825  *      with args.  C_EXTP points to which language we are looking at.
2826  *
2827  * Globals
2828  *      fvdef                   IN OUT
2829  *      structdef               IN OUT
2830  *      definedef               IN OUT
2831  *      typdef                  IN OUT
2832  *      objdef                  IN OUT
2833  */
2834
2835 static bool
2836 consider_token (str, len, c, c_extp, bracelev, parlev, is_func_or_var)
2837      register char *str;        /* IN: token pointer */
2838      register int len;          /* IN: token length */
2839      register int c;            /* IN: first char after the token */
2840      int *c_extp;               /* IN, OUT: C extensions mask */
2841      int bracelev;              /* IN: brace level */
2842      int parlev;                /* IN: parenthesis level */
2843      bool *is_func_or_var;      /* OUT: function or variable found */
2844 {
2845   /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2846      structtype is the type of the preceding struct-like keyword, and
2847      structbracelev is the brace level where it has been seen. */
2848   static enum sym_type structtype;
2849   static int structbracelev;
2850   static enum sym_type toktype;
2851
2852
2853   toktype = C_symtype (str, len, *c_extp);
2854
2855   /*
2856    * Skip __attribute__
2857    */
2858   if (toktype == st_C_attribute)
2859     {
2860       inattribute = TRUE;
2861       return FALSE;
2862      }
2863
2864    /*
2865     * Advance the definedef state machine.
2866     */
2867    switch (definedef)
2868      {
2869      case dnone:
2870        /* We're not on a preprocessor line. */
2871        if (toktype == st_C_gnumacro)
2872          {
2873            fvdef = fdefunkey;
2874            return FALSE;
2875          }
2876        break;
2877      case dsharpseen:
2878        if (toktype == st_C_define)
2879          {
2880            definedef = ddefineseen;
2881          }
2882        else
2883          {
2884            definedef = dignorerest;
2885          }
2886        return FALSE;
2887      case ddefineseen:
2888        /*
2889         * Make a tag for any macro, unless it is a constant
2890         * and constantypedefs is FALSE.
2891         */
2892        definedef = dignorerest;
2893        *is_func_or_var = (c == '(');
2894        if (!*is_func_or_var && !constantypedefs)
2895          return FALSE;
2896        else
2897          return TRUE;
2898      case dignorerest:
2899        return FALSE;
2900      default:
2901        error ("internal error: definedef value.", (char *)NULL);
2902      }
2903
2904    /*
2905     * Now typedefs
2906     */
2907    switch (typdef)
2908      {
2909      case tnone:
2910        if (toktype == st_C_typedef)
2911          {
2912            if (typedefs)
2913              typdef = tkeyseen;
2914            fvextern = FALSE;
2915            fvdef = fvnone;
2916            return FALSE;
2917          }
2918        break;
2919      case tkeyseen:
2920        switch (toktype)
2921          {
2922          case st_none:
2923          case st_C_class:
2924          case st_C_struct:
2925          case st_C_enum:
2926            typdef = ttypeseen;
2927          }
2928        break;
2929      case ttypeseen:
2930        if (structdef == snone && fvdef == fvnone)
2931          {
2932            fvdef = fvnameseen;
2933            return TRUE;
2934          }
2935        break;
2936      case tend:
2937        switch (toktype)
2938          {
2939          case st_C_class:
2940          case st_C_struct:
2941          case st_C_enum:
2942            return FALSE;
2943          }
2944        return TRUE;
2945      }
2946
2947    /*
2948     * This structdef business is NOT invoked when we are ctags and the
2949     * file is plain C.  This is because a struct tag may have the same
2950     * name as another tag, and this loses with ctags.
2951     */
2952    switch (toktype)
2953      {
2954      case st_C_javastruct:
2955        if (structdef == stagseen)
2956          structdef = scolonseen;
2957        return FALSE;
2958      case st_C_template:
2959      case st_C_class:
2960        if ((*c_extp & C_AUTO)   /* automatic detection of C++ language */
2961            && bracelev == 0
2962            && definedef == dnone && structdef == snone
2963            && typdef == tnone && fvdef == fvnone)
2964          *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2965        if (toktype == st_C_template)
2966          break;
2967        /* FALLTHRU */
2968      case st_C_struct:
2969      case st_C_enum:
2970        if (parlev == 0
2971            && fvdef != vignore
2972            && (typdef == tkeyseen
2973                || (typedefs_or_cplusplus && structdef == snone)))
2974          {
2975            structdef = skeyseen;
2976            structtype = toktype;
2977            structbracelev = bracelev;
2978            if (fvdef == fvnameseen)
2979              fvdef = fvnone;
2980          }
2981        return FALSE;
2982      }
2983
2984    if (structdef == skeyseen)
2985      {
2986        structdef = stagseen;
2987        return TRUE;
2988      }
2989
2990    if (typdef != tnone)
2991      definedef = dnone;
2992
2993    /* Detect Objective C constructs. */
2994    switch (objdef)
2995      {
2996      case onone:
2997        switch (toktype)
2998          {
2999          case st_C_objprot:
3000            objdef = oprotocol;
3001            return FALSE;
3002          case st_C_objimpl:
3003            objdef = oimplementation;
3004            return FALSE;
3005          }
3006        break;
3007      case oimplementation:
3008        /* Save the class tag for functions or variables defined inside. */
3009        objtag = savenstr (str, len);
3010        objdef = oinbody;
3011        return FALSE;
3012      case oprotocol:
3013        /* Save the class tag for categories. */
3014        objtag = savenstr (str, len);
3015        objdef = otagseen;
3016        *is_func_or_var = TRUE;
3017        return TRUE;
3018      case oparenseen:
3019        objdef = ocatseen;
3020        *is_func_or_var = TRUE;
3021        return TRUE;
3022      case oinbody:
3023        break;
3024      case omethodsign:
3025        if (parlev == 0)
3026          {
3027            fvdef = fvnone;
3028            objdef = omethodtag;
3029            linebuffer_setlen (&token_name, len);
3030            strncpy (token_name.buffer, str, len);
3031            token_name.buffer[len] = '\0';
3032            return TRUE;
3033          }
3034        return FALSE;
3035      case omethodcolon:
3036        if (parlev == 0)
3037          objdef = omethodparm;
3038        return FALSE;
3039      case omethodparm:
3040        if (parlev == 0)
3041          {
3042            fvdef = fvnone;
3043            objdef = omethodtag;
3044            linebuffer_setlen (&token_name, token_name.len + len);
3045            strncat (token_name.buffer, str, len);
3046            return TRUE;
3047          }
3048        return FALSE;
3049      case oignore:
3050        if (toktype == st_C_objend)
3051          {
3052            /* Memory leakage here: the string pointed by objtag is
3053               never released, because many tests would be needed to
3054               avoid breaking on incorrect input code.  The amount of
3055               memory leaked here is the sum of the lengths of the
3056               class tags.
3057            free (objtag); */
3058            objdef = onone;
3059          }
3060        return FALSE;
3061      }
3062
3063    /* A function, variable or enum constant? */
3064    switch (toktype)
3065      {
3066      case st_C_extern:
3067        fvextern = TRUE;
3068        switch  (fvdef)
3069          {
3070          case finlist:
3071          case flistseen:
3072          case fignore:
3073          case vignore:
3074            break;
3075          default:
3076            fvdef = fvnone;
3077          }
3078        return FALSE;
3079      case st_C_ignore:
3080        fvextern = FALSE;
3081        fvdef = vignore;
3082        return FALSE;
3083      case st_C_operator:
3084        fvdef = foperator;
3085        *is_func_or_var = TRUE;
3086        return TRUE;
3087      case st_none:
3088        if (constantypedefs
3089            && structdef == snone
3090            && structtype == st_C_enum && bracelev > structbracelev)
3091          return TRUE;           /* enum constant */
3092        switch (fvdef)
3093          {
3094          case fdefunkey:
3095            if (bracelev > 0)
3096              break;
3097            fvdef = fdefunname;  /* GNU macro */
3098            *is_func_or_var = TRUE;
3099            return TRUE;
3100          case fvnone:
3101            switch (typdef)
3102              {
3103              case ttypeseen:
3104                return FALSE;
3105              case tnone:
3106                if ((strneq (str, "asm", 3) && endtoken (str[3]))
3107                    || (strneq (str, "__asm__", 7) && endtoken (str[7])))
3108                  {
3109                    fvdef = vignore;
3110                    return FALSE;
3111                  }
3112                break;
3113              }
3114           /* FALLTHRU */
3115           case fvnameseen:
3116           if (len >= 10 && strneq (str+len-10, "::operator", 10))
3117             {
3118               if (*c_extp & C_AUTO) /* automatic detection of C++ */
3119                 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3120               fvdef = foperator;
3121               *is_func_or_var = TRUE;
3122               return TRUE;
3123             }
3124           if (bracelev > 0 && !instruct)
3125             break;
3126           fvdef = fvnameseen;   /* function or variable */
3127           *is_func_or_var = TRUE;
3128           return TRUE;
3129         }
3130       break;
3131     }
3132
3133   return FALSE;
3134 }
3135
3136 \f
3137 /*
3138  * C_entries often keeps pointers to tokens or lines which are older than
3139  * the line currently read.  By keeping two line buffers, and switching
3140  * them at end of line, it is possible to use those pointers.
3141  */
3142 static struct
3143 {
3144   long linepos;
3145   linebuffer lb;
3146 } lbs[2];
3147
3148 #define current_lb_is_new (newndx == curndx)
3149 #define switch_line_buffers() (curndx = 1 - curndx)
3150
3151 #define curlb (lbs[curndx].lb)
3152 #define newlb (lbs[newndx].lb)
3153 #define curlinepos (lbs[curndx].linepos)
3154 #define newlinepos (lbs[newndx].linepos)
3155
3156 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3157 #define cplpl (c_ext & C_PLPL)
3158 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3159
3160 #define CNL_SAVE_DEFINEDEF()                                            \
3161 do {                                                                    \
3162   curlinepos = charno;                                                  \
3163   readline (&curlb, inf);                                               \
3164   lp = curlb.buffer;                                                    \
3165   quotednl = FALSE;                                                     \
3166   newndx = curndx;                                                      \
3167 } while (0)
3168
3169 #define CNL()                                                           \
3170 do {                                                                    \
3171   CNL_SAVE_DEFINEDEF();                                                 \
3172   if (savetoken.valid)                                                  \
3173     {                                                                   \
3174       token = savetoken;                                                \
3175       savetoken.valid = FALSE;                                          \
3176     }                                                                   \
3177   definedef = dnone;                                                    \
3178 } while (0)
3179
3180
3181 static void
3182 make_C_tag (isfun)
3183      bool isfun;
3184 {
3185   /* This function should never be called when token.valid is FALSE, but
3186      we must protect against invalid input or internal errors. */
3187   if (!DEBUG && !token.valid)
3188     return;
3189
3190   if (token.valid)
3191     make_tag (token_name.buffer, token_name.len, isfun, token.line,
3192               token.offset+token.length+1, token.lineno, token.linepos);
3193   else                          /* this case is optimised away if !DEBUG */
3194     make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3195               token_name.len + 17, isfun, token.line,
3196               token.offset+token.length+1, token.lineno, token.linepos);
3197
3198   token.valid = FALSE;
3199 }
3200
3201
3202 /*
3203  * C_entries ()
3204  *      This routine finds functions, variables, typedefs,
3205  *      #define's, enum constants and struct/union/enum definitions in
3206  *      C syntax and adds them to the list.
3207  */
3208 static void
3209 C_entries (c_ext, inf)
3210      int c_ext;                 /* extension of C */
3211      FILE *inf;                 /* input file */
3212 {
3213   register char c;              /* latest char read; '\0' for end of line */
3214   register char *lp;            /* pointer one beyond the character `c' */
3215   int curndx, newndx;           /* indices for current and new lb */
3216   register int tokoff;          /* offset in line of start of current token */
3217   register int toklen;          /* length of current token */
3218   char *qualifier;              /* string used to qualify names */
3219   int qlen;                     /* length of qualifier */
3220   int bracelev;                 /* current brace level */
3221   int bracketlev;               /* current bracket level */
3222   int parlev;                   /* current parenthesis level */
3223   int attrparlev;               /* __attribute__ parenthesis level */
3224   int templatelev;              /* current template level */
3225   int typdefbracelev;           /* bracelev where a typedef struct body begun */
3226   bool incomm, inquote, inchar, quotednl, midtoken;
3227   bool yacc_rules;              /* in the rules part of a yacc file */
3228   struct tok savetoken;         /* token saved during preprocessor handling */
3229
3230
3231   linebuffer_init (&lbs[0].lb);
3232   linebuffer_init (&lbs[1].lb);
3233   if (cstack.size == 0)
3234     {
3235       cstack.size = (DEBUG) ? 1 : 4;
3236       cstack.nl = 0;
3237       cstack.cname = xnew (cstack.size, char *);
3238       cstack.bracelev = xnew (cstack.size, int);
3239     }
3240
3241   tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3242   curndx = newndx = 0;
3243   lp = curlb.buffer;
3244   *lp = 0;
3245
3246   fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3247   structdef = snone; definedef = dnone; objdef = onone;
3248   yacc_rules = FALSE;
3249   midtoken = inquote = inchar = incomm = quotednl = FALSE;
3250   token.valid = savetoken.valid = FALSE;
3251   bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3252   if (cjava)
3253     { qualifier = "."; qlen = 1; }
3254   else
3255     { qualifier = "::"; qlen = 2; }
3256
3257
3258   while (!feof (inf))
3259     {
3260       c = *lp++;
3261       if (c == '\\')
3262         {
3263           /* If we are at the end of the line, the next character is a
3264              '\0'; do not skip it, because it is what tells us
3265              to read the next line.  */
3266           if (*lp == '\0')
3267             {
3268               quotednl = TRUE;
3269               continue;
3270             }
3271           lp++;
3272           c = ' ';
3273         }
3274       else if (incomm)
3275         {
3276           switch (c)
3277             {
3278             case '*':
3279               if (*lp == '/')
3280                 {
3281                   c = *lp++;
3282                   incomm = FALSE;
3283                 }
3284               break;
3285             case '\0':
3286               /* Newlines inside comments do not end macro definitions in
3287                  traditional cpp. */
3288               CNL_SAVE_DEFINEDEF ();
3289               break;
3290             }
3291           continue;
3292         }
3293       else if (inquote)
3294         {
3295           switch (c)
3296             {
3297             case '"':
3298               inquote = FALSE;
3299               break;
3300             case '\0':
3301               /* Newlines inside strings do not end macro definitions
3302                  in traditional cpp, even though compilers don't
3303                  usually accept them. */
3304               CNL_SAVE_DEFINEDEF ();
3305               break;
3306             }
3307           continue;
3308         }
3309       else if (inchar)
3310         {
3311           switch (c)
3312             {
3313             case '\0':
3314               /* Hmmm, something went wrong. */
3315               CNL ();
3316               /* FALLTHRU */
3317             case '\'':
3318               inchar = FALSE;
3319               break;
3320             }
3321           continue;
3322         }
3323       else if (bracketlev > 0)
3324         {
3325           switch (c)
3326             {
3327             case ']':
3328               if (--bracketlev > 0)
3329                 continue;
3330               break;
3331             case '\0':
3332               CNL_SAVE_DEFINEDEF ();
3333               break;
3334             }
3335           continue;
3336         }
3337       else switch (c)
3338         {
3339         case '"':
3340           inquote = TRUE;
3341           if (inattribute)
3342             break;
3343           switch (fvdef)
3344             {
3345             case fdefunkey:
3346             case fstartlist:
3347             case finlist:
3348             case fignore:
3349             case vignore:
3350               break;
3351             default:
3352               fvextern = FALSE;
3353               fvdef = fvnone;
3354             }
3355           continue;
3356         case '\'':
3357           inchar = TRUE;
3358           if (inattribute)
3359             break;
3360           if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
3361             {
3362               fvextern = FALSE;
3363               fvdef = fvnone;
3364             }
3365           continue;
3366         case '/':
3367           if (*lp == '*')
3368             {
3369               lp++;
3370               incomm = TRUE;
3371               continue;
3372             }
3373           else if (/* cplpl && */ *lp == '/')
3374             {
3375               c = '\0';
3376               break;
3377             }
3378           else
3379             break;
3380         case '%':
3381           if ((c_ext & YACC) && *lp == '%')
3382             {
3383               /* Entering or exiting rules section in yacc file. */
3384               lp++;
3385               definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3386               typdef = tnone; structdef = snone;
3387               midtoken = inquote = inchar = incomm = quotednl = FALSE;
3388               bracelev = 0;
3389               yacc_rules = !yacc_rules;
3390               continue;
3391             }
3392           else
3393             break;
3394         case '#':
3395           if (definedef == dnone)
3396             {
3397               char *cp;
3398               bool cpptoken = TRUE;
3399
3400               /* Look back on this line.  If all blanks, or nonblanks
3401                  followed by an end of comment, this is a preprocessor
3402                  token. */
3403               for (cp = newlb.buffer; cp < lp-1; cp++)
3404                 if (!iswhite (*cp))
3405                   {
3406                     if (*cp == '*' && *(cp+1) == '/')
3407                       {
3408                         cp++;
3409                         cpptoken = TRUE;
3410                       }
3411                     else
3412                       cpptoken = FALSE;
3413                   }
3414               if (cpptoken)
3415                 definedef = dsharpseen;
3416             } /* if (definedef == dnone) */
3417           continue;
3418         case '[':
3419           bracketlev++;
3420             continue;
3421         } /* switch (c) */
3422
3423
3424       /* Consider token only if some involved conditions are satisfied. */
3425       if (typdef != tignore
3426           && definedef != dignorerest
3427           && fvdef != finlist
3428           && templatelev == 0
3429           && (definedef != dnone
3430               || structdef != scolonseen)
3431           && !inattribute)
3432         {
3433           if (midtoken)
3434             {
3435               if (endtoken (c))
3436                 {
3437                   if (c == ':' && *lp == ':' && begtoken (lp[1]))
3438                     /* This handles :: in the middle,
3439                        but not at the beginning of an identifier.
3440                        Also, space-separated :: is not recognised. */
3441                     {
3442                       if (c_ext & C_AUTO) /* automatic detection of C++ */
3443                         c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3444                       lp += 2;
3445                       toklen += 2;
3446                       c = lp[-1];
3447                       goto still_in_token;
3448                     }
3449                   else
3450                     {
3451                       bool funorvar = FALSE;
3452
3453                       if (yacc_rules
3454                           || consider_token (newlb.buffer + tokoff, toklen, c,
3455                                              &c_ext, bracelev, parlev,
3456                                              &funorvar))
3457                         {
3458                           if (fvdef == foperator)
3459                             {
3460                               char *oldlp = lp;
3461                               lp = skip_spaces (lp-1);
3462                               if (*lp != '\0')
3463                                 lp += 1;
3464                               while (*lp != '\0'
3465                                      && !iswhite (*lp) && *lp != '(')
3466                                 lp += 1;
3467                               c = *lp++;
3468                               toklen += lp - oldlp;
3469                             }
3470                           token.named = FALSE;
3471                           if (!plainc
3472                               && nestlev > 0 && definedef == dnone)
3473                             /* in struct body */
3474                             {
3475                               write_classname (&token_name, qualifier);
3476                               linebuffer_setlen (&token_name,
3477                                                  token_name.len+qlen+toklen);
3478                               strcat (token_name.buffer, qualifier);
3479                               strncat (token_name.buffer,
3480                                        newlb.buffer + tokoff, toklen);
3481                               token.named = TRUE;
3482                             }
3483                           else if (objdef == ocatseen)
3484                             /* Objective C category */
3485                             {
3486                               int len = strlen (objtag) + 2 + toklen;
3487                               linebuffer_setlen (&token_name, len);
3488                               strcpy (token_name.buffer, objtag);
3489                               strcat (token_name.buffer, "(");
3490                               strncat (token_name.buffer,
3491                                        newlb.buffer + tokoff, toklen);
3492                               strcat (token_name.buffer, ")");
3493                               token.named = TRUE;
3494                             }
3495                           else if (objdef == omethodtag
3496                                    || objdef == omethodparm)
3497                             /* Objective C method */
3498                             {
3499                               token.named = TRUE;
3500                             }
3501                           else if (fvdef == fdefunname)
3502                             /* GNU DEFUN and similar macros */
3503                             {
3504                               bool defun = (newlb.buffer[tokoff] == 'F');
3505                               int off = tokoff;
3506                               int len = toklen;
3507
3508                               /* Rewrite the tag so that emacs lisp DEFUNs
3509                                  can be found by their elisp name */
3510                               if (defun)
3511                                 {
3512                                   off += 1;
3513                                   len -= 1;
3514                                 }
3515                               len = toklen;
3516                               linebuffer_setlen (&token_name, len);
3517                               strncpy (token_name.buffer,
3518                                        newlb.buffer + off, len);
3519                               token_name.buffer[len] = '\0';
3520                               if (defun)
3521                                 while (--len >= 0)
3522                                   if (token_name.buffer[len] == '_')
3523                                     token_name.buffer[len] = '-';
3524                               token.named = defun;
3525                             }
3526                           else
3527                             {
3528                               linebuffer_setlen (&token_name, toklen);
3529                               strncpy (token_name.buffer,
3530                                        newlb.buffer + tokoff, toklen);
3531                               token_name.buffer[toklen] = '\0';
3532                               /* Name macros and members. */
3533                               token.named = (structdef == stagseen
3534                                              || typdef == ttypeseen
3535                                              || typdef == tend
3536                                              || (funorvar
3537                                                  && definedef == dignorerest)
3538                                              || (funorvar
3539                                                  && definedef == dnone
3540                                                  && structdef == snone
3541                                                  && bracelev > 0));
3542                             }
3543                           token.lineno = lineno;
3544                           token.offset = tokoff;
3545                           token.length = toklen;
3546                           token.line = newlb.buffer;
3547                           token.linepos = newlinepos;
3548                           token.valid = TRUE;
3549
3550                           if (definedef == dnone
3551                               && (fvdef == fvnameseen
3552                                   || fvdef == foperator
3553                                   || structdef == stagseen
3554                                   || typdef == tend
3555                                   || typdef == ttypeseen
3556                                   || objdef != onone))
3557                             {
3558                               if (current_lb_is_new)
3559                                 switch_line_buffers ();
3560                             }
3561                           else if (definedef != dnone
3562                                    || fvdef == fdefunname
3563                                    || instruct)
3564                             make_C_tag (funorvar);
3565                         }
3566                       else /* not yacc and consider_token failed */
3567                         {
3568                           if (inattribute && fvdef == fignore)
3569                             {
3570                               /* We have just met __attribute__ after a
3571                                  function parameter list: do not tag the
3572                                  function again. */
3573                               fvdef = fvnone;
3574                             }
3575                         }
3576                       midtoken = FALSE;
3577                     }
3578                 } /* if (endtoken (c)) */
3579               else if (intoken (c))
3580                 still_in_token:
3581                 {
3582                   toklen++;
3583                   continue;
3584                 }
3585             } /* if (midtoken) */
3586           else if (begtoken (c))
3587             {
3588               switch (definedef)
3589                 {
3590                 case dnone:
3591                   switch (fvdef)
3592                     {
3593                     case fstartlist:
3594                       /* This prevents tagging fb in
3595                          void (__attribute__((noreturn)) *fb) (void);
3596                          Fixing this is not easy and not very important. */
3597                       fvdef = finlist;
3598                       continue;
3599                     case flistseen:
3600                       if (plainc || declarations)
3601                         {
3602                           make_C_tag (TRUE); /* a function */
3603                           fvdef = fignore;
3604                         }
3605                       break;
3606                     }
3607                   if (structdef == stagseen && !cjava)
3608                     {
3609                       popclass_above (bracelev);
3610                       structdef = snone;
3611                     }
3612                   break;
3613                 case dsharpseen:
3614                   savetoken = token;
3615                   break;
3616                 }
3617               if (!yacc_rules || lp == newlb.buffer + 1)
3618                 {
3619                   tokoff = lp - 1 - newlb.buffer;
3620                   toklen = 1;
3621                   midtoken = TRUE;
3622                 }
3623               continue;
3624             } /* if (begtoken) */
3625         } /* if must look at token */
3626
3627
3628       /* Detect end of line, colon, comma, semicolon and various braces
3629          after having handled a token.*/
3630       switch (c)
3631         {
3632         case ':':
3633           if (inattribute)
3634             break;
3635           if (yacc_rules && token.offset == 0 && token.valid)
3636             {
3637               make_C_tag (FALSE); /* a yacc function */
3638               break;
3639             }
3640           if (definedef != dnone)
3641             break;
3642           switch (objdef)
3643             {
3644             case  otagseen:
3645               objdef = oignore;
3646               make_C_tag (TRUE); /* an Objective C class */
3647               break;
3648             case omethodtag:
3649             case omethodparm:
3650               objdef = omethodcolon;
3651               linebuffer_setlen (&token_name, token_name.len + 1);
3652               strcat (token_name.buffer, ":");
3653               break;
3654             }
3655           if (structdef == stagseen)
3656             {
3657               structdef = scolonseen;
3658               break;
3659             }
3660           /* Should be useless, but may be work as a safety net. */
3661           if (cplpl && fvdef == flistseen)
3662             {
3663               make_C_tag (TRUE); /* a function */
3664               fvdef = fignore;
3665               break;
3666             }
3667           break;
3668         case ';':
3669           if (definedef != dnone || inattribute)
3670             break;
3671           switch (typdef)
3672             {
3673             case tend:
3674             case ttypeseen:
3675               make_C_tag (FALSE); /* a typedef */
3676               typdef = tnone;
3677               fvdef = fvnone;
3678               break;
3679             case tnone:
3680             case tinbody:
3681             case tignore:
3682               switch (fvdef)
3683                 {
3684                 case fignore:
3685                   if (typdef == tignore || cplpl)
3686                     fvdef = fvnone;
3687                   break;
3688                 case fvnameseen:
3689                   if ((globals && bracelev == 0 && (!fvextern || declarations))
3690                       || (members && instruct))
3691                     make_C_tag (FALSE); /* a variable */
3692                   fvextern = FALSE;
3693                   fvdef = fvnone;
3694                   token.valid = FALSE;
3695                   break;
3696                 case flistseen:
3697                   if ((declarations
3698                        && (cplpl || !instruct)
3699                        && (typdef == tnone || (typdef != tignore && instruct)))
3700                       || (members
3701                           && plainc && instruct))
3702                     make_C_tag (TRUE);  /* a function */
3703                   /* FALLTHRU */
3704                 default:
3705                   fvextern = FALSE;
3706                   fvdef = fvnone;
3707                   if (declarations
3708                        && cplpl && structdef == stagseen)
3709                     make_C_tag (FALSE); /* forward declaration */
3710                   else
3711                     token.valid = FALSE;
3712                 } /* switch (fvdef) */
3713               /* FALLTHRU */
3714             default:
3715               if (!instruct)
3716                 typdef = tnone;
3717             }
3718           if (structdef == stagseen)
3719             structdef = snone;
3720           break;
3721         case ',':
3722           if (definedef != dnone || inattribute)
3723             break;
3724           switch (objdef)
3725             {
3726             case omethodtag:
3727             case omethodparm:
3728               make_C_tag (TRUE); /* an Objective C method */
3729               objdef = oinbody;
3730               break;
3731             }
3732           switch (fvdef)
3733             {
3734             case fdefunkey:
3735             case foperator:
3736             case fstartlist:
3737             case finlist:
3738             case fignore:
3739             case vignore:
3740               break;
3741             case fdefunname:
3742               fvdef = fignore;
3743               break;
3744             case fvnameseen:
3745               if (parlev == 0
3746                   && ((globals
3747                        && bracelev == 0
3748                        && templatelev == 0
3749                        && (!fvextern || declarations))
3750                       || (members && instruct)))
3751                   make_C_tag (FALSE); /* a variable */
3752               break;
3753             case flistseen:
3754               if ((declarations && typdef == tnone && !instruct)
3755                   || (members && typdef != tignore && instruct))
3756                 {
3757                   make_C_tag (TRUE); /* a function */
3758                   fvdef = fvnameseen;
3759                 }
3760               else if (!declarations)
3761                 fvdef = fvnone;
3762               token.valid = FALSE;
3763               break;
3764             default:
3765               fvdef = fvnone;
3766             }
3767           if (structdef == stagseen)
3768             structdef = snone;
3769           break;
3770         case ']':
3771           if (definedef != dnone || inattribute)
3772             break;
3773           if (structdef == stagseen)
3774             structdef = snone;
3775           switch (typdef)
3776             {
3777             case ttypeseen:
3778             case tend:
3779               typdef = tignore;
3780               make_C_tag (FALSE);       /* a typedef */
3781               break;
3782             case tnone:
3783             case tinbody:
3784               switch (fvdef)
3785                 {
3786                 case foperator:
3787                 case finlist:
3788                 case fignore:
3789                 case vignore:
3790                   break;
3791                 case fvnameseen:
3792                   if ((members && bracelev == 1)
3793                       || (globals && bracelev == 0
3794                           && (!fvextern || declarations)))
3795                     make_C_tag (FALSE); /* a variable */
3796                   /* FALLTHRU */
3797                 default:
3798                   fvdef = fvnone;
3799                 }
3800               break;
3801             }
3802           break;
3803         case '(':
3804           if (inattribute)
3805             {
3806               attrparlev++;
3807               break;
3808             }
3809           if (definedef != dnone)
3810             break;
3811           if (objdef == otagseen && parlev == 0)
3812             objdef = oparenseen;
3813           switch (fvdef)
3814             {
3815             case fvnameseen:
3816               if (typdef == ttypeseen
3817                   && *lp != '*'
3818                   && !instruct)
3819                 {
3820                   /* This handles constructs like:
3821                      typedef void OperatorFun (int fun); */
3822                   make_C_tag (FALSE);
3823                   typdef = tignore;
3824                   fvdef = fignore;
3825                   break;
3826                 }
3827               /* FALLTHRU */
3828             case foperator:
3829               fvdef = fstartlist;
3830               break;
3831             case flistseen:
3832               fvdef = finlist;
3833               break;
3834             }
3835           parlev++;
3836           break;
3837         case ')':
3838           if (inattribute)
3839             {
3840               if (--attrparlev == 0)
3841                 inattribute = FALSE;
3842               break;
3843             }
3844           if (definedef != dnone)
3845             break;
3846           if (objdef == ocatseen && parlev == 1)
3847             {
3848               make_C_tag (TRUE); /* an Objective C category */
3849               objdef = oignore;
3850             }
3851           if (--parlev == 0)
3852             {
3853               switch (fvdef)
3854                 {
3855                 case fstartlist:
3856                 case finlist:
3857                   fvdef = flistseen;
3858                   break;
3859                 }
3860               if (!instruct
3861                   && (typdef == tend
3862                       || typdef == ttypeseen))
3863                 {
3864                   typdef = tignore;
3865                   make_C_tag (FALSE); /* a typedef */
3866                 }
3867             }
3868           else if (parlev < 0)  /* can happen due to ill-conceived #if's. */
3869             parlev = 0;
3870           break;
3871         case '{':
3872           if (definedef != dnone)
3873             break;
3874           if (typdef == ttypeseen)
3875             {
3876               /* Whenever typdef is set to tinbody (currently only
3877                  here), typdefbracelev should be set to bracelev. */
3878               typdef = tinbody;
3879               typdefbracelev = bracelev;
3880             }
3881           switch (fvdef)
3882             {
3883             case flistseen:
3884               make_C_tag (TRUE);    /* a function */
3885               /* FALLTHRU */
3886             case fignore:
3887               fvdef = fvnone;
3888               break;
3889             case fvnone:
3890               switch (objdef)
3891                 {
3892                 case otagseen:
3893                   make_C_tag (TRUE); /* an Objective C class */
3894                   objdef = oignore;
3895                   break;
3896                 case omethodtag:
3897                 case omethodparm:
3898                   make_C_tag (TRUE); /* an Objective C method */
3899                   objdef = oinbody;
3900                   break;
3901                 default:
3902                   /* Neutralize `extern "C" {' grot. */
3903                   if (bracelev == 0 && structdef == snone && nestlev == 0
3904                       && typdef == tnone)
3905                     bracelev = -1;
3906                 }
3907               break;
3908             }
3909           switch (structdef)
3910             {
3911             case skeyseen:         /* unnamed struct */
3912               pushclass_above (bracelev, NULL, 0);
3913               structdef = snone;
3914               break;
3915             case stagseen:         /* named struct or enum */
3916             case scolonseen:       /* a class */
3917               pushclass_above (bracelev,token.line+token.offset, token.length);
3918               structdef = snone;
3919               make_C_tag (FALSE);  /* a struct or enum */
3920               break;
3921             }
3922           bracelev++;
3923           break;
3924         case '*':
3925           if (definedef != dnone)
3926             break;
3927           if (fvdef == fstartlist)
3928             {
3929               fvdef = fvnone;   /* avoid tagging `foo' in `foo (*bar()) ()' */
3930               token.valid = FALSE;
3931             }
3932           break;
3933         case '}':
3934           if (definedef != dnone)
3935             break;
3936           if (!ignoreindent && lp == newlb.buffer + 1)
3937             {
3938               if (bracelev != 0)
3939                 token.valid = FALSE;
3940               bracelev = 0;     /* reset brace level if first column */
3941               parlev = 0;       /* also reset paren level, just in case... */
3942             }
3943           else if (bracelev > 0)
3944             bracelev--;
3945           else
3946             token.valid = FALSE; /* something gone amiss, token unreliable */
3947           popclass_above (bracelev);
3948           structdef = snone;
3949           /* Only if typdef == tinbody is typdefbracelev significant. */
3950           if (typdef == tinbody && bracelev <= typdefbracelev)
3951             {
3952               assert (bracelev == typdefbracelev);
3953               typdef = tend;
3954             }
3955           break;
3956         case '=':
3957           if (definedef != dnone)
3958             break;
3959           switch (fvdef)
3960             {
3961             case foperator:
3962             case finlist:
3963             case fignore:
3964             case vignore:
3965               break;
3966             case fvnameseen:
3967               if ((members && bracelev == 1)
3968                   || (globals && bracelev == 0 && (!fvextern || declarations)))
3969                 make_C_tag (FALSE); /* a variable */
3970               /* FALLTHRU */
3971             default:
3972               fvdef = vignore;
3973             }
3974           break;
3975         case '<':
3976           if (cplpl
3977               && (structdef == stagseen || fvdef == fvnameseen))
3978             {
3979               templatelev++;
3980               break;
3981             }
3982           goto resetfvdef;
3983         case '>':
3984           if (templatelev > 0)
3985             {
3986               templatelev--;
3987               break;
3988             }
3989           goto resetfvdef;
3990         case '+':
3991         case '-':
3992           if (objdef == oinbody && bracelev == 0)
3993             {
3994               objdef = omethodsign;
3995               break;
3996             }
3997           /* FALLTHRU */
3998         resetfvdef:
3999         case '#': case '~': case '&': case '%': case '/':
4000         case '|': case '^': case '!': case '.': case '?':
4001           if (definedef != dnone)
4002             break;
4003           /* These surely cannot follow a function tag in C. */
4004           switch (fvdef)
4005             {
4006             case foperator:
4007             case finlist:
4008             case fignore:
4009             case vignore:
4010               break;
4011             default:
4012               fvdef = fvnone;
4013             }
4014           break;
4015         case '\0':
4016           if (objdef == otagseen)
4017             {
4018               make_C_tag (TRUE); /* an Objective C class */
4019               objdef = oignore;
4020             }
4021           /* If a macro spans multiple lines don't reset its state. */
4022           if (quotednl)
4023             CNL_SAVE_DEFINEDEF ();
4024           else
4025             CNL ();
4026           break;
4027         } /* switch (c) */
4028
4029     } /* while not eof */
4030
4031   free (lbs[0].lb.buffer);
4032   free (lbs[1].lb.buffer);
4033 }
4034
4035 /*
4036  * Process either a C++ file or a C file depending on the setting
4037  * of a global flag.
4038  */
4039 static void
4040 default_C_entries (inf)
4041      FILE *inf;
4042 {
4043   C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
4044 }
4045
4046 /* Always do plain C. */
4047 static void
4048 plain_C_entries (inf)
4049      FILE *inf;
4050 {
4051   C_entries (0, inf);
4052 }
4053
4054 /* Always do C++. */
4055 static void
4056 Cplusplus_entries (inf)
4057      FILE *inf;
4058 {
4059   C_entries (C_PLPL, inf);
4060 }
4061
4062 /* Always do Java. */
4063 static void
4064 Cjava_entries (inf)
4065      FILE *inf;
4066 {
4067   C_entries (C_JAVA, inf);
4068 }
4069
4070 /* Always do C*. */
4071 static void
4072 Cstar_entries (inf)
4073      FILE *inf;
4074 {
4075   C_entries (C_STAR, inf);
4076 }
4077
4078 /* Always do Yacc. */
4079 static void
4080 Yacc_entries (inf)
4081      FILE *inf;
4082 {
4083   C_entries (YACC, inf);
4084 }
4085
4086 \f
4087 /* Useful macros. */
4088 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer)    \
4089   for (;                        /* loop initialization */               \
4090        !feof (file_pointer)     /* loop test */                         \
4091        &&                       /* instructions at start of loop */     \
4092           (readline (&line_buffer, file_pointer),                       \
4093            char_pointer = line_buffer.buffer,                           \
4094            TRUE);                                                       \
4095       )
4096
4097 #define LOOKING_AT(cp, kw)  /* kw is the keyword, a literal string */   \
4098   ((assert("" kw), TRUE)   /* syntax error if not a literal string */   \
4099    && strneq ((cp), kw, sizeof(kw)-1)           /* cp points at kw */   \
4100    && notinname ((cp)[sizeof(kw)-1])            /* end of kw */         \
4101    && ((cp) = skip_spaces((cp)+sizeof(kw)-1)))  /* skip spaces */
4102
4103 /* Similar to LOOKING_AT but does not use notinname, does not skip */
4104 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
4105   ((assert("" kw), TRUE)     /* syntax error if not a literal string */ \
4106    && strncaseeq ((cp), kw, sizeof(kw)-1)       /* cp points at kw */   \
4107    && ((cp) += sizeof(kw)-1))                   /* skip spaces */
4108
4109 /*
4110  * Read a file, but do no processing.  This is used to do regexp
4111  * matching on files that have no language defined.
4112  */
4113 static void
4114 just_read_file (inf)
4115      FILE *inf;
4116 {
4117   register char *dummy;
4118
4119   LOOP_ON_INPUT_LINES (inf, lb, dummy)
4120     continue;
4121 }
4122
4123 \f
4124 /* Fortran parsing */
4125
4126 static void F_takeprec __P((void));
4127 static void F_getit __P((FILE *));
4128
4129 static void
4130 F_takeprec ()
4131 {
4132   dbp = skip_spaces (dbp);
4133   if (*dbp != '*')
4134     return;
4135   dbp++;
4136   dbp = skip_spaces (dbp);
4137   if (strneq (dbp, "(*)", 3))
4138     {
4139       dbp += 3;
4140       return;
4141     }
4142   if (!ISDIGIT (*dbp))
4143     {
4144       --dbp;                    /* force failure */
4145       return;
4146     }
4147   do
4148     dbp++;
4149   while (ISDIGIT (*dbp));
4150 }
4151
4152 static void
4153 F_getit (inf)
4154      FILE *inf;
4155 {
4156   register char *cp;
4157
4158   dbp = skip_spaces (dbp);
4159   if (*dbp == '\0')
4160     {
4161       readline (&lb, inf);
4162       dbp = lb.buffer;
4163       if (dbp[5] != '&')
4164         return;
4165       dbp += 6;
4166       dbp = skip_spaces (dbp);
4167     }
4168   if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
4169     return;
4170   for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4171     continue;
4172   make_tag (dbp, cp-dbp, TRUE,
4173             lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4174 }
4175
4176
4177 static void
4178 Fortran_functions (inf)
4179      FILE *inf;
4180 {
4181   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4182     {
4183       if (*dbp == '%')
4184         dbp++;                  /* Ratfor escape to fortran */
4185       dbp = skip_spaces (dbp);
4186       if (*dbp == '\0')
4187         continue;
4188       switch (lowcase (*dbp))
4189         {
4190         case 'i':
4191           if (nocase_tail ("integer"))
4192             F_takeprec ();
4193           break;
4194         case 'r':
4195           if (nocase_tail ("real"))
4196             F_takeprec ();
4197           break;
4198         case 'l':
4199           if (nocase_tail ("logical"))
4200             F_takeprec ();
4201           break;
4202         case 'c':
4203           if (nocase_tail ("complex") || nocase_tail ("character"))
4204             F_takeprec ();
4205           break;
4206         case 'd':
4207           if (nocase_tail ("double"))
4208             {
4209               dbp = skip_spaces (dbp);
4210               if (*dbp == '\0')
4211                 continue;
4212               if (nocase_tail ("precision"))
4213                 break;
4214               continue;
4215             }
4216           break;
4217         }
4218       dbp = skip_spaces (dbp);
4219       if (*dbp == '\0')
4220         continue;
4221       switch (lowcase (*dbp))
4222         {
4223         case 'f':
4224           if (nocase_tail ("function"))
4225             F_getit (inf);
4226           continue;
4227         case 's':
4228           if (nocase_tail ("subroutine"))
4229             F_getit (inf);
4230           continue;
4231         case 'e':
4232           if (nocase_tail ("entry"))
4233             F_getit (inf);
4234           continue;
4235         case 'b':
4236           if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4237             {
4238               dbp = skip_spaces (dbp);
4239               if (*dbp == '\0') /* assume un-named */
4240                 make_tag ("blockdata", 9, TRUE,
4241                           lb.buffer, dbp - lb.buffer, lineno, linecharno);
4242               else
4243                 F_getit (inf);  /* look for name */
4244             }
4245           continue;
4246         }
4247     }
4248 }
4249
4250 \f
4251 /*
4252  * Ada parsing
4253  * Original code by
4254  * Philippe Waroquiers (1998)
4255  */
4256
4257 static void Ada_getit __P((FILE *, char *));
4258
4259 /* Once we are positioned after an "interesting" keyword, let's get
4260    the real tag value necessary. */
4261 static void
4262 Ada_getit (inf, name_qualifier)
4263      FILE *inf;
4264      char *name_qualifier;
4265 {
4266   register char *cp;
4267   char *name;
4268   char c;
4269
4270   while (!feof (inf))
4271     {
4272       dbp = skip_spaces (dbp);
4273       if (*dbp == '\0'
4274           || (dbp[0] == '-' && dbp[1] == '-'))
4275         {
4276           readline (&lb, inf);
4277           dbp = lb.buffer;
4278         }
4279       switch (lowcase(*dbp))
4280         {
4281         case 'b':
4282           if (nocase_tail ("body"))
4283             {
4284               /* Skipping body of   procedure body   or   package body or ....
4285                  resetting qualifier to body instead of spec. */
4286               name_qualifier = "/b";
4287               continue;
4288             }
4289           break;
4290         case 't':
4291           /* Skipping type of   task type   or   protected type ... */
4292           if (nocase_tail ("type"))
4293             continue;
4294           break;
4295         }
4296       if (*dbp == '"')
4297         {
4298           dbp += 1;
4299           for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4300             continue;
4301         }
4302       else
4303         {
4304           dbp = skip_spaces (dbp);
4305           for (cp = dbp;
4306                (*cp != '\0'
4307                 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4308                cp++)
4309             continue;
4310           if (cp == dbp)
4311             return;
4312         }
4313       c = *cp;
4314       *cp = '\0';
4315       name = concat (dbp, name_qualifier, "");
4316       *cp = c;
4317       make_tag (name, strlen (name), TRUE,
4318                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4319       free (name);
4320       if (c == '"')
4321         dbp = cp + 1;
4322       return;
4323     }
4324 }
4325
4326 static void
4327 Ada_funcs (inf)
4328      FILE *inf;
4329 {
4330   bool inquote = FALSE;
4331   bool skip_till_semicolumn = FALSE;
4332
4333   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4334     {
4335       while (*dbp != '\0')
4336         {
4337           /* Skip a string i.e. "abcd". */
4338           if (inquote || (*dbp == '"'))
4339             {
4340               dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
4341               if (dbp != NULL)
4342                 {
4343                   inquote = FALSE;
4344                   dbp += 1;
4345                   continue;     /* advance char */
4346                 }
4347               else
4348                 {
4349                   inquote = TRUE;
4350                   break;        /* advance line */
4351                 }
4352             }
4353
4354           /* Skip comments. */
4355           if (dbp[0] == '-' && dbp[1] == '-')
4356             break;              /* advance line */
4357
4358           /* Skip character enclosed in single quote i.e. 'a'
4359              and skip single quote starting an attribute i.e. 'Image. */
4360           if (*dbp == '\'')
4361             {
4362               dbp++ ;
4363               if (*dbp != '\0')
4364                 dbp++;
4365               continue;
4366             }
4367
4368           if (skip_till_semicolumn)
4369             {
4370               if (*dbp == ';')
4371                 skip_till_semicolumn = FALSE;
4372               dbp++;
4373               continue;         /* advance char */
4374             }
4375
4376           /* Search for beginning of a token.  */
4377           if (!begtoken (*dbp))
4378             {
4379               dbp++;
4380               continue;         /* advance char */
4381             }
4382
4383           /* We are at the beginning of a token. */
4384           switch (lowcase(*dbp))
4385             {
4386             case 'f':
4387               if (!packages_only && nocase_tail ("function"))
4388                 Ada_getit (inf, "/f");
4389               else
4390                 break;          /* from switch */
4391               continue;         /* advance char */
4392             case 'p':
4393               if (!packages_only && nocase_tail ("procedure"))
4394                 Ada_getit (inf, "/p");
4395               else if (nocase_tail ("package"))
4396                 Ada_getit (inf, "/s");
4397               else if (nocase_tail ("protected")) /* protected type */
4398                 Ada_getit (inf, "/t");
4399               else
4400                 break;          /* from switch */
4401               continue;         /* advance char */
4402
4403             case 'u':
4404               if (typedefs && !packages_only && nocase_tail ("use"))
4405                 {
4406                   /* when tagging types, avoid tagging  use type Pack.Typename;
4407                      for this, we will skip everything till a ; */
4408                   skip_till_semicolumn = TRUE;
4409                   continue;     /* advance char */
4410                 }
4411
4412             case 't':
4413               if (!packages_only && nocase_tail ("task"))
4414                 Ada_getit (inf, "/k");
4415               else if (typedefs && !packages_only && nocase_tail ("type"))
4416                 {
4417                   Ada_getit (inf, "/t");
4418                   while (*dbp != '\0')
4419                     dbp += 1;
4420                 }
4421               else
4422                 break;          /* from switch */
4423               continue;         /* advance char */
4424             }
4425
4426           /* Look for the end of the token. */
4427           while (!endtoken (*dbp))
4428             dbp++;
4429
4430         } /* advance char */
4431     } /* advance line */
4432 }
4433
4434 \f
4435 /*
4436  * Unix and microcontroller assembly tag handling
4437  * Labels:  /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4438  * Idea by Bob Weiner, Motorola Inc. (1994)
4439  */
4440 static void
4441 Asm_labels (inf)
4442      FILE *inf;
4443 {
4444   register char *cp;
4445
4446   LOOP_ON_INPUT_LINES (inf, lb, cp)
4447     {
4448       /* If first char is alphabetic or one of [_.$], test for colon
4449          following identifier. */
4450       if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4451         {
4452           /* Read past label. */
4453           cp++;
4454           while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4455             cp++;
4456           if (*cp == ':' || iswhite (*cp))
4457             /* Found end of label, so copy it and add it to the table. */
4458             make_tag (lb.buffer, cp - lb.buffer, TRUE,
4459                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4460         }
4461     }
4462 }
4463
4464 \f
4465 /*
4466  * Perl support
4467  * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4468  * Perl variable names: /^(my|local).../
4469  * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4470  * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4471  * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4472  */
4473 static void
4474 Perl_functions (inf)
4475      FILE *inf;
4476 {
4477   char *package = savestr ("main"); /* current package name */
4478   register char *cp;
4479
4480   LOOP_ON_INPUT_LINES (inf, lb, cp)
4481     {
4482       skip_spaces(cp);
4483
4484       if (LOOKING_AT (cp, "package"))
4485         {
4486           free (package);
4487           get_tag (cp, &package);
4488         }
4489       else if (LOOKING_AT (cp, "sub"))
4490         {
4491           char *pos;
4492           char *sp = cp;
4493
4494           while (!notinname (*cp))
4495             cp++;
4496           if (cp == sp)
4497             continue;           /* nothing found */
4498           if ((pos = etags_strchr (sp, ':')) != NULL
4499               && pos < cp && pos[1] == ':')
4500             /* The name is already qualified. */
4501             make_tag (sp, cp - sp, TRUE,
4502                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4503           else
4504             /* Qualify it. */
4505             {
4506               char savechar, *name;
4507
4508               savechar = *cp;
4509               *cp = '\0';
4510               name = concat (package, "::", sp);
4511               *cp = savechar;
4512               make_tag (name, strlen(name), TRUE,
4513                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4514               free (name);
4515             }
4516         }
4517        else if (globals)        /* only if we are tagging global vars */
4518         {
4519           /* Skip a qualifier, if any. */
4520           bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4521           /* After "my" or "local", but before any following paren or space. */
4522           char *varstart = cp;
4523
4524           if (qual              /* should this be removed?  If yes, how? */
4525               && (*cp == '$' || *cp == '@' || *cp == '%'))
4526             {
4527               varstart += 1;
4528               do
4529                 cp++;
4530               while (ISALNUM (*cp) || *cp == '_');
4531             }
4532           else if (qual)
4533             {
4534               /* Should be examining a variable list at this point;
4535                  could insist on seeing an open parenthesis. */
4536               while (*cp != '\0' && *cp != ';' && *cp != '=' &&  *cp != ')')
4537                 cp++;
4538             }
4539           else
4540             continue;
4541
4542           make_tag (varstart, cp - varstart, FALSE,
4543                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4544         }
4545     }
4546 }
4547
4548
4549 /*
4550  * Python support
4551  * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4552  * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4553  * More ideas by seb bacon <seb@jamkit.com> (2002)
4554  */
4555 static void
4556 Python_functions (inf)
4557      FILE *inf;
4558 {
4559   register char *cp;
4560
4561   LOOP_ON_INPUT_LINES (inf, lb, cp)
4562     {
4563       cp = skip_spaces (cp);
4564       if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4565         {
4566           char *name = cp;
4567           while (!notinname (*cp) && *cp != ':')
4568             cp++;
4569           make_tag (name, cp - name, TRUE,
4570                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4571         }
4572     }
4573 }
4574
4575 \f
4576 /*
4577  * PHP support
4578  * Look for:
4579  *  - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4580  *  - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4581  *  - /^[ \t]*define\(\"[^\"]+/
4582  * Only with --members:
4583  *  - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4584  * Idea by Diez B. Roggisch (2001)
4585  */
4586 static void
4587 PHP_functions (inf)
4588      FILE *inf;
4589 {
4590   register char *cp, *name;
4591   bool search_identifier = FALSE;
4592
4593   LOOP_ON_INPUT_LINES (inf, lb, cp)
4594     {
4595       cp = skip_spaces (cp);
4596       name = cp;
4597       if (search_identifier
4598           && *cp != '\0')
4599         {
4600           while (!notinname (*cp))
4601             cp++;
4602           make_tag (name, cp - name, TRUE,
4603                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4604           search_identifier = FALSE;
4605         }
4606       else if (LOOKING_AT (cp, "function"))
4607         {
4608           if(*cp == '&')
4609             cp = skip_spaces (cp+1);
4610           if(*cp != '\0')
4611             {
4612               name = cp;
4613               while (!notinname (*cp))
4614                 cp++;
4615               make_tag (name, cp - name, TRUE,
4616                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4617             }
4618           else
4619             search_identifier = TRUE;
4620         }
4621       else if (LOOKING_AT (cp, "class"))
4622         {
4623           if (*cp != '\0')
4624             {
4625               name = cp;
4626               while (*cp != '\0' && !iswhite (*cp))
4627                 cp++;
4628               make_tag (name, cp - name, FALSE,
4629                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4630             }
4631           else
4632             search_identifier = TRUE;
4633         }
4634       else if (strneq (cp, "define", 6)
4635                && (cp = skip_spaces (cp+6))
4636                && *cp++ == '('
4637                && (*cp == '"' || *cp == '\''))
4638         {
4639           char quote = *cp++;
4640           name = cp;
4641           while (*cp != quote && *cp != '\0')
4642             cp++;
4643           make_tag (name, cp - name, FALSE,
4644                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4645         }
4646       else if (members
4647                && LOOKING_AT (cp, "var")
4648                && *cp == '$')
4649         {
4650           name = cp;
4651           while (!notinname(*cp))
4652             cp++;
4653           make_tag (name, cp - name, FALSE,
4654                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4655         }
4656     }
4657 }
4658
4659 \f
4660 /*
4661  * Cobol tag functions
4662  * We could look for anything that could be a paragraph name.
4663  * i.e. anything that starts in column 8 is one word and ends in a full stop.
4664  * Idea by Corny de Souza (1993)
4665  */
4666 static void
4667 Cobol_paragraphs (inf)
4668      FILE *inf;
4669 {
4670   register char *bp, *ep;
4671
4672   LOOP_ON_INPUT_LINES (inf, lb, bp)
4673     {
4674       if (lb.len < 9)
4675         continue;
4676       bp += 8;
4677
4678       /* If eoln, compiler option or comment ignore whole line. */
4679       if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4680         continue;
4681
4682       for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4683         continue;
4684       if (*ep++ == '.')
4685         make_tag (bp, ep - bp, TRUE,
4686                   lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4687     }
4688 }
4689
4690 \f
4691 /*
4692  * Makefile support
4693  * Ideas by Assar Westerlund <assar@sics.se> (2001)
4694  */
4695 static void
4696 Makefile_targets (inf)
4697      FILE *inf;
4698 {
4699   register char *bp;
4700
4701   LOOP_ON_INPUT_LINES (inf, lb, bp)
4702     {
4703       if (*bp == '\t' || *bp == '#')
4704         continue;
4705       while (*bp != '\0' && *bp != '=' && *bp != ':')
4706         bp++;
4707       if (*bp == ':' || (globals && *bp == '='))
4708         make_tag (lb.buffer, bp - lb.buffer, TRUE,
4709                   lb.buffer, bp - lb.buffer + 1, lineno, linecharno);
4710     }
4711 }
4712
4713 \f
4714 /*
4715  * Pascal parsing
4716  * Original code by Mosur K. Mohan (1989)
4717  *
4718  *  Locates tags for procedures & functions.  Doesn't do any type- or
4719  *  var-definitions.  It does look for the keyword "extern" or
4720  *  "forward" immediately following the procedure statement; if found,
4721  *  the tag is skipped.
4722  */
4723 static void
4724 Pascal_functions (inf)
4725      FILE *inf;
4726 {
4727   linebuffer tline;             /* mostly copied from C_entries */
4728   long save_lcno;
4729   int save_lineno, namelen, taglen;
4730   char c, *name;
4731
4732   bool                          /* each of these flags is TRUE iff: */
4733     incomment,                  /* point is inside a comment */
4734     inquote,                    /* point is inside '..' string */
4735     get_tagname,                /* point is after PROCEDURE/FUNCTION
4736                                    keyword, so next item = potential tag */
4737     found_tag,                  /* point is after a potential tag */
4738     inparms,                    /* point is within parameter-list */
4739     verify_tag;                 /* point has passed the parm-list, so the
4740                                    next token will determine whether this
4741                                    is a FORWARD/EXTERN to be ignored, or
4742                                    whether it is a real tag */
4743
4744   save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4745   name = NULL;                  /* keep compiler quiet */
4746   dbp = lb.buffer;
4747   *dbp = '\0';
4748   linebuffer_init (&tline);
4749
4750   incomment = inquote = FALSE;
4751   found_tag = FALSE;            /* have a proc name; check if extern */
4752   get_tagname = FALSE;          /* found "procedure" keyword         */
4753   inparms = FALSE;              /* found '(' after "proc"            */
4754   verify_tag = FALSE;           /* check if "extern" is ahead        */
4755
4756
4757   while (!feof (inf))           /* long main loop to get next char */
4758     {
4759       c = *dbp++;
4760       if (c == '\0')            /* if end of line */
4761         {
4762           readline (&lb, inf);
4763           dbp = lb.buffer;
4764           if (*dbp == '\0')
4765             continue;
4766           if (!((found_tag && verify_tag)
4767                 || get_tagname))
4768             c = *dbp++;         /* only if don't need *dbp pointing
4769                                    to the beginning of the name of
4770                                    the procedure or function */
4771         }
4772       if (incomment)
4773         {
4774           if (c == '}')         /* within { } comments */
4775             incomment = FALSE;
4776           else if (c == '*' && *dbp == ')') /* within (* *) comments */
4777             {
4778               dbp++;
4779               incomment = FALSE;
4780             }
4781           continue;
4782         }
4783       else if (inquote)
4784         {
4785           if (c == '\'')
4786             inquote = FALSE;
4787           continue;
4788         }
4789       else
4790         switch (c)
4791           {
4792           case '\'':
4793             inquote = TRUE;     /* found first quote */
4794             continue;
4795           case '{':             /* found open { comment */
4796             incomment = TRUE;
4797             continue;
4798           case '(':
4799             if (*dbp == '*')    /* found open (* comment */
4800               {
4801                 incomment = TRUE;
4802                 dbp++;
4803               }
4804             else if (found_tag) /* found '(' after tag, i.e., parm-list */
4805               inparms = TRUE;
4806             continue;
4807           case ')':             /* end of parms list */
4808             if (inparms)
4809               inparms = FALSE;
4810             continue;
4811           case ';':
4812             if (found_tag && !inparms) /* end of proc or fn stmt */
4813               {
4814                 verify_tag = TRUE;
4815                 break;
4816               }
4817             continue;
4818           }
4819       if (found_tag && verify_tag && (*dbp != ' '))
4820         {
4821           /* Check if this is an "extern" declaration. */
4822           if (*dbp == '\0')
4823             continue;
4824           if (lowcase (*dbp == 'e'))
4825             {
4826               if (nocase_tail ("extern")) /* superfluous, really! */
4827                 {
4828                   found_tag = FALSE;
4829                   verify_tag = FALSE;
4830                 }
4831             }
4832           else if (lowcase (*dbp) == 'f')
4833             {
4834               if (nocase_tail ("forward")) /* check for forward reference */
4835                 {
4836                   found_tag = FALSE;
4837                   verify_tag = FALSE;
4838                 }
4839             }
4840           if (found_tag && verify_tag) /* not external proc, so make tag */
4841             {
4842               found_tag = FALSE;
4843               verify_tag = FALSE;
4844               make_tag (name, namelen, TRUE,
4845                         tline.buffer, taglen, save_lineno, save_lcno);
4846               continue;
4847             }
4848         }
4849       if (get_tagname)          /* grab name of proc or fn */
4850         {
4851           char *cp;
4852
4853           if (*dbp == '\0')
4854             continue;
4855
4856           /* Find block name. */
4857           for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4858             continue;
4859
4860           /* Save all values for later tagging. */
4861           linebuffer_setlen (&tline, lb.len);
4862           strcpy (tline.buffer, lb.buffer);
4863           save_lineno = lineno;
4864           save_lcno = linecharno;
4865           name = tline.buffer + (dbp - lb.buffer);
4866           namelen = cp - dbp;
4867           taglen = cp - lb.buffer + 1;
4868
4869           dbp = cp;             /* set dbp to e-o-token */
4870           get_tagname = FALSE;
4871           found_tag = TRUE;
4872           continue;
4873
4874           /* And proceed to check for "extern". */
4875         }
4876       else if (!incomment && !inquote && !found_tag)
4877         {
4878           /* Check for proc/fn keywords. */
4879           switch (lowcase (c))
4880             {
4881             case 'p':
4882               if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4883                 get_tagname = TRUE;
4884               continue;
4885             case 'f':
4886               if (nocase_tail ("unction"))
4887                 get_tagname = TRUE;
4888               continue;
4889             }
4890         }
4891     } /* while not eof */
4892
4893   free (tline.buffer);
4894 }
4895
4896 \f
4897 /*
4898  * Lisp tag functions
4899  *  look for (def or (DEF, quote or QUOTE
4900  */
4901
4902 static void L_getit __P((void));
4903
4904 static void
4905 L_getit ()
4906 {
4907   if (*dbp == '\'')             /* Skip prefix quote */
4908     dbp++;
4909   else if (*dbp == '(')
4910   {
4911     dbp++;
4912     /* Try to skip "(quote " */
4913     if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4914       /* Ok, then skip "(" before name in (defstruct (foo)) */
4915       dbp = skip_spaces (dbp);
4916   }
4917   get_tag (dbp, NULL);
4918 }
4919
4920 static void
4921 Lisp_functions (inf)
4922      FILE *inf;
4923 {
4924   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4925     {
4926       if (dbp[0] != '(')
4927         continue;
4928
4929       if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4930         {
4931           dbp = skip_non_spaces (dbp);
4932           dbp = skip_spaces (dbp);
4933           L_getit ();
4934         }
4935       else
4936         {
4937           /* Check for (foo::defmumble name-defined ... */
4938           do
4939             dbp++;
4940           while (!notinname (*dbp) && *dbp != ':');
4941           if (*dbp == ':')
4942             {
4943               do
4944                 dbp++;
4945               while (*dbp == ':');
4946
4947               if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4948                 {
4949                   dbp = skip_non_spaces (dbp);
4950                   dbp = skip_spaces (dbp);
4951                   L_getit ();
4952                 }
4953             }
4954         }
4955     }
4956 }
4957
4958 \f
4959 /*
4960  * Lua script language parsing
4961  * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
4962  *
4963  *  "function" and "local function" are tags if they start at column 1.
4964  */
4965 static void
4966 Lua_functions (inf)
4967      FILE *inf;
4968 {
4969   register char *bp;
4970
4971   LOOP_ON_INPUT_LINES (inf, lb, bp)
4972     {
4973       if (bp[0] != 'f' && bp[0] != 'l')
4974         continue;
4975
4976       LOOKING_AT (bp, "local"); /* skip possible "local" */
4977
4978       if (LOOKING_AT (bp, "function"))
4979         get_tag (bp, NULL);
4980     }
4981 }
4982
4983 \f
4984 /*
4985  * Postscript tags
4986  * Just look for lines where the first character is '/'
4987  * Also look at "defineps" for PSWrap
4988  * Ideas by:
4989  *   Richard Mlynarik <mly@adoc.xerox.com> (1997)
4990  *   Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4991  */
4992 static void
4993 PS_functions (inf)
4994      FILE *inf;
4995 {
4996   register char *bp, *ep;
4997
4998   LOOP_ON_INPUT_LINES (inf, lb, bp)
4999     {
5000       if (bp[0] == '/')
5001         {
5002           for (ep = bp+1;
5003                *ep != '\0' && *ep != ' ' && *ep != '{';
5004                ep++)
5005             continue;
5006           make_tag (bp, ep - bp, TRUE,
5007                     lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
5008         }
5009       else if (LOOKING_AT (bp, "defineps"))
5010         get_tag (bp, NULL);
5011     }
5012 }
5013
5014 \f
5015 /*
5016  * Forth tags
5017  * Ignore anything after \ followed by space or in ( )
5018  * Look for words defined by :
5019  * Look for constant, code, create, defer, value, and variable
5020  * OBP extensions:  Look for buffer:, field,
5021  * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
5022  */
5023 static void
5024 Forth_words (inf)
5025      FILE *inf;
5026 {
5027   register char *bp;
5028
5029   LOOP_ON_INPUT_LINES (inf, lb, bp)
5030     while ((bp = skip_spaces (bp))[0] != '\0')
5031       if (bp[0] == '\\' && iswhite(bp[1]))
5032         break;                  /* read next line */
5033       else if (bp[0] == '(' && iswhite(bp[1]))
5034         do                      /* skip to ) or eol */
5035           bp++;
5036         while (*bp != ')' && *bp != '\0');
5037       else if ((bp[0] == ':' && iswhite(bp[1]) && bp++)
5038                || LOOKING_AT_NOCASE (bp, "constant")
5039                || LOOKING_AT_NOCASE (bp, "code")
5040                || LOOKING_AT_NOCASE (bp, "create")
5041                || LOOKING_AT_NOCASE (bp, "defer")
5042                || LOOKING_AT_NOCASE (bp, "value")
5043                || LOOKING_AT_NOCASE (bp, "variable")
5044                || LOOKING_AT_NOCASE (bp, "buffer:")
5045                || LOOKING_AT_NOCASE (bp, "field"))
5046         get_tag (skip_spaces (bp), NULL); /* Yay!  A definition! */
5047       else
5048         bp = skip_non_spaces (bp);
5049 }
5050
5051 \f
5052 /*
5053  * Scheme tag functions
5054  * look for (def... xyzzy
5055  *          (def... (xyzzy
5056  *          (def ... ((...(xyzzy ....
5057  *          (set! xyzzy
5058  * Original code by Ken Haase (1985?)
5059  */
5060 static void
5061 Scheme_functions (inf)
5062      FILE *inf;
5063 {
5064   register char *bp;
5065
5066   LOOP_ON_INPUT_LINES (inf, lb, bp)
5067     {
5068       if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
5069         {
5070           bp = skip_non_spaces (bp+4);
5071           /* Skip over open parens and white space */
5072           while (notinname (*bp))
5073             bp++;
5074           get_tag (bp, NULL);
5075         }
5076       if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
5077         get_tag (bp, NULL);
5078     }
5079 }
5080
5081 \f
5082 /* Find tags in TeX and LaTeX input files.  */
5083
5084 /* TEX_toktab is a table of TeX control sequences that define tags.
5085  * Each entry records one such control sequence.
5086  *
5087  * Original code from who knows whom.
5088  * Ideas by:
5089  *   Stefan Monnier (2002)
5090  */
5091
5092 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
5093
5094 /* Default set of control sequences to put into TEX_toktab.
5095    The value of environment var TEXTAGS is prepended to this.  */
5096 static char *TEX_defenv = "\
5097 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5098 :part:appendix:entry:index:def\
5099 :newcommand:renewcommand:newenvironment:renewenvironment";
5100
5101 static void TEX_mode __P((FILE *));
5102 static void TEX_decode_env __P((char *, char *));
5103
5104 static char TEX_esc = '\\';
5105 static char TEX_opgrp = '{';
5106 static char TEX_clgrp = '}';
5107
5108 /*
5109  * TeX/LaTeX scanning loop.
5110  */
5111 static void
5112 TeX_commands (inf)
5113      FILE *inf;
5114 {
5115   char *cp;
5116   linebuffer *key;
5117
5118   /* Select either \ or ! as escape character.  */
5119   TEX_mode (inf);
5120
5121   /* Initialize token table once from environment. */
5122   if (TEX_toktab == NULL)
5123     TEX_decode_env ("TEXTAGS", TEX_defenv);
5124
5125   LOOP_ON_INPUT_LINES (inf, lb, cp)
5126     {
5127       /* Look at each TEX keyword in line. */
5128       for (;;)
5129         {
5130           /* Look for a TEX escape. */
5131           while (*cp++ != TEX_esc)
5132             if (cp[-1] == '\0' || cp[-1] == '%')
5133               goto tex_next_line;
5134
5135           for (key = TEX_toktab; key->buffer != NULL; key++)
5136             if (strneq (cp, key->buffer, key->len))
5137               {
5138                 register char *p;
5139                 int namelen, linelen;
5140                 bool opgrp = FALSE;
5141
5142                 cp = skip_spaces (cp + key->len);
5143                 if (*cp == TEX_opgrp)
5144                   {
5145                     opgrp = TRUE;
5146                     cp++;
5147                   }
5148                 for (p = cp;
5149                      (!iswhite (*p) && *p != '#' &&
5150                       *p != TEX_opgrp && *p != TEX_clgrp);
5151                      p++)
5152                   continue;
5153                 namelen = p - cp;
5154                 linelen = lb.len;
5155                 if (!opgrp || *p == TEX_clgrp)
5156                   {
5157                     while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5158                       *p++;
5159                     linelen = p - lb.buffer + 1;
5160                   }
5161                 make_tag (cp, namelen, TRUE,
5162                           lb.buffer, linelen, lineno, linecharno);
5163                 goto tex_next_line; /* We only tag a line once */
5164               }
5165         }
5166     tex_next_line:
5167       ;
5168     }
5169 }
5170
5171 #define TEX_LESC '\\'
5172 #define TEX_SESC '!'
5173
5174 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
5175    chars accordingly. */
5176 static void
5177 TEX_mode (inf)
5178      FILE *inf;
5179 {
5180   int c;
5181
5182   while ((c = getc (inf)) != EOF)
5183     {
5184       /* Skip to next line if we hit the TeX comment char. */
5185       if (c == '%')
5186         while (c != '\n')
5187           c = getc (inf);
5188       else if (c == TEX_LESC || c == TEX_SESC )
5189         break;
5190     }
5191
5192   if (c == TEX_LESC)
5193     {
5194       TEX_esc = TEX_LESC;
5195       TEX_opgrp = '{';
5196       TEX_clgrp = '}';
5197     }
5198   else
5199     {
5200       TEX_esc = TEX_SESC;
5201       TEX_opgrp = '<';
5202       TEX_clgrp = '>';
5203     }
5204   /* If the input file is compressed, inf is a pipe, and rewind may fail.
5205      No attempt is made to correct the situation. */
5206   rewind (inf);
5207 }
5208
5209 /* Read environment and prepend it to the default string.
5210    Build token table. */
5211 static void
5212 TEX_decode_env (evarname, defenv)
5213      char *evarname;
5214      char *defenv;
5215 {
5216   register char *env, *p;
5217   int i, len;
5218
5219   /* Append default string to environment. */
5220   env = getenv (evarname);
5221   if (!env)
5222     env = defenv;
5223   else
5224     {
5225       char *oldenv = env;
5226       env = concat (oldenv, defenv, "");
5227     }
5228
5229   /* Allocate a token table */
5230   for (len = 1, p = env; p;)
5231     if ((p = etags_strchr (p, ':')) && *++p != '\0')
5232       len++;
5233   TEX_toktab = xnew (len, linebuffer);
5234
5235   /* Unpack environment string into token table. Be careful about */
5236   /* zero-length strings (leading ':', "::" and trailing ':') */
5237   for (i = 0; *env != '\0';)
5238     {
5239       p = etags_strchr (env, ':');
5240       if (!p)                   /* End of environment string. */
5241         p = env + strlen (env);
5242       if (p - env > 0)
5243         {                       /* Only non-zero strings. */
5244           TEX_toktab[i].buffer = savenstr (env, p - env);
5245           TEX_toktab[i].len = p - env;
5246           i++;
5247         }
5248       if (*p)
5249         env = p + 1;
5250       else
5251         {
5252           TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5253           TEX_toktab[i].len = 0;
5254           break;
5255         }
5256     }
5257 }
5258
5259 \f
5260 /* Texinfo support.  Dave Love, Mar. 2000.  */
5261 static void
5262 Texinfo_nodes (inf)
5263      FILE * inf;
5264 {
5265   char *cp, *start;
5266   LOOP_ON_INPUT_LINES (inf, lb, cp)
5267     if (LOOKING_AT (cp, "@node"))
5268       {
5269         start = cp;
5270         while (*cp != '\0' && *cp != ',')
5271           cp++;
5272         make_tag (start, cp - start, TRUE,
5273                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5274       }
5275 }
5276
5277 \f
5278 /*
5279  * HTML support.
5280  * Contents of <title>, <h1>, <h2>, <h3> are tags.
5281  * Contents of <a name=xxx> are tags with name xxx.
5282  *
5283  * Francesco Potortì, 2002.
5284  */
5285 static void
5286 HTML_labels (inf)
5287      FILE * inf;
5288 {
5289   bool getnext = FALSE;         /* next text outside of HTML tags is a tag */
5290   bool skiptag = FALSE;         /* skip to the end of the current HTML tag */
5291   bool intag = FALSE;           /* inside an html tag, looking for ID= */
5292   bool inanchor = FALSE;        /* when INTAG, is an anchor, look for NAME= */
5293   char *end;
5294
5295
5296   linebuffer_setlen (&token_name, 0); /* no name in buffer */
5297
5298   LOOP_ON_INPUT_LINES (inf, lb, dbp)
5299     for (;;)                    /* loop on the same line */
5300       {
5301         if (skiptag)            /* skip HTML tag */
5302           {
5303             while (*dbp != '\0' && *dbp != '>')
5304               dbp++;
5305             if (*dbp == '>')
5306               {
5307                 dbp += 1;
5308                 skiptag = FALSE;
5309                 continue;       /* look on the same line */
5310               }
5311             break;              /* go to next line */
5312           }
5313
5314         else if (intag) /* look for "name=" or "id=" */
5315           {
5316             while (*dbp != '\0' && *dbp != '>'
5317                    && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5318               dbp++;
5319             if (*dbp == '\0')
5320               break;            /* go to next line */
5321             if (*dbp == '>')
5322               {
5323                 dbp += 1;
5324                 intag = FALSE;
5325                 continue;       /* look on the same line */
5326               }
5327             if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5328                 || LOOKING_AT_NOCASE (dbp, "id="))
5329               {
5330                 bool quoted = (dbp[0] == '"');
5331
5332                 if (quoted)
5333                   for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5334                     continue;
5335                 else
5336                   for (end = dbp; *end != '\0' && intoken (*end); end++)
5337                     continue;
5338                 linebuffer_setlen (&token_name, end - dbp);
5339                 strncpy (token_name.buffer, dbp, end - dbp);
5340                 token_name.buffer[end - dbp] = '\0';
5341
5342                 dbp = end;
5343                 intag = FALSE;  /* we found what we looked for */
5344                 skiptag = TRUE; /* skip to the end of the tag */
5345                 getnext = TRUE; /* then grab the text */
5346                 continue;       /* look on the same line */
5347               }
5348             dbp += 1;
5349           }
5350
5351         else if (getnext)       /* grab next tokens and tag them */
5352           {
5353             dbp = skip_spaces (dbp);
5354             if (*dbp == '\0')
5355               break;            /* go to next line */
5356             if (*dbp == '<')
5357               {
5358                 intag = TRUE;
5359                 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5360                 continue;       /* look on the same line */
5361               }
5362
5363             for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5364               continue;
5365             make_tag (token_name.buffer, token_name.len, TRUE,
5366                       dbp, end - dbp, lineno, linecharno);
5367             linebuffer_setlen (&token_name, 0); /* no name in buffer */
5368             getnext = FALSE;
5369             break;              /* go to next line */
5370           }
5371
5372         else                    /* look for an interesting HTML tag */
5373           {
5374             while (*dbp != '\0' && *dbp != '<')
5375               dbp++;
5376             if (*dbp == '\0')
5377               break;            /* go to next line */
5378             intag = TRUE;
5379             if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5380               {
5381                 inanchor = TRUE;
5382                 continue;       /* look on the same line */
5383               }
5384             else if (LOOKING_AT_NOCASE (dbp, "<title>")
5385                      || LOOKING_AT_NOCASE (dbp, "<h1>")
5386                      || LOOKING_AT_NOCASE (dbp, "<h2>")
5387                      || LOOKING_AT_NOCASE (dbp, "<h3>"))
5388               {
5389                 intag = FALSE;
5390                 getnext = TRUE;
5391                 continue;       /* look on the same line */
5392               }
5393             dbp += 1;
5394           }
5395       }
5396 }
5397
5398 \f
5399 /*
5400  * Prolog support
5401  *
5402  * Assumes that the predicate or rule starts at column 0.
5403  * Only the first clause of a predicate or rule is added.
5404  * Original code by Sunichirou Sugou (1989)
5405  * Rewritten by Anders Lindgren (1996)
5406  */
5407 static int prolog_pr __P((char *, char *));
5408 static void prolog_skip_comment __P((linebuffer *, FILE *));
5409 static int prolog_atom __P((char *, int));
5410
5411 static void
5412 Prolog_functions (inf)
5413      FILE *inf;
5414 {
5415   char *cp, *last;
5416   int len;
5417   int allocated;
5418
5419   allocated = 0;
5420   len = 0;
5421   last = NULL;
5422
5423   LOOP_ON_INPUT_LINES (inf, lb, cp)
5424     {
5425       if (cp[0] == '\0')        /* Empty line */
5426         continue;
5427       else if (iswhite (cp[0])) /* Not a predicate */
5428         continue;
5429       else if (cp[0] == '/' && cp[1] == '*')    /* comment. */
5430         prolog_skip_comment (&lb, inf);
5431       else if ((len = prolog_pr (cp, last)) > 0)
5432         {
5433           /* Predicate or rule.  Store the function name so that we
5434              only generate a tag for the first clause.  */
5435           if (last == NULL)
5436             last = xnew(len + 1, char);
5437           else if (len + 1 > allocated)
5438             xrnew (last, len + 1, char);
5439           allocated = len + 1;
5440           strncpy (last, cp, len);
5441           last[len] = '\0';
5442         }
5443     }
5444 }
5445
5446
5447 static void
5448 prolog_skip_comment (plb, inf)
5449      linebuffer *plb;
5450      FILE *inf;
5451 {
5452   char *cp;
5453
5454   do
5455     {
5456       for (cp = plb->buffer; *cp != '\0'; cp++)
5457         if (cp[0] == '*' && cp[1] == '/')
5458           return;
5459       readline (plb, inf);
5460     }
5461   while (!feof(inf));
5462 }
5463
5464 /*
5465  * A predicate or rule definition is added if it matches:
5466  *     <beginning of line><Prolog Atom><whitespace>(
5467  * or  <beginning of line><Prolog Atom><whitespace>:-
5468  *
5469  * It is added to the tags database if it doesn't match the
5470  * name of the previous clause header.
5471  *
5472  * Return the size of the name of the predicate or rule, or 0 if no
5473  * header was found.
5474  */
5475 static int
5476 prolog_pr (s, last)
5477      char *s;
5478      char *last;                /* Name of last clause. */
5479 {
5480   int pos;
5481   int len;
5482
5483   pos = prolog_atom (s, 0);
5484   if (pos < 1)
5485     return 0;
5486
5487   len = pos;
5488   pos = skip_spaces (s + pos) - s;
5489
5490   if ((s[pos] == '.'
5491        || (s[pos] == '(' && (pos += 1))
5492        || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5493       && (last == NULL          /* save only the first clause */
5494           || len != (int)strlen (last)
5495           || !strneq (s, last, len)))
5496         {
5497           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5498           return len;
5499         }
5500   else
5501     return 0;
5502 }
5503
5504 /*
5505  * Consume a Prolog atom.
5506  * Return the number of bytes consumed, or -1 if there was an error.
5507  *
5508  * A prolog atom, in this context, could be one of:
5509  * - An alphanumeric sequence, starting with a lower case letter.
5510  * - A quoted arbitrary string. Single quotes can escape themselves.
5511  *   Backslash quotes everything.
5512  */
5513 static int
5514 prolog_atom (s, pos)
5515      char *s;
5516      int pos;
5517 {
5518   int origpos;
5519
5520   origpos = pos;
5521
5522   if (ISLOWER(s[pos]) || (s[pos] == '_'))
5523     {
5524       /* The atom is unquoted. */
5525       pos++;
5526       while (ISALNUM(s[pos]) || (s[pos] == '_'))
5527         {
5528           pos++;
5529         }
5530       return pos - origpos;
5531     }
5532   else if (s[pos] == '\'')
5533     {
5534       pos++;
5535
5536       for (;;)
5537         {
5538           if (s[pos] == '\'')
5539             {
5540               pos++;
5541               if (s[pos] != '\'')
5542                 break;
5543               pos++;            /* A double quote */
5544             }
5545           else if (s[pos] == '\0')
5546             /* Multiline quoted atoms are ignored. */
5547             return -1;
5548           else if (s[pos] == '\\')
5549             {
5550               if (s[pos+1] == '\0')
5551                 return -1;
5552               pos += 2;
5553             }
5554           else
5555             pos++;
5556         }
5557       return pos - origpos;
5558     }
5559   else
5560     return -1;
5561 }
5562
5563 \f
5564 /*
5565  * Support for Erlang
5566  *
5567  * Generates tags for functions, defines, and records.
5568  * Assumes that Erlang functions start at column 0.
5569  * Original code by Anders Lindgren (1996)
5570  */
5571 static int erlang_func __P((char *, char *));
5572 static void erlang_attribute __P((char *));
5573 static int erlang_atom __P((char *));
5574
5575 static void
5576 Erlang_functions (inf)
5577      FILE *inf;
5578 {
5579   char *cp, *last;
5580   int len;
5581   int allocated;
5582
5583   allocated = 0;
5584   len = 0;
5585   last = NULL;
5586
5587   LOOP_ON_INPUT_LINES (inf, lb, cp)
5588     {
5589       if (cp[0] == '\0')        /* Empty line */
5590         continue;
5591       else if (iswhite (cp[0])) /* Not function nor attribute */
5592         continue;
5593       else if (cp[0] == '%')    /* comment */
5594         continue;
5595       else if (cp[0] == '"')    /* Sometimes, strings start in column one */
5596         continue;
5597       else if (cp[0] == '-')    /* attribute, e.g. "-define" */
5598         {
5599           erlang_attribute (cp);
5600           last = NULL;
5601         }
5602       else if ((len = erlang_func (cp, last)) > 0)
5603         {
5604           /*
5605            * Function.  Store the function name so that we only
5606            * generates a tag for the first clause.
5607            */
5608           if (last == NULL)
5609             last = xnew (len + 1, char);
5610           else if (len + 1 > allocated)
5611             xrnew (last, len + 1, char);
5612           allocated = len + 1;
5613           strncpy (last, cp, len);
5614           last[len] = '\0';
5615         }
5616     }
5617 }
5618
5619
5620 /*
5621  * A function definition is added if it matches:
5622  *     <beginning of line><Erlang Atom><whitespace>(
5623  *
5624  * It is added to the tags database if it doesn't match the
5625  * name of the previous clause header.
5626  *
5627  * Return the size of the name of the function, or 0 if no function
5628  * was found.
5629  */
5630 static int
5631 erlang_func (s, last)
5632      char *s;
5633      char *last;                /* Name of last clause. */
5634 {
5635   int pos;
5636   int len;
5637
5638   pos = erlang_atom (s);
5639   if (pos < 1)
5640     return 0;
5641
5642   len = pos;
5643   pos = skip_spaces (s + pos) - s;
5644
5645   /* Save only the first clause. */
5646   if (s[pos++] == '('
5647       && (last == NULL
5648           || len != (int)strlen (last)
5649           || !strneq (s, last, len)))
5650         {
5651           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5652           return len;
5653         }
5654
5655   return 0;
5656 }
5657
5658
5659 /*
5660  * Handle attributes.  Currently, tags are generated for defines
5661  * and records.
5662  *
5663  * They are on the form:
5664  * -define(foo, bar).
5665  * -define(Foo(M, N), M+N).
5666  * -record(graph, {vtab = notable, cyclic = true}).
5667  */
5668 static void
5669 erlang_attribute (s)
5670      char *s;
5671 {
5672   char *cp = s;
5673
5674   if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5675       && *cp++ == '(')
5676     {
5677       int len = erlang_atom (skip_spaces (cp));
5678       if (len > 0)
5679         make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5680     }
5681   return;
5682 }
5683
5684
5685 /*
5686  * Consume an Erlang atom (or variable).
5687  * Return the number of bytes consumed, or -1 if there was an error.
5688  */
5689 static int
5690 erlang_atom (s)
5691      char *s;
5692 {
5693   int pos = 0;
5694
5695   if (ISALPHA (s[pos]) || s[pos] == '_')
5696     {
5697       /* The atom is unquoted. */
5698       do
5699         pos++;
5700       while (ISALNUM (s[pos]) || s[pos] == '_');
5701     }
5702   else if (s[pos] == '\'')
5703     {
5704       for (pos++; s[pos] != '\''; pos++)
5705         if (s[pos] == '\0'      /* multiline quoted atoms are ignored */
5706             || (s[pos] == '\\' && s[++pos] == '\0'))
5707           return 0;
5708       pos++;
5709     }
5710
5711   return pos;
5712 }
5713
5714 \f
5715 #ifdef ETAGS_REGEXPS
5716
5717 static char *scan_separators __P((char *));
5718 static void add_regex __P((char *, language *));
5719 static char *substitute __P((char *, char *, struct re_registers *));
5720
5721 /*
5722  * Take a string like "/blah/" and turn it into "blah", verifying
5723  * that the first and last characters are the same, and handling
5724  * quoted separator characters.  Actually, stops on the occurrence of
5725  * an unquoted separator.  Also process \t, \n, etc. and turn into
5726  * appropriate characters. Works in place.  Null terminates name string.
5727  * Returns pointer to terminating separator, or NULL for
5728  * unterminated regexps.
5729  */
5730 static char *
5731 scan_separators (name)
5732      char *name;
5733 {
5734   char sep = name[0];
5735   char *copyto = name;
5736   bool quoted = FALSE;
5737
5738   for (++name; *name != '\0'; ++name)
5739     {
5740       if (quoted)
5741         {
5742           switch (*name)
5743             {
5744             case 'a': *copyto++ = '\007'; break; /* BEL (bell)           */
5745             case 'b': *copyto++ = '\b'; break;   /* BS (back space)      */
5746             case 'd': *copyto++ = 0177; break;   /* DEL (delete)         */
5747             case 'e': *copyto++ = 033; break;    /* ESC (delete)         */
5748             case 'f': *copyto++ = '\f'; break;   /* FF (form feed)       */
5749             case 'n': *copyto++ = '\n'; break;   /* NL (new line)        */
5750             case 'r': *copyto++ = '\r'; break;   /* CR (carriage return) */
5751             case 't': *copyto++ = '\t'; break;   /* TAB (horizontal tab) */
5752             case 'v': *copyto++ = '\v'; break;   /* VT (vertical tab)    */
5753             default:
5754               if (*name == sep)
5755                 *copyto++ = sep;
5756               else
5757                 {
5758                   /* Something else is quoted, so preserve the quote. */
5759                   *copyto++ = '\\';
5760                   *copyto++ = *name;
5761                 }
5762               break;
5763             }
5764           quoted = FALSE;
5765         }
5766       else if (*name == '\\')
5767         quoted = TRUE;
5768       else if (*name == sep)
5769         break;
5770       else
5771         *copyto++ = *name;
5772     }
5773   if (*name != sep)
5774     name = NULL;                /* signal unterminated regexp */
5775
5776   /* Terminate copied string. */
5777   *copyto = '\0';
5778   return name;
5779 }
5780
5781 /* Look at the argument of --regex or --no-regex and do the right
5782    thing.  Same for each line of a regexp file. */
5783 static void
5784 analyse_regex (regex_arg)
5785      char *regex_arg;
5786 {
5787   if (regex_arg == NULL)
5788     {
5789       free_regexps ();          /* --no-regex: remove existing regexps */
5790       return;
5791     }
5792
5793   /* A real --regexp option or a line in a regexp file. */
5794   switch (regex_arg[0])
5795     {
5796       /* Comments in regexp file or null arg to --regex. */
5797     case '\0':
5798     case ' ':
5799     case '\t':
5800       break;
5801
5802       /* Read a regex file.  This is recursive and may result in a
5803          loop, which will stop when the file descriptors are exhausted. */
5804     case '@':
5805       {
5806         FILE *regexfp;
5807         linebuffer regexbuf;
5808         char *regexfile = regex_arg + 1;
5809
5810         /* regexfile is a file containing regexps, one per line. */
5811         regexfp = fopen (regexfile, "r");
5812         if (regexfp == NULL)
5813           {
5814             pfatal (regexfile);
5815             return;
5816           }
5817         linebuffer_init (&regexbuf);
5818         while (readline_internal (&regexbuf, regexfp) > 0)
5819           analyse_regex (regexbuf.buffer);
5820         free (regexbuf.buffer);
5821         fclose (regexfp);
5822       }
5823       break;
5824
5825       /* Regexp to be used for a specific language only. */
5826     case '{':
5827       {
5828         language *lang;
5829         char *lang_name = regex_arg + 1;
5830         char *cp;
5831
5832         for (cp = lang_name; *cp != '}'; cp++)
5833           if (*cp == '\0')
5834             {
5835               error ("unterminated language name in regex: %s", regex_arg);
5836               return;
5837             }
5838         *cp++ = '\0';
5839         lang = get_language_from_langname (lang_name);
5840         if (lang == NULL)
5841           return;
5842         add_regex (cp, lang);
5843       }
5844       break;
5845
5846       /* Regexp to be used for any language. */
5847     default:
5848       add_regex (regex_arg, NULL);
5849       break;
5850     }
5851 }
5852
5853 /* Separate the regexp pattern, compile it,
5854    and care for optional name and modifiers. */
5855 static void
5856 add_regex (regexp_pattern, lang)
5857      char *regexp_pattern;
5858      language *lang;
5859 {
5860   static struct re_pattern_buffer zeropattern;
5861   char sep, *pat, *name, *modifiers;
5862   const char *err;
5863   struct re_pattern_buffer *patbuf;
5864   regexp *rp;
5865   bool
5866     force_explicit_name = TRUE, /* do not use implicit tag names */
5867     ignore_case = FALSE,        /* case is significant */
5868     multi_line = FALSE,         /* matches are done one line at a time */
5869     single_line = FALSE;        /* dot does not match newline */
5870
5871
5872   if (strlen(regexp_pattern) < 3)
5873     {
5874       error ("null regexp", (char *)NULL);
5875       return;
5876     }
5877   sep = regexp_pattern[0];
5878   name = scan_separators (regexp_pattern);
5879   if (name == NULL)
5880     {
5881       error ("%s: unterminated regexp", regexp_pattern);
5882       return;
5883     }
5884   if (name[1] == sep)
5885     {
5886       error ("null name for regexp \"%s\"", regexp_pattern);
5887       return;
5888     }
5889   modifiers = scan_separators (name);
5890   if (modifiers == NULL)        /* no terminating separator --> no name */
5891     {
5892       modifiers = name;
5893       name = "";
5894     }
5895   else
5896     modifiers += 1;             /* skip separator */
5897
5898   /* Parse regex modifiers. */
5899   for (; modifiers[0] != '\0'; modifiers++)
5900     switch (modifiers[0])
5901       {
5902       case 'N':
5903         if (modifiers == name)
5904           error ("forcing explicit tag name but no name, ignoring", NULL);
5905         force_explicit_name = TRUE;
5906         break;
5907       case 'i':
5908         ignore_case = TRUE;
5909         break;
5910       case 's':
5911         single_line = TRUE;
5912         /* FALLTHRU */
5913       case 'm':
5914         multi_line = TRUE;
5915         need_filebuf = TRUE;
5916         break;
5917       default:
5918         {
5919           char wrongmod [2];
5920           wrongmod[0] = modifiers[0];
5921           wrongmod[1] = '\0';
5922           error ("invalid regexp modifier `%s', ignoring", wrongmod);
5923         }
5924         break;
5925       }
5926
5927   patbuf = xnew (1, struct re_pattern_buffer);
5928   *patbuf = zeropattern;
5929   if (ignore_case)
5930     {
5931       static char lc_trans[CHARS];
5932       int i;
5933       for (i = 0; i < CHARS; i++)
5934         lc_trans[i] = lowcase (i);
5935       patbuf->translate = lc_trans;     /* translation table to fold case  */
5936     }
5937
5938   if (multi_line)
5939     pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5940   else
5941     pat = regexp_pattern;
5942
5943   if (single_line)
5944     re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5945   else
5946     re_set_syntax (RE_SYNTAX_EMACS);
5947
5948   err = re_compile_pattern (pat, strlen (regexp_pattern), patbuf);
5949   if (multi_line)
5950     free (pat);
5951   if (err != NULL)
5952     {
5953       error ("%s while compiling pattern", err);
5954       return;
5955     }
5956
5957   rp = p_head;
5958   p_head = xnew (1, regexp);
5959   p_head->pattern = savestr (regexp_pattern);
5960   p_head->p_next = rp;
5961   p_head->lang = lang;
5962   p_head->pat = patbuf;
5963   p_head->name = savestr (name);
5964   p_head->error_signaled = FALSE;
5965   p_head->force_explicit_name = force_explicit_name;
5966   p_head->ignore_case = ignore_case;
5967   p_head->multi_line = multi_line;
5968 }
5969
5970 /*
5971  * Do the substitutions indicated by the regular expression and
5972  * arguments.
5973  */
5974 static char *
5975 substitute (in, out, regs)
5976      char *in, *out;
5977      struct re_registers *regs;
5978 {
5979   char *result, *t;
5980   int size, dig, diglen;
5981
5982   result = NULL;
5983   size = strlen (out);
5984
5985   /* Pass 1: figure out how much to allocate by finding all \N strings. */
5986   if (out[size - 1] == '\\')
5987     fatal ("pattern error in \"%s\"", out);
5988   for (t = etags_strchr (out, '\\');
5989        t != NULL;
5990        t = etags_strchr (t + 2, '\\'))
5991     if (ISDIGIT (t[1]))
5992       {
5993         dig = t[1] - '0';
5994         diglen = regs->end[dig] - regs->start[dig];
5995         size += diglen - 2;
5996       }
5997     else
5998       size -= 1;
5999
6000   /* Allocate space and do the substitutions. */
6001   assert (size >= 0);
6002   result = xnew (size + 1, char);
6003
6004   for (t = result; *out != '\0'; out++)
6005     if (*out == '\\' && ISDIGIT (*++out))
6006       {
6007         dig = *out - '0';
6008         diglen = regs->end[dig] - regs->start[dig];
6009         strncpy (t, in + regs->start[dig], diglen);
6010         t += diglen;
6011       }
6012     else
6013       *t++ = *out;
6014   *t = '\0';
6015
6016   assert (t <= result + size);
6017   assert (t - result == (int)strlen (result));
6018
6019   return result;
6020 }
6021
6022 /* Deallocate all regexps. */
6023 static void
6024 free_regexps ()
6025 {
6026   regexp *rp;
6027   while (p_head != NULL)
6028     {
6029       rp = p_head->p_next;
6030       free (p_head->pattern);
6031       free (p_head->name);
6032       free (p_head);
6033       p_head = rp;
6034     }
6035   return;
6036 }
6037
6038 /*
6039  * Reads the whole file as a single string from `filebuf' and looks for
6040  * multi-line regular expressions, creating tags on matches.
6041  * readline already dealt with normal regexps.
6042  *
6043  * Idea by Ben Wing <ben@666.com> (2002).
6044  */
6045 static void
6046 regex_tag_multiline ()
6047 {
6048   char *buffer = filebuf.buffer;
6049   regexp *rp;
6050   char *name;
6051
6052   for (rp = p_head; rp != NULL; rp = rp->p_next)
6053     {
6054       int match = 0;
6055
6056       if (!rp->multi_line)
6057         continue;               /* skip normal regexps */
6058
6059       /* Generic initialisations before parsing file from memory. */
6060       lineno = 1;               /* reset global line number */
6061       charno = 0;               /* reset global char number */
6062       linecharno = 0;           /* reset global char number of line start */
6063
6064       /* Only use generic regexps or those for the current language. */
6065       if (rp->lang != NULL && rp->lang != curfdp->lang)
6066         continue;
6067
6068       while (match >= 0 && match < filebuf.len)
6069         {
6070           match = re_search (rp->pat, buffer, filebuf.len, charno,
6071                              filebuf.len - match, &rp->regs);
6072           switch (match)
6073             {
6074             case -2:
6075               /* Some error. */
6076               if (!rp->error_signaled)
6077                 {
6078                   error ("regexp stack overflow while matching \"%s\"",
6079                          rp->pattern);
6080                   rp->error_signaled = TRUE;
6081                 }
6082               break;
6083             case -1:
6084               /* No match. */
6085               break;
6086             default:
6087               if (match == rp->regs.end[0])
6088                 {
6089                   if (!rp->error_signaled)
6090                     {
6091                       error ("regexp matches the empty string: \"%s\"",
6092                              rp->pattern);
6093                       rp->error_signaled = TRUE;
6094                     }
6095                   match = -3;   /* exit from while loop */
6096                   break;
6097                 }
6098
6099               /* Match occurred.  Construct a tag. */
6100               while (charno < rp->regs.end[0])
6101                 if (buffer[charno++] == '\n')
6102                   lineno++, linecharno = charno;
6103               name = rp->name;
6104               if (name[0] == '\0')
6105                 name = NULL;
6106               else /* make a named tag */
6107                 name = substitute (buffer, rp->name, &rp->regs);
6108               if (rp->force_explicit_name)
6109                 /* Force explicit tag name, if a name is there. */
6110                 pfnote (name, TRUE, buffer + linecharno,
6111                         charno - linecharno + 1, lineno, linecharno);
6112               else
6113                 make_tag (name, strlen (name), TRUE, buffer + linecharno,
6114                           charno - linecharno + 1, lineno, linecharno);
6115               break;
6116             }
6117         }
6118     }
6119 }
6120
6121 #endif /* ETAGS_REGEXPS */
6122
6123 \f
6124 static bool
6125 nocase_tail (cp)
6126      char *cp;
6127 {
6128   register int len = 0;
6129
6130   while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
6131     cp++, len++;
6132   if (*cp == '\0' && !intoken (dbp[len]))
6133     {
6134       dbp += len;
6135       return TRUE;
6136     }
6137   return FALSE;
6138 }
6139
6140 static void
6141 get_tag (bp, namepp)
6142      register char *bp;
6143      char **namepp;
6144 {
6145   register char *cp = bp;
6146
6147   if (*bp != '\0')
6148     {
6149       /* Go till you get to white space or a syntactic break */
6150       for (cp = bp + 1; !notinname (*cp); cp++)
6151         continue;
6152       make_tag (bp, cp - bp, TRUE,
6153                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6154     }
6155
6156   if (namepp != NULL)
6157     *namepp = savenstr (bp, cp - bp);
6158 }
6159
6160 /*
6161  * Read a line of text from `stream' into `lbp', excluding the
6162  * newline or CR-NL, if any.  Return the number of characters read from
6163  * `stream', which is the length of the line including the newline.
6164  *
6165  * On DOS or Windows we do not count the CR character, if any before the
6166  * NL, in the returned length; this mirrors the behavior of Emacs on those
6167  * platforms (for text files, it translates CR-NL to NL as it reads in the
6168  * file).
6169  *
6170  * If multi-line regular expressions are requested, each line read is
6171  * appended to `filebuf'.
6172  */
6173 static long
6174 readline_internal (lbp, stream)
6175      linebuffer *lbp;
6176      register FILE *stream;
6177 {
6178   char *buffer = lbp->buffer;
6179   register char *p = lbp->buffer;
6180   register char *pend;
6181   int chars_deleted;
6182
6183   pend = p + lbp->size;         /* Separate to avoid 386/IX compiler bug.  */
6184
6185   for (;;)
6186     {
6187       register int c = getc (stream);
6188       if (p == pend)
6189         {
6190           /* We're at the end of linebuffer: expand it. */
6191           lbp->size *= 2;
6192           xrnew (buffer, lbp->size, char);
6193           p += buffer - lbp->buffer;
6194           pend = buffer + lbp->size;
6195           lbp->buffer = buffer;
6196         }
6197       if (c == EOF)
6198         {
6199           *p = '\0';
6200           chars_deleted = 0;
6201           break;
6202         }
6203       if (c == '\n')
6204         {
6205           if (p > buffer && p[-1] == '\r')
6206             {
6207               p -= 1;
6208 #ifdef DOS_NT
6209              /* Assume CRLF->LF translation will be performed by Emacs
6210                 when loading this file, so CRs won't appear in the buffer.
6211                 It would be cleaner to compensate within Emacs;
6212                 however, Emacs does not know how many CRs were deleted
6213                 before any given point in the file.  */
6214               chars_deleted = 1;
6215 #else
6216               chars_deleted = 2;
6217 #endif
6218             }
6219           else
6220             {
6221               chars_deleted = 1;
6222             }
6223           *p = '\0';
6224           break;
6225         }
6226       *p++ = c;
6227     }
6228   lbp->len = p - buffer;
6229
6230   if (need_filebuf              /* we need filebuf for multi-line regexps */
6231       && chars_deleted > 0)     /* not at EOF */
6232     {
6233       while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6234         {
6235           /* Expand filebuf. */
6236           filebuf.size *= 2;
6237           xrnew (filebuf.buffer, filebuf.size, char);
6238         }
6239       strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6240       filebuf.len += lbp->len;
6241       filebuf.buffer[filebuf.len++] = '\n';
6242       filebuf.buffer[filebuf.len] = '\0';
6243     }
6244
6245   return lbp->len + chars_deleted;
6246 }
6247
6248 /*
6249  * Like readline_internal, above, but in addition try to match the
6250  * input line against relevant regular expressions and manage #line
6251  * directives.
6252  */
6253 static void
6254 readline (lbp, stream)
6255      linebuffer *lbp;
6256      FILE *stream;
6257 {
6258   long result;
6259
6260   linecharno = charno;          /* update global char number of line start */
6261   result = readline_internal (lbp, stream); /* read line */
6262   lineno += 1;                  /* increment global line number */
6263   charno += result;             /* increment global char number */
6264
6265   /* Honour #line directives. */
6266   if (!no_line_directive)
6267     {
6268       static bool discard_until_line_directive;
6269
6270       /* Check whether this is a #line directive. */
6271       if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6272         {
6273           int start, lno;
6274
6275           if (DEBUG) start = 0; /* shut up the compiler */
6276           if (sscanf (lbp->buffer, "#line %d \"%n", &lno, &start) == 1)
6277             {
6278               char *endp = lbp->buffer + start;
6279
6280               assert (start > 0);
6281               while ((endp = etags_strchr (endp, '"')) != NULL
6282                      && endp[-1] == '\\')
6283                 endp++;
6284               if (endp != NULL)
6285                 /* Ok, this is a real #line directive.  Let's deal with it. */
6286                 {
6287                   char *taggedabsname;  /* absolute name of original file */
6288                   char *taggedfname;    /* name of original file as given */
6289                   char *name;           /* temp var */
6290
6291                   discard_until_line_directive = FALSE; /* found it */
6292                   name = lbp->buffer + start;
6293                   *endp = '\0';
6294                   canonicalize_filename (name); /* for DOS */
6295                   taggedabsname = absolute_filename (name, curfdp->infabsdir);
6296                   if (filename_is_absolute (name)
6297                       || filename_is_absolute (curfdp->infname))
6298                     taggedfname = savestr (taggedabsname);
6299                   else
6300                     taggedfname = relative_filename (taggedabsname,tagfiledir);
6301
6302                   if (streq (curfdp->taggedfname, taggedfname))
6303                     /* The #line directive is only a line number change.  We
6304                        deal with this afterwards. */
6305                     free (taggedfname);
6306                   else
6307                     /* The tags following this #line directive should be
6308                        attributed to taggedfname.  In order to do this, set
6309                        curfdp accordingly. */
6310                     {
6311                       fdesc *fdp; /* file description pointer */
6312
6313                       /* Go look for a file description already set up for the
6314                          file indicated in the #line directive.  If there is
6315                          one, use it from now until the next #line
6316                          directive. */
6317                       for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6318                         if (streq (fdp->infname, curfdp->infname)
6319                             && streq (fdp->taggedfname, taggedfname))
6320                           /* If we remove the second test above (after the &&)
6321                              then all entries pertaining to the same file are
6322                              coalesced in the tags file.  If we use it, then
6323                              entries pertaining to the same file but generated
6324                              from different files (via #line directives) will
6325                              go into separate sections in the tags file.  These
6326                              alternatives look equivalent.  The first one
6327                              destroys some apparently useless information. */
6328                           {
6329                             curfdp = fdp;
6330                             free (taggedfname);
6331                             break;
6332                           }
6333                       /* Else, if we already tagged the real file, skip all
6334                          input lines until the next #line directive. */
6335                       if (fdp == NULL) /* not found */
6336                         for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6337                           if (streq (fdp->infabsname, taggedabsname))
6338                             {
6339                               discard_until_line_directive = TRUE;
6340                               free (taggedfname);
6341                               break;
6342                             }
6343                       /* Else create a new file description and use that from
6344                          now on, until the next #line directive. */
6345                       if (fdp == NULL) /* not found */
6346                         {
6347                           fdp = fdhead;
6348                           fdhead = xnew (1, fdesc);
6349                           *fdhead = *curfdp; /* copy curr. file description */
6350                           fdhead->next = fdp;
6351                           fdhead->infname = savestr (curfdp->infname);
6352                           fdhead->infabsname = savestr (curfdp->infabsname);
6353                           fdhead->infabsdir = savestr (curfdp->infabsdir);
6354                           fdhead->taggedfname = taggedfname;
6355                           fdhead->usecharno = FALSE;
6356                           fdhead->prop = NULL;
6357                           fdhead->written = FALSE;
6358                           curfdp = fdhead;
6359                         }
6360                     }
6361                   free (taggedabsname);
6362                   lineno = lno - 1;
6363                   readline (lbp, stream);
6364                   return;
6365                 } /* if a real #line directive */
6366             } /* if #line is followed by a a number */
6367         } /* if line begins with "#line " */
6368
6369       /* If we are here, no #line directive was found. */
6370       if (discard_until_line_directive)
6371         {
6372           if (result > 0)
6373             {
6374               /* Do a tail recursion on ourselves, thus discarding the contents
6375                  of the line buffer. */
6376               readline (lbp, stream);
6377               return;
6378             }
6379           /* End of file. */
6380           discard_until_line_directive = FALSE;
6381           return;
6382         }
6383     } /* if #line directives should be considered */
6384
6385 #ifdef ETAGS_REGEXPS
6386   {
6387     int match;
6388     regexp *rp;
6389     char *name;
6390
6391     /* Match against relevant regexps. */
6392     if (lbp->len > 0)
6393       for (rp = p_head; rp != NULL; rp = rp->p_next)
6394         {
6395           /* Only use generic regexps or those for the current language.
6396              Also do not use multiline regexps, which is the job of
6397              regex_tag_multiline. */
6398           if ((rp->lang != NULL && rp->lang != fdhead->lang)
6399               || rp->multi_line)
6400             continue;
6401
6402           match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6403           switch (match)
6404             {
6405             case -2:
6406               /* Some error. */
6407               if (!rp->error_signaled)
6408                 {
6409                   error ("regexp stack overflow while matching \"%s\"",
6410                          rp->pattern);
6411                   rp->error_signaled = TRUE;
6412                 }
6413               break;
6414             case -1:
6415               /* No match. */
6416               break;
6417             case 0:
6418               /* Empty string matched. */
6419               if (!rp->error_signaled)
6420                 {
6421                   error ("regexp matches the empty string: \"%s\"", rp->pattern);
6422                   rp->error_signaled = TRUE;
6423                 }
6424               break;
6425             default:
6426               /* Match occurred.  Construct a tag. */
6427               name = rp->name;
6428               if (name[0] == '\0')
6429                 name = NULL;
6430               else /* make a named tag */
6431                 name = substitute (lbp->buffer, rp->name, &rp->regs);
6432               if (rp->force_explicit_name)
6433                 /* Force explicit tag name, if a name is there. */
6434                 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6435               else
6436                 make_tag (name, strlen (name), TRUE,
6437                           lbp->buffer, match, lineno, linecharno);
6438               break;
6439             }
6440         }
6441   }
6442 #endif /* ETAGS_REGEXPS */
6443 }
6444
6445 \f
6446 /*
6447  * Return a pointer to a space of size strlen(cp)+1 allocated
6448  * with xnew where the string CP has been copied.
6449  */
6450 static char *
6451 savestr (cp)
6452      char *cp;
6453 {
6454   return savenstr (cp, strlen (cp));
6455 }
6456
6457 /*
6458  * Return a pointer to a space of size LEN+1 allocated with xnew where
6459  * the string CP has been copied for at most the first LEN characters.
6460  */
6461 static char *
6462 savenstr (cp, len)
6463      char *cp;
6464      int len;
6465 {
6466   register char *dp;
6467
6468   dp = xnew (len + 1, char);
6469   strncpy (dp, cp, len);
6470   dp[len] = '\0';
6471   return dp;
6472 }
6473
6474 /*
6475  * Return the ptr in sp at which the character c last
6476  * appears; NULL if not found
6477  *
6478  * Identical to POSIX strrchr, included for portability.
6479  */
6480 static char *
6481 etags_strrchr (sp, c)
6482      register const char *sp;
6483      register int c;
6484 {
6485   register const char *r;
6486
6487   r = NULL;
6488   do
6489     {
6490       if (*sp == c)
6491         r = sp;
6492   } while (*sp++);
6493   return (char *)r;
6494 }
6495
6496 /*
6497  * Return the ptr in sp at which the character c first
6498  * appears; NULL if not found
6499  *
6500  * Identical to POSIX strchr, included for portability.
6501  */
6502 static char *
6503 etags_strchr (sp, c)
6504      register const char *sp;
6505      register int c;
6506 {
6507   do
6508     {
6509       if (*sp == c)
6510         return (char *)sp;
6511     } while (*sp++);
6512   return NULL;
6513 }
6514
6515 /*
6516  * Compare two strings, ignoring case for alphabetic characters.
6517  *
6518  * Same as BSD's strcasecmp, included for portability.
6519  */
6520 static int
6521 etags_strcasecmp (s1, s2)
6522      register const char *s1;
6523      register const char *s2;
6524 {
6525   while (*s1 != '\0'
6526          && (ISALPHA (*s1) && ISALPHA (*s2)
6527              ? lowcase (*s1) == lowcase (*s2)
6528              : *s1 == *s2))
6529     s1++, s2++;
6530
6531   return (ISALPHA (*s1) && ISALPHA (*s2)
6532           ? lowcase (*s1) - lowcase (*s2)
6533           : *s1 - *s2);
6534 }
6535
6536 /*
6537  * Compare two strings, ignoring case for alphabetic characters.
6538  * Stop after a given number of characters
6539  *
6540  * Same as BSD's strncasecmp, included for portability.
6541  */
6542 static int
6543 etags_strncasecmp (s1, s2, n)
6544      register const char *s1;
6545      register const char *s2;
6546      register int n;
6547 {
6548   while (*s1 != '\0' && n-- > 0
6549          && (ISALPHA (*s1) && ISALPHA (*s2)
6550              ? lowcase (*s1) == lowcase (*s2)
6551              : *s1 == *s2))
6552     s1++, s2++;
6553
6554   if (n < 0)
6555     return 0;
6556   else
6557     return (ISALPHA (*s1) && ISALPHA (*s2)
6558             ? lowcase (*s1) - lowcase (*s2)
6559             : *s1 - *s2);
6560 }
6561
6562 /* Skip spaces (end of string is not space), return new pointer. */
6563 static char *
6564 skip_spaces (cp)
6565      char *cp;
6566 {
6567   while (iswhite (*cp))
6568     cp++;
6569   return cp;
6570 }
6571
6572 /* Skip non spaces, except end of string, return new pointer. */
6573 static char *
6574 skip_non_spaces (cp)
6575      char *cp;
6576 {
6577   while (*cp != '\0' && !iswhite (*cp))
6578     cp++;
6579   return cp;
6580 }
6581
6582 /* Print error message and exit.  */
6583 void
6584 fatal (s1, s2)
6585      char *s1, *s2;
6586 {
6587   error (s1, s2);
6588   exit (EXIT_FAILURE);
6589 }
6590
6591 static void
6592 pfatal (s1)
6593      char *s1;
6594 {
6595   perror (s1);
6596   exit (EXIT_FAILURE);
6597 }
6598
6599 static void
6600 suggest_asking_for_help ()
6601 {
6602   fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
6603            progname, LONG_OPTIONS ? "--help" : "-h");
6604   exit (EXIT_FAILURE);
6605 }
6606
6607 /* Print error message.  `s1' is printf control string, `s2' is arg for it. */
6608 static void
6609 error (s1, s2)
6610      const char *s1, *s2;
6611 {
6612   fprintf (stderr, "%s: ", progname);
6613   fprintf (stderr, s1, s2);
6614   fprintf (stderr, "\n");
6615 }
6616
6617 /* Return a newly-allocated string whose contents
6618    concatenate those of s1, s2, s3.  */
6619 static char *
6620 concat (s1, s2, s3)
6621      char *s1, *s2, *s3;
6622 {
6623   int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6624   char *result = xnew (len1 + len2 + len3 + 1, char);
6625
6626   strcpy (result, s1);
6627   strcpy (result + len1, s2);
6628   strcpy (result + len1 + len2, s3);
6629   result[len1 + len2 + len3] = '\0';
6630
6631   return result;
6632 }
6633
6634 \f
6635 /* Does the same work as the system V getcwd, but does not need to
6636    guess the buffer size in advance. */
6637 static char *
6638 etags_getcwd ()
6639 {
6640 #ifdef HAVE_GETCWD
6641   int bufsize = 200;
6642   char *path = xnew (bufsize, char);
6643
6644   while (getcwd (path, bufsize) == NULL)
6645     {
6646       if (errno != ERANGE)
6647         pfatal ("getcwd");
6648       bufsize *= 2;
6649       free (path);
6650       path = xnew (bufsize, char);
6651     }
6652
6653   canonicalize_filename (path);
6654   return path;
6655
6656 #else /* not HAVE_GETCWD */
6657 #if MSDOS
6658
6659   char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS.  */
6660
6661   getwd (path);
6662
6663   for (p = path; *p != '\0'; p++)
6664     if (*p == '\\')
6665       *p = '/';
6666     else
6667       *p = lowcase (*p);
6668
6669   return strdup (path);
6670 #else /* not MSDOS */
6671   linebuffer path;
6672   FILE *pipe;
6673
6674   linebuffer_init (&path);
6675   pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6676   if (pipe == NULL || readline_internal (&path, pipe) == 0)
6677     pfatal ("pwd");
6678   pclose (pipe);
6679
6680   return path.buffer;
6681 #endif /* not MSDOS */
6682 #endif /* not HAVE_GETCWD */
6683 }
6684
6685 /* Return a newly allocated string containing the file name of FILE
6686    relative to the absolute directory DIR (which should end with a slash). */
6687 static char *
6688 relative_filename (file, dir)
6689      char *file, *dir;
6690 {
6691   char *fp, *dp, *afn, *res;
6692   int i;
6693
6694   /* Find the common root of file and dir (with a trailing slash). */
6695   afn = absolute_filename (file, cwd);
6696   fp = afn;
6697   dp = dir;
6698   while (*fp++ == *dp++)
6699     continue;
6700   fp--, dp--;                   /* back to the first differing char */
6701 #ifdef DOS_NT
6702   if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6703     return afn;
6704 #endif
6705   do                            /* look at the equal chars until '/' */
6706     fp--, dp--;
6707   while (*fp != '/');
6708
6709   /* Build a sequence of "../" strings for the resulting relative file name. */
6710   i = 0;
6711   while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6712     i += 1;
6713   res = xnew (3*i + strlen (fp + 1) + 1, char);
6714   res[0] = '\0';
6715   while (i-- > 0)
6716     strcat (res, "../");
6717
6718   /* Add the file name relative to the common root of file and dir. */
6719   strcat (res, fp + 1);
6720   free (afn);
6721
6722   return res;
6723 }
6724
6725 /* Return a newly allocated string containing the absolute file name
6726    of FILE given DIR (which should end with a slash). */
6727 static char *
6728 absolute_filename (file, dir)
6729      char *file, *dir;
6730 {
6731   char *slashp, *cp, *res;
6732
6733   if (filename_is_absolute (file))
6734     res = savestr (file);
6735 #ifdef DOS_NT
6736   /* We don't support non-absolute file names with a drive
6737      letter, like `d:NAME' (it's too much hassle).  */
6738   else if (file[1] == ':')
6739     fatal ("%s: relative file names with drive letters not supported", file);
6740 #endif
6741   else
6742     res = concat (dir, file, "");
6743
6744   /* Delete the "/dirname/.." and "/." substrings. */
6745   slashp = etags_strchr (res, '/');
6746   while (slashp != NULL && slashp[0] != '\0')
6747     {
6748       if (slashp[1] == '.')
6749         {
6750           if (slashp[2] == '.'
6751               && (slashp[3] == '/' || slashp[3] == '\0'))
6752             {
6753               cp = slashp;
6754               do
6755                 cp--;
6756               while (cp >= res && !filename_is_absolute (cp));
6757               if (cp < res)
6758                 cp = slashp;    /* the absolute name begins with "/.." */
6759 #ifdef DOS_NT
6760               /* Under MSDOS and NT we get `d:/NAME' as absolute
6761                  file name, so the luser could say `d:/../NAME'.
6762                  We silently treat this as `d:/NAME'.  */
6763               else if (cp[0] != '/')
6764                 cp = slashp;
6765 #endif
6766               strcpy (cp, slashp + 3);
6767               slashp = cp;
6768               continue;
6769             }
6770           else if (slashp[2] == '/' || slashp[2] == '\0')
6771             {
6772               strcpy (slashp, slashp + 2);
6773               continue;
6774             }
6775         }
6776
6777       slashp = etags_strchr (slashp + 1, '/');
6778     }
6779
6780   if (res[0] == '\0')
6781     return savestr ("/");
6782   else
6783     return res;
6784 }
6785
6786 /* Return a newly allocated string containing the absolute
6787    file name of dir where FILE resides given DIR (which should
6788    end with a slash). */
6789 static char *
6790 absolute_dirname (file, dir)
6791      char *file, *dir;
6792 {
6793   char *slashp, *res;
6794   char save;
6795
6796   canonicalize_filename (file);
6797   slashp = etags_strrchr (file, '/');
6798   if (slashp == NULL)
6799     return savestr (dir);
6800   save = slashp[1];
6801   slashp[1] = '\0';
6802   res = absolute_filename (file, dir);
6803   slashp[1] = save;
6804
6805   return res;
6806 }
6807
6808 /* Whether the argument string is an absolute file name.  The argument
6809    string must have been canonicalized with canonicalize_filename. */
6810 static bool
6811 filename_is_absolute (fn)
6812      char *fn;
6813 {
6814   return (fn[0] == '/'
6815 #ifdef DOS_NT
6816           || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
6817 #endif
6818           );
6819 }
6820
6821 /* Translate backslashes into slashes.  Works in place. */
6822 static void
6823 canonicalize_filename (fn)
6824      register char *fn;
6825 {
6826 #ifdef DOS_NT
6827   /* Canonicalize drive letter case.  */
6828   if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
6829     fn[0] = upcase (fn[0]);
6830   /* Convert backslashes to slashes.  */
6831   for (; *fn != '\0'; fn++)
6832     if (*fn == '\\')
6833       *fn = '/';
6834 #else
6835   /* No action. */
6836   fn = NULL;                    /* shut up the compiler */
6837 #endif
6838 }
6839
6840 \f
6841 /* Initialize a linebuffer for use */
6842 static void
6843 linebuffer_init (lbp)
6844      linebuffer *lbp;
6845 {
6846   lbp->size = (DEBUG) ? 3 : 200;
6847   lbp->buffer = xnew (lbp->size, char);
6848   lbp->buffer[0] = '\0';
6849   lbp->len = 0;
6850 }
6851
6852 /* Set the minimum size of a string contained in a linebuffer. */
6853 static void
6854 linebuffer_setlen (lbp, toksize)
6855      linebuffer *lbp;
6856      int toksize;
6857 {
6858   while (lbp->size <= toksize)
6859     {
6860       lbp->size *= 2;
6861       xrnew (lbp->buffer, lbp->size, char);
6862     }
6863   lbp->len = toksize;
6864 }
6865
6866 /* Like malloc but get fatal error if memory is exhausted. */
6867 static PTR
6868 xmalloc (size)
6869      unsigned int size;
6870 {
6871   PTR result = (PTR) malloc (size);
6872   if (result == NULL)
6873     fatal ("virtual memory exhausted", (char *)NULL);
6874   return result;
6875 }
6876
6877 static PTR
6878 xrealloc (ptr, size)
6879      char *ptr;
6880      unsigned int size;
6881 {
6882   PTR result = (PTR) realloc (ptr, size);
6883   if (result == NULL)
6884     fatal ("virtual memory exhausted", (char *)NULL);
6885   return result;
6886 }
6887
6888 /*
6889  * Local Variables:
6890  * indent-tabs-mode: t
6891  * tab-width: 8
6892  * fill-column: 79
6893  * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6894  * End:
6895  */
6896
6897 /* arch-tag: 8a9b748d-390c-4922-99db-2eeefa921051
6898    (do not change this comment) */
6899
6900 /* etags.c ends here */