]> code.delx.au - gnu-emacs/blob - lib-src/make-docfile.c
Minor tweaks of copying text properties when padding strings
[gnu-emacs] / lib-src / make-docfile.c
1 /* Generate doc-string file for GNU Emacs from source files.
2
3 Copyright (C) 1985-1986, 1992-1994, 1997, 1999-2016 Free Software
4 Foundation, Inc.
5
6 This file is part of GNU Emacs.
7
8 GNU Emacs is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or (at
11 your option) any later version.
12
13 GNU Emacs is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
20
21
22 /* The arguments given to this program are all the C and Lisp source files
23 of GNU Emacs. .elc and .el and .c files are allowed.
24 A .o file can also be specified; the .c file it was made from is used.
25 This helps the makefile pass the correct list of files.
26 Option -d DIR means change to DIR before looking for files.
27
28 The results, which go to standard output or to a file
29 specified with -a or -o (-a to append, -o to start from nothing),
30 are entries containing function or variable names and their documentation.
31 Each entry starts with a ^_ character.
32 Then comes F for a function or V for a variable.
33 Then comes the function or variable name, terminated with a newline.
34 Then comes the documentation for that function or variable.
35 */
36
37 #include <config.h>
38
39 #include <stdarg.h>
40 #include <stdbool.h>
41 #include <stddef.h>
42 #include <stdint.h>
43 #include <stdio.h>
44 #include <stdlib.h> /* config.h unconditionally includes this anyway */
45
46 #ifdef WINDOWSNT
47 /* Defined to be sys_fopen in ms-w32.h, but only #ifdef emacs, so this
48 is really just insurance. */
49 #undef fopen
50 #include <direct.h>
51 #endif /* WINDOWSNT */
52
53 #include <binary-io.h>
54 #include <intprops.h>
55 #include <min-max.h>
56
57 #ifdef DOS_NT
58 /* Defined to be sys_chdir in ms-w32.h, but only #ifdef emacs, so this
59 is really just insurance.
60
61 Similarly, msdos defines this as sys_chdir, but we're not linking with the
62 file where that function is defined. */
63 #undef chdir
64 #define IS_SLASH(c) ((c) == '/' || (c) == '\\' || (c) == ':')
65 #else /* not DOS_NT */
66 #define IS_SLASH(c) ((c) == '/')
67 #endif /* not DOS_NT */
68
69 static void scan_file (char *filename);
70 static void scan_lisp_file (const char *filename, const char *mode);
71 static void scan_c_file (char *filename, const char *mode);
72 static void scan_c_stream (FILE *infile);
73 static void start_globals (void);
74 static void write_globals (void);
75
76 #include <unistd.h>
77
78 /* Name this program was invoked with. */
79 static char *progname;
80
81 /* True if this invocation is generating globals.h. */
82 static bool generate_globals;
83
84 /* Print error message. Args are like vprintf. */
85
86 static void ATTRIBUTE_FORMAT_PRINTF (1, 0)
87 verror (char const *m, va_list ap)
88 {
89 fprintf (stderr, "%s: ", progname);
90 vfprintf (stderr, m, ap);
91 fprintf (stderr, "\n");
92 }
93
94 /* Print error message. Args are like printf. */
95
96 static void ATTRIBUTE_FORMAT_PRINTF (1, 2)
97 error (char const *m, ...)
98 {
99 va_list ap;
100 va_start (ap, m);
101 verror (m, ap);
102 va_end (ap);
103 }
104
105 /* Print error message and exit. Args are like printf. */
106
107 static _Noreturn void ATTRIBUTE_FORMAT_PRINTF (1, 2)
108 fatal (char const *m, ...)
109 {
110 va_list ap;
111 va_start (ap, m);
112 verror (m, ap);
113 va_end (ap);
114 exit (EXIT_FAILURE);
115 }
116
117 static _Noreturn void
118 memory_exhausted (void)
119 {
120 fatal ("virtual memory exhausted");
121 }
122
123 /* Like malloc but get fatal error if memory is exhausted. */
124
125 static void *
126 xmalloc (ptrdiff_t size)
127 {
128 void *result = malloc (size);
129 if (result == NULL)
130 memory_exhausted ();
131 return result;
132 }
133
134 /* Like realloc but get fatal error if memory is exhausted. */
135
136 static void *
137 xrealloc (void *arg, ptrdiff_t size)
138 {
139 void *result = realloc (arg, size);
140 if (result == NULL)
141 memory_exhausted ();
142 return result;
143 }
144
145 \f
146 int
147 main (int argc, char **argv)
148 {
149 int i;
150
151 progname = argv[0];
152
153 /* If first two args are -o FILE, output to FILE. */
154 i = 1;
155 if (argc > i + 1 && !strcmp (argv[i], "-o"))
156 {
157 if (! freopen (argv[i + 1], "w", stdout))
158 {
159 perror (argv[i + 1]);
160 return EXIT_FAILURE;
161 }
162 i += 2;
163 }
164 if (argc > i + 1 && !strcmp (argv[i], "-a"))
165 {
166 if (! freopen (argv[i + 1], "a", stdout))
167 {
168 perror (argv[i + 1]);
169 return EXIT_FAILURE;
170 }
171 i += 2;
172 }
173 if (argc > i + 1 && !strcmp (argv[i], "-d"))
174 {
175 if (chdir (argv[i + 1]) != 0)
176 {
177 perror (argv[i + 1]);
178 return EXIT_FAILURE;
179 }
180 i += 2;
181 }
182 if (argc > i && !strcmp (argv[i], "-g"))
183 {
184 generate_globals = true;
185 ++i;
186 }
187
188 set_binary_mode (fileno (stdout), O_BINARY);
189
190 if (generate_globals)
191 start_globals ();
192
193 if (argc <= i)
194 scan_c_stream (stdin);
195 else
196 {
197 int first_infile = i;
198 for (; i < argc; i++)
199 {
200 int j;
201 /* Don't process one file twice. */
202 for (j = first_infile; j < i; j++)
203 if (strcmp (argv[i], argv[j]) == 0)
204 break;
205 if (j == i)
206 scan_file (argv[i]);
207 }
208 }
209
210 if (generate_globals)
211 write_globals ();
212
213 if (ferror (stdout) || fclose (stdout) != 0)
214 fatal ("write error");
215
216 return EXIT_SUCCESS;
217 }
218
219 /* Add a source file name boundary marker in the output file. */
220 static void
221 put_filename (char *filename)
222 {
223 char *tmp;
224
225 for (tmp = filename; *tmp; tmp++)
226 {
227 if (IS_DIRECTORY_SEP (*tmp))
228 filename = tmp + 1;
229 }
230
231 printf ("\037S%s\n", filename);
232 }
233
234 /* Read file FILENAME and output its doc strings to stdout.
235 Return true if file is found, false otherwise. */
236
237 static void
238 scan_file (char *filename)
239 {
240 ptrdiff_t len = strlen (filename);
241
242 if (!generate_globals)
243 put_filename (filename);
244 if (len > 4 && !strcmp (filename + len - 4, ".elc"))
245 scan_lisp_file (filename, "rb");
246 else if (len > 3 && !strcmp (filename + len - 3, ".el"))
247 scan_lisp_file (filename, "r");
248 else
249 scan_c_file (filename, "r");
250 }
251
252 static void
253 start_globals (void)
254 {
255 puts ("/* This file was auto-generated by make-docfile. */");
256 puts ("/* DO NOT EDIT. */");
257 puts ("struct emacs_globals {");
258 }
259 \f
260 static char input_buffer[128];
261
262 /* Some state during the execution of `read_c_string_or_comment'. */
263 struct rcsoc_state
264 {
265 /* A count of spaces and newlines that have been read, but not output. */
266 intmax_t pending_spaces, pending_newlines;
267
268 /* Where we're reading from. */
269 FILE *in_file;
270
271 /* If non-zero, a buffer into which to copy characters. */
272 char *buf_ptr;
273 /* If non-zero, a file into which to copy characters. */
274 FILE *out_file;
275
276 /* A keyword we look for at the beginning of lines. If found, it is
277 not copied, and SAW_KEYWORD is set to true. */
278 const char *keyword;
279 /* The current point we've reached in an occurrence of KEYWORD in
280 the input stream. */
281 const char *cur_keyword_ptr;
282 /* Set to true if we saw an occurrence of KEYWORD. */
283 bool saw_keyword;
284 };
285
286 /* Output CH to the file or buffer in STATE. Any pending newlines or
287 spaces are output first. */
288
289 static void
290 put_char (char ch, struct rcsoc_state *state)
291 {
292 char out_ch;
293 do
294 {
295 if (state->pending_newlines > 0)
296 {
297 state->pending_newlines--;
298 out_ch = '\n';
299 }
300 else if (state->pending_spaces > 0)
301 {
302 state->pending_spaces--;
303 out_ch = ' ';
304 }
305 else
306 out_ch = ch;
307
308 if (state->out_file)
309 putc (out_ch, state->out_file);
310 if (state->buf_ptr)
311 *state->buf_ptr++ = out_ch;
312 }
313 while (out_ch != ch);
314 }
315
316 /* If in the middle of scanning a keyword, continue scanning with
317 character CH, otherwise output CH to the file or buffer in STATE.
318 Any pending newlines or spaces are output first, as well as any
319 previously scanned characters that were thought to be part of a
320 keyword, but were in fact not. */
321
322 static void
323 scan_keyword_or_put_char (char ch, struct rcsoc_state *state)
324 {
325 if (state->keyword
326 && *state->cur_keyword_ptr == ch
327 && (state->cur_keyword_ptr > state->keyword
328 || state->pending_newlines > 0))
329 /* We might be looking at STATE->keyword at some point.
330 Keep looking until we know for sure. */
331 {
332 if (*++state->cur_keyword_ptr == '\0')
333 /* Saw the whole keyword. Set SAW_KEYWORD flag to true. */
334 {
335 state->saw_keyword = true;
336
337 /* Reset the scanning pointer. */
338 state->cur_keyword_ptr = state->keyword;
339
340 /* Canonicalize whitespace preceding a usage string. */
341 state->pending_newlines = 2;
342 state->pending_spaces = 0;
343
344 /* Skip any whitespace between the keyword and the
345 usage string. */
346 int c;
347 do
348 c = getc (state->in_file);
349 while (c == ' ' || c == '\n');
350
351 /* Output the open-paren we just read. */
352 if (c != '(')
353 fatal ("Missing '(' after keyword");
354 put_char (c, state);
355
356 /* Skip the function name and replace it with `fn'. */
357 do
358 {
359 c = getc (state->in_file);
360 if (c == EOF)
361 fatal ("Unexpected EOF after keyword");
362 }
363 while (c != ' ' && c != ')');
364 put_char ('f', state);
365 put_char ('n', state);
366
367 /* Put back the last character. */
368 ungetc (c, state->in_file);
369 }
370 }
371 else
372 {
373 if (state->keyword && state->cur_keyword_ptr > state->keyword)
374 /* We scanned the beginning of a potential usage
375 keyword, but it was a false alarm. Output the
376 part we scanned. */
377 {
378 const char *p;
379
380 for (p = state->keyword; p < state->cur_keyword_ptr; p++)
381 put_char (*p, state);
382
383 state->cur_keyword_ptr = state->keyword;
384 }
385
386 put_char (ch, state);
387 }
388 }
389
390
391 /* Skip a C string or C-style comment from INFILE, and return the
392 byte that follows, or EOF. COMMENT means skip a comment. If
393 PRINTFLAG is positive, output string contents to stdout. If it is
394 negative, store contents in buf. Convert escape sequences \n and
395 \t to newline and tab; discard \ followed by newline.
396 If SAW_USAGE is non-null, then any occurrences of the string "usage:"
397 at the beginning of a line will be removed, and *SAW_USAGE set to
398 true if any were encountered. */
399
400 static int
401 read_c_string_or_comment (FILE *infile, int printflag, bool comment,
402 bool *saw_usage)
403 {
404 int c;
405 struct rcsoc_state state;
406
407 state.in_file = infile;
408 state.buf_ptr = (printflag < 0 ? input_buffer : 0);
409 state.out_file = (printflag > 0 ? stdout : 0);
410 state.pending_spaces = 0;
411 state.pending_newlines = 0;
412 state.keyword = (saw_usage ? "usage:" : 0);
413 state.cur_keyword_ptr = state.keyword;
414 state.saw_keyword = false;
415
416 c = getc (infile);
417 if (comment)
418 while (c == '\n' || c == '\r' || c == '\t' || c == ' ')
419 c = getc (infile);
420
421 while (c != EOF)
422 {
423 while (c != EOF && (comment ? c != '*' : c != '"'))
424 {
425 if (c == '\\')
426 {
427 c = getc (infile);
428 if (c == '\n' || c == '\r')
429 {
430 c = getc (infile);
431 continue;
432 }
433 if (c == 'n')
434 c = '\n';
435 if (c == 't')
436 c = '\t';
437 }
438
439 if (c == ' ')
440 state.pending_spaces++;
441 else if (c == '\n')
442 {
443 state.pending_newlines++;
444 state.pending_spaces = 0;
445 }
446 else
447 scan_keyword_or_put_char (c, &state);
448
449 c = getc (infile);
450 }
451
452 if (c != EOF)
453 c = getc (infile);
454
455 if (comment)
456 {
457 if (c == '/')
458 {
459 c = getc (infile);
460 break;
461 }
462
463 scan_keyword_or_put_char ('*', &state);
464 }
465 else
466 {
467 if (c != '"')
468 break;
469
470 /* If we had a "", concatenate the two strings. */
471 c = getc (infile);
472 }
473 }
474
475 if (printflag < 0)
476 *state.buf_ptr = 0;
477
478 if (saw_usage)
479 *saw_usage = state.saw_keyword;
480
481 return c;
482 }
483
484
485 \f
486 /* Write to stdout the argument names of function FUNC, whose text is in BUF.
487 MINARGS and MAXARGS are the minimum and maximum number of arguments. */
488
489 static void
490 write_c_args (char *func, char *buf, int minargs, int maxargs)
491 {
492 char *p;
493 bool in_ident = false;
494 char *ident_start UNINIT;
495 ptrdiff_t ident_length = 0;
496
497 fputs ("(fn", stdout);
498
499 if (*buf == '(')
500 ++buf;
501
502 for (p = buf; *p; p++)
503 {
504 char c = *p;
505
506 /* Notice when a new identifier starts. */
507 if ((('A' <= c && c <= 'Z')
508 || ('a' <= c && c <= 'z')
509 || ('0' <= c && c <= '9')
510 || c == '_')
511 != in_ident)
512 {
513 if (!in_ident)
514 {
515 in_ident = true;
516 ident_start = p;
517 }
518 else
519 {
520 in_ident = false;
521 ident_length = p - ident_start;
522 }
523 }
524
525 /* Found the end of an argument, write out the last seen
526 identifier. */
527 if (c == ',' || c == ')')
528 {
529 if (ident_length == 0)
530 {
531 error ("empty arg list for '%s' should be (void), not ()", func);
532 continue;
533 }
534
535 if (strncmp (ident_start, "void", ident_length) == 0)
536 continue;
537
538 putchar (' ');
539
540 if (minargs == 0 && maxargs > 0)
541 fputs ("&optional ", stdout);
542
543 minargs--;
544 maxargs--;
545
546 /* In C code, `default' is a reserved word, so we spell it
547 `defalt'; demangle that here. */
548 if (ident_length == 6 && memcmp (ident_start, "defalt", 6) == 0)
549 fputs ("DEFAULT", stdout);
550 else
551 while (ident_length-- > 0)
552 {
553 c = *ident_start++;
554 if (c >= 'a' && c <= 'z')
555 /* Upcase the letter. */
556 c += 'A' - 'a';
557 else if (c == '_')
558 /* Print underscore as hyphen. */
559 c = '-';
560 putchar (c);
561 }
562 }
563 }
564
565 putchar (')');
566 }
567 \f
568 /* The types of globals. These are sorted roughly in decreasing alignment
569 order to avoid allocation gaps, except that symbols and functions
570 are last. */
571 enum global_type
572 {
573 INVALID,
574 LISP_OBJECT,
575 EMACS_INTEGER,
576 BOOLEAN,
577 SYMBOL,
578 FUNCTION
579 };
580
581 /* A single global. */
582 struct global
583 {
584 enum global_type type;
585 char *name;
586 int flags;
587 union
588 {
589 int value;
590 char const *svalue;
591 } v;
592 };
593
594 /* Bit values for FLAGS field from the above. Applied for DEFUNs only. */
595 enum { DEFUN_noreturn = 1, DEFUN_const = 2 };
596
597 /* All the variable names we saw while scanning C sources in `-g'
598 mode. */
599 static ptrdiff_t num_globals;
600 static ptrdiff_t num_globals_allocated;
601 static struct global *globals;
602
603 static struct global *
604 add_global (enum global_type type, char const *name, int value,
605 char const *svalue)
606 {
607 /* Ignore the one non-symbol that can occur. */
608 if (strcmp (name, "..."))
609 {
610 if (num_globals == num_globals_allocated)
611 {
612 ptrdiff_t num_globals_max = (min (PTRDIFF_MAX, SIZE_MAX)
613 / sizeof *globals);
614 if (num_globals_allocated == num_globals_max)
615 memory_exhausted ();
616 if (num_globals_allocated < num_globals_max / 2)
617 num_globals_allocated = 2 * num_globals_allocated + 1;
618 else
619 num_globals_allocated = num_globals_max;
620 globals = xrealloc (globals, num_globals_allocated * sizeof *globals);
621 }
622
623 ++num_globals;
624
625 ptrdiff_t namesize = strlen (name) + 1;
626 char *buf = xmalloc (namesize + (svalue ? strlen (svalue) + 1 : 0));
627 globals[num_globals - 1].type = type;
628 globals[num_globals - 1].name = strcpy (buf, name);
629 if (svalue)
630 globals[num_globals - 1].v.svalue = strcpy (buf + namesize, svalue);
631 else
632 globals[num_globals - 1].v.value = value;
633 globals[num_globals - 1].flags = 0;
634 return globals + num_globals - 1;
635 }
636 return NULL;
637 }
638
639 static int
640 compare_globals (const void *a, const void *b)
641 {
642 const struct global *ga = a;
643 const struct global *gb = b;
644
645 if (ga->type != gb->type)
646 return ga->type - gb->type;
647
648 /* Consider "nil" to be the least, so that iQnil is zero. That
649 way, Qnil's internal representation is zero, which is a bit faster. */
650 if (ga->type == SYMBOL)
651 {
652 bool a_nil = strcmp (ga->name, "Qnil") == 0;
653 bool b_nil = strcmp (gb->name, "Qnil") == 0;
654 if (a_nil | b_nil)
655 return b_nil - a_nil;
656 }
657
658 return strcmp (ga->name, gb->name);
659 }
660
661 static void
662 close_emacs_globals (ptrdiff_t num_symbols)
663 {
664 printf (("};\n"
665 "extern struct emacs_globals globals;\n"
666 "\n"
667 "#ifndef DEFINE_SYMBOLS\n"
668 "extern\n"
669 "#endif\n"
670 "struct Lisp_Symbol alignas (GCALIGNMENT) lispsym[%td];\n"),
671 num_symbols);
672 }
673
674 static void
675 write_globals (void)
676 {
677 ptrdiff_t i, j;
678 bool seen_defun = false;
679 ptrdiff_t symnum = 0;
680 ptrdiff_t num_symbols = 0;
681 qsort (globals, num_globals, sizeof (struct global), compare_globals);
682
683 j = 0;
684 for (i = 0; i < num_globals; i++)
685 {
686 while (i + 1 < num_globals
687 && strcmp (globals[i].name, globals[i + 1].name) == 0)
688 {
689 if (globals[i].type == FUNCTION
690 && globals[i].v.value != globals[i + 1].v.value)
691 error ("function '%s' defined twice with differing signatures",
692 globals[i].name);
693 free (globals[i].name);
694 i++;
695 }
696 num_symbols += globals[i].type == SYMBOL;
697 globals[j++] = globals[i];
698 }
699 num_globals = j;
700
701 for (i = 0; i < num_globals; ++i)
702 {
703 char const *type = 0;
704
705 switch (globals[i].type)
706 {
707 case EMACS_INTEGER:
708 type = "EMACS_INT";
709 break;
710 case BOOLEAN:
711 type = "bool";
712 break;
713 case LISP_OBJECT:
714 type = "Lisp_Object";
715 break;
716 case SYMBOL:
717 case FUNCTION:
718 if (!seen_defun)
719 {
720 close_emacs_globals (num_symbols);
721 putchar ('\n');
722 seen_defun = true;
723 }
724 break;
725 default:
726 fatal ("not a recognized DEFVAR_");
727 }
728
729 if (type)
730 {
731 printf (" %s f_%s;\n", type, globals[i].name);
732 printf ("#define %s globals.f_%s\n",
733 globals[i].name, globals[i].name);
734 }
735 else if (globals[i].type == SYMBOL)
736 printf (("#define i%s %td\n"
737 "DEFINE_LISP_SYMBOL (%s)\n"),
738 globals[i].name, symnum++, globals[i].name);
739 else
740 {
741 if (globals[i].flags & DEFUN_noreturn)
742 fputs ("_Noreturn ", stdout);
743
744 printf ("EXFUN (%s, ", globals[i].name);
745 if (globals[i].v.value == -1)
746 fputs ("MANY", stdout);
747 else if (globals[i].v.value == -2)
748 fputs ("UNEVALLED", stdout);
749 else
750 printf ("%d", globals[i].v.value);
751 putchar (')');
752
753 if (globals[i].flags & DEFUN_const)
754 fputs (" ATTRIBUTE_CONST", stdout);
755
756 puts (";");
757 }
758 }
759
760 if (!seen_defun)
761 close_emacs_globals (num_symbols);
762
763 puts ("#ifdef DEFINE_SYMBOLS");
764 puts ("static char const *const defsym_name[] = {");
765 for (ptrdiff_t i = 0; i < num_globals; i++)
766 if (globals[i].type == SYMBOL)
767 printf ("\t\"%s\",\n", globals[i].v.svalue);
768 puts ("};");
769 puts ("#endif");
770
771 puts ("#define Qnil builtin_lisp_symbol (0)");
772 puts ("#if DEFINE_NON_NIL_Q_SYMBOL_MACROS");
773 num_symbols = 0;
774 for (ptrdiff_t i = 0; i < num_globals; i++)
775 if (globals[i].type == SYMBOL && num_symbols++ != 0)
776 printf ("# define %s builtin_lisp_symbol (%td)\n",
777 globals[i].name, num_symbols - 1);
778 puts ("#endif");
779 }
780
781 \f
782 /* Read through a c file. If a .o file is named,
783 the corresponding .c or .m file is read instead.
784 Looks for DEFUN constructs such as are defined in ../src/lisp.h.
785 Accepts any word starting DEF... so it finds DEFSIMPLE and DEFPRED. */
786
787 static void
788 scan_c_file (char *filename, const char *mode)
789 {
790 FILE *infile;
791 char extension = filename[strlen (filename) - 1];
792
793 if (extension == 'o')
794 filename[strlen (filename) - 1] = 'c';
795
796 infile = fopen (filename, mode);
797
798 if (infile == NULL && extension == 'o')
799 {
800 /* Try .m. */
801 filename[strlen (filename) - 1] = 'm';
802 infile = fopen (filename, mode);
803 if (infile == NULL)
804 filename[strlen (filename) - 1] = 'c'; /* Don't confuse people. */
805 }
806
807 if (infile == NULL)
808 {
809 perror (filename);
810 exit (EXIT_FAILURE);
811 }
812
813 /* Reset extension to be able to detect duplicate files. */
814 filename[strlen (filename) - 1] = extension;
815 scan_c_stream (infile);
816 }
817
818 /* Return 1 if next input from INFILE is equal to P, -1 if EOF,
819 0 if input doesn't match. */
820
821 static int
822 stream_match (FILE *infile, const char *p)
823 {
824 for (; *p; p++)
825 {
826 int c = getc (infile);
827 if (c == EOF)
828 return -1;
829 if (c != *p)
830 return 0;
831 }
832 return 1;
833 }
834
835 static void
836 scan_c_stream (FILE *infile)
837 {
838 int commas, minargs, maxargs;
839 int c = '\n';
840
841 while (!feof (infile))
842 {
843 bool doc_keyword = false;
844 bool defunflag = false;
845 bool defvarperbufferflag = false;
846 bool defvarflag = false;
847 enum global_type type = INVALID;
848 static char *name;
849 static ptrdiff_t name_size;
850
851 if (c != '\n' && c != '\r')
852 {
853 c = getc (infile);
854 continue;
855 }
856 c = getc (infile);
857 if (c == ' ')
858 {
859 while (c == ' ')
860 c = getc (infile);
861 if (c != 'D')
862 continue;
863 c = getc (infile);
864 if (c != 'E')
865 continue;
866 c = getc (infile);
867 if (c != 'F')
868 continue;
869 c = getc (infile);
870 if (c == 'S')
871 {
872 c = getc (infile);
873 if (c != 'Y')
874 continue;
875 c = getc (infile);
876 if (c != 'M')
877 continue;
878 c = getc (infile);
879 if (c != ' ' && c != '\t' && c != '(')
880 continue;
881 type = SYMBOL;
882 }
883 else if (c == 'V')
884 {
885 c = getc (infile);
886 if (c != 'A')
887 continue;
888 c = getc (infile);
889 if (c != 'R')
890 continue;
891 c = getc (infile);
892 if (c != '_')
893 continue;
894
895 defvarflag = true;
896
897 c = getc (infile);
898 defvarperbufferflag = (c == 'P');
899 if (generate_globals)
900 {
901 if (c == 'I')
902 type = EMACS_INTEGER;
903 else if (c == 'L')
904 type = LISP_OBJECT;
905 else if (c == 'B')
906 type = BOOLEAN;
907 }
908
909 c = getc (infile);
910 /* We need to distinguish between DEFVAR_BOOL and
911 DEFVAR_BUFFER_DEFAULTS. */
912 if (generate_globals && type == BOOLEAN && c != 'O')
913 type = INVALID;
914 }
915 else
916 continue;
917 }
918 else if (c == 'D')
919 {
920 c = getc (infile);
921 if (c != 'E')
922 continue;
923 c = getc (infile);
924 if (c != 'F')
925 continue;
926 c = getc (infile);
927 defunflag = c == 'U';
928 }
929 else continue;
930
931 if (generate_globals
932 && (!defvarflag || defvarperbufferflag || type == INVALID)
933 && !defunflag && type != SYMBOL)
934 continue;
935
936 while (c != '(')
937 {
938 if (c < 0)
939 goto eof;
940 c = getc (infile);
941 }
942
943 if (type != SYMBOL)
944 {
945 /* Lisp variable or function name. */
946 c = getc (infile);
947 if (c != '"')
948 continue;
949 c = read_c_string_or_comment (infile, -1, false, 0);
950 }
951
952 if (generate_globals)
953 {
954 ptrdiff_t i = 0;
955 char const *svalue = 0;
956
957 /* Skip "," and whitespace. */
958 do
959 {
960 c = getc (infile);
961 }
962 while (c == ',' || c == ' ' || c == '\t' || c == '\n' || c == '\r');
963
964 /* Read in the identifier. */
965 do
966 {
967 if (c < 0)
968 goto eof;
969 input_buffer[i++] = c;
970 c = getc (infile);
971 }
972 while (! (c == ',' || c == ' ' || c == '\t'
973 || c == '\n' || c == '\r'));
974 input_buffer[i] = '\0';
975
976 if (name_size <= i)
977 {
978 free (name);
979 name_size = i + 1;
980 ptrdiff_t doubled;
981 if (! INT_MULTIPLY_WRAPV (name_size, 2, &doubled)
982 && doubled <= SIZE_MAX)
983 name_size = doubled;
984 name = xmalloc (name_size);
985 }
986 memcpy (name, input_buffer, i + 1);
987
988 if (type == SYMBOL)
989 {
990 do
991 c = getc (infile);
992 while (c == ' ' || c == '\t' || c == '\n' || c == '\r');
993 if (c != '"')
994 continue;
995 c = read_c_string_or_comment (infile, -1, false, 0);
996 svalue = input_buffer;
997 }
998
999 if (!defunflag)
1000 {
1001 add_global (type, name, 0, svalue);
1002 continue;
1003 }
1004 }
1005
1006 if (type == SYMBOL)
1007 continue;
1008
1009 /* DEFVAR_LISP ("name", addr, "doc")
1010 DEFVAR_LISP ("name", addr /\* doc *\/)
1011 DEFVAR_LISP ("name", addr, doc: /\* doc *\/) */
1012
1013 if (defunflag)
1014 commas = generate_globals ? 4 : 5;
1015 else if (defvarperbufferflag)
1016 commas = 3;
1017 else if (defvarflag)
1018 commas = 1;
1019 else /* For DEFSIMPLE and DEFPRED. */
1020 commas = 2;
1021
1022 while (commas)
1023 {
1024 if (c == ',')
1025 {
1026 commas--;
1027
1028 if (defunflag && (commas == 1 || commas == 2))
1029 {
1030 int scanned = 0;
1031 do
1032 c = getc (infile);
1033 while (c == ' ' || c == '\n' || c == '\r' || c == '\t');
1034 if (c < 0)
1035 goto eof;
1036 ungetc (c, infile);
1037 if (commas == 2) /* Pick up minargs. */
1038 scanned = fscanf (infile, "%d", &minargs);
1039 else /* Pick up maxargs. */
1040 if (c == 'M' || c == 'U') /* MANY || UNEVALLED */
1041 {
1042 if (generate_globals)
1043 maxargs = (c == 'M') ? -1 : -2;
1044 else
1045 maxargs = -1;
1046 }
1047 else
1048 scanned = fscanf (infile, "%d", &maxargs);
1049 if (scanned < 0)
1050 goto eof;
1051 }
1052 }
1053
1054 if (c == EOF)
1055 goto eof;
1056 c = getc (infile);
1057 }
1058
1059 if (generate_globals)
1060 {
1061 struct global *g = add_global (FUNCTION, name, maxargs, 0);
1062 if (!g)
1063 continue;
1064
1065 /* The following code tries to recognize function attributes
1066 specified after the docstring, e.g.:
1067
1068 DEFUN ("foo", Ffoo, Sfoo, X, Y, Z,
1069 doc: /\* doc *\/
1070 attributes: attribute1 attribute2 ...)
1071 (Lisp_Object arg...)
1072
1073 Now only 'noreturn' and 'const' attributes are used. */
1074
1075 /* Advance to the end of docstring. */
1076 c = getc (infile);
1077 if (c == EOF)
1078 goto eof;
1079 int d = getc (infile);
1080 if (d == EOF)
1081 goto eof;
1082 while (1)
1083 {
1084 if (c == '*' && d == '/')
1085 break;
1086 c = d, d = getc (infile);
1087 if (d == EOF)
1088 goto eof;
1089 }
1090 /* Skip spaces, if any. */
1091 do
1092 {
1093 c = getc (infile);
1094 if (c == EOF)
1095 goto eof;
1096 }
1097 while (c == ' ' || c == '\n' || c == '\r' || c == '\t');
1098 /* Check for 'attributes:' token. */
1099 if (c == 'a' && stream_match (infile, "ttributes:"))
1100 {
1101 char *p = input_buffer;
1102 /* Collect attributes up to ')'. */
1103 while (1)
1104 {
1105 c = getc (infile);
1106 if (c == EOF)
1107 goto eof;
1108 if (c == ')')
1109 break;
1110 if (p - input_buffer > sizeof (input_buffer))
1111 abort ();
1112 *p++ = c;
1113 }
1114 *p = 0;
1115 if (strstr (input_buffer, "noreturn"))
1116 g->flags |= DEFUN_noreturn;
1117 if (strstr (input_buffer, "const"))
1118 g->flags |= DEFUN_const;
1119 }
1120 continue;
1121 }
1122
1123 while (c == ' ' || c == '\n' || c == '\r' || c == '\t')
1124 c = getc (infile);
1125
1126 if (c == '"')
1127 c = read_c_string_or_comment (infile, 0, false, 0);
1128
1129 while (c != EOF && c != ',' && c != '/')
1130 c = getc (infile);
1131 if (c == ',')
1132 {
1133 c = getc (infile);
1134 while (c == ' ' || c == '\n' || c == '\r' || c == '\t')
1135 c = getc (infile);
1136 while ((c >= 'a' && c <= 'z') || (c >= 'Z' && c <= 'Z'))
1137 c = getc (infile);
1138 if (c == ':')
1139 {
1140 doc_keyword = true;
1141 c = getc (infile);
1142 while (c == ' ' || c == '\n' || c == '\r' || c == '\t')
1143 c = getc (infile);
1144 }
1145 }
1146
1147 if (c == '"'
1148 || (c == '/'
1149 && (c = getc (infile),
1150 ungetc (c, infile),
1151 c == '*')))
1152 {
1153 bool comment = c != '"';
1154 bool saw_usage;
1155
1156 printf ("\037%c%s\n", defvarflag ? 'V' : 'F', input_buffer);
1157
1158 if (comment)
1159 getc (infile); /* Skip past `*'. */
1160 c = read_c_string_or_comment (infile, 1, comment, &saw_usage);
1161
1162 /* If this is a defun, find the arguments and print them. If
1163 this function takes MANY or UNEVALLED args, then the C source
1164 won't give the names of the arguments, so we shouldn't bother
1165 trying to find them.
1166
1167 Various doc-string styles:
1168 0: DEFUN (..., "DOC") (args) [!comment]
1169 1: DEFUN (..., /\* DOC *\/ (args)) [comment && !doc_keyword]
1170 2: DEFUN (..., doc: /\* DOC *\/) (args) [comment && doc_keyword]
1171 */
1172 if (defunflag && maxargs != -1 && !saw_usage)
1173 {
1174 char argbuf[1024], *p = argbuf;
1175
1176 if (!comment || doc_keyword)
1177 while (c != ')')
1178 {
1179 if (c < 0)
1180 goto eof;
1181 c = getc (infile);
1182 }
1183
1184 /* Skip into arguments. */
1185 while (c != '(')
1186 {
1187 if (c < 0)
1188 goto eof;
1189 c = getc (infile);
1190 }
1191 /* Copy arguments into ARGBUF. */
1192 *p++ = c;
1193 do
1194 *p++ = c = getc (infile);
1195 while (c != ')');
1196 *p = '\0';
1197 /* Output them. */
1198 fputs ("\n\n", stdout);
1199 write_c_args (input_buffer, argbuf, minargs, maxargs);
1200 }
1201 else if (defunflag && maxargs == -1 && !saw_usage)
1202 /* The DOC should provide the usage form. */
1203 fprintf (stderr, "Missing 'usage' for function '%s'.\n",
1204 input_buffer);
1205 }
1206 }
1207 eof:
1208 if (ferror (infile) || fclose (infile) != 0)
1209 fatal ("read error");
1210 }
1211 \f
1212 /* Read a file of Lisp code, compiled or interpreted.
1213 Looks for
1214 (defun NAME ARGS DOCSTRING ...)
1215 (defmacro NAME ARGS DOCSTRING ...)
1216 (defsubst NAME ARGS DOCSTRING ...)
1217 (autoload (quote NAME) FILE DOCSTRING ...)
1218 (defvar NAME VALUE DOCSTRING)
1219 (defconst NAME VALUE DOCSTRING)
1220 (fset (quote NAME) (make-byte-code ... DOCSTRING ...))
1221 (fset (quote NAME) #[... DOCSTRING ...])
1222 (defalias (quote NAME) #[... DOCSTRING ...])
1223 (custom-declare-variable (quote NAME) VALUE DOCSTRING ...)
1224 starting in column zero.
1225 (quote NAME) may appear as 'NAME as well.
1226
1227 We also look for #@LENGTH CONTENTS^_ at the beginning of the line.
1228 When we find that, we save it for the following defining-form,
1229 and we use that instead of reading a doc string within that defining-form.
1230
1231 For defvar, defconst, and fset we skip to the docstring with a kludgy
1232 formatting convention: all docstrings must appear on the same line as the
1233 initial open-paren (the one in column zero) and must contain a backslash
1234 and a newline immediately after the initial double-quote. No newlines
1235 must appear between the beginning of the form and the first double-quote.
1236 For defun, defmacro, and autoload, we know how to skip over the
1237 arglist, but the doc string must still have a backslash and newline
1238 immediately after the double quote.
1239 The only source files that must follow this convention are preloaded
1240 uncompiled ones like loaddefs.el; aside from that, it is always the .elc
1241 file that we should look at, and they are no problem because byte-compiler
1242 output follows this convention.
1243 The NAME and DOCSTRING are output.
1244 NAME is preceded by `F' for a function or `V' for a variable.
1245 An entry is output only if DOCSTRING has \ newline just after the opening ".
1246 */
1247
1248 static void
1249 skip_white (FILE *infile)
1250 {
1251 char c = ' ';
1252 while (c == ' ' || c == '\t' || c == '\n' || c == '\r')
1253 c = getc (infile);
1254 ungetc (c, infile);
1255 }
1256
1257 static void
1258 read_lisp_symbol (FILE *infile, char *buffer)
1259 {
1260 char c;
1261 char *fillp = buffer;
1262
1263 skip_white (infile);
1264 while (1)
1265 {
1266 c = getc (infile);
1267 if (c == '\\')
1268 *(++fillp) = getc (infile);
1269 else if (c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '(' || c == ')')
1270 {
1271 ungetc (c, infile);
1272 *fillp = 0;
1273 break;
1274 }
1275 else
1276 *fillp++ = c;
1277 }
1278
1279 if (! buffer[0])
1280 fprintf (stderr, "## expected a symbol, got '%c'\n", c);
1281
1282 skip_white (infile);
1283 }
1284
1285 static bool
1286 search_lisp_doc_at_eol (FILE *infile)
1287 {
1288 int c = 0, c1 = 0, c2 = 0;
1289
1290 /* Skip until the end of line; remember two previous chars. */
1291 while (c != '\n' && c != '\r' && c != EOF)
1292 {
1293 c2 = c1;
1294 c1 = c;
1295 c = getc (infile);
1296 }
1297
1298 /* If two previous characters were " and \,
1299 this is a doc string. Otherwise, there is none. */
1300 if (c2 != '"' || c1 != '\\')
1301 {
1302 #ifdef DEBUG
1303 fprintf (stderr, "## non-docstring found\n");
1304 #endif
1305 ungetc (c, infile);
1306 return false;
1307 }
1308 return true;
1309 }
1310
1311 #define DEF_ELISP_FILE(fn) { #fn, sizeof(#fn) - 1 }
1312
1313 static void
1314 scan_lisp_file (const char *filename, const char *mode)
1315 {
1316 FILE *infile;
1317 int c;
1318 char *saved_string = 0;
1319 /* These are the only files that are loaded uncompiled, and must
1320 follow the conventions of the doc strings expected by this
1321 function. These conventions are automatically followed by the
1322 byte compiler when it produces the .elc files. */
1323 static struct {
1324 const char *fn;
1325 int fl;
1326 } const uncompiled[] = {
1327 DEF_ELISP_FILE (loaddefs.el),
1328 DEF_ELISP_FILE (loadup.el),
1329 DEF_ELISP_FILE (charprop.el),
1330 DEF_ELISP_FILE (cp51932.el),
1331 DEF_ELISP_FILE (eucjp-ms.el)
1332 };
1333 int i;
1334 int flen = strlen (filename);
1335
1336 if (generate_globals)
1337 fatal ("scanning lisp file when -g specified");
1338 if (flen > 3 && !strcmp (filename + flen - 3, ".el"))
1339 {
1340 bool match = false;
1341 for (i = 0; i < sizeof (uncompiled) / sizeof (uncompiled[0]); i++)
1342 {
1343 if (uncompiled[i].fl <= flen
1344 && !strcmp (filename + flen - uncompiled[i].fl, uncompiled[i].fn)
1345 && (flen == uncompiled[i].fl
1346 || IS_SLASH (filename[flen - uncompiled[i].fl - 1])))
1347 {
1348 match = true;
1349 break;
1350 }
1351 }
1352 if (!match)
1353 fatal ("uncompiled lisp file %s is not supported", filename);
1354 }
1355
1356 infile = fopen (filename, mode);
1357 if (infile == NULL)
1358 {
1359 perror (filename);
1360 exit (EXIT_FAILURE);
1361 }
1362
1363 c = '\n';
1364 while (!feof (infile))
1365 {
1366 char buffer[BUFSIZ];
1367 char type;
1368
1369 /* If not at end of line, skip till we get to one. */
1370 if (c != '\n' && c != '\r')
1371 {
1372 c = getc (infile);
1373 continue;
1374 }
1375 /* Skip the line break. */
1376 while (c == '\n' || c == '\r')
1377 c = getc (infile);
1378 /* Detect a dynamic doc string and save it for the next expression. */
1379 if (c == '#')
1380 {
1381 c = getc (infile);
1382 if (c == '@')
1383 {
1384 ptrdiff_t length = 0;
1385 ptrdiff_t i;
1386
1387 /* Read the length. */
1388 while ((c = getc (infile),
1389 c >= '0' && c <= '9'))
1390 {
1391 if (INT_MULTIPLY_WRAPV (length, 10, &length)
1392 || INT_ADD_WRAPV (length, c - '0', &length)
1393 || SIZE_MAX < length)
1394 memory_exhausted ();
1395 }
1396
1397 if (length <= 1)
1398 fatal ("invalid dynamic doc string length");
1399
1400 if (c != ' ')
1401 fatal ("space not found after dynamic doc string length");
1402
1403 /* The next character is a space that is counted in the length
1404 but not part of the doc string.
1405 We already read it, so just ignore it. */
1406 length--;
1407
1408 /* Read in the contents. */
1409 free (saved_string);
1410 saved_string = xmalloc (length);
1411 for (i = 0; i < length; i++)
1412 saved_string[i] = getc (infile);
1413 /* The last character is a ^_.
1414 That is needed in the .elc file
1415 but it is redundant in DOC. So get rid of it here. */
1416 saved_string[length - 1] = 0;
1417 /* Skip the line break. */
1418 while (c == '\n' || c == '\r')
1419 c = getc (infile);
1420 /* Skip the following line. */
1421 while (c != '\n' && c != '\r')
1422 c = getc (infile);
1423 }
1424 continue;
1425 }
1426
1427 if (c != '(')
1428 continue;
1429
1430 read_lisp_symbol (infile, buffer);
1431
1432 if (! strcmp (buffer, "defun")
1433 || ! strcmp (buffer, "defmacro")
1434 || ! strcmp (buffer, "defsubst"))
1435 {
1436 type = 'F';
1437 read_lisp_symbol (infile, buffer);
1438
1439 /* Skip the arguments: either "nil" or a list in parens. */
1440
1441 c = getc (infile);
1442 if (c == 'n') /* nil */
1443 {
1444 if ((c = getc (infile)) != 'i'
1445 || (c = getc (infile)) != 'l')
1446 {
1447 fprintf (stderr, "## unparsable arglist in %s (%s)\n",
1448 buffer, filename);
1449 continue;
1450 }
1451 }
1452 else if (c != '(')
1453 {
1454 fprintf (stderr, "## unparsable arglist in %s (%s)\n",
1455 buffer, filename);
1456 continue;
1457 }
1458 else
1459 while (c != ')')
1460 c = getc (infile);
1461 skip_white (infile);
1462
1463 /* If the next three characters aren't `dquote bslash newline'
1464 then we're not reading a docstring.
1465 */
1466 if ((c = getc (infile)) != '"'
1467 || (c = getc (infile)) != '\\'
1468 || ((c = getc (infile)) != '\n' && c != '\r'))
1469 {
1470 #ifdef DEBUG
1471 fprintf (stderr, "## non-docstring in %s (%s)\n",
1472 buffer, filename);
1473 #endif
1474 continue;
1475 }
1476 }
1477
1478 /* defcustom can only occur in uncompiled Lisp files. */
1479 else if (! strcmp (buffer, "defvar")
1480 || ! strcmp (buffer, "defconst")
1481 || ! strcmp (buffer, "defcustom"))
1482 {
1483 type = 'V';
1484 read_lisp_symbol (infile, buffer);
1485
1486 if (saved_string == 0)
1487 if (!search_lisp_doc_at_eol (infile))
1488 continue;
1489 }
1490
1491 else if (! strcmp (buffer, "custom-declare-variable")
1492 || ! strcmp (buffer, "defvaralias")
1493 )
1494 {
1495 type = 'V';
1496
1497 c = getc (infile);
1498 if (c == '\'')
1499 read_lisp_symbol (infile, buffer);
1500 else
1501 {
1502 if (c != '(')
1503 {
1504 fprintf (stderr,
1505 "## unparsable name in custom-declare-variable in %s\n",
1506 filename);
1507 continue;
1508 }
1509 read_lisp_symbol (infile, buffer);
1510 if (strcmp (buffer, "quote"))
1511 {
1512 fprintf (stderr,
1513 "## unparsable name in custom-declare-variable in %s\n",
1514 filename);
1515 continue;
1516 }
1517 read_lisp_symbol (infile, buffer);
1518 c = getc (infile);
1519 if (c != ')')
1520 {
1521 fprintf (stderr,
1522 "## unparsable quoted name in custom-declare-variable in %s\n",
1523 filename);
1524 continue;
1525 }
1526 }
1527
1528 if (saved_string == 0)
1529 if (!search_lisp_doc_at_eol (infile))
1530 continue;
1531 }
1532
1533 else if (! strcmp (buffer, "fset") || ! strcmp (buffer, "defalias"))
1534 {
1535 type = 'F';
1536
1537 c = getc (infile);
1538 if (c == '\'')
1539 read_lisp_symbol (infile, buffer);
1540 else
1541 {
1542 if (c != '(')
1543 {
1544 fprintf (stderr, "## unparsable name in fset in %s\n",
1545 filename);
1546 continue;
1547 }
1548 read_lisp_symbol (infile, buffer);
1549 if (strcmp (buffer, "quote"))
1550 {
1551 fprintf (stderr, "## unparsable name in fset in %s\n",
1552 filename);
1553 continue;
1554 }
1555 read_lisp_symbol (infile, buffer);
1556 c = getc (infile);
1557 if (c != ')')
1558 {
1559 fprintf (stderr,
1560 "## unparsable quoted name in fset in %s\n",
1561 filename);
1562 continue;
1563 }
1564 }
1565
1566 if (saved_string == 0)
1567 if (!search_lisp_doc_at_eol (infile))
1568 continue;
1569 }
1570
1571 else if (! strcmp (buffer, "autoload"))
1572 {
1573 type = 'F';
1574 c = getc (infile);
1575 if (c == '\'')
1576 read_lisp_symbol (infile, buffer);
1577 else
1578 {
1579 if (c != '(')
1580 {
1581 fprintf (stderr, "## unparsable name in autoload in %s\n",
1582 filename);
1583 continue;
1584 }
1585 read_lisp_symbol (infile, buffer);
1586 if (strcmp (buffer, "quote"))
1587 {
1588 fprintf (stderr, "## unparsable name in autoload in %s\n",
1589 filename);
1590 continue;
1591 }
1592 read_lisp_symbol (infile, buffer);
1593 c = getc (infile);
1594 if (c != ')')
1595 {
1596 fprintf (stderr,
1597 "## unparsable quoted name in autoload in %s\n",
1598 filename);
1599 continue;
1600 }
1601 }
1602 skip_white (infile);
1603 if ((c = getc (infile)) != '\"')
1604 {
1605 fprintf (stderr, "## autoload of %s unparsable (%s)\n",
1606 buffer, filename);
1607 continue;
1608 }
1609 read_c_string_or_comment (infile, 0, false, 0);
1610
1611 if (saved_string == 0)
1612 if (!search_lisp_doc_at_eol (infile))
1613 continue;
1614 }
1615
1616 #ifdef DEBUG
1617 else if (! strcmp (buffer, "if")
1618 || ! strcmp (buffer, "byte-code"))
1619 continue;
1620 #endif
1621
1622 else
1623 {
1624 #ifdef DEBUG
1625 fprintf (stderr, "## unrecognized top-level form, %s (%s)\n",
1626 buffer, filename);
1627 #endif
1628 continue;
1629 }
1630
1631 /* At this point, we should either use the previous dynamic doc string in
1632 saved_string or gobble a doc string from the input file.
1633 In the latter case, the opening quote (and leading backslash-newline)
1634 have already been read. */
1635
1636 printf ("\037%c%s\n", type, buffer);
1637 if (saved_string)
1638 {
1639 fputs (saved_string, stdout);
1640 /* Don't use one dynamic doc string twice. */
1641 free (saved_string);
1642 saved_string = 0;
1643 }
1644 else
1645 read_c_string_or_comment (infile, 1, false, 0);
1646 }
1647 free (saved_string);
1648 if (ferror (infile) || fclose (infile) != 0)
1649 fatal ("%s: read error", filename);
1650 }
1651
1652
1653 /* make-docfile.c ends here */