]> code.delx.au - gnu-emacs/blob - lib-src/make-docfile.c
Merge from origin/emacs-24
[gnu-emacs] / lib-src / make-docfile.c
1 /* Generate doc-string file for GNU Emacs from source files.
2
3 Copyright (C) 1985-1986, 1992-1994, 1997, 1999-2015 Free Software
4 Foundation, Inc.
5
6 This file is part of GNU Emacs.
7
8 GNU Emacs is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
12
13 GNU Emacs is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
20
21
22 /* The arguments given to this program are all the C and Lisp source files
23 of GNU Emacs. .elc and .el and .c files are allowed.
24 A .o file can also be specified; the .c file it was made from is used.
25 This helps the makefile pass the correct list of files.
26 Option -d DIR means change to DIR before looking for files.
27
28 The results, which go to standard output or to a file
29 specified with -a or -o (-a to append, -o to start from nothing),
30 are entries containing function or variable names and their documentation.
31 Each entry starts with a ^_ character.
32 Then comes F for a function or V for a variable.
33 Then comes the function or variable name, terminated with a newline.
34 Then comes the documentation for that function or variable.
35 */
36
37 #include <config.h>
38
39 #include <stdbool.h>
40 #include <stdio.h>
41 #include <stdlib.h> /* config.h unconditionally includes this anyway */
42
43 #ifdef WINDOWSNT
44 /* Defined to be sys_fopen in ms-w32.h, but only #ifdef emacs, so this
45 is really just insurance. */
46 #undef fopen
47 #include <direct.h>
48 #endif /* WINDOWSNT */
49
50 #include <binary-io.h>
51
52 #ifdef DOS_NT
53 /* Defined to be sys_chdir in ms-w32.h, but only #ifdef emacs, so this
54 is really just insurance.
55
56 Similarly, msdos defines this as sys_chdir, but we're not linking with the
57 file where that function is defined. */
58 #undef chdir
59 #define IS_SLASH(c) ((c) == '/' || (c) == '\\' || (c) == ':')
60 #else /* not DOS_NT */
61 #define IS_SLASH(c) ((c) == '/')
62 #endif /* not DOS_NT */
63
64 static int scan_file (char *filename);
65 static int scan_lisp_file (const char *filename, const char *mode);
66 static int scan_c_file (char *filename, const char *mode);
67 static int scan_c_stream (FILE *infile);
68 static void start_globals (void);
69 static void write_globals (void);
70
71 #include <unistd.h>
72
73 /* Name this program was invoked with. */
74 char *progname;
75
76 /* Nonzero if this invocation is generating globals.h. */
77 int generate_globals;
78
79 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
80
81 /* VARARGS1 */
82 static void
83 error (const char *s1, const char *s2)
84 {
85 fprintf (stderr, "%s: ", progname);
86 fprintf (stderr, s1, s2);
87 fprintf (stderr, "\n");
88 }
89
90 /* Print error message and exit. */
91
92 /* VARARGS1 */
93 static _Noreturn void
94 fatal (const char *s1, const char *s2)
95 {
96 error (s1, s2);
97 exit (EXIT_FAILURE);
98 }
99
100 /* Like malloc but get fatal error if memory is exhausted. */
101
102 static void *
103 xmalloc (unsigned int size)
104 {
105 void *result = (void *) malloc (size);
106 if (result == NULL)
107 fatal ("virtual memory exhausted", 0);
108 return result;
109 }
110
111 /* Like strdup, but get fatal error if memory is exhausted. */
112
113 static char *
114 xstrdup (char *s)
115 {
116 char *result = strdup (s);
117 if (! result)
118 fatal ("virtual memory exhausted", 0);
119 return result;
120 }
121
122 /* Like realloc but get fatal error if memory is exhausted. */
123
124 static void *
125 xrealloc (void *arg, unsigned int size)
126 {
127 void *result = (void *) realloc (arg, size);
128 if (result == NULL)
129 fatal ("virtual memory exhausted", 0);
130 return result;
131 }
132
133 \f
134 int
135 main (int argc, char **argv)
136 {
137 int i;
138 int err_count = 0;
139
140 progname = argv[0];
141
142 /* If first two args are -o FILE, output to FILE. */
143 i = 1;
144 if (argc > i + 1 && !strcmp (argv[i], "-o"))
145 {
146 if (! freopen (argv[i + 1], "w", stdout))
147 {
148 perror (argv[i + 1]);
149 return EXIT_FAILURE;
150 }
151 i += 2;
152 }
153 if (argc > i + 1 && !strcmp (argv[i], "-a"))
154 {
155 if (! freopen (argv[i + 1], "a", stdout))
156 {
157 perror (argv[i + 1]);
158 return EXIT_FAILURE;
159 }
160 i += 2;
161 }
162 if (argc > i + 1 && !strcmp (argv[i], "-d"))
163 {
164 if (chdir (argv[i + 1]) != 0)
165 {
166 perror (argv[i + 1]);
167 return EXIT_FAILURE;
168 }
169 i += 2;
170 }
171 if (argc > i && !strcmp (argv[i], "-g"))
172 {
173 generate_globals = 1;
174 ++i;
175 }
176
177 set_binary_mode (fileno (stdout), O_BINARY);
178
179 if (generate_globals)
180 start_globals ();
181
182 if (argc <= i)
183 scan_c_stream (stdin);
184 else
185 {
186 int first_infile = i;
187 for (; i < argc; i++)
188 {
189 int j;
190 /* Don't process one file twice. */
191 for (j = first_infile; j < i; j++)
192 if (strcmp (argv[i], argv[j]) == 0)
193 break;
194 if (j == i)
195 err_count += scan_file (argv[i]);
196 }
197 }
198
199 if (err_count == 0 && generate_globals)
200 write_globals ();
201
202 return (err_count > 0 ? EXIT_FAILURE : EXIT_SUCCESS);
203 }
204
205 /* Add a source file name boundary marker in the output file. */
206 static void
207 put_filename (char *filename)
208 {
209 char *tmp;
210
211 for (tmp = filename; *tmp; tmp++)
212 {
213 if (IS_DIRECTORY_SEP (*tmp))
214 filename = tmp + 1;
215 }
216
217 printf ("\037S%s\n", filename);
218 }
219
220 /* Read file FILENAME and output its doc strings to stdout.
221 Return 1 if file is not found, 0 if it is found. */
222
223 static int
224 scan_file (char *filename)
225 {
226
227 size_t len = strlen (filename);
228
229 if (!generate_globals)
230 put_filename (filename);
231 if (len > 4 && !strcmp (filename + len - 4, ".elc"))
232 return scan_lisp_file (filename, "rb");
233 else if (len > 3 && !strcmp (filename + len - 3, ".el"))
234 return scan_lisp_file (filename, "r");
235 else
236 return scan_c_file (filename, "r");
237 }
238
239 static void
240 start_globals (void)
241 {
242 puts ("/* This file was auto-generated by make-docfile. */");
243 puts ("/* DO NOT EDIT. */");
244 puts ("struct emacs_globals {");
245 }
246 \f
247 static char input_buffer[128];
248
249 /* Some state during the execution of `read_c_string_or_comment'. */
250 struct rcsoc_state
251 {
252 /* A count of spaces and newlines that have been read, but not output. */
253 unsigned pending_spaces, pending_newlines;
254
255 /* Where we're reading from. */
256 FILE *in_file;
257
258 /* If non-zero, a buffer into which to copy characters. */
259 char *buf_ptr;
260 /* If non-zero, a file into which to copy characters. */
261 FILE *out_file;
262
263 /* A keyword we look for at the beginning of lines. If found, it is
264 not copied, and SAW_KEYWORD is set to true. */
265 const char *keyword;
266 /* The current point we've reached in an occurrence of KEYWORD in
267 the input stream. */
268 const char *cur_keyword_ptr;
269 /* Set to true if we saw an occurrence of KEYWORD. */
270 int saw_keyword;
271 };
272
273 /* Output CH to the file or buffer in STATE. Any pending newlines or
274 spaces are output first. */
275
276 static void
277 put_char (int ch, struct rcsoc_state *state)
278 {
279 int out_ch;
280 do
281 {
282 if (state->pending_newlines > 0)
283 {
284 state->pending_newlines--;
285 out_ch = '\n';
286 }
287 else if (state->pending_spaces > 0)
288 {
289 state->pending_spaces--;
290 out_ch = ' ';
291 }
292 else
293 out_ch = ch;
294
295 if (state->out_file)
296 putc (out_ch, state->out_file);
297 if (state->buf_ptr)
298 *state->buf_ptr++ = out_ch;
299 }
300 while (out_ch != ch);
301 }
302
303 /* If in the middle of scanning a keyword, continue scanning with
304 character CH, otherwise output CH to the file or buffer in STATE.
305 Any pending newlines or spaces are output first, as well as any
306 previously scanned characters that were thought to be part of a
307 keyword, but were in fact not. */
308
309 static void
310 scan_keyword_or_put_char (int ch, struct rcsoc_state *state)
311 {
312 if (state->keyword
313 && *state->cur_keyword_ptr == ch
314 && (state->cur_keyword_ptr > state->keyword
315 || state->pending_newlines > 0))
316 /* We might be looking at STATE->keyword at some point.
317 Keep looking until we know for sure. */
318 {
319 if (*++state->cur_keyword_ptr == '\0')
320 /* Saw the whole keyword. Set SAW_KEYWORD flag to true. */
321 {
322 state->saw_keyword = 1;
323
324 /* Reset the scanning pointer. */
325 state->cur_keyword_ptr = state->keyword;
326
327 /* Canonicalize whitespace preceding a usage string. */
328 state->pending_newlines = 2;
329 state->pending_spaces = 0;
330
331 /* Skip any whitespace between the keyword and the
332 usage string. */
333 do
334 ch = getc (state->in_file);
335 while (ch == ' ' || ch == '\n');
336
337 /* Output the open-paren we just read. */
338 put_char (ch, state);
339
340 /* Skip the function name and replace it with `fn'. */
341 do
342 ch = getc (state->in_file);
343 while (ch != ' ' && ch != ')');
344 put_char ('f', state);
345 put_char ('n', state);
346
347 /* Put back the last character. */
348 ungetc (ch, state->in_file);
349 }
350 }
351 else
352 {
353 if (state->keyword && state->cur_keyword_ptr > state->keyword)
354 /* We scanned the beginning of a potential usage
355 keyword, but it was a false alarm. Output the
356 part we scanned. */
357 {
358 const char *p;
359
360 for (p = state->keyword; p < state->cur_keyword_ptr; p++)
361 put_char (*p, state);
362
363 state->cur_keyword_ptr = state->keyword;
364 }
365
366 put_char (ch, state);
367 }
368 }
369
370
371 /* Skip a C string or C-style comment from INFILE, and return the
372 character that follows. COMMENT non-zero means skip a comment. If
373 PRINTFLAG is positive, output string contents to stdout. If it is
374 negative, store contents in buf. Convert escape sequences \n and
375 \t to newline and tab; discard \ followed by newline.
376 If SAW_USAGE is non-zero, then any occurrences of the string `usage:'
377 at the beginning of a line will be removed, and *SAW_USAGE set to
378 true if any were encountered. */
379
380 static int
381 read_c_string_or_comment (FILE *infile, int printflag, int comment, int *saw_usage)
382 {
383 register int c;
384 struct rcsoc_state state;
385
386 state.in_file = infile;
387 state.buf_ptr = (printflag < 0 ? input_buffer : 0);
388 state.out_file = (printflag > 0 ? stdout : 0);
389 state.pending_spaces = 0;
390 state.pending_newlines = 0;
391 state.keyword = (saw_usage ? "usage:" : 0);
392 state.cur_keyword_ptr = state.keyword;
393 state.saw_keyword = 0;
394
395 c = getc (infile);
396 if (comment)
397 while (c == '\n' || c == '\r' || c == '\t' || c == ' ')
398 c = getc (infile);
399
400 while (c != EOF)
401 {
402 while (c != EOF && (comment ? c != '*' : c != '"'))
403 {
404 if (c == '\\')
405 {
406 c = getc (infile);
407 if (c == '\n' || c == '\r')
408 {
409 c = getc (infile);
410 continue;
411 }
412 if (c == 'n')
413 c = '\n';
414 if (c == 't')
415 c = '\t';
416 }
417
418 if (c == ' ')
419 state.pending_spaces++;
420 else if (c == '\n')
421 {
422 state.pending_newlines++;
423 state.pending_spaces = 0;
424 }
425 else
426 scan_keyword_or_put_char (c, &state);
427
428 c = getc (infile);
429 }
430
431 if (c != EOF)
432 c = getc (infile);
433
434 if (comment)
435 {
436 if (c == '/')
437 {
438 c = getc (infile);
439 break;
440 }
441
442 scan_keyword_or_put_char ('*', &state);
443 }
444 else
445 {
446 if (c != '"')
447 break;
448
449 /* If we had a "", concatenate the two strings. */
450 c = getc (infile);
451 }
452 }
453
454 if (printflag < 0)
455 *state.buf_ptr = 0;
456
457 if (saw_usage)
458 *saw_usage = state.saw_keyword;
459
460 return c;
461 }
462
463
464 \f
465 /* Write to stdout the argument names of function FUNC, whose text is in BUF.
466 MINARGS and MAXARGS are the minimum and maximum number of arguments. */
467
468 static void
469 write_c_args (char *func, char *buf, int minargs, int maxargs)
470 {
471 register char *p;
472 int in_ident = 0;
473 char *ident_start IF_LINT (= NULL);
474 size_t ident_length = 0;
475
476 fputs ("(fn", stdout);
477
478 if (*buf == '(')
479 ++buf;
480
481 for (p = buf; *p; p++)
482 {
483 char c = *p;
484
485 /* Notice when a new identifier starts. */
486 if ((('A' <= c && c <= 'Z')
487 || ('a' <= c && c <= 'z')
488 || ('0' <= c && c <= '9')
489 || c == '_')
490 != in_ident)
491 {
492 if (!in_ident)
493 {
494 in_ident = 1;
495 ident_start = p;
496 }
497 else
498 {
499 in_ident = 0;
500 ident_length = p - ident_start;
501 }
502 }
503
504 /* Found the end of an argument, write out the last seen
505 identifier. */
506 if (c == ',' || c == ')')
507 {
508 if (ident_length == 0)
509 {
510 error ("empty arg list for `%s' should be (void), not ()", func);
511 continue;
512 }
513
514 if (strncmp (ident_start, "void", ident_length) == 0)
515 continue;
516
517 putchar (' ');
518
519 if (minargs == 0 && maxargs > 0)
520 fputs ("&optional ", stdout);
521
522 minargs--;
523 maxargs--;
524
525 /* In C code, `default' is a reserved word, so we spell it
526 `defalt'; demangle that here. */
527 if (ident_length == 6 && memcmp (ident_start, "defalt", 6) == 0)
528 fputs ("DEFAULT", stdout);
529 else
530 while (ident_length-- > 0)
531 {
532 c = *ident_start++;
533 if (c >= 'a' && c <= 'z')
534 /* Upcase the letter. */
535 c += 'A' - 'a';
536 else if (c == '_')
537 /* Print underscore as hyphen. */
538 c = '-';
539 putchar (c);
540 }
541 }
542 }
543
544 putchar (')');
545 }
546 \f
547 /* The types of globals. These are sorted roughly in decreasing alignment
548 order to avoid allocation gaps, except that symbols and functions
549 are last. */
550 enum global_type
551 {
552 INVALID,
553 LISP_OBJECT,
554 EMACS_INTEGER,
555 BOOLEAN,
556 SYMBOL,
557 FUNCTION
558 };
559
560 /* A single global. */
561 struct global
562 {
563 enum global_type type;
564 char *name;
565 int flags;
566 union
567 {
568 int value;
569 char const *svalue;
570 } v;
571 };
572
573 /* Bit values for FLAGS field from the above. Applied for DEFUNs only. */
574 enum { DEFUN_noreturn = 1, DEFUN_const = 2 };
575
576 /* All the variable names we saw while scanning C sources in `-g'
577 mode. */
578 int num_globals;
579 int num_globals_allocated;
580 struct global *globals;
581
582 static struct global *
583 add_global (enum global_type type, char *name, int value, char const *svalue)
584 {
585 /* Ignore the one non-symbol that can occur. */
586 if (strcmp (name, "..."))
587 {
588 ++num_globals;
589
590 if (num_globals_allocated == 0)
591 {
592 num_globals_allocated = 100;
593 globals = xmalloc (num_globals_allocated * sizeof (struct global));
594 }
595 else if (num_globals == num_globals_allocated)
596 {
597 num_globals_allocated *= 2;
598 globals = xrealloc (globals,
599 num_globals_allocated * sizeof (struct global));
600 }
601
602 globals[num_globals - 1].type = type;
603 globals[num_globals - 1].name = name;
604 if (svalue)
605 globals[num_globals - 1].v.svalue = svalue;
606 else
607 globals[num_globals - 1].v.value = value;
608 globals[num_globals - 1].flags = 0;
609 return globals + num_globals - 1;
610 }
611 return NULL;
612 }
613
614 static int
615 compare_globals (const void *a, const void *b)
616 {
617 const struct global *ga = a;
618 const struct global *gb = b;
619
620 if (ga->type != gb->type)
621 return ga->type - gb->type;
622
623 /* Consider "nil" to be the least, so that iQnil is zero. That
624 way, Qnil's internal representation is zero, which is a bit faster. */
625 if (ga->type == SYMBOL)
626 {
627 bool a_nil = strcmp (ga->name, "Qnil") == 0;
628 bool b_nil = strcmp (gb->name, "Qnil") == 0;
629 if (a_nil | b_nil)
630 return b_nil - a_nil;
631 }
632
633 return strcmp (ga->name, gb->name);
634 }
635
636 static void
637 close_emacs_globals (int num_symbols)
638 {
639 printf (("};\n"
640 "extern struct emacs_globals globals;\n"
641 "\n"
642 "#ifndef DEFINE_SYMBOLS\n"
643 "extern\n"
644 "#endif\n"
645 "struct Lisp_Symbol alignas (GCALIGNMENT) lispsym[%d];\n"),
646 num_symbols);
647 }
648
649 static void
650 write_globals (void)
651 {
652 int i, j;
653 bool seen_defun = false;
654 int symnum = 0;
655 int num_symbols = 0;
656 qsort (globals, num_globals, sizeof (struct global), compare_globals);
657
658 j = 0;
659 for (i = 0; i < num_globals; i++)
660 {
661 while (i + 1 < num_globals
662 && strcmp (globals[i].name, globals[i + 1].name) == 0)
663 {
664 if (globals[i].type == FUNCTION
665 && globals[i].v.value != globals[i + 1].v.value)
666 error ("function '%s' defined twice with differing signatures",
667 globals[i].name);
668 i++;
669 }
670 num_symbols += globals[i].type == SYMBOL;
671 globals[j++] = globals[i];
672 }
673 num_globals = j;
674
675 for (i = 0; i < num_globals; ++i)
676 {
677 char const *type = 0;
678
679 switch (globals[i].type)
680 {
681 case EMACS_INTEGER:
682 type = "EMACS_INT";
683 break;
684 case BOOLEAN:
685 type = "bool";
686 break;
687 case LISP_OBJECT:
688 type = "Lisp_Object";
689 break;
690 case SYMBOL:
691 case FUNCTION:
692 if (!seen_defun)
693 {
694 close_emacs_globals (num_symbols);
695 putchar ('\n');
696 seen_defun = true;
697 }
698 break;
699 default:
700 fatal ("not a recognized DEFVAR_", 0);
701 }
702
703 if (type)
704 {
705 printf (" %s f_%s;\n", type, globals[i].name);
706 printf ("#define %s globals.f_%s\n",
707 globals[i].name, globals[i].name);
708 }
709 else if (globals[i].type == SYMBOL)
710 printf (("DEFINE_LISP_SYMBOL_BEGIN (%s)\n"
711 "#define i%s %d\n"
712 "#define %s builtin_lisp_symbol (i%s)\n"
713 "DEFINE_LISP_SYMBOL_END (%s)\n\n"),
714 globals[i].name, globals[i].name, symnum++,
715 globals[i].name, globals[i].name, globals[i].name);
716 else
717 {
718 if (globals[i].flags & DEFUN_noreturn)
719 fputs ("_Noreturn ", stdout);
720
721 printf ("EXFUN (%s, ", globals[i].name);
722 if (globals[i].v.value == -1)
723 fputs ("MANY", stdout);
724 else if (globals[i].v.value == -2)
725 fputs ("UNEVALLED", stdout);
726 else
727 printf ("%d", globals[i].v.value);
728 putchar (')');
729
730 if (globals[i].flags & DEFUN_const)
731 fputs (" ATTRIBUTE_CONST", stdout);
732
733 puts (";");
734 }
735 }
736
737 if (!seen_defun)
738 close_emacs_globals (num_symbols);
739
740 puts ("#ifdef DEFINE_SYMBOLS");
741 puts ("static char const *const defsym_name[] = {");
742 for (int i = 0; i < num_globals; i++)
743 {
744 if (globals[i].type == SYMBOL)
745 printf ("\t\"%s\",\n", globals[i].v.svalue);
746 while (i + 1 < num_globals
747 && strcmp (globals[i].name, globals[i + 1].name) == 0)
748 i++;
749 }
750 puts ("};");
751 puts ("#endif");
752 }
753
754 \f
755 /* Read through a c file. If a .o file is named,
756 the corresponding .c or .m file is read instead.
757 Looks for DEFUN constructs such as are defined in ../src/lisp.h.
758 Accepts any word starting DEF... so it finds DEFSIMPLE and DEFPRED. */
759
760 static int
761 scan_c_file (char *filename, const char *mode)
762 {
763 FILE *infile;
764 int extension = filename[strlen (filename) - 1];
765
766 if (extension == 'o')
767 filename[strlen (filename) - 1] = 'c';
768
769 infile = fopen (filename, mode);
770
771 if (infile == NULL && extension == 'o')
772 {
773 /* Try .m. */
774 filename[strlen (filename) - 1] = 'm';
775 infile = fopen (filename, mode);
776 if (infile == NULL)
777 filename[strlen (filename) - 1] = 'c'; /* Don't confuse people. */
778 }
779
780 /* No error if non-ex input file. */
781 if (infile == NULL)
782 {
783 perror (filename);
784 return 0;
785 }
786
787 /* Reset extension to be able to detect duplicate files. */
788 filename[strlen (filename) - 1] = extension;
789 return scan_c_stream (infile);
790 }
791
792 /* Return 1 if next input from INFILE is equal to P, -1 if EOF,
793 0 if input doesn't match. */
794
795 static int
796 stream_match (FILE *infile, const char *p)
797 {
798 for (; *p; p++)
799 {
800 int c = getc (infile);
801 if (c == EOF)
802 return -1;
803 if (c != *p)
804 return 0;
805 }
806 return 1;
807 }
808
809 static int
810 scan_c_stream (FILE *infile)
811 {
812 int commas, minargs, maxargs;
813 int c = '\n';
814
815 while (!feof (infile))
816 {
817 int doc_keyword = 0;
818 int defunflag = 0;
819 int defvarperbufferflag = 0;
820 int defvarflag = 0;
821 enum global_type type = INVALID;
822 char *name IF_LINT (= 0);
823
824 if (c != '\n' && c != '\r')
825 {
826 c = getc (infile);
827 continue;
828 }
829 c = getc (infile);
830 if (c == ' ')
831 {
832 while (c == ' ')
833 c = getc (infile);
834 if (c != 'D')
835 continue;
836 c = getc (infile);
837 if (c != 'E')
838 continue;
839 c = getc (infile);
840 if (c != 'F')
841 continue;
842 c = getc (infile);
843 if (c == 'S')
844 {
845 c = getc (infile);
846 if (c != 'Y')
847 continue;
848 c = getc (infile);
849 if (c != 'M')
850 continue;
851 c = getc (infile);
852 if (c != ' ' && c != '\t' && c != '(')
853 continue;
854 type = SYMBOL;
855 }
856 else if (c == 'V')
857 {
858 c = getc (infile);
859 if (c != 'A')
860 continue;
861 c = getc (infile);
862 if (c != 'R')
863 continue;
864 c = getc (infile);
865 if (c != '_')
866 continue;
867
868 defvarflag = 1;
869
870 c = getc (infile);
871 defvarperbufferflag = (c == 'P');
872 if (generate_globals)
873 {
874 if (c == 'I')
875 type = EMACS_INTEGER;
876 else if (c == 'L')
877 type = LISP_OBJECT;
878 else if (c == 'B')
879 type = BOOLEAN;
880 }
881
882 c = getc (infile);
883 /* We need to distinguish between DEFVAR_BOOL and
884 DEFVAR_BUFFER_DEFAULTS. */
885 if (generate_globals && type == BOOLEAN && c != 'O')
886 type = INVALID;
887 }
888 else
889 continue;
890 }
891 else if (c == 'D')
892 {
893 c = getc (infile);
894 if (c != 'E')
895 continue;
896 c = getc (infile);
897 if (c != 'F')
898 continue;
899 c = getc (infile);
900 defunflag = c == 'U';
901 }
902 else continue;
903
904 if (generate_globals
905 && (!defvarflag || defvarperbufferflag || type == INVALID)
906 && !defunflag && type != SYMBOL)
907 continue;
908
909 while (c != '(')
910 {
911 if (c < 0)
912 goto eof;
913 c = getc (infile);
914 }
915
916 if (type != SYMBOL)
917 {
918 /* Lisp variable or function name. */
919 c = getc (infile);
920 if (c != '"')
921 continue;
922 c = read_c_string_or_comment (infile, -1, 0, 0);
923 }
924
925 if (generate_globals)
926 {
927 int i = 0;
928 char const *svalue = 0;
929
930 /* Skip "," and whitespace. */
931 do
932 {
933 c = getc (infile);
934 }
935 while (c == ',' || c == ' ' || c == '\t' || c == '\n' || c == '\r');
936
937 /* Read in the identifier. */
938 do
939 {
940 if (c < 0)
941 goto eof;
942 input_buffer[i++] = c;
943 c = getc (infile);
944 }
945 while (! (c == ',' || c == ' ' || c == '\t'
946 || c == '\n' || c == '\r'));
947 input_buffer[i] = '\0';
948
949 name = xmalloc (i + 1);
950 memcpy (name, input_buffer, i + 1);
951
952 if (type == SYMBOL)
953 {
954 do
955 c = getc (infile);
956 while (c == ' ' || c == '\t' || c == '\n' || c == '\r');
957 if (c != '"')
958 continue;
959 c = read_c_string_or_comment (infile, -1, 0, 0);
960 svalue = xstrdup (input_buffer);
961 }
962
963 if (!defunflag)
964 {
965 add_global (type, name, 0, svalue);
966 continue;
967 }
968 }
969
970 if (type == SYMBOL)
971 continue;
972
973 /* DEFVAR_LISP ("name", addr, "doc")
974 DEFVAR_LISP ("name", addr /\* doc *\/)
975 DEFVAR_LISP ("name", addr, doc: /\* doc *\/) */
976
977 if (defunflag)
978 commas = generate_globals ? 4 : 5;
979 else if (defvarperbufferflag)
980 commas = 3;
981 else if (defvarflag)
982 commas = 1;
983 else /* For DEFSIMPLE and DEFPRED. */
984 commas = 2;
985
986 while (commas)
987 {
988 if (c == ',')
989 {
990 commas--;
991
992 if (defunflag && (commas == 1 || commas == 2))
993 {
994 int scanned = 0;
995 do
996 c = getc (infile);
997 while (c == ' ' || c == '\n' || c == '\r' || c == '\t');
998 if (c < 0)
999 goto eof;
1000 ungetc (c, infile);
1001 if (commas == 2) /* Pick up minargs. */
1002 scanned = fscanf (infile, "%d", &minargs);
1003 else /* Pick up maxargs. */
1004 if (c == 'M' || c == 'U') /* MANY || UNEVALLED */
1005 {
1006 if (generate_globals)
1007 maxargs = (c == 'M') ? -1 : -2;
1008 else
1009 maxargs = -1;
1010 }
1011 else
1012 scanned = fscanf (infile, "%d", &maxargs);
1013 if (scanned < 0)
1014 goto eof;
1015 }
1016 }
1017
1018 if (c == EOF)
1019 goto eof;
1020 c = getc (infile);
1021 }
1022
1023 if (generate_globals)
1024 {
1025 struct global *g = add_global (FUNCTION, name, maxargs, 0);
1026
1027 /* The following code tries to recognize function attributes
1028 specified after the docstring, e.g.:
1029
1030 DEFUN ("foo", Ffoo, Sfoo, X, Y, Z,
1031 doc: /\* doc *\/
1032 attributes: attribute1 attribute2 ...)
1033 (Lisp_Object arg...)
1034
1035 Now only 'noreturn' and 'const' attributes are used. */
1036
1037 /* Advance to the end of docstring. */
1038 c = getc (infile);
1039 if (c == EOF)
1040 goto eof;
1041 int d = getc (infile);
1042 if (d == EOF)
1043 goto eof;
1044 while (1)
1045 {
1046 if (c == '*' && d == '/')
1047 break;
1048 c = d, d = getc (infile);
1049 if (d == EOF)
1050 goto eof;
1051 }
1052 /* Skip spaces, if any. */
1053 do
1054 {
1055 c = getc (infile);
1056 if (c == EOF)
1057 goto eof;
1058 }
1059 while (c == ' ' || c == '\n' || c == '\r' || c == '\t');
1060 /* Check for 'attributes:' token. */
1061 if (c == 'a' && stream_match (infile, "ttributes:"))
1062 {
1063 char *p = input_buffer;
1064 /* Collect attributes up to ')'. */
1065 while (1)
1066 {
1067 c = getc (infile);
1068 if (c == EOF)
1069 goto eof;
1070 if (c == ')')
1071 break;
1072 if (p - input_buffer > sizeof (input_buffer))
1073 abort ();
1074 *p++ = c;
1075 }
1076 *p = 0;
1077 if (strstr (input_buffer, "noreturn"))
1078 g->flags |= DEFUN_noreturn;
1079 if (strstr (input_buffer, "const"))
1080 g->flags |= DEFUN_const;
1081 }
1082 continue;
1083 }
1084
1085 while (c == ' ' || c == '\n' || c == '\r' || c == '\t')
1086 c = getc (infile);
1087
1088 if (c == '"')
1089 c = read_c_string_or_comment (infile, 0, 0, 0);
1090
1091 while (c != EOF && c != ',' && c != '/')
1092 c = getc (infile);
1093 if (c == ',')
1094 {
1095 c = getc (infile);
1096 while (c == ' ' || c == '\n' || c == '\r' || c == '\t')
1097 c = getc (infile);
1098 while ((c >= 'a' && c <= 'z') || (c >= 'Z' && c <= 'Z'))
1099 c = getc (infile);
1100 if (c == ':')
1101 {
1102 doc_keyword = 1;
1103 c = getc (infile);
1104 while (c == ' ' || c == '\n' || c == '\r' || c == '\t')
1105 c = getc (infile);
1106 }
1107 }
1108
1109 if (c == '"'
1110 || (c == '/'
1111 && (c = getc (infile),
1112 ungetc (c, infile),
1113 c == '*')))
1114 {
1115 int comment = c != '"';
1116 int saw_usage;
1117
1118 printf ("\037%c%s\n", defvarflag ? 'V' : 'F', input_buffer);
1119
1120 if (comment)
1121 getc (infile); /* Skip past `*'. */
1122 c = read_c_string_or_comment (infile, 1, comment, &saw_usage);
1123
1124 /* If this is a defun, find the arguments and print them. If
1125 this function takes MANY or UNEVALLED args, then the C source
1126 won't give the names of the arguments, so we shouldn't bother
1127 trying to find them.
1128
1129 Various doc-string styles:
1130 0: DEFUN (..., "DOC") (args) [!comment]
1131 1: DEFUN (..., /\* DOC *\/ (args)) [comment && !doc_keyword]
1132 2: DEFUN (..., doc: /\* DOC *\/) (args) [comment && doc_keyword]
1133 */
1134 if (defunflag && maxargs != -1 && !saw_usage)
1135 {
1136 char argbuf[1024], *p = argbuf;
1137
1138 if (!comment || doc_keyword)
1139 while (c != ')')
1140 {
1141 if (c < 0)
1142 goto eof;
1143 c = getc (infile);
1144 }
1145
1146 /* Skip into arguments. */
1147 while (c != '(')
1148 {
1149 if (c < 0)
1150 goto eof;
1151 c = getc (infile);
1152 }
1153 /* Copy arguments into ARGBUF. */
1154 *p++ = c;
1155 do
1156 *p++ = c = getc (infile);
1157 while (c != ')');
1158 *p = '\0';
1159 /* Output them. */
1160 fputs ("\n\n", stdout);
1161 write_c_args (input_buffer, argbuf, minargs, maxargs);
1162 }
1163 else if (defunflag && maxargs == -1 && !saw_usage)
1164 /* The DOC should provide the usage form. */
1165 fprintf (stderr, "Missing `usage' for function `%s'.\n",
1166 input_buffer);
1167 }
1168 }
1169 eof:
1170 fclose (infile);
1171 return 0;
1172 }
1173 \f
1174 /* Read a file of Lisp code, compiled or interpreted.
1175 Looks for
1176 (defun NAME ARGS DOCSTRING ...)
1177 (defmacro NAME ARGS DOCSTRING ...)
1178 (defsubst NAME ARGS DOCSTRING ...)
1179 (autoload (quote NAME) FILE DOCSTRING ...)
1180 (defvar NAME VALUE DOCSTRING)
1181 (defconst NAME VALUE DOCSTRING)
1182 (fset (quote NAME) (make-byte-code ... DOCSTRING ...))
1183 (fset (quote NAME) #[... DOCSTRING ...])
1184 (defalias (quote NAME) #[... DOCSTRING ...])
1185 (custom-declare-variable (quote NAME) VALUE DOCSTRING ...)
1186 starting in column zero.
1187 (quote NAME) may appear as 'NAME as well.
1188
1189 We also look for #@LENGTH CONTENTS^_ at the beginning of the line.
1190 When we find that, we save it for the following defining-form,
1191 and we use that instead of reading a doc string within that defining-form.
1192
1193 For defvar, defconst, and fset we skip to the docstring with a kludgy
1194 formatting convention: all docstrings must appear on the same line as the
1195 initial open-paren (the one in column zero) and must contain a backslash
1196 and a newline immediately after the initial double-quote. No newlines
1197 must appear between the beginning of the form and the first double-quote.
1198 For defun, defmacro, and autoload, we know how to skip over the
1199 arglist, but the doc string must still have a backslash and newline
1200 immediately after the double quote.
1201 The only source files that must follow this convention are preloaded
1202 uncompiled ones like loaddefs.el; aside from that, it is always the .elc
1203 file that we should look at, and they are no problem because byte-compiler
1204 output follows this convention.
1205 The NAME and DOCSTRING are output.
1206 NAME is preceded by `F' for a function or `V' for a variable.
1207 An entry is output only if DOCSTRING has \ newline just after the opening ".
1208 */
1209
1210 static void
1211 skip_white (FILE *infile)
1212 {
1213 char c = ' ';
1214 while (c == ' ' || c == '\t' || c == '\n' || c == '\r')
1215 c = getc (infile);
1216 ungetc (c, infile);
1217 }
1218
1219 static void
1220 read_lisp_symbol (FILE *infile, char *buffer)
1221 {
1222 char c;
1223 char *fillp = buffer;
1224
1225 skip_white (infile);
1226 while (1)
1227 {
1228 c = getc (infile);
1229 if (c == '\\')
1230 *(++fillp) = getc (infile);
1231 else if (c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '(' || c == ')')
1232 {
1233 ungetc (c, infile);
1234 *fillp = 0;
1235 break;
1236 }
1237 else
1238 *fillp++ = c;
1239 }
1240
1241 if (! buffer[0])
1242 fprintf (stderr, "## expected a symbol, got '%c'\n", c);
1243
1244 skip_white (infile);
1245 }
1246
1247 static int
1248 search_lisp_doc_at_eol (FILE *infile)
1249 {
1250 int c = 0, c1 = 0, c2 = 0;
1251
1252 /* Skip until the end of line; remember two previous chars. */
1253 while (c != '\n' && c != '\r' && c != EOF)
1254 {
1255 c2 = c1;
1256 c1 = c;
1257 c = getc (infile);
1258 }
1259
1260 /* If two previous characters were " and \,
1261 this is a doc string. Otherwise, there is none. */
1262 if (c2 != '"' || c1 != '\\')
1263 {
1264 #ifdef DEBUG
1265 fprintf (stderr, "## non-docstring found\n");
1266 #endif
1267 if (c != EOF)
1268 ungetc (c, infile);
1269 return 0;
1270 }
1271 return 1;
1272 }
1273
1274 #define DEF_ELISP_FILE(fn) { #fn, sizeof(#fn) - 1 }
1275
1276 static int
1277 scan_lisp_file (const char *filename, const char *mode)
1278 {
1279 FILE *infile;
1280 register int c;
1281 char *saved_string = 0;
1282 /* These are the only files that are loaded uncompiled, and must
1283 follow the conventions of the doc strings expected by this
1284 function. These conventions are automatically followed by the
1285 byte compiler when it produces the .elc files. */
1286 static struct {
1287 const char *fn;
1288 size_t fl;
1289 } const uncompiled[] = {
1290 DEF_ELISP_FILE (loaddefs.el),
1291 DEF_ELISP_FILE (loadup.el),
1292 DEF_ELISP_FILE (charprop.el),
1293 DEF_ELISP_FILE (cp51932.el),
1294 DEF_ELISP_FILE (eucjp-ms.el)
1295 };
1296 int i, match;
1297 size_t flen = strlen (filename);
1298
1299 if (generate_globals)
1300 fatal ("scanning lisp file when -g specified", 0);
1301 if (flen > 3 && !strcmp (filename + flen - 3, ".el"))
1302 {
1303 for (i = 0, match = 0; i < sizeof (uncompiled) / sizeof (uncompiled[0]);
1304 i++)
1305 {
1306 if (uncompiled[i].fl <= flen
1307 && !strcmp (filename + flen - uncompiled[i].fl, uncompiled[i].fn)
1308 && (flen == uncompiled[i].fl
1309 || IS_SLASH (filename[flen - uncompiled[i].fl - 1])))
1310 {
1311 match = 1;
1312 break;
1313 }
1314 }
1315 if (!match)
1316 fatal ("uncompiled lisp file %s is not supported", filename);
1317 }
1318
1319 infile = fopen (filename, mode);
1320 if (infile == NULL)
1321 {
1322 perror (filename);
1323 return 0; /* No error. */
1324 }
1325
1326 c = '\n';
1327 while (!feof (infile))
1328 {
1329 char buffer[BUFSIZ];
1330 char type;
1331
1332 /* If not at end of line, skip till we get to one. */
1333 if (c != '\n' && c != '\r')
1334 {
1335 c = getc (infile);
1336 continue;
1337 }
1338 /* Skip the line break. */
1339 while (c == '\n' || c == '\r')
1340 c = getc (infile);
1341 /* Detect a dynamic doc string and save it for the next expression. */
1342 if (c == '#')
1343 {
1344 c = getc (infile);
1345 if (c == '@')
1346 {
1347 size_t length = 0;
1348 size_t i;
1349
1350 /* Read the length. */
1351 while ((c = getc (infile),
1352 c >= '0' && c <= '9'))
1353 {
1354 length *= 10;
1355 length += c - '0';
1356 }
1357
1358 if (length <= 1)
1359 fatal ("invalid dynamic doc string length", "");
1360
1361 if (c != ' ')
1362 fatal ("space not found after dynamic doc string length", "");
1363
1364 /* The next character is a space that is counted in the length
1365 but not part of the doc string.
1366 We already read it, so just ignore it. */
1367 length--;
1368
1369 /* Read in the contents. */
1370 free (saved_string);
1371 saved_string = (char *) xmalloc (length);
1372 for (i = 0; i < length; i++)
1373 saved_string[i] = getc (infile);
1374 /* The last character is a ^_.
1375 That is needed in the .elc file
1376 but it is redundant in DOC. So get rid of it here. */
1377 saved_string[length - 1] = 0;
1378 /* Skip the line break. */
1379 while (c == '\n' || c == '\r')
1380 c = getc (infile);
1381 /* Skip the following line. */
1382 while (c != '\n' && c != '\r')
1383 c = getc (infile);
1384 }
1385 continue;
1386 }
1387
1388 if (c != '(')
1389 continue;
1390
1391 read_lisp_symbol (infile, buffer);
1392
1393 if (! strcmp (buffer, "defun")
1394 || ! strcmp (buffer, "defmacro")
1395 || ! strcmp (buffer, "defsubst"))
1396 {
1397 type = 'F';
1398 read_lisp_symbol (infile, buffer);
1399
1400 /* Skip the arguments: either "nil" or a list in parens. */
1401
1402 c = getc (infile);
1403 if (c == 'n') /* nil */
1404 {
1405 if ((c = getc (infile)) != 'i'
1406 || (c = getc (infile)) != 'l')
1407 {
1408 fprintf (stderr, "## unparsable arglist in %s (%s)\n",
1409 buffer, filename);
1410 continue;
1411 }
1412 }
1413 else if (c != '(')
1414 {
1415 fprintf (stderr, "## unparsable arglist in %s (%s)\n",
1416 buffer, filename);
1417 continue;
1418 }
1419 else
1420 while (c != ')')
1421 c = getc (infile);
1422 skip_white (infile);
1423
1424 /* If the next three characters aren't `dquote bslash newline'
1425 then we're not reading a docstring.
1426 */
1427 if ((c = getc (infile)) != '"'
1428 || (c = getc (infile)) != '\\'
1429 || ((c = getc (infile)) != '\n' && c != '\r'))
1430 {
1431 #ifdef DEBUG
1432 fprintf (stderr, "## non-docstring in %s (%s)\n",
1433 buffer, filename);
1434 #endif
1435 continue;
1436 }
1437 }
1438
1439 /* defcustom can only occur in uncompiled Lisp files. */
1440 else if (! strcmp (buffer, "defvar")
1441 || ! strcmp (buffer, "defconst")
1442 || ! strcmp (buffer, "defcustom"))
1443 {
1444 type = 'V';
1445 read_lisp_symbol (infile, buffer);
1446
1447 if (saved_string == 0)
1448 if (!search_lisp_doc_at_eol (infile))
1449 continue;
1450 }
1451
1452 else if (! strcmp (buffer, "custom-declare-variable")
1453 || ! strcmp (buffer, "defvaralias")
1454 )
1455 {
1456 type = 'V';
1457
1458 c = getc (infile);
1459 if (c == '\'')
1460 read_lisp_symbol (infile, buffer);
1461 else
1462 {
1463 if (c != '(')
1464 {
1465 fprintf (stderr,
1466 "## unparsable name in custom-declare-variable in %s\n",
1467 filename);
1468 continue;
1469 }
1470 read_lisp_symbol (infile, buffer);
1471 if (strcmp (buffer, "quote"))
1472 {
1473 fprintf (stderr,
1474 "## unparsable name in custom-declare-variable in %s\n",
1475 filename);
1476 continue;
1477 }
1478 read_lisp_symbol (infile, buffer);
1479 c = getc (infile);
1480 if (c != ')')
1481 {
1482 fprintf (stderr,
1483 "## unparsable quoted name in custom-declare-variable in %s\n",
1484 filename);
1485 continue;
1486 }
1487 }
1488
1489 if (saved_string == 0)
1490 if (!search_lisp_doc_at_eol (infile))
1491 continue;
1492 }
1493
1494 else if (! strcmp (buffer, "fset") || ! strcmp (buffer, "defalias"))
1495 {
1496 type = 'F';
1497
1498 c = getc (infile);
1499 if (c == '\'')
1500 read_lisp_symbol (infile, buffer);
1501 else
1502 {
1503 if (c != '(')
1504 {
1505 fprintf (stderr, "## unparsable name in fset in %s\n",
1506 filename);
1507 continue;
1508 }
1509 read_lisp_symbol (infile, buffer);
1510 if (strcmp (buffer, "quote"))
1511 {
1512 fprintf (stderr, "## unparsable name in fset in %s\n",
1513 filename);
1514 continue;
1515 }
1516 read_lisp_symbol (infile, buffer);
1517 c = getc (infile);
1518 if (c != ')')
1519 {
1520 fprintf (stderr,
1521 "## unparsable quoted name in fset in %s\n",
1522 filename);
1523 continue;
1524 }
1525 }
1526
1527 if (saved_string == 0)
1528 if (!search_lisp_doc_at_eol (infile))
1529 continue;
1530 }
1531
1532 else if (! strcmp (buffer, "autoload"))
1533 {
1534 type = 'F';
1535 c = getc (infile);
1536 if (c == '\'')
1537 read_lisp_symbol (infile, buffer);
1538 else
1539 {
1540 if (c != '(')
1541 {
1542 fprintf (stderr, "## unparsable name in autoload in %s\n",
1543 filename);
1544 continue;
1545 }
1546 read_lisp_symbol (infile, buffer);
1547 if (strcmp (buffer, "quote"))
1548 {
1549 fprintf (stderr, "## unparsable name in autoload in %s\n",
1550 filename);
1551 continue;
1552 }
1553 read_lisp_symbol (infile, buffer);
1554 c = getc (infile);
1555 if (c != ')')
1556 {
1557 fprintf (stderr,
1558 "## unparsable quoted name in autoload in %s\n",
1559 filename);
1560 continue;
1561 }
1562 }
1563 skip_white (infile);
1564 if ((c = getc (infile)) != '\"')
1565 {
1566 fprintf (stderr, "## autoload of %s unparsable (%s)\n",
1567 buffer, filename);
1568 continue;
1569 }
1570 read_c_string_or_comment (infile, 0, 0, 0);
1571
1572 if (saved_string == 0)
1573 if (!search_lisp_doc_at_eol (infile))
1574 continue;
1575 }
1576
1577 #ifdef DEBUG
1578 else if (! strcmp (buffer, "if")
1579 || ! strcmp (buffer, "byte-code"))
1580 continue;
1581 #endif
1582
1583 else
1584 {
1585 #ifdef DEBUG
1586 fprintf (stderr, "## unrecognized top-level form, %s (%s)\n",
1587 buffer, filename);
1588 #endif
1589 continue;
1590 }
1591
1592 /* At this point, we should either use the previous dynamic doc string in
1593 saved_string or gobble a doc string from the input file.
1594 In the latter case, the opening quote (and leading backslash-newline)
1595 have already been read. */
1596
1597 printf ("\037%c%s\n", type, buffer);
1598 if (saved_string)
1599 {
1600 fputs (saved_string, stdout);
1601 /* Don't use one dynamic doc string twice. */
1602 free (saved_string);
1603 saved_string = 0;
1604 }
1605 else
1606 read_c_string_or_comment (infile, 1, 0, 0);
1607 }
1608 fclose (infile);
1609 return 0;
1610 }
1611
1612
1613 /* make-docfile.c ends here */