]> code.delx.au - gnu-emacs/blob - lib-src/make-docfile.c
merge master, fix conflicts
[gnu-emacs] / lib-src / make-docfile.c
1 /* Generate doc-string file for GNU Emacs from source files.
2
3 Copyright (C) 1985-1986, 1992-1994, 1997, 1999-2015 Free Software
4 Foundation, Inc.
5
6 This file is part of GNU Emacs.
7
8 GNU Emacs is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
12
13 GNU Emacs is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
20
21
22 /* The arguments given to this program are all the C and Lisp source files
23 of GNU Emacs. .elc and .el and .c files are allowed.
24 A .o file can also be specified; the .c file it was made from is used.
25 This helps the makefile pass the correct list of files.
26 Option -d DIR means change to DIR before looking for files.
27
28 The results, which go to standard output or to a file
29 specified with -a or -o (-a to append, -o to start from nothing),
30 are entries containing function or variable names and their documentation.
31 Each entry starts with a ^_ character.
32 Then comes F for a function or V for a variable.
33 Then comes the function or variable name, terminated with a newline.
34 Then comes the documentation for that function or variable.
35 */
36
37 #include <config.h>
38
39 #include <stdbool.h>
40 #include <stdio.h>
41 #include <stdlib.h> /* config.h unconditionally includes this anyway */
42
43 #ifdef WINDOWSNT
44 /* Defined to be sys_fopen in ms-w32.h, but only #ifdef emacs, so this
45 is really just insurance. */
46 #undef fopen
47 #include <direct.h>
48 #endif /* WINDOWSNT */
49
50 #include <binary-io.h>
51
52 #ifdef DOS_NT
53 /* Defined to be sys_chdir in ms-w32.h, but only #ifdef emacs, so this
54 is really just insurance.
55
56 Similarly, msdos defines this as sys_chdir, but we're not linking with the
57 file where that function is defined. */
58 #undef chdir
59 #define IS_SLASH(c) ((c) == '/' || (c) == '\\' || (c) == ':')
60 #else /* not DOS_NT */
61 #define IS_SLASH(c) ((c) == '/')
62 #endif /* not DOS_NT */
63
64 static int scan_file (char *filename);
65 static int scan_lisp_file (const char *filename, const char *mode);
66 static int scan_c_file (char *filename, const char *mode);
67 static int scan_c_stream (FILE *infile);
68 static void start_globals (void);
69 static void write_globals (void);
70
71 #include <unistd.h>
72
73 /* Name this program was invoked with. */
74 char *progname;
75
76 /* Nonzero if this invocation is generating globals.h. */
77 int generate_globals;
78
79 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
80
81 /* VARARGS1 */
82 static void
83 error (const char *s1, const char *s2)
84 {
85 fprintf (stderr, "%s: ", progname);
86 fprintf (stderr, s1, s2);
87 fprintf (stderr, "\n");
88 }
89
90 /* Print error message and exit. */
91
92 /* VARARGS1 */
93 static _Noreturn void
94 fatal (const char *s1, const char *s2)
95 {
96 error (s1, s2);
97 exit (EXIT_FAILURE);
98 }
99
100 /* Like malloc but get fatal error if memory is exhausted. */
101
102 static void *
103 xmalloc (unsigned int size)
104 {
105 void *result = (void *) malloc (size);
106 if (result == NULL)
107 fatal ("virtual memory exhausted", 0);
108 return result;
109 }
110
111 /* Like strdup, but get fatal error if memory is exhausted. */
112
113 static char *
114 xstrdup (char *s)
115 {
116 char *result = strdup (s);
117 if (! result)
118 fatal ("virtual memory exhausted", 0);
119 return result;
120 }
121
122 /* Like realloc but get fatal error if memory is exhausted. */
123
124 static void *
125 xrealloc (void *arg, unsigned int size)
126 {
127 void *result = (void *) realloc (arg, size);
128 if (result == NULL)
129 fatal ("virtual memory exhausted", 0);
130 return result;
131 }
132
133 \f
134 int
135 main (int argc, char **argv)
136 {
137 int i;
138 int err_count = 0;
139
140 progname = argv[0];
141
142 /* If first two args are -o FILE, output to FILE. */
143 i = 1;
144 if (argc > i + 1 && !strcmp (argv[i], "-o"))
145 {
146 if (! freopen (argv[i + 1], "w", stdout))
147 {
148 perror (argv[i + 1]);
149 return EXIT_FAILURE;
150 }
151 i += 2;
152 }
153 if (argc > i + 1 && !strcmp (argv[i], "-a"))
154 {
155 if (! freopen (argv[i + 1], "a", stdout))
156 {
157 perror (argv[i + 1]);
158 return EXIT_FAILURE;
159 }
160 i += 2;
161 }
162 if (argc > i + 1 && !strcmp (argv[i], "-d"))
163 {
164 if (chdir (argv[i + 1]) != 0)
165 {
166 perror (argv[i + 1]);
167 return EXIT_FAILURE;
168 }
169 i += 2;
170 }
171 if (argc > i && !strcmp (argv[i], "-g"))
172 {
173 generate_globals = 1;
174 ++i;
175 }
176
177 set_binary_mode (fileno (stdout), O_BINARY);
178
179 if (generate_globals)
180 start_globals ();
181
182 if (argc <= i)
183 scan_c_stream (stdin);
184 else
185 {
186 int first_infile = i;
187 for (; i < argc; i++)
188 {
189 int j;
190 /* Don't process one file twice. */
191 for (j = first_infile; j < i; j++)
192 if (strcmp (argv[i], argv[j]) == 0)
193 break;
194 if (j == i)
195 err_count += scan_file (argv[i]);
196 }
197 }
198
199 if (err_count == 0 && generate_globals)
200 write_globals ();
201
202 return (err_count > 0 ? EXIT_FAILURE : EXIT_SUCCESS);
203 }
204
205 /* Add a source file name boundary marker in the output file. */
206 static void
207 put_filename (char *filename)
208 {
209 char *tmp;
210
211 for (tmp = filename; *tmp; tmp++)
212 {
213 if (IS_DIRECTORY_SEP (*tmp))
214 filename = tmp + 1;
215 }
216
217 printf ("\037S%s\n", filename);
218 }
219
220 /* Read file FILENAME and output its doc strings to stdout.
221 Return 1 if file is not found, 0 if it is found. */
222
223 static int
224 scan_file (char *filename)
225 {
226
227 size_t len = strlen (filename);
228
229 if (!generate_globals)
230 put_filename (filename);
231 if (len > 4 && !strcmp (filename + len - 4, ".elc"))
232 return scan_lisp_file (filename, "rb");
233 else if (len > 3 && !strcmp (filename + len - 3, ".el"))
234 return scan_lisp_file (filename, "r");
235 else
236 return scan_c_file (filename, "r");
237 }
238
239 static void
240 start_globals (void)
241 {
242 puts ("/* This file was auto-generated by make-docfile. */");
243 puts ("/* DO NOT EDIT. */");
244 puts ("struct emacs_globals {");
245 }
246 \f
247 static char input_buffer[128];
248
249 /* Some state during the execution of `read_c_string_or_comment'. */
250 struct rcsoc_state
251 {
252 /* A count of spaces and newlines that have been read, but not output. */
253 unsigned pending_spaces, pending_newlines;
254
255 /* Where we're reading from. */
256 FILE *in_file;
257
258 /* If non-zero, a buffer into which to copy characters. */
259 char *buf_ptr;
260 /* If non-zero, a file into which to copy characters. */
261 FILE *out_file;
262
263 /* A keyword we look for at the beginning of lines. If found, it is
264 not copied, and SAW_KEYWORD is set to true. */
265 const char *keyword;
266 /* The current point we've reached in an occurrence of KEYWORD in
267 the input stream. */
268 const char *cur_keyword_ptr;
269 /* Set to true if we saw an occurrence of KEYWORD. */
270 int saw_keyword;
271 };
272
273 /* Output CH to the file or buffer in STATE. Any pending newlines or
274 spaces are output first. */
275
276 static void
277 put_char (int ch, struct rcsoc_state *state)
278 {
279 int out_ch;
280 do
281 {
282 if (state->pending_newlines > 0)
283 {
284 state->pending_newlines--;
285 out_ch = '\n';
286 }
287 else if (state->pending_spaces > 0)
288 {
289 state->pending_spaces--;
290 out_ch = ' ';
291 }
292 else
293 out_ch = ch;
294
295 if (state->out_file)
296 putc (out_ch, state->out_file);
297 if (state->buf_ptr)
298 *state->buf_ptr++ = out_ch;
299 }
300 while (out_ch != ch);
301 }
302
303 /* If in the middle of scanning a keyword, continue scanning with
304 character CH, otherwise output CH to the file or buffer in STATE.
305 Any pending newlines or spaces are output first, as well as any
306 previously scanned characters that were thought to be part of a
307 keyword, but were in fact not. */
308
309 static void
310 scan_keyword_or_put_char (int ch, struct rcsoc_state *state)
311 {
312 if (state->keyword
313 && *state->cur_keyword_ptr == ch
314 && (state->cur_keyword_ptr > state->keyword
315 || state->pending_newlines > 0))
316 /* We might be looking at STATE->keyword at some point.
317 Keep looking until we know for sure. */
318 {
319 if (*++state->cur_keyword_ptr == '\0')
320 /* Saw the whole keyword. Set SAW_KEYWORD flag to true. */
321 {
322 state->saw_keyword = 1;
323
324 /* Reset the scanning pointer. */
325 state->cur_keyword_ptr = state->keyword;
326
327 /* Canonicalize whitespace preceding a usage string. */
328 state->pending_newlines = 2;
329 state->pending_spaces = 0;
330
331 /* Skip any whitespace between the keyword and the
332 usage string. */
333 do
334 ch = getc (state->in_file);
335 while (ch == ' ' || ch == '\n');
336
337 /* Output the open-paren we just read. */
338 put_char (ch, state);
339
340 /* Skip the function name and replace it with `fn'. */
341 do
342 ch = getc (state->in_file);
343 while (ch != ' ' && ch != ')');
344 put_char ('f', state);
345 put_char ('n', state);
346
347 /* Put back the last character. */
348 ungetc (ch, state->in_file);
349 }
350 }
351 else
352 {
353 if (state->keyword && state->cur_keyword_ptr > state->keyword)
354 /* We scanned the beginning of a potential usage
355 keyword, but it was a false alarm. Output the
356 part we scanned. */
357 {
358 const char *p;
359
360 for (p = state->keyword; p < state->cur_keyword_ptr; p++)
361 put_char (*p, state);
362
363 state->cur_keyword_ptr = state->keyword;
364 }
365
366 put_char (ch, state);
367 }
368 }
369
370
371 /* Skip a C string or C-style comment from INFILE, and return the
372 character that follows. COMMENT non-zero means skip a comment. If
373 PRINTFLAG is positive, output string contents to stdout. If it is
374 negative, store contents in buf. Convert escape sequences \n and
375 \t to newline and tab; discard \ followed by newline.
376 If SAW_USAGE is non-zero, then any occurrences of the string `usage:'
377 at the beginning of a line will be removed, and *SAW_USAGE set to
378 true if any were encountered. */
379
380 static int
381 read_c_string_or_comment (FILE *infile, int printflag, int comment, int *saw_usage)
382 {
383 register int c;
384 struct rcsoc_state state;
385
386 state.in_file = infile;
387 state.buf_ptr = (printflag < 0 ? input_buffer : 0);
388 state.out_file = (printflag > 0 ? stdout : 0);
389 state.pending_spaces = 0;
390 state.pending_newlines = 0;
391 state.keyword = (saw_usage ? "usage:" : 0);
392 state.cur_keyword_ptr = state.keyword;
393 state.saw_keyword = 0;
394
395 c = getc (infile);
396 if (comment)
397 while (c == '\n' || c == '\r' || c == '\t' || c == ' ')
398 c = getc (infile);
399
400 while (c != EOF)
401 {
402 while (c != EOF && (comment ? c != '*' : c != '"'))
403 {
404 if (c == '\\')
405 {
406 c = getc (infile);
407 if (c == '\n' || c == '\r')
408 {
409 c = getc (infile);
410 continue;
411 }
412 if (c == 'n')
413 c = '\n';
414 if (c == 't')
415 c = '\t';
416 }
417
418 if (c == ' ')
419 state.pending_spaces++;
420 else if (c == '\n')
421 {
422 state.pending_newlines++;
423 state.pending_spaces = 0;
424 }
425 else
426 scan_keyword_or_put_char (c, &state);
427
428 c = getc (infile);
429 }
430
431 if (c != EOF)
432 c = getc (infile);
433
434 if (comment)
435 {
436 if (c == '/')
437 {
438 c = getc (infile);
439 break;
440 }
441
442 scan_keyword_or_put_char ('*', &state);
443 }
444 else
445 {
446 if (c != '"')
447 break;
448
449 /* If we had a "", concatenate the two strings. */
450 c = getc (infile);
451 }
452 }
453
454 if (printflag < 0)
455 *state.buf_ptr = 0;
456
457 if (saw_usage)
458 *saw_usage = state.saw_keyword;
459
460 return c;
461 }
462
463
464 \f
465 /* Write to stdout the argument names of function FUNC, whose text is in BUF.
466 MINARGS and MAXARGS are the minimum and maximum number of arguments. */
467
468 static void
469 write_c_args (char *func, char *buf, int minargs, int maxargs)
470 {
471 register char *p;
472 int in_ident = 0;
473 char *ident_start IF_LINT (= NULL);
474 size_t ident_length = 0;
475
476 fputs ("(fn", stdout);
477
478 if (*buf == '(')
479 ++buf;
480
481 for (p = buf; *p; p++)
482 {
483 char c = *p;
484
485 /* Notice when a new identifier starts. */
486 if ((('A' <= c && c <= 'Z')
487 || ('a' <= c && c <= 'z')
488 || ('0' <= c && c <= '9')
489 || c == '_')
490 != in_ident)
491 {
492 if (!in_ident)
493 {
494 in_ident = 1;
495 ident_start = p;
496 }
497 else
498 {
499 in_ident = 0;
500 ident_length = p - ident_start;
501 }
502 }
503
504 /* Found the end of an argument, write out the last seen
505 identifier. */
506 if (c == ',' || c == ')')
507 {
508 if (ident_length == 0)
509 {
510 error ("empty arg list for `%s' should be (void), not ()", func);
511 continue;
512 }
513
514 if (strncmp (ident_start, "void", ident_length) == 0)
515 continue;
516
517 putchar (' ');
518
519 if (minargs == 0 && maxargs > 0)
520 fputs ("&optional ", stdout);
521
522 minargs--;
523 maxargs--;
524
525 /* In C code, `default' is a reserved word, so we spell it
526 `defalt'; demangle that here. */
527 if (ident_length == 6 && memcmp (ident_start, "defalt", 6) == 0)
528 fputs ("DEFAULT", stdout);
529 else
530 while (ident_length-- > 0)
531 {
532 c = *ident_start++;
533 if (c >= 'a' && c <= 'z')
534 /* Upcase the letter. */
535 c += 'A' - 'a';
536 else if (c == '_')
537 /* Print underscore as hyphen. */
538 c = '-';
539 putchar (c);
540 }
541 }
542 }
543
544 putchar (')');
545 }
546 \f
547 /* The types of globals. These are sorted roughly in decreasing alignment
548 order to avoid allocation gaps, except that symbols and functions
549 are last. */
550 enum global_type
551 {
552 INVALID,
553 LISP_OBJECT,
554 EMACS_INTEGER,
555 BOOLEAN,
556 SYMBOL,
557 FUNCTION
558 };
559
560 /* A single global. */
561 struct global
562 {
563 enum global_type type;
564 char *name;
565 union
566 {
567 int value;
568 char const *svalue;
569 } v;
570 };
571
572 /* All the variable names we saw while scanning C sources in `-g'
573 mode. */
574 int num_globals;
575 int num_globals_allocated;
576 struct global *globals;
577
578 static void
579 add_global (enum global_type type, char *name, int value, char const *svalue)
580 {
581 /* Ignore the one non-symbol that can occur. */
582 if (strcmp (name, "..."))
583 {
584 ++num_globals;
585
586 if (num_globals_allocated == 0)
587 {
588 num_globals_allocated = 100;
589 globals = xmalloc (num_globals_allocated * sizeof (struct global));
590 }
591 else if (num_globals == num_globals_allocated)
592 {
593 num_globals_allocated *= 2;
594 globals = xrealloc (globals,
595 num_globals_allocated * sizeof (struct global));
596 }
597
598 globals[num_globals - 1].type = type;
599 globals[num_globals - 1].name = name;
600 if (svalue)
601 globals[num_globals - 1].v.svalue = svalue;
602 else
603 globals[num_globals - 1].v.value = value;
604 }
605 }
606
607 static int
608 compare_globals (const void *a, const void *b)
609 {
610 const struct global *ga = a;
611 const struct global *gb = b;
612
613 if (ga->type != gb->type)
614 return ga->type - gb->type;
615
616 /* Consider "nil" to be the least, so that iQnil is zero. That
617 way, Qnil's internal representation is zero, which is a bit faster. */
618 if (ga->type == SYMBOL)
619 {
620 bool a_nil = strcmp (ga->name, "Qnil") == 0;
621 bool b_nil = strcmp (gb->name, "Qnil") == 0;
622 if (a_nil | b_nil)
623 return b_nil - a_nil;
624 }
625
626 return strcmp (ga->name, gb->name);
627 }
628
629 static void
630 close_emacs_globals (int num_symbols)
631 {
632 printf (("};\n"
633 "extern struct emacs_globals globals;\n"
634 "\n"
635 "#ifndef DEFINE_SYMBOLS\n"
636 "extern\n"
637 "#endif\n"
638 "struct Lisp_Symbol alignas (GCALIGNMENT) lispsym[%d];\n"),
639 num_symbols);
640 }
641
642 static void
643 write_globals (void)
644 {
645 int i, j;
646 bool seen_defun = false;
647 int symnum = 0;
648 int num_symbols = 0;
649 qsort (globals, num_globals, sizeof (struct global), compare_globals);
650
651 j = 0;
652 for (i = 0; i < num_globals; i++)
653 {
654 while (i + 1 < num_globals
655 && strcmp (globals[i].name, globals[i + 1].name) == 0)
656 {
657 if (globals[i].type == FUNCTION
658 && globals[i].v.value != globals[i + 1].v.value)
659 error ("function '%s' defined twice with differing signatures",
660 globals[i].name);
661 i++;
662 }
663 num_symbols += globals[i].type == SYMBOL;
664 globals[j++] = globals[i];
665 }
666 num_globals = j;
667
668 for (i = 0; i < num_globals; ++i)
669 {
670 char const *type = 0;
671
672 switch (globals[i].type)
673 {
674 case EMACS_INTEGER:
675 type = "EMACS_INT";
676 break;
677 case BOOLEAN:
678 type = "bool";
679 break;
680 case LISP_OBJECT:
681 type = "Lisp_Object";
682 break;
683 case SYMBOL:
684 case FUNCTION:
685 if (!seen_defun)
686 {
687 close_emacs_globals (num_symbols);
688 putchar ('\n');
689 seen_defun = true;
690 }
691 break;
692 default:
693 fatal ("not a recognized DEFVAR_", 0);
694 }
695
696 if (type)
697 {
698 printf (" %s f_%s;\n", type, globals[i].name);
699 printf ("#define %s globals.f_%s\n",
700 globals[i].name, globals[i].name);
701 }
702 else if (globals[i].type == SYMBOL)
703 printf (("DEFINE_LISP_SYMBOL_BEGIN (%s)\n"
704 "#define i%s %d\n"
705 "#define %s builtin_lisp_symbol (i%s)\n"
706 "DEFINE_LISP_SYMBOL_END (%s)\n\n"),
707 globals[i].name, globals[i].name, symnum++,
708 globals[i].name, globals[i].name, globals[i].name);
709 else
710 {
711 /* It would be nice to have a cleaner way to deal with these
712 special hacks. */
713 if (strcmp (globals[i].name, "Fthrow") == 0
714 || strcmp (globals[i].name, "Ftop_level") == 0
715 || strcmp (globals[i].name, "Fkill_emacs") == 0
716 || strcmp (globals[i].name, "Fexit_recursive_edit") == 0
717 || strcmp (globals[i].name, "Fabort_recursive_edit") == 0)
718 fputs ("_Noreturn ", stdout);
719
720 printf ("EXFUN (%s, ", globals[i].name);
721 if (globals[i].v.value == -1)
722 fputs ("MANY", stdout);
723 else if (globals[i].v.value == -2)
724 fputs ("UNEVALLED", stdout);
725 else
726 printf ("%d", globals[i].v.value);
727 putchar (')');
728
729 /* It would be nice to have a cleaner way to deal with these
730 special hacks, too. */
731 if (strcmp (globals[i].name, "Fatom") == 0
732 || strcmp (globals[i].name, "Fbyteorder") == 0
733 || strcmp (globals[i].name, "Fcharacterp") == 0
734 || strcmp (globals[i].name, "Fchar_or_string_p") == 0
735 || strcmp (globals[i].name, "Fconsp") == 0
736 || strcmp (globals[i].name, "Feq") == 0
737 || strcmp (globals[i].name, "Fface_attribute_relative_p") == 0
738 || strcmp (globals[i].name, "Fframe_windows_min_size") == 0
739 || strcmp (globals[i].name, "Fgnutls_errorp") == 0
740 || strcmp (globals[i].name, "Fidentity") == 0
741 || strcmp (globals[i].name, "Fintegerp") == 0
742 || strcmp (globals[i].name, "Finteractive") == 0
743 || strcmp (globals[i].name, "Ffloatp") == 0
744 || strcmp (globals[i].name, "Flistp") == 0
745 || strcmp (globals[i].name, "Fmax_char") == 0
746 || strcmp (globals[i].name, "Fnatnump") == 0
747 || strcmp (globals[i].name, "Fnlistp") == 0
748 || strcmp (globals[i].name, "Fnull") == 0
749 || strcmp (globals[i].name, "Fnumberp") == 0
750 || strcmp (globals[i].name, "Fstringp") == 0
751 || strcmp (globals[i].name, "Fsymbolp") == 0
752 || strcmp (globals[i].name, "Ftool_bar_height") == 0
753 || strcmp (globals[i].name, "Fwindow__sanitize_window_sizes") == 0
754 #ifndef WINDOWSNT
755 || strcmp (globals[i].name, "Fgnutls_available_p") == 0
756 || strcmp (globals[i].name, "Fzlib_available_p") == 0
757 #endif
758 || 0)
759 fputs (" ATTRIBUTE_CONST", stdout);
760
761 puts (";");
762 }
763 }
764
765 if (!seen_defun)
766 close_emacs_globals (num_symbols);
767
768 puts ("#ifdef DEFINE_SYMBOLS");
769 puts ("static char const *const defsym_name[] = {");
770 for (int i = 0; i < num_globals; i++)
771 {
772 if (globals[i].type == SYMBOL)
773 printf ("\t\"%s\",\n", globals[i].v.svalue);
774 while (i + 1 < num_globals
775 && strcmp (globals[i].name, globals[i + 1].name) == 0)
776 i++;
777 }
778 puts ("};");
779 puts ("#endif");
780 }
781
782 \f
783 /* Read through a c file. If a .o file is named,
784 the corresponding .c or .m file is read instead.
785 Looks for DEFUN constructs such as are defined in ../src/lisp.h.
786 Accepts any word starting DEF... so it finds DEFSIMPLE and DEFPRED. */
787
788 static int
789 scan_c_file (char *filename, const char *mode)
790 {
791 FILE *infile;
792 int extension = filename[strlen (filename) - 1];
793
794 if (extension == 'o')
795 filename[strlen (filename) - 1] = 'c';
796
797 infile = fopen (filename, mode);
798
799 if (infile == NULL && extension == 'o')
800 {
801 /* Try .m. */
802 filename[strlen (filename) - 1] = 'm';
803 infile = fopen (filename, mode);
804 if (infile == NULL)
805 filename[strlen (filename) - 1] = 'c'; /* Don't confuse people. */
806 }
807
808 /* No error if non-ex input file. */
809 if (infile == NULL)
810 {
811 perror (filename);
812 return 0;
813 }
814
815 /* Reset extension to be able to detect duplicate files. */
816 filename[strlen (filename) - 1] = extension;
817 return scan_c_stream (infile);
818 }
819
820 static int
821 scan_c_stream (FILE *infile)
822 {
823 int commas, minargs, maxargs;
824 int c = '\n';
825
826 while (!feof (infile))
827 {
828 int doc_keyword = 0;
829 int defunflag = 0;
830 int defvarperbufferflag = 0;
831 int defvarflag = 0;
832 enum global_type type = INVALID;
833 char *name IF_LINT (= 0);
834
835 if (c != '\n' && c != '\r')
836 {
837 c = getc (infile);
838 continue;
839 }
840 c = getc (infile);
841 if (c == ' ')
842 {
843 while (c == ' ')
844 c = getc (infile);
845 if (c != 'D')
846 continue;
847 c = getc (infile);
848 if (c != 'E')
849 continue;
850 c = getc (infile);
851 if (c != 'F')
852 continue;
853 c = getc (infile);
854 if (c == 'S')
855 {
856 c = getc (infile);
857 if (c != 'Y')
858 continue;
859 c = getc (infile);
860 if (c != 'M')
861 continue;
862 c = getc (infile);
863 if (c != ' ' && c != '\t' && c != '(')
864 continue;
865 type = SYMBOL;
866 }
867 else if (c == 'V')
868 {
869 c = getc (infile);
870 if (c != 'A')
871 continue;
872 c = getc (infile);
873 if (c != 'R')
874 continue;
875 c = getc (infile);
876 if (c != '_')
877 continue;
878
879 defvarflag = 1;
880
881 c = getc (infile);
882 defvarperbufferflag = (c == 'P');
883 if (generate_globals)
884 {
885 if (c == 'I')
886 type = EMACS_INTEGER;
887 else if (c == 'L')
888 type = LISP_OBJECT;
889 else if (c == 'B')
890 type = BOOLEAN;
891 }
892
893 c = getc (infile);
894 /* We need to distinguish between DEFVAR_BOOL and
895 DEFVAR_BUFFER_DEFAULTS. */
896 if (generate_globals && type == BOOLEAN && c != 'O')
897 type = INVALID;
898 }
899 else
900 continue;
901 }
902 else if (c == 'D')
903 {
904 c = getc (infile);
905 if (c != 'E')
906 continue;
907 c = getc (infile);
908 if (c != 'F')
909 continue;
910 c = getc (infile);
911 defunflag = c == 'U';
912 }
913 else continue;
914
915 if (generate_globals
916 && (!defvarflag || defvarperbufferflag || type == INVALID)
917 && !defunflag && type != SYMBOL)
918 continue;
919
920 while (c != '(')
921 {
922 if (c < 0)
923 goto eof;
924 c = getc (infile);
925 }
926
927 if (type != SYMBOL)
928 {
929 /* Lisp variable or function name. */
930 c = getc (infile);
931 if (c != '"')
932 continue;
933 c = read_c_string_or_comment (infile, -1, 0, 0);
934 }
935
936 if (generate_globals)
937 {
938 int i = 0;
939 char const *svalue = 0;
940
941 /* Skip "," and whitespace. */
942 do
943 {
944 c = getc (infile);
945 }
946 while (c == ',' || c == ' ' || c == '\t' || c == '\n' || c == '\r');
947
948 /* Read in the identifier. */
949 do
950 {
951 if (c < 0)
952 goto eof;
953 input_buffer[i++] = c;
954 c = getc (infile);
955 }
956 while (! (c == ',' || c == ' ' || c == '\t'
957 || c == '\n' || c == '\r'));
958 input_buffer[i] = '\0';
959
960 name = xmalloc (i + 1);
961 memcpy (name, input_buffer, i + 1);
962
963 if (type == SYMBOL)
964 {
965 do
966 c = getc (infile);
967 while (c == ' ' || c == '\t' || c == '\n' || c == '\r');
968 if (c != '"')
969 continue;
970 c = read_c_string_or_comment (infile, -1, 0, 0);
971 svalue = xstrdup (input_buffer);
972 }
973
974 if (!defunflag)
975 {
976 add_global (type, name, 0, svalue);
977 continue;
978 }
979 }
980
981 if (type == SYMBOL)
982 continue;
983
984 /* DEFVAR_LISP ("name", addr, "doc")
985 DEFVAR_LISP ("name", addr /\* doc *\/)
986 DEFVAR_LISP ("name", addr, doc: /\* doc *\/) */
987
988 if (defunflag)
989 commas = generate_globals ? 4 : 5;
990 else if (defvarperbufferflag)
991 commas = 3;
992 else if (defvarflag)
993 commas = 1;
994 else /* For DEFSIMPLE and DEFPRED. */
995 commas = 2;
996
997 while (commas)
998 {
999 if (c == ',')
1000 {
1001 commas--;
1002
1003 if (defunflag && (commas == 1 || commas == 2))
1004 {
1005 int scanned = 0;
1006 do
1007 c = getc (infile);
1008 while (c == ' ' || c == '\n' || c == '\r' || c == '\t');
1009 if (c < 0)
1010 goto eof;
1011 ungetc (c, infile);
1012 if (commas == 2) /* Pick up minargs. */
1013 scanned = fscanf (infile, "%d", &minargs);
1014 else /* Pick up maxargs. */
1015 if (c == 'M' || c == 'U') /* MANY || UNEVALLED */
1016 {
1017 if (generate_globals)
1018 maxargs = (c == 'M') ? -1 : -2;
1019 else
1020 maxargs = -1;
1021 }
1022 else
1023 scanned = fscanf (infile, "%d", &maxargs);
1024 if (scanned < 0)
1025 goto eof;
1026 }
1027 }
1028
1029 if (c == EOF)
1030 goto eof;
1031 c = getc (infile);
1032 }
1033
1034 if (generate_globals)
1035 {
1036 add_global (FUNCTION, name, maxargs, 0);
1037 continue;
1038 }
1039
1040 while (c == ' ' || c == '\n' || c == '\r' || c == '\t')
1041 c = getc (infile);
1042
1043 if (c == '"')
1044 c = read_c_string_or_comment (infile, 0, 0, 0);
1045
1046 while (c != EOF && c != ',' && c != '/')
1047 c = getc (infile);
1048 if (c == ',')
1049 {
1050 c = getc (infile);
1051 while (c == ' ' || c == '\n' || c == '\r' || c == '\t')
1052 c = getc (infile);
1053 while ((c >= 'a' && c <= 'z') || (c >= 'Z' && c <= 'Z'))
1054 c = getc (infile);
1055 if (c == ':')
1056 {
1057 doc_keyword = 1;
1058 c = getc (infile);
1059 while (c == ' ' || c == '\n' || c == '\r' || c == '\t')
1060 c = getc (infile);
1061 }
1062 }
1063
1064 if (c == '"'
1065 || (c == '/'
1066 && (c = getc (infile),
1067 ungetc (c, infile),
1068 c == '*')))
1069 {
1070 int comment = c != '"';
1071 int saw_usage;
1072
1073 printf ("\037%c%s\n", defvarflag ? 'V' : 'F', input_buffer);
1074
1075 if (comment)
1076 getc (infile); /* Skip past `*'. */
1077 c = read_c_string_or_comment (infile, 1, comment, &saw_usage);
1078
1079 /* If this is a defun, find the arguments and print them. If
1080 this function takes MANY or UNEVALLED args, then the C source
1081 won't give the names of the arguments, so we shouldn't bother
1082 trying to find them.
1083
1084 Various doc-string styles:
1085 0: DEFUN (..., "DOC") (args) [!comment]
1086 1: DEFUN (..., /\* DOC *\/ (args)) [comment && !doc_keyword]
1087 2: DEFUN (..., doc: /\* DOC *\/) (args) [comment && doc_keyword]
1088 */
1089 if (defunflag && maxargs != -1 && !saw_usage)
1090 {
1091 char argbuf[1024], *p = argbuf;
1092
1093 if (!comment || doc_keyword)
1094 while (c != ')')
1095 {
1096 if (c < 0)
1097 goto eof;
1098 c = getc (infile);
1099 }
1100
1101 /* Skip into arguments. */
1102 while (c != '(')
1103 {
1104 if (c < 0)
1105 goto eof;
1106 c = getc (infile);
1107 }
1108 /* Copy arguments into ARGBUF. */
1109 *p++ = c;
1110 do
1111 *p++ = c = getc (infile);
1112 while (c != ')');
1113 *p = '\0';
1114 /* Output them. */
1115 fputs ("\n\n", stdout);
1116 write_c_args (input_buffer, argbuf, minargs, maxargs);
1117 }
1118 else if (defunflag && maxargs == -1 && !saw_usage)
1119 /* The DOC should provide the usage form. */
1120 fprintf (stderr, "Missing `usage' for function `%s'.\n",
1121 input_buffer);
1122 }
1123 }
1124 eof:
1125 fclose (infile);
1126 return 0;
1127 }
1128 \f
1129 /* Read a file of Lisp code, compiled or interpreted.
1130 Looks for
1131 (defun NAME ARGS DOCSTRING ...)
1132 (defmacro NAME ARGS DOCSTRING ...)
1133 (defsubst NAME ARGS DOCSTRING ...)
1134 (autoload (quote NAME) FILE DOCSTRING ...)
1135 (defvar NAME VALUE DOCSTRING)
1136 (defconst NAME VALUE DOCSTRING)
1137 (fset (quote NAME) (make-byte-code ... DOCSTRING ...))
1138 (fset (quote NAME) #[... DOCSTRING ...])
1139 (defalias (quote NAME) #[... DOCSTRING ...])
1140 (custom-declare-variable (quote NAME) VALUE DOCSTRING ...)
1141 starting in column zero.
1142 (quote NAME) may appear as 'NAME as well.
1143
1144 We also look for #@LENGTH CONTENTS^_ at the beginning of the line.
1145 When we find that, we save it for the following defining-form,
1146 and we use that instead of reading a doc string within that defining-form.
1147
1148 For defvar, defconst, and fset we skip to the docstring with a kludgy
1149 formatting convention: all docstrings must appear on the same line as the
1150 initial open-paren (the one in column zero) and must contain a backslash
1151 and a newline immediately after the initial double-quote. No newlines
1152 must appear between the beginning of the form and the first double-quote.
1153 For defun, defmacro, and autoload, we know how to skip over the
1154 arglist, but the doc string must still have a backslash and newline
1155 immediately after the double quote.
1156 The only source files that must follow this convention are preloaded
1157 uncompiled ones like loaddefs.el; aside from that, it is always the .elc
1158 file that we should look at, and they are no problem because byte-compiler
1159 output follows this convention.
1160 The NAME and DOCSTRING are output.
1161 NAME is preceded by `F' for a function or `V' for a variable.
1162 An entry is output only if DOCSTRING has \ newline just after the opening ".
1163 */
1164
1165 static void
1166 skip_white (FILE *infile)
1167 {
1168 char c = ' ';
1169 while (c == ' ' || c == '\t' || c == '\n' || c == '\r')
1170 c = getc (infile);
1171 ungetc (c, infile);
1172 }
1173
1174 static void
1175 read_lisp_symbol (FILE *infile, char *buffer)
1176 {
1177 char c;
1178 char *fillp = buffer;
1179
1180 skip_white (infile);
1181 while (1)
1182 {
1183 c = getc (infile);
1184 if (c == '\\')
1185 *(++fillp) = getc (infile);
1186 else if (c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '(' || c == ')')
1187 {
1188 ungetc (c, infile);
1189 *fillp = 0;
1190 break;
1191 }
1192 else
1193 *fillp++ = c;
1194 }
1195
1196 if (! buffer[0])
1197 fprintf (stderr, "## expected a symbol, got '%c'\n", c);
1198
1199 skip_white (infile);
1200 }
1201
1202 static int
1203 search_lisp_doc_at_eol (FILE *infile)
1204 {
1205 int c = 0, c1 = 0, c2 = 0;
1206
1207 /* Skip until the end of line; remember two previous chars. */
1208 while (c != '\n' && c != '\r' && c != EOF)
1209 {
1210 c2 = c1;
1211 c1 = c;
1212 c = getc (infile);
1213 }
1214
1215 /* If two previous characters were " and \,
1216 this is a doc string. Otherwise, there is none. */
1217 if (c2 != '"' || c1 != '\\')
1218 {
1219 #ifdef DEBUG
1220 fprintf (stderr, "## non-docstring found\n");
1221 #endif
1222 if (c != EOF)
1223 ungetc (c, infile);
1224 return 0;
1225 }
1226 return 1;
1227 }
1228
1229 #define DEF_ELISP_FILE(fn) { #fn, sizeof(#fn) - 1 }
1230
1231 static int
1232 scan_lisp_file (const char *filename, const char *mode)
1233 {
1234 FILE *infile;
1235 register int c;
1236 char *saved_string = 0;
1237 /* These are the only files that are loaded uncompiled, and must
1238 follow the conventions of the doc strings expected by this
1239 function. These conventions are automatically followed by the
1240 byte compiler when it produces the .elc files. */
1241 static struct {
1242 const char *fn;
1243 size_t fl;
1244 } const uncompiled[] = {
1245 DEF_ELISP_FILE (loaddefs.el),
1246 DEF_ELISP_FILE (loadup.el),
1247 DEF_ELISP_FILE (charprop.el),
1248 DEF_ELISP_FILE (cp51932.el),
1249 DEF_ELISP_FILE (eucjp-ms.el)
1250 };
1251 int i, match;
1252 size_t flen = strlen (filename);
1253
1254 if (generate_globals)
1255 fatal ("scanning lisp file when -g specified", 0);
1256 if (flen > 3 && !strcmp (filename + flen - 3, ".el"))
1257 {
1258 for (i = 0, match = 0; i < sizeof (uncompiled) / sizeof (uncompiled[0]);
1259 i++)
1260 {
1261 if (uncompiled[i].fl <= flen
1262 && !strcmp (filename + flen - uncompiled[i].fl, uncompiled[i].fn)
1263 && (flen == uncompiled[i].fl
1264 || IS_SLASH (filename[flen - uncompiled[i].fl - 1])))
1265 {
1266 match = 1;
1267 break;
1268 }
1269 }
1270 if (!match)
1271 fatal ("uncompiled lisp file %s is not supported", filename);
1272 }
1273
1274 infile = fopen (filename, mode);
1275 if (infile == NULL)
1276 {
1277 perror (filename);
1278 return 0; /* No error. */
1279 }
1280
1281 c = '\n';
1282 while (!feof (infile))
1283 {
1284 char buffer[BUFSIZ];
1285 char type;
1286
1287 /* If not at end of line, skip till we get to one. */
1288 if (c != '\n' && c != '\r')
1289 {
1290 c = getc (infile);
1291 continue;
1292 }
1293 /* Skip the line break. */
1294 while (c == '\n' || c == '\r')
1295 c = getc (infile);
1296 /* Detect a dynamic doc string and save it for the next expression. */
1297 if (c == '#')
1298 {
1299 c = getc (infile);
1300 if (c == '@')
1301 {
1302 size_t length = 0;
1303 size_t i;
1304
1305 /* Read the length. */
1306 while ((c = getc (infile),
1307 c >= '0' && c <= '9'))
1308 {
1309 length *= 10;
1310 length += c - '0';
1311 }
1312
1313 if (length <= 1)
1314 fatal ("invalid dynamic doc string length", "");
1315
1316 if (c != ' ')
1317 fatal ("space not found after dynamic doc string length", "");
1318
1319 /* The next character is a space that is counted in the length
1320 but not part of the doc string.
1321 We already read it, so just ignore it. */
1322 length--;
1323
1324 /* Read in the contents. */
1325 free (saved_string);
1326 saved_string = (char *) xmalloc (length);
1327 for (i = 0; i < length; i++)
1328 saved_string[i] = getc (infile);
1329 /* The last character is a ^_.
1330 That is needed in the .elc file
1331 but it is redundant in DOC. So get rid of it here. */
1332 saved_string[length - 1] = 0;
1333 /* Skip the line break. */
1334 while (c == '\n' || c == '\r')
1335 c = getc (infile);
1336 /* Skip the following line. */
1337 while (c != '\n' && c != '\r')
1338 c = getc (infile);
1339 }
1340 continue;
1341 }
1342
1343 if (c != '(')
1344 continue;
1345
1346 read_lisp_symbol (infile, buffer);
1347
1348 if (! strcmp (buffer, "defun")
1349 || ! strcmp (buffer, "defmacro")
1350 || ! strcmp (buffer, "defsubst"))
1351 {
1352 type = 'F';
1353 read_lisp_symbol (infile, buffer);
1354
1355 /* Skip the arguments: either "nil" or a list in parens. */
1356
1357 c = getc (infile);
1358 if (c == 'n') /* nil */
1359 {
1360 if ((c = getc (infile)) != 'i'
1361 || (c = getc (infile)) != 'l')
1362 {
1363 fprintf (stderr, "## unparsable arglist in %s (%s)\n",
1364 buffer, filename);
1365 continue;
1366 }
1367 }
1368 else if (c != '(')
1369 {
1370 fprintf (stderr, "## unparsable arglist in %s (%s)\n",
1371 buffer, filename);
1372 continue;
1373 }
1374 else
1375 while (c != ')')
1376 c = getc (infile);
1377 skip_white (infile);
1378
1379 /* If the next three characters aren't `dquote bslash newline'
1380 then we're not reading a docstring.
1381 */
1382 if ((c = getc (infile)) != '"'
1383 || (c = getc (infile)) != '\\'
1384 || ((c = getc (infile)) != '\n' && c != '\r'))
1385 {
1386 #ifdef DEBUG
1387 fprintf (stderr, "## non-docstring in %s (%s)\n",
1388 buffer, filename);
1389 #endif
1390 continue;
1391 }
1392 }
1393
1394 /* defcustom can only occur in uncompiled Lisp files. */
1395 else if (! strcmp (buffer, "defvar")
1396 || ! strcmp (buffer, "defconst")
1397 || ! strcmp (buffer, "defcustom"))
1398 {
1399 type = 'V';
1400 read_lisp_symbol (infile, buffer);
1401
1402 if (saved_string == 0)
1403 if (!search_lisp_doc_at_eol (infile))
1404 continue;
1405 }
1406
1407 else if (! strcmp (buffer, "custom-declare-variable")
1408 || ! strcmp (buffer, "defvaralias")
1409 )
1410 {
1411 type = 'V';
1412
1413 c = getc (infile);
1414 if (c == '\'')
1415 read_lisp_symbol (infile, buffer);
1416 else
1417 {
1418 if (c != '(')
1419 {
1420 fprintf (stderr,
1421 "## unparsable name in custom-declare-variable in %s\n",
1422 filename);
1423 continue;
1424 }
1425 read_lisp_symbol (infile, buffer);
1426 if (strcmp (buffer, "quote"))
1427 {
1428 fprintf (stderr,
1429 "## unparsable name in custom-declare-variable in %s\n",
1430 filename);
1431 continue;
1432 }
1433 read_lisp_symbol (infile, buffer);
1434 c = getc (infile);
1435 if (c != ')')
1436 {
1437 fprintf (stderr,
1438 "## unparsable quoted name in custom-declare-variable in %s\n",
1439 filename);
1440 continue;
1441 }
1442 }
1443
1444 if (saved_string == 0)
1445 if (!search_lisp_doc_at_eol (infile))
1446 continue;
1447 }
1448
1449 else if (! strcmp (buffer, "fset") || ! strcmp (buffer, "defalias"))
1450 {
1451 type = 'F';
1452
1453 c = getc (infile);
1454 if (c == '\'')
1455 read_lisp_symbol (infile, buffer);
1456 else
1457 {
1458 if (c != '(')
1459 {
1460 fprintf (stderr, "## unparsable name in fset in %s\n",
1461 filename);
1462 continue;
1463 }
1464 read_lisp_symbol (infile, buffer);
1465 if (strcmp (buffer, "quote"))
1466 {
1467 fprintf (stderr, "## unparsable name in fset in %s\n",
1468 filename);
1469 continue;
1470 }
1471 read_lisp_symbol (infile, buffer);
1472 c = getc (infile);
1473 if (c != ')')
1474 {
1475 fprintf (stderr,
1476 "## unparsable quoted name in fset in %s\n",
1477 filename);
1478 continue;
1479 }
1480 }
1481
1482 if (saved_string == 0)
1483 if (!search_lisp_doc_at_eol (infile))
1484 continue;
1485 }
1486
1487 else if (! strcmp (buffer, "autoload"))
1488 {
1489 type = 'F';
1490 c = getc (infile);
1491 if (c == '\'')
1492 read_lisp_symbol (infile, buffer);
1493 else
1494 {
1495 if (c != '(')
1496 {
1497 fprintf (stderr, "## unparsable name in autoload in %s\n",
1498 filename);
1499 continue;
1500 }
1501 read_lisp_symbol (infile, buffer);
1502 if (strcmp (buffer, "quote"))
1503 {
1504 fprintf (stderr, "## unparsable name in autoload in %s\n",
1505 filename);
1506 continue;
1507 }
1508 read_lisp_symbol (infile, buffer);
1509 c = getc (infile);
1510 if (c != ')')
1511 {
1512 fprintf (stderr,
1513 "## unparsable quoted name in autoload in %s\n",
1514 filename);
1515 continue;
1516 }
1517 }
1518 skip_white (infile);
1519 if ((c = getc (infile)) != '\"')
1520 {
1521 fprintf (stderr, "## autoload of %s unparsable (%s)\n",
1522 buffer, filename);
1523 continue;
1524 }
1525 read_c_string_or_comment (infile, 0, 0, 0);
1526
1527 if (saved_string == 0)
1528 if (!search_lisp_doc_at_eol (infile))
1529 continue;
1530 }
1531
1532 #ifdef DEBUG
1533 else if (! strcmp (buffer, "if")
1534 || ! strcmp (buffer, "byte-code"))
1535 continue;
1536 #endif
1537
1538 else
1539 {
1540 #ifdef DEBUG
1541 fprintf (stderr, "## unrecognized top-level form, %s (%s)\n",
1542 buffer, filename);
1543 #endif
1544 continue;
1545 }
1546
1547 /* At this point, we should either use the previous dynamic doc string in
1548 saved_string or gobble a doc string from the input file.
1549 In the latter case, the opening quote (and leading backslash-newline)
1550 have already been read. */
1551
1552 printf ("\037%c%s\n", type, buffer);
1553 if (saved_string)
1554 {
1555 fputs (saved_string, stdout);
1556 /* Don't use one dynamic doc string twice. */
1557 free (saved_string);
1558 saved_string = 0;
1559 }
1560 else
1561 read_c_string_or_comment (infile, 1, 0, 0);
1562 }
1563 fclose (infile);
1564 return 0;
1565 }
1566
1567
1568 /* make-docfile.c ends here */