]> code.delx.au - gnu-emacs-elpa/blob - packages/wisi/wisi-compile.el
Merge commit 'c0a1e24ef39e2b0f388135c2ed8f8b419346337c'
[gnu-emacs-elpa] / packages / wisi / wisi-compile.el
1 ;; wisi-compile.el --- Grammar compiler for the wisi parser, integrating Wisi OpenToken output. -*- lexical-binding:t -*-
2 ;;
3 ;; Copyright (C) 2012, 2013, 2015, 2016 Free Software Foundation, Inc.
4 ;;
5 ;; Author: Stephen Leake <stephen_leake@member.fsf.org>
6 ;;
7 ;; This file is part of GNU Emacs.
8 ;;
9 ;; GNU Emacs is free software: you can redistribute it and/or modify
10 ;; it under the terms of the GNU General Public License as published by
11 ;; the Free Software Foundation, either version 3 of the License, or
12 ;; (at your option) any later version.
13 ;;
14 ;; GNU Emacs is distributed in the hope that it will be useful,
15 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
16 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 ;; GNU General Public License for more details.
18 ;;
19 ;; You should have received a copy of the GNU General Public License
20 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
21 ;;
22
23 ;;; Commentary:
24 ;;
25
26 ;;;; History: first experimental version Jan 2013
27 ;;
28 ;;;; Context
29 ;;
30 ;; Semantic (info "(semantic)Top") provides an LALR(1) parser
31 ;; wisent-parse. The grammar used is defined by the functions
32 ;; semantic-grammar-create-package, which reads a bison-like source
33 ;; file and produces corresponding elisp source, and
34 ;; wisent-compile-grammar, which generates a parser table.
35 ;;
36 ;; However, the algorithm used in wisent-compile-grammar cannot cope
37 ;; with the grammar for the Ada language, because it is not
38 ;; LALR(1). So we provide a generalized LALR parser, which spawns
39 ;; parallel LALR parsers at each conflict. Instead of also rewriting
40 ;; the entire semantic grammar compiler, we use the OpenToken LALR
41 ;; parser generator, which is easier to modify (it is written in Ada,
42 ;; not Lisp).
43 ;;
44 ;; The Ada function Wisi.Generate reads the bison-like input and
45 ;; produces corresponding elisp source code, similar to that
46 ;; produced by semantic-grammar-create-package.
47 ;;
48 ;; wisi-compile-grammar (provided here) generates the automaton
49 ;; structure required by wisi-parse
50 ;;
51 ;;;;
52
53 (defun wisi-compose-action (value symbol-obarray nonterms)
54 (let* ((nonterm (car value))
55 (index (cdr value))
56 (symbol (intern-soft (format "%s:%d" nonterm index) symbol-obarray))
57 (rhs (car (nth index (cdr (assoc nonterm nonterms))))))
58 (list nonterm symbol (length rhs))
59 ))
60
61 (defun wisi-replace-actions (action symbol-obarray nonterms)
62 "Replace semantic action symbol names in ACTION with list as defined in `wisi-compile-grammar'.
63 ACTION is the alist for one state from the grammar, with the form:
64 ((default . error) ITEM ... )
65 ITEM is one of:
66 reduction (TOKEN . (NONTERM . INDEX)) where NONTERM . INDEX gives the action symbol name.
67 shift (TOKEN . STATE)
68 shift/reduce conflict (STATE (NONTERM . INDEX))
69 reduce/shift conflict ((NONTERM . INDEX) (NONTERM . INDEX))
70
71 SYMBOL-OBARRAY contains the action symbols.
72 NONTERMS is from the grammar.
73 Return the new action alist."
74 ;; result is list of (nonterm index action-symbol token-count)
75 (let (result item)
76 (while action
77 (setq item (pop action))
78 (cond
79 ((or
80 (memq (cdr item) '(error accept))
81 (numberp (cdr item))) ;; shift
82 (push item result))
83
84 ((listp (cdr item))
85 (let ((value (cdr item)))
86 (cond
87 ((symbolp (car value))
88 ;; reduction
89 (push (cons (car item)
90 (wisi-compose-action value symbol-obarray nonterms))
91 result))
92
93 ((integerp (car value))
94 ;; shift/reduce conflict
95 (push (cons (car item)
96 (list (car value)
97 (wisi-compose-action (cadr value) symbol-obarray nonterms)))
98 result))
99
100 (t ;; reduce/reduce conflict
101 (push (cons (car item)
102 (list (wisi-compose-action (car value) symbol-obarray nonterms)
103 (wisi-compose-action (cadr value) symbol-obarray nonterms)))
104 result))
105 )))
106
107 (t
108 (error "unexpected '%s'; expected 'error, 'accept, numberp, stringp, listp" (cdr item)))
109 ));; while/cond
110
111 (reverse result)))
112
113 (defun wisi-semantic-action (form nonterm iactn symbol-obarray)
114 "Define an Elisp semantic action function for a production, interned in SYMBOL-OBARRAY.
115 FORM is the body of the semantic action.
116 NONTERM is the nonterminal left hand side.
117 IACTN is the index of the production in the NTERM rule.
118
119 The semantic action function accepts two arguments;
120 - $nterm : the nonterminal
121 - wisi-tokens : the list of tokens to be reduced.
122
123 It returns nil; it is called for the semantic side-effects only."
124 ;; based on comp.el wisent-semantic-action
125 (let* ((name (format "%s:%d" nonterm iactn))
126 (action-symbol (intern name symbol-obarray)))
127
128 (fset action-symbol
129 `(lambda ($nterm wisi-tokens)
130 ,form
131 nil))))
132
133 (defun wisi-compile-grammar (grammar)
134 "Compile the LALR(1) GRAMMAR; return the automaton for wisi-parse.
135 GRAMMAR is a list TERMINALS NONTERMS ACTIONS GOTOS, where:
136
137 TERMINALS is a list of terminal token symbols.
138
139 NONTERMS is a list of productions; each production is a
140 list (nonterm (tokens semantic-action) ...) where `semantic-action' is
141 any lisp form. The set of (tokens semantic-action) are the right hand
142 sides; nonterm is the left hand side.
143
144 ACTIONS is an array indexed by parser state, of alists indexed by
145 terminal tokens. The value of each item in the alists is one of:
146
147 'error
148
149 'accept
150
151 integer - shift; gives new state
152
153 '(nonterm . index) - reduce by nonterm production index.
154
155 '(integer (nonterm . index)) - a shift/reduce conflict
156 '((nonterm . index) (nonterm . index)) - a reduce/reduce conflict
157
158 The first item in the alist must have the key 'default (not a
159 terminal token); it is used when no other item matches the
160 current token.
161
162 GOTOS is an array indexed by parser state, of alists giving the
163 new state after a reduce for each nonterminal legal in that
164 state.
165
166 The automaton is an array [parser-actions gotos symbol-obarray]:
167
168 - parser-actions is a copy of the input ACTIONS, with semantic
169 actions replaced by a list (nonterm action-symbol token-count),
170 where:
171
172 -- nonterm is a symbol from NONTERMS, and is the non-terminal to
173 reduce to
174
175 -- token-count is the number of tokens in the reduction,
176
177 -- action-symbol is nil if there is no semantic action, or a
178 symbol interned in symbol-obarray
179
180 - gotos is a copy of GOTOS.
181
182 - symbol-obarray is an obarray containing functions that
183 implement the semantic action for each nonterminal; the function
184 names have the format nonterm:index."
185 ;; We store named symbols for semantic actions, not just lambda
186 ;; functions, so we have a name for debug trace.
187 ;;
188 ;; FIXME: TERMINALS is not used. Eliminating it requires decoupling
189 ;; from OpenToken; we'll do that in the move to FastToken.
190 ;;
191 ;; FIXME: eliminate use of semantic-lex-* in *-wy.el. Similarly
192 ;; requires decoupling from OpenToken
193 ;;
194 ;; FIXME: can eliminate obarray? We don't need the obarray to
195 ;; avoid garbage collection of the symbols; they are all referenced in the compiled grammar.
196 ;; But each semantic action function has to be defined (and byte-compiled?) somewhere?
197 ;; currently actions are _not_ byte-compiled; wisi-compile-grammar is run at load time
198 ;; need 'eval-when-compile' to byte-compile them?
199 ;; can't byte-compile obarray?
200
201 (let ((defs (nth 1 grammar))
202 (symbol-obarray (make-vector 13 0));; for parse actions
203 def nonterm rhs-list rule
204 semantic-action index)
205
206 (while defs
207 (setq def (car defs)
208 defs (cdr defs)
209 nonterm (car def)
210 rhs-list (cdr def)
211 index 0)
212 (while rhs-list
213 (setq rule (car rhs-list)
214 rhs-list (cdr rhs-list)
215 semantic-action (cadr rule))
216
217 (when semantic-action
218 (wisi-semantic-action semantic-action nonterm index symbol-obarray))
219
220 (setq index (1+ index))
221 ))
222
223 ;; replace semantic actions in ACTIONS with symbols from symbol-obarray
224 (let ((nactions (length (nth 2 grammar)))
225 (actions (nth 2 grammar))
226 (i 0))
227 (while (< i nactions)
228 (aset actions i
229 (wisi-replace-actions (aref actions i) symbol-obarray (nth 1 grammar)))
230 (setq i (1+ i)))
231 (vector
232 actions
233 (nth 3 grammar)
234 symbol-obarray)
235 )))
236
237 (provide 'wisi-compile)
238 ;;; wisi-compile.el ends here