]> code.delx.au - gnu-emacs-elpa/blob - packages/wisi/wisi-compile.el
multishell - Merge edge-case but significant fixes
[gnu-emacs-elpa] / packages / wisi / wisi-compile.el
1 ;; wisi-compile.el --- Grammar compiler for the wisi parser, integrating Wisi OpenToken output. -*- lexical-binding:t -*-
2 ;;
3 ;; Copyright (C) 2012, 2013, 2015, 2016 Free Software Foundation, Inc.
4 ;;
5 ;; Author: Stephen Leake <stephen_leake@member.fsf.org>
6 ;;
7 ;; This file is part of GNU Emacs.
8 ;;
9 ;; GNU Emacs is free software: you can redistribute it and/or modify
10 ;; it under the terms of the GNU General Public License as published by
11 ;; the Free Software Foundation, either version 3 of the License, or
12 ;; (at your option) any later version.
13 ;;
14 ;; GNU Emacs is distributed in the hope that it will be useful,
15 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
16 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 ;; GNU General Public License for more details.
18 ;;
19 ;; You should have received a copy of the GNU General Public License
20 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
21 ;;
22 ;;; History: first experimental version Jan 2013
23 ;;
24 ;;; Context
25 ;;
26 ;; Semantic (info "(semantic)Top") provides an LALR(1) parser
27 ;; wisent-parse. The grammar used is defined by the functions
28 ;; semantic-grammar-create-package, which reads a bison-like source
29 ;; file and produces corresponding elisp source, and
30 ;; wisent-compile-grammar, which generates a parser table.
31 ;;
32 ;; However, the algorithm used in wisent-compile-grammar cannot cope
33 ;; with the grammar for the Ada language, because it is not
34 ;; LALR(1). So we provide a generalized LALR parser, which spawns
35 ;; parallel LALR parsers at each conflict. Instead of also rewriting
36 ;; the entire semantic grammar compiler, we use the OpenToken LALR
37 ;; parser generator, which is easier to modify (it is written in Ada,
38 ;; not Lisp).
39 ;;
40 ;; The Ada function Wisi.Generate reads the bison-like input and
41 ;; produces corresponding elisp source code, similar to that
42 ;; produced by semantic-grammar-create-package.
43 ;;
44 ;; wisi-compile-grammar (provided here) generates the automaton
45 ;; structure required by wisi-parse
46 ;;
47 ;;;;
48
49 (defun wisi-compose-action (value symbol-obarray nonterms)
50 (let* ((nonterm (car value))
51 (index (cdr value))
52 (symbol (intern-soft (format "%s:%d" nonterm index) symbol-obarray))
53 (rhs (car (nth index (cdr (assoc nonterm nonterms))))))
54 (list nonterm symbol (length rhs))
55 ))
56
57 (defun wisi-replace-actions (action symbol-obarray nonterms)
58 "Replace semantic action symbol names in ACTION with list as defined in `wisi-compile-grammar'.
59 ACTION is the alist for one state from the grammar, with the form:
60 ((default . error) ITEM ... )
61 ITEM is one of:
62 reduction (TOKEN . (NONTERM . INDEX)) where NONTERM . INDEX gives the action symbol name.
63 shift (TOKEN . STATE)
64 shift/reduce conflict (STATE (NONTERM . INDEX))
65 reduce/shift conflict ((NONTERM . INDEX) (NONTERM . INDEX))
66
67 SYMBOL-OBARRAY contains the action symbols.
68 NONTERMS is from the grammar.
69 Return the new action alist."
70 ;; result is list of (nonterm index action-symbol token-count)
71 (let (result item)
72 (while action
73 (setq item (pop action))
74 (cond
75 ((or
76 (memq (cdr item) '(error accept))
77 (numberp (cdr item))) ;; shift
78 (push item result))
79
80 ((listp (cdr item))
81 (let ((value (cdr item)))
82 (cond
83 ((symbolp (car value))
84 ;; reduction
85 (push (cons (car item)
86 (wisi-compose-action value symbol-obarray nonterms))
87 result))
88
89 ((integerp (car value))
90 ;; shift/reduce conflict
91 (push (cons (car item)
92 (list (car value)
93 (wisi-compose-action (cadr value) symbol-obarray nonterms)))
94 result))
95
96 (t ;; reduce/reduce conflict
97 (push (cons (car item)
98 (list (wisi-compose-action (car value) symbol-obarray nonterms)
99 (wisi-compose-action (cadr value) symbol-obarray nonterms)))
100 result))
101 )))
102
103 (t
104 (error "unexpected '%s'; expected 'error, 'accept, numberp, stringp, listp" (cdr item)))
105 ));; while/cond
106
107 (reverse result)))
108
109 (defun wisi-semantic-action (form nonterm iactn symbol-obarray)
110 "Define an Elisp semantic action function for a production, interned in SYMBOL-OBARRAY.
111 FORM is the body of the semantic action.
112 NONTERM is the nonterminal left hand side.
113 IACTN is the index of the production in the NTERM rule.
114
115 The semantic action function accepts two arguments;
116 - $nterm : the nonterminal
117 - wisi-tokens : the list of tokens to be reduced.
118
119 It returns nil; it is called for the semantic side-effects only."
120 ;; based on comp.el wisent-semantic-action
121 (let* ((name (format "%s:%d" nonterm iactn))
122 (action-symbol (intern name symbol-obarray)))
123
124 (fset action-symbol
125 `(lambda ($nterm wisi-tokens)
126 ,form
127 nil))))
128
129 (defun wisi-compile-grammar (grammar)
130 "Compile the LALR(1) GRAMMAR; return the automaton for wisi-parse.
131 GRAMMAR is a list TERMINALS NONTERMS ACTIONS GOTOS, where:
132
133 TERMINALS is a list of terminal token symbols.
134
135 NONTERMS is a list of productions; each production is a
136 list (nonterm (tokens semantic-action) ...) where `semantic-action' is
137 any lisp form. The set of (tokens semantic-action) are the right hand
138 sides; nonterm is the left hand side.
139
140 ACTIONS is an array indexed by parser state, of alists indexed by
141 terminal tokens. The value of each item in the alists is one of:
142
143 'error
144
145 'accept
146
147 integer - shift; gives new state
148
149 '(nonterm . index) - reduce by nonterm production index.
150
151 '(integer (nonterm . index)) - a shift/reduce conflict
152 '((nonterm . index) (nonterm . index)) - a reduce/reduce conflict
153
154 The first item in the alist must have the key 'default (not a
155 terminal token); it is used when no other item matches the
156 current token.
157
158 GOTOS is an array indexed by parser state, of alists giving the
159 new state after a reduce for each nonterminal legal in that
160 state.
161
162 The automaton is an array [parser-actions gotos symbol-obarray]:
163
164 - parser-actions is a copy of the input ACTIONS, with semantic
165 actions replaced by a list (nonterm action-symbol token-count),
166 where:
167
168 -- nonterm is a symbol from NONTERMS, and is the non-terminal to
169 reduce to
170
171 -- token-count is the number of tokens in the reduction,
172
173 -- action-symbol is nil if there is no semantic action, or a
174 symbol interned in symbol-obarray
175
176 - gotos is a copy of GOTOS.
177
178 - symbol-obarray is an obarray containing functions that
179 implement the semantic action for each nonterminal; the function
180 names have the format nonterm:index."
181 ;; We store named symbols for semantic actions, not just lambda
182 ;; functions, so we have a name for debug trace.
183 ;;
184 ;; FIXME: TERMINALS is not used. Eliminating it requires decoupling
185 ;; from OpenToken; we'll do that in the move to FastToken.
186 ;;
187 ;; FIXME: eliminate use of semantic-lex-* in *-wy.el. Similarly
188 ;; requires decoupling from OpenToken
189 ;;
190 ;; FIXME: can eliminate obarray? We don't need the obarray to
191 ;; avoid garbage collection of the symbols; they are all referenced in the compiled grammar.
192 ;; But each semantic action function has to be defined (and byte-compiled?) somewhere?
193 ;; currently actions are _not_ byte-compiled; wisi-compile-grammar is run at load time
194 ;; need 'eval-when-compile' to byte-compile them?
195 ;; can't byte-compile obarray?
196
197 (let ((defs (nth 1 grammar))
198 (symbol-obarray (make-vector 13 0));; for parse actions
199 def nonterm rhs-list rule
200 semantic-action index)
201
202 (while defs
203 (setq def (car defs)
204 defs (cdr defs)
205 nonterm (car def)
206 rhs-list (cdr def)
207 index 0)
208 (while rhs-list
209 (setq rule (car rhs-list)
210 rhs-list (cdr rhs-list)
211 semantic-action (cadr rule))
212
213 (when semantic-action
214 (wisi-semantic-action semantic-action nonterm index symbol-obarray))
215
216 (setq index (1+ index))
217 ))
218
219 ;; replace semantic actions in ACTIONS with symbols from symbol-obarray
220 (let ((nactions (length (nth 2 grammar)))
221 (actions (nth 2 grammar))
222 (i 0))
223 (while (< i nactions)
224 (aset actions i
225 (wisi-replace-actions (aref actions i) symbol-obarray (nth 1 grammar)))
226 (setq i (1+ i)))
227 (vector
228 actions
229 (nth 3 grammar)
230 symbol-obarray)
231 )))
232
233 (provide 'wisi-compile)
234
235 ;;;; end of file