]> code.delx.au - gnu-emacs-elpa/blob - packages/wisi/wisi-compile.el
Merge commit '078f88ecb797b6cf2cd597417402274dd82402ce' from diff-hl
[gnu-emacs-elpa] / packages / wisi / wisi-compile.el
1 ;;; Grammar compiler for the wisent LALR parser, integrating Wisi OpenToken output. -*- lexical-binding:t -*-
2 ;;
3 ;; Copyright (C) 2012, 2013, 2015 Free Software Foundation, Inc.
4 ;;
5 ;; Author: Stephen Leake <stephen_leake@member.fsf.org>
6 ;;
7 ;; This file is part of GNU Emacs.
8 ;;
9 ;; GNU Emacs is free software: you can redistribute it and/or modify
10 ;; it under the terms of the GNU General Public License as published by
11 ;; the Free Software Foundation, either version 3 of the License, or
12 ;; (at your option) any later version.
13 ;;
14 ;; GNU Emacs is distributed in the hope that it will be useful,
15 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
16 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 ;; GNU General Public License for more details.
18 ;;
19 ;; You should have received a copy of the GNU General Public License
20 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
21 ;;
22 ;;; History: first experimental version Jan 2013
23 ;;
24 ;;; Context
25 ;;
26 ;; Semantic (info "(semantic)Top") provides an LALR(1) parser
27 ;; wisent-parse. The grammar used is defined by the functions
28 ;; semantic-grammar-create-package, which reads a bison-like source
29 ;; file and produces corresponding elisp source, and
30 ;; wisent-compile-grammar, which generates a parser table.
31 ;;
32 ;; However, the algorithm used in wisent-compile-grammar cannot cope
33 ;; with the grammar for the Ada language, because it is not
34 ;; LALR(1). So we provide a generalized LALR parser, which spawns
35 ;; parallel LALR parsers at each conflict. Instead of also rewriting
36 ;; the entire semantic grammar compiler, we use the OpenToken LALR
37 ;; parser generator, which is easier to modify (it is written in Ada,
38 ;; not Lisp).
39 ;;
40 ;; The Ada function Wisi.Generate reads the bison-like input and
41 ;; produces corresponding elisp source code, similar to that
42 ;; produced by semantic-grammar-create-package.
43 ;;
44 ;; wisi-compile-grammar (provided here) generate the automaton
45 ;; structure required by wisi-parse, using functions from
46 ;; wisent/comp.el
47 ;;
48 ;;;;
49
50 (require 'semantic/wisent/comp)
51
52 (defun wisi-compose-action (value symbol-array nonterms)
53 (let ((symbol (intern-soft (format "%s:%d" (car value) (cdr value)) symbol-array))
54 (prod (car (nth (cdr value) (cdr (assoc (car value) nonterms))))))
55 (if symbol
56 (list (car value) symbol (length prod))
57 (error "%s not in symbol-array" symbol))))
58
59 (defun wisi-replace-actions (action symbol-array nonterms)
60 "Replace semantic action symbol names in ACTION with list as defined in `wisi-compile-grammar'.
61 ACTION is the alist for one state from the grammar; NONTERMS is from the grammar.
62 Return the new alist."
63 ;; result is (nonterm index action-symbol token-count)
64 (let (result item)
65 (while action
66 (setq item (pop action))
67 (cond
68 ((or
69 (memq (cdr item) '(error accept))
70 (numberp (cdr item)))
71 (push item result))
72
73 ((listp (cdr item))
74 (let ((value (cdr item)))
75 (cond
76 ((symbolp (car value))
77 ;; reduction
78 (push (cons (car item)
79 (wisi-compose-action value symbol-array nonterms))
80 result))
81
82 ((integerp (car value))
83 ;; shift/reduce conflict
84 (push (cons (car item)
85 (list (car value)
86 (wisi-compose-action (cadr value) symbol-array nonterms)))
87 result))
88
89 ((integerp (cadr value))
90 ;; reduce/shift conflict
91 (push (cons (car item)
92 (list (wisi-compose-action (car value) symbol-array nonterms)
93 (cadr value)))
94 result))
95
96 (t ;; reduce/reduce conflict
97 (push (cons (car item)
98 (list (wisi-compose-action (car value) symbol-array nonterms)
99 (wisi-compose-action (cadr value) symbol-array nonterms)))
100 result))
101 )))
102
103 (t
104 (error "unexpected '%s'; expected 'error, 'accept, numberp, stringp, listp" (cdr item)))
105 ));; while/cond
106
107 (reverse result)))
108
109 (defun wisi-semantic-action (r rcode tags rlhs)
110 "Define an Elisp function for semantic action at rule R.
111 On entry RCODE[R] contains a vector [BODY N (NTERM I)] where BODY
112 is the body of the semantic action, N is the number of tokens in
113 the production, NTERM is the nonterminal the semantic action
114 belongs to, and I is the index of the production and associated
115 semantic action in the NTERM rule. Returns the semantic action
116 symbol, which is interned in RCODE[0].
117
118 The semantic action function accepts one argument, the list of
119 tokens to be reduced. It returns nil; it is called for the user
120 side-effects only."
121 ;; based on comp.el wisent-semantic-action
122 (let* ((actn (aref rcode r))
123 (n (aref actn 1)) ; number of tokens in production
124 (name (apply 'format "%s:%d" (aref actn 2)))
125 (form (aref actn 0))
126 (action-symbol (intern name (aref rcode 0))))
127
128 (fset action-symbol
129 `(lambda (wisi-tokens)
130 (let* (($nterm ',(aref tags (aref rlhs r)))
131 ($1 nil));; wisent-parse-nonterminals defines a default body of $1 for empty actions
132 ,form
133 nil)))
134
135 (list (car (aref actn 2)) action-symbol n)))
136
137 (defun wisi-compile-grammar (grammar)
138 ;; FIXME: This docstring is full of ambiguities making it unclear whether
139 ;; we're talking for example about data that includes the symbol `nonterm' as
140 ;; opposed to data that includes some non terminal object we denote
141 ;; with the meta-variable "nonterm".
142 ;; The convention in Elisp's docstrings is to use all-caps for metavariables
143 ;; (and `...' quoting as opposed to the '... quoting used below in a few
144 ;; spots).
145 "Compile the LALR(1) GRAMMAR; return the automaton for wisi-parse.
146 GRAMMAR is a list TERMINALS NONTERMS ACTIONS GOTOS, where:
147
148 TERMINALS is a list of terminal token symbols.
149
150 NONTERMS is a list of productions; each production is a
151 list (nonterm (tokens action) ...) where `action' is any lisp form.
152
153 ACTIONS is an array indexed by parser state, of alists indexed by
154 terminal tokens. The value of each item in the alists is one of:
155
156 'error
157
158 'accept
159
160 integer - shift; gives new state
161
162 '(nonterm . index) - reduce by nonterm production index.
163
164 '(integer (nonterm . index)) - a shift/reduce conflict
165 '((nonterm . index) integer) - a reduce/shift conflict
166 '((nonterm . index) (nonterm . index)) - a reduce/reduce conflict
167
168 The first item in the alist must have the key 'default (not a
169 terminal token); it is used when no other item matches the
170 current token.
171
172 GOTOS is an array indexed by parser state, of alists giving the
173 new state after a reduce for each nonterminal legal in that
174 state.
175
176 The automaton is an array with 3 elements:
177
178 parser-actions is a copy of the input ACTIONS, with reduction
179 actions replaced by a list (NONTERM ACTION-SYMBOL TOKEN-COUNT),
180 where NONTERM is a symbol from NONTERMS, and is the
181 non-terminal to reduce to, TOKEN-COUNT is the number of tokens in
182 the reduction, ACTION-SYMBOL is nil if there is no user action,
183 or a symbol from semantic-actions (below).
184
185 gotos is a copy of GOTOS.
186
187 semantic-actions is an obarray containing functions that
188 implement the user action for each nonterminal; the function
189 names have the format nonterm:index."
190 (defvar nrules) (defvar ptable) (defvar rcode) (defvar rlhs) (defvar tags)
191 (defvar token-list) (defvar var-list)
192 (let (nrules ptable rcode rlhs tags token-list var-list)
193 (wisent-parse-grammar;; set global vars used by wisent-semantic-action
194 (cons
195 (nth 0 grammar);; TOKENS
196 (cons nil ;; ASSOCS
197 (nth 1 grammar));; NONTERMS
198 ))
199
200 (aset rcode 0 (make-vector 13 0));; obarray for semantic actions
201
202 ;; create semantic action functions, interned in rcode[0]
203 (let* ((i 1))
204 (while (<= i nrules)
205 (wisi-semantic-action i rcode tags rlhs)
206 (setq i (1+ i)))
207 )
208
209 ;; replace semantic actions in ACTIONS with symbols from symbol-array
210 (let ((nactions (length (nth 2 grammar)))
211 (actions (nth 2 grammar))
212 (symbol-array (aref rcode 0))
213 (i 0))
214 (while (< i nactions)
215 (aset actions i
216 (wisi-replace-actions (aref actions i) symbol-array (nth 1 grammar)))
217 (setq i (1+ i)))
218 (vector
219 actions
220 (nth 3 grammar)
221 symbol-array)
222 )))
223
224 (provide 'wisi-compile)
225
226 ;;; wisi-compile.el ends here