]> code.delx.au - gnu-emacs-elpa/blob - packages/wisi/wisi.el
Merge remote-tracking branch 'ztree/master'
[gnu-emacs-elpa] / packages / wisi / wisi.el
1 ;;; wisi.el --- Utilities for implementing an indentation/navigation engine using a generalized LALR parser
2 ;;
3 ;; Copyright (C) 2012 - 2015 Free Software Foundation, Inc.
4 ;;
5 ;; Author: Stephen Leake <stephen_leake@member.fsf.org>
6 ;; Maintainer: Stephen Leake <stephen_leake@member.fsf.org>
7 ;; Keywords: parser
8 ;; indentation
9 ;; navigation
10 ;; Version: 1.1.1
11 ;; package-requires: ((cl-lib "0.4") (emacs "24.2"))
12 ;; URL: http://stephe-leake.org/emacs/ada-mode/emacs-ada-mode.html
13 ;;
14 ;; This file is part of GNU Emacs.
15 ;;
16 ;; GNU Emacs is free software: you can redistribute it and/or modify
17 ;; it under the terms of the GNU General Public License as published by
18 ;; the Free Software Foundation, either version 3 of the License, or
19 ;; (at your option) any later version.
20 ;;
21 ;; GNU Emacs is distributed in the hope that it will be useful,
22 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
23 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 ;; GNU General Public License for more details.
25 ;;
26 ;; You should have received a copy of the GNU General Public License
27 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
28 ;;
29
30 ;;; Commentary:
31
32 ;;;; History: see NEWS-wisi.text
33 ;;
34 ;;;; indentation algorithm overview
35 ;;
36 ;; This design is inspired in part by experience writing a SMIE
37 ;; indentation engine for Ada, and the wisent parser.
38 ;;
39 ;; The general approach to indenting a given token is to find the
40 ;; start of the statement it is part of, or some other relevant point
41 ;; in the statement, and indent relative to that. So we need a parser
42 ;; that lets us find statement indent points from arbitrary places in
43 ;; the code.
44 ;;
45 ;; For example, the grammar for Ada as represented by the EBNF in LRM
46 ;; Annex P is not LALR(1), so we use a generalized LALR(1) parser (see
47 ;; wisi-parse, wisi-compile).
48 ;;
49 ;; The parser actions cache indentation and other information as text
50 ;; properties of tokens in statements.
51 ;;
52 ;; An indentation engine moves text in the buffer, as does user
53 ;; editing, so we can't rely on character positions remaining
54 ;; constant. So the parser actions use markers to store
55 ;; positions. Text properties also move with the text.
56 ;;
57 ;; The stored information includes a marker at each statement indent
58 ;; point. Thus, the indentation algorithm is: find the previous token
59 ;; with cached information, and either indent from it, or fetch from
60 ;; it the marker for a previous statement indent point, and indent
61 ;; relative to that.
62 ;;
63 ;; Since we have a cache (the text properties), we need to consider
64 ;; when to invalidate it. Ideally, we invalidate only when a change to
65 ;; the buffer would change the result of a parse that crosses that
66 ;; change, or starts after that change. Changes in whitespace
67 ;; (indentation and newlines) do not affect an Ada parse. Other
68 ;; languages are sensitive to newlines (Bash for example) or
69 ;; indentation (Python). Adding comments does not change a parse,
70 ;; unless code is commented out. For now we invalidate the cache after
71 ;; the edit point if the change involves anything other than
72 ;; whitespace.
73 ;;
74 ;;; Handling parse errors:
75 ;;
76 ;; When a parse fails, the cache information before the failure point
77 ;; is only partly correct, and there is no cache informaiton after the
78 ;; failure point.
79 ;;
80 ;; However, in the case where a parse previously succeeded, and the
81 ;; current parse fails due to editing, we keep the preceding cache
82 ;; information by setting wisi-cache-max to the edit point in
83 ;; wisi-before change; the parser does not apply actions before that
84 ;; point.
85 ;;
86 ;; This allows navigation and indentation in the text preceding the
87 ;; edit point, and saves some time.
88 ;;
89 ;;;; comparison to the SMIE parser
90 ;;
91 ;; The central problem to be solved in building the SMIE parser is
92 ;; grammar precedence conflicts; the general solution is refining
93 ;; keywords so that each new keyword can be assigned a unique
94 ;; precedence. This means ad hoc code must be written to determine the
95 ;; correct refinement for each language keyword from the surrounding
96 ;; tokens. In effect, for a complex language like Ada, the knowledge
97 ;; of the language grammar is mostly embedded in the refinement code;
98 ;; only a small amount is in the refined grammar. Implementing a SMIE
99 ;; parser for a new language involves the same amount of work as the
100 ;; first language.
101 ;;
102 ;; Using a generalized LALR parser avoids that particular problem;
103 ;; assuming the language is already defined by a grammar, it is only a
104 ;; matter of a format change to teach the wisi parser the
105 ;; language. The problem in a wisi indentation engine is caching the
106 ;; output of the parser in a useful way, since we can't start the
107 ;; parser from arbitrary places in the code (as we can with the SMIE
108 ;; parser). A second problem is determining when to invalidate the
109 ;; cache. But these problems are independent of the language being
110 ;; parsed, so once we have one wisi indentation engine working,
111 ;; adapting it to new languages should be quite simple.
112 ;;
113 ;; The SMIE parser does not find the start of each statement, only the
114 ;; first language keyword in each statement; additional code must be
115 ;; written to find the statement start and indent points. The wisi
116 ;; parser finds the statement start and indent points directly.
117 ;;
118 ;; In SMIE, it is best if each grammar rule is a complete statement,
119 ;; so forward-sexp will traverse the entire statement. If nested
120 ;; non-terminals are used, forward-sexp may stop inside one of the
121 ;; nested non-terminals. This problem does not occur with the wisi
122 ;; parser.
123 ;;
124 ;; A downside of the wisi parser is conflicts in the grammar; they can
125 ;; be much more difficult to resolve than in the SMIE parser. The
126 ;; generalized parser helps by handling conflicts, but it does so by
127 ;; running multiple parsers in parallel, persuing each choice in the
128 ;; conflict. If the conflict is due to a genuine ambiguity, both paths
129 ;; will succeed, which causes the parse to fail, since it is not clear
130 ;; which set of text properties to store. Even if one branch
131 ;; ultimately fails, running parallel parsers over large sections of
132 ;; code is slow. Finally, this approach can lead to exponential growth
133 ;; in the number of parsers. So grammar conflicts must still be
134 ;; analyzed and minimized.
135 ;;
136 ;; In addition, the complete grammar must be specified; in smie, it is
137 ;; often possible to specify a subset of the grammar.
138 ;;
139 ;;;; grammar compiler and parser
140 ;;
141 ;; Since we are using a generalized LALR(1) parser, we cannot use any
142 ;; of the wisent grammar functions. We use OpenToken wisi-generate
143 ;; to compile BNF to Elisp source (similar to
144 ;; semantic-grammar-create-package), and wisi-compile-grammar to
145 ;; compile that to the parser table.
146 ;;
147 ;; Semantic provides a complex lexer, more complicated than we need
148 ;; for indentation. So we use the elisp lexer, which consists of
149 ;; `forward-comment', `skip-syntax-forward', and `scan-sexp'. We wrap
150 ;; that in functions that return tokens in the form wisi-parse
151 ;; expects.
152 ;;
153 ;;;; lexer
154 ;;
155 ;; The lexer is `wisi-forward-token'. It relies on syntax properties,
156 ;; so syntax-propertize must be called on the text to be lexed before
157 ;; wisi-forward-token is called. In general, it is hard to determine
158 ;; an appropriate end-point for syntax-propertize, other than
159 ;; point-max. So we call (syntax-propertize point-max) in wisi-setup,
160 ;; and also call syntax-propertize in wisi-after-change.
161 ;;
162 ;;;; code style
163 ;;
164 ;; 'wisi' was originally short for "wisent indentation engine", but
165 ;; now is just a name.
166 ;;
167 ;; not using lexical-binding because we support Emacs 23
168 ;;
169 ;;;;;
170
171 ;;; Code:
172
173 (require 'cl-lib)
174 (require 'wisi-parse)
175
176 ;; WORKAROUND: for some reason, this condition doesn't work in batch mode!
177 ;; (when (and (= emacs-major-version 24)
178 ;; (= emacs-minor-version 2))
179 (require 'wisi-compat-24.2)
180 ;;)
181
182 (defcustom wisi-size-threshold 100000
183 "Max size (in characters) for using wisi parser results for syntax highlighting and file navigation."
184 :type 'integer
185 :group 'wisi
186 :safe 'integerp)
187 (make-variable-buffer-local 'wisi-size-threshold)
188
189 ;;;; lexer
190
191 (defvar-local wisi-class-list nil)
192 (defvar-local wisi-keyword-table nil)
193 (defvar-local wisi-punctuation-table nil)
194 (defvar-local wisi-punctuation-table-max-length 0)
195 (defvar-local wisi-string-double-term nil);; string delimited by double quotes
196 (defvar-local wisi-string-quote-escape-doubled nil
197 "Non-nil if a string delimiter is escaped by doubling it (as in Ada).")
198 (defvar-local wisi-string-quote-escape nil
199 "Cons '(delim . character) where 'character' escapes quotes in strings delimited by 'delim'.")
200 (defvar-local wisi-string-single-term nil) ;; string delimited by single quotes
201 (defvar-local wisi-symbol-term nil)
202 (defvar-local wisi-number-term nil)
203 (defvar-local wisi-number-p nil)
204
205 (defun wisi-number-p (token-text)
206 "Return t if TOKEN-TEXT plus text after point matches the
207 syntax for a real literal; otherwise nil. point is after
208 TOKEN-TEXT; move point to just past token."
209 ;; typical literals:
210 ;; 1234
211 ;; 1234.5678
212 ;; _not_ including non-decimal base, or underscores (see ada-wisi-number-p)
213 ;;
214 (let ((end (point)))
215 ;; starts with a simple integer
216 (when (string-match "^[0-9]+$" token-text)
217 (when (looking-at "\\.[0-9]+")
218 ;; real number
219 (goto-char (setq end (match-end 0)))
220 (when (looking-at "[Ee][+-][0-9]+")
221 ;; exponent
222 (goto-char (setq end (match-end 0)))))
223
224 t
225 )))
226
227 (defun wisi-forward-token ()
228 "Move point forward across one token, skipping leading whitespace and comments.
229 Return the corresponding token, in format: (token start . end) where:
230
231 `token' is a token symbol (not string) from `wisi-punctuation-table',
232 `wisi-keyword-table', `wisi-string-double-term', `wisi-string-double-term' or `wisi-symbol-term'.
233
234 `start, end' are the character positions in the buffer of the start
235 and end of the token text.
236
237 If at end of buffer, returns `wisent-eoi-term'."
238 (forward-comment (point-max))
239 ;; skips leading whitespace, comment, trailing whitespace.
240
241 (let ((start (point))
242 ;; (info "(elisp)Syntax Table Internals" "*info elisp syntax*")
243 (syntax (syntax-class (syntax-after (point))))
244 token-id token-text)
245 (cond
246 ((eobp)
247 (setq token-id wisent-eoi-term))
248
249 ((eq syntax 1)
250 ;; punctuation. Find the longest matching string in wisi-punctuation-table
251 (forward-char 1)
252 (let ((next-point (point))
253 temp-text temp-id done)
254 (while (not done)
255 (setq temp-text (buffer-substring-no-properties start (point)))
256 (setq temp-id (car (rassoc temp-text wisi-punctuation-table)))
257 (when temp-id
258 (setq token-id temp-id
259 next-point (point)))
260 (if (or
261 (eobp)
262 (= (- (point) start) wisi-punctuation-table-max-length))
263 (setq done t)
264 (forward-char 1))
265 )
266 (goto-char next-point)))
267
268 ((memq syntax '(4 5)) ;; open, close parenthesis
269 (forward-char 1)
270 (setq token-text (buffer-substring-no-properties start (point)))
271 (setq token-id (symbol-value (intern-soft token-text wisi-keyword-table))))
272
273 ((eq syntax 7)
274 ;; string quote, either single or double. we assume point is
275 ;; before the start quote, not the end quote
276 (let ((delim (char-after (point)))
277 (forward-sexp-function nil))
278 (condition-case err
279 (progn
280 (forward-sexp)
281
282 ;; point is now after the end quote; check for an escaped quote
283 (while (or
284 (and wisi-string-quote-escape-doubled
285 (eq (char-after (point)) delim))
286 (and (eq delim (car wisi-string-quote-escape))
287 (eq (char-before (1- (point))) (cdr wisi-string-quote-escape))))
288 (forward-sexp))
289 (setq token-id (if (= delim ?\") wisi-string-double-term wisi-string-single-term)))
290 (scan-error
291 ;; Something screwed up; we should not get here if
292 ;; syntax-propertize works properly.
293 (error "wisi-forward-token: forward-sexp failed %s" err)
294 ))))
295
296 (t ;; assuming word or symbol syntax
297 (skip-syntax-forward "w_'")
298 (setq token-text (buffer-substring-no-properties start (point)))
299 (setq token-id
300 (or (symbol-value (intern-soft (downcase token-text) wisi-keyword-table))
301 (and (functionp wisi-number-p)
302 (funcall wisi-number-p token-text)
303 (setq token-text (buffer-substring-no-properties start (point)))
304 wisi-number-term)
305 wisi-symbol-term))
306 )
307 );; cond
308
309 (unless token-id
310 (signal 'wisi-parse-error
311 (wisi-error-msg "unrecognized token '%s'" (buffer-substring-no-properties start (point)))))
312
313 (cons token-id (cons start (point)))
314 ))
315
316 (defun wisi-backward-token ()
317 "Move point backward across one token, skipping whitespace and comments.
318 Does _not_ handle numbers with wisi-number-p; just sees lower-level syntax.
319 Return (nil start . end) - same structure as
320 wisi-forward-token, but does not look up symbol."
321 (forward-comment (- (point)))
322 ;; skips leading whitespace, comment, trailing whitespace.
323
324 ;; (info "(elisp)Syntax Table Internals" "*info elisp syntax*")
325 (let ((end (point))
326 (syntax (syntax-class (syntax-after (1- (point))))))
327 (cond
328 ((bobp) nil)
329
330 ((eq syntax 1)
331 ;; punctuation. Find the longest matching string in wisi-punctuation-table
332 (backward-char 1)
333 (let ((next-point (point))
334 temp-text done)
335 (while (not done)
336 (setq temp-text (buffer-substring-no-properties (point) end))
337 (when (car (rassoc temp-text wisi-punctuation-table))
338 (setq next-point (point)))
339 (if (or
340 (bobp)
341 (= (- end (point)) wisi-punctuation-table-max-length))
342 (setq done t)
343 (backward-char 1))
344 )
345 (goto-char next-point))
346 )
347
348 ((memq syntax '(4 5)) ;; open, close parenthesis
349 (backward-char 1))
350
351 ((eq syntax 7)
352 ;; a string quote. we assume we are after the end quote, not the start quote
353 (let ((forward-sexp-function nil))
354 (forward-sexp -1)))
355
356 (t ;; assuming word or symbol syntax
357 (if (zerop (skip-syntax-backward "."))
358 (skip-syntax-backward "w_'")))
359 )
360 (cons nil (cons (point) end))
361 ))
362
363 ;;;; token info cache
364 ;;
365 ;; the cache stores the results of parsing as text properties on
366 ;; keywords, for use by the indention and motion engines.
367
368 (cl-defstruct
369 (wisi-cache
370 (:constructor wisi-cache-create)
371 (:copier nil))
372 nonterm;; nonterminal from parse (set by wisi-statement-action)
373
374 token
375 ;; terminal symbol from wisi-keyword-table or
376 ;; wisi-punctuation-table, or lower-level nonterminal from parse
377 ;; (set by wisi-statement-action)
378
379 last ;; pos of last char in token, relative to first (0 indexed)
380
381 class
382 ;; arbitrary lisp symbol, used for indentation and navigation.
383 ;; some classes are defined by wisi:
384 ;;
385 ;; 'block-middle - a block keyword (ie: if then else end), not at the start of a statement
386 ;;
387 ;; 'block-start - a block keyword at the start of a statement
388 ;;
389 ;; 'statement-start - the start of a statement
390 ;;
391 ;; 'open-paren
392 ;;
393 ;; others are language-specific
394
395 containing
396 ;; Marker at the containing keyword for this token.
397 ;; A containing keyword is an indent point; the start of a
398 ;; statement, or 'begin', 'then' or 'else' for a block of
399 ;; statements, etc.
400 ;; nil only for first token in buffer
401
402 prev ;; marker at previous motion token in statement; nil if none
403 next ;; marker at next motion token in statement; nil if none
404 end ;; marker at token at end of current statement
405 )
406
407 (defvar-local wisi-parse-table nil)
408
409 (defvar-local wisi-parse-failed nil
410 "Non-nil when a recent parse has failed - cleared when parse succeeds.")
411
412 (defvar-local wisi-parse-try nil
413 "Non-nil when parse is needed - cleared when parse succeeds.")
414
415 (defvar-local wisi-change-need-invalidate nil
416 "When non-nil, buffer position to invalidate from.
417 Used in before/after change functions.")
418
419 (defvar-local wisi-end-caches nil
420 "List of buffer positions of caches in current statement that need wisi-cache-end set.")
421
422 (defun wisi-delete-cache (after)
423 (with-silent-modifications
424 (remove-text-properties after (point-max) '(wisi-cache nil))
425 ;; We don't remove 'font-lock-face; that's annoying to the user,
426 ;; since they won't be restored until a parse for some other
427 ;; reason, and they are likely to be right anyway.
428 ))
429
430 (defun wisi-invalidate-cache(&optional after)
431 "Invalidate parsing caches for the current buffer from AFTER to end of buffer."
432 (interactive)
433 (if (not after)
434 (setq after (point-min))
435 (setq after
436 (save-excursion
437 (goto-char after)
438 (line-beginning-position))))
439 (when (> wisi-debug 0) (message "wisi-invalidate %s:%d" (current-buffer) after))
440 (setq wisi-cache-max after)
441 (setq wisi-parse-try t)
442 (syntax-ppss-flush-cache after)
443 (wisi-delete-cache after)
444 )
445
446 (defun wisi-before-change (begin end)
447 "For `before-change-functions'."
448 ;; begin . end is range of text being deleted
449
450 ;; If jit-lock-after-change is before wisi-after-change in
451 ;; after-change-functions, it might use any invalid caches in the
452 ;; inserted text.
453 ;;
454 ;; So we check for that here, and ensure it is after
455 ;; wisi-after-change, which deletes the invalid caches
456 (when (boundp 'jit-lock-mode)
457 (when (memq 'wisi-after-change (memq 'jit-lock-after-change after-change-functions))
458 (setq after-change-functions (delete 'wisi-after-change after-change-functions))
459 (add-hook 'after-change-functions 'wisi-after-change nil t))
460 )
461
462 (setq wisi-change-need-invalidate nil)
463
464 (when (> end begin)
465 (save-excursion
466 ;; (info "(elisp)Parser State")
467 (let* ((begin-state (syntax-ppss begin))
468 (end-state (syntax-ppss end))
469 ;; syntax-ppss has moved point to "end".
470 (word-end (progn (skip-syntax-forward "w_")(point))))
471
472 ;; Remove grammar face from word(s) containing change region;
473 ;; might be changing to/from a keyword. See
474 ;; test/ada_mode-interactive_common.adb Obj_1
475 (goto-char begin)
476 (skip-syntax-backward "w_")
477 (with-silent-modifications
478 (remove-text-properties (point) word-end '(font-lock-face nil fontified nil)))
479
480 (if (<= wisi-cache-max begin)
481 ;; Change is in unvalidated region; either the parse was
482 ;; failing, or there is more than one top-level grammar
483 ;; symbol in buffer.
484 (when wisi-parse-failed
485 ;; The parse was failing, probably due to bad syntax; this
486 ;; change may have fixed it, so try reparse.
487 (setq wisi-parse-try t))
488
489 ;; else change is in validated region
490 ;;
491 ;; don't invalidate parse for whitespace, string, or comment changes
492 (cond
493 ((and
494 (nth 3 begin-state); in string
495 (nth 3 end-state)))
496 ;; no easy way to tell if there is intervening non-string
497
498 ((and
499 (nth 4 begin-state); in comment
500 (nth 4 end-state))
501 ;; too hard to detect case where there is intervening
502 ;; code; no easy way to go to end of comment if not
503 ;; newline
504 )
505
506 ;; Deleting whitespace generally does not require parse, but
507 ;; deleting all whitespace between two words does; check that
508 ;; there is whitespace on at least one side of the deleted
509 ;; text.
510 ;;
511 ;; We are not in a comment (checked above), so treat
512 ;; comment end as whitespace in case it is newline, except
513 ;; deleting a comment end at begin means commenting the
514 ;; current line; requires parse.
515 ((and
516 (eq (car (syntax-after begin)) 0) ; whitespace
517 (memq (car (syntax-after (1- end))) '(0 12)) ; whitespace, comment end
518 (or
519 (memq (car (syntax-after (1- begin))) '(0 12))
520 (memq (car (syntax-after end)) '(0 12)))
521 (progn
522 (goto-char begin)
523 (skip-syntax-forward " >" end)
524 (eq (point) end))))
525
526 (t
527 (setq wisi-change-need-invalidate
528 (progn
529 (goto-char begin)
530 ;; note that because of the checks above, this never
531 ;; triggers a parse, so it's fast
532 (wisi-goto-statement-start)
533 (point))))
534 )))
535 ))
536 )
537
538 (defun wisi-after-change (begin end length)
539 "For `after-change-functions'."
540 ;; begin . end is range of text being inserted (empty if equal);
541 ;; length is the size of the deleted text.
542
543 ;; (syntax-ppss-flush-cache begin) is in before-change-functions
544
545 (syntax-propertize end) ;; see comments above on "lexer" re syntax-propertize
546
547 ;; Remove caches on inserted text, which could have caches from
548 ;; before the failed parse (or another buffer), and are in any case
549 ;; invalid. No point in removing 'fontified; that's handled by
550 ;; jit-lock.
551
552 (with-silent-modifications
553 (remove-text-properties begin end '(wisi-cache nil font-lock-face nil)))
554
555 ;; Also remove grammar face from word(s) containing change region;
556 ;; might be changing to/from a keyword. See
557 ;; test/ada_mode-interactive_common.adb Obj_1
558 (save-excursion
559 ;; (info "(elisp)Parser State")
560 (let ((need-invalidate wisi-change-need-invalidate)
561 begin-state end-state word-end)
562 (when (> end begin)
563 (setq begin-state (syntax-ppss begin))
564 (setq end-state (syntax-ppss end))
565 ;; syntax-ppss has moved point to "end".
566 (skip-syntax-forward "w_")
567 (setq word-end (point))
568 (goto-char begin)
569 (skip-syntax-backward "w_")
570 (with-silent-modifications
571 (remove-text-properties (point) word-end '(font-lock-face nil fontified nil))))
572
573 (if (<= wisi-cache-max begin)
574 ;; Change is in unvalidated region
575 (when wisi-parse-failed
576 ;; The parse was failing, probably due to bad syntax; this
577 ;; change may have fixed it, so try reparse.
578 (setq wisi-parse-try t))
579
580 ;; Change is in validated region
581 (cond
582 (wisi-change-need-invalidate
583 ;; wisi-before change determined the removed text alters the
584 ;; parse
585 )
586
587 ((= end begin)
588 (setq need-invalidate nil))
589
590 ((and
591 (nth 3 begin-state); in string
592 (nth 3 end-state))
593 ;; no easy way to tell if there is intervening non-string
594 (setq need-invalidate nil))
595
596 ((or
597 (nth 4 begin-state)
598 (nth 4 end-state)); in comment
599 ;; no easy way to detect intervening code
600 (setq need-invalidate nil)
601 ;; no caches to remove
602 )
603
604 ;; Adding whitespace generally does not require parse, but in
605 ;; the middle of word it does; check that there was
606 ;; whitespace on at least one side of the inserted text.
607 ;;
608 ;; We are not in a comment (checked above), so treat
609 ;; comment end as whitespace in case it is newline
610 ((and
611 (or
612 (memq (car (syntax-after (1- begin))) '(0 12)); whitespace, comment end
613 (memq (car (syntax-after end)) '(0 12)))
614 (progn
615 (goto-char begin)
616 (skip-syntax-forward " >" end)
617 (eq (point) end)))
618 (setq need-invalidate nil))
619
620 (t
621 (setq need-invalidate
622 (progn
623 (goto-char begin)
624 ;; note that because of the checks above, this never
625 ;; triggers a parse, so it's fast
626 (wisi-goto-statement-start)
627 (point))))
628 )
629
630 (if need-invalidate
631 (wisi-invalidate-cache need-invalidate)
632
633 ;; else move cache-max by the net change length.
634 (setq wisi-cache-max
635 (+ wisi-cache-max (- end begin length))) )
636 ))
637 ))
638
639 (defun wisi-get-cache (pos)
640 "Return `wisi-cache' struct from the `wisi-cache' text property at POS.
641 If accessing cache at a marker for a token as set by `wisi-cache-tokens', POS must be (1- mark)."
642 (get-text-property pos 'wisi-cache))
643
644 (defvar-local wisi-parse-error-msg nil)
645
646 (defun wisi-goto-error ()
647 "Move point to position in last error message (if any)."
648 (when (string-match ":\\([0-9]+\\):\\([0-9]+\\):" wisi-parse-error-msg)
649 (let ((line (string-to-number (match-string 1 wisi-parse-error-msg)))
650 (col (string-to-number (match-string 2 wisi-parse-error-msg))))
651 (push-mark)
652 (goto-char (point-min))
653 (forward-line (1- line))
654 (forward-char col))))
655
656 (defun wisi-show-parse-error ()
657 "Show last wisi-parse error."
658 (interactive)
659 (cond
660 (wisi-parse-failed
661 (wisi-goto-error)
662 (message wisi-parse-error-msg))
663
664 (wisi-parse-try
665 (message "need parse"))
666
667 (t
668 (message "parse succeeded"))
669 ))
670
671 (defvar wisi-post-parse-succeed-hook nil
672 "Hook run after parse succeeds.")
673
674 (defun wisi-validate-cache (pos)
675 "Ensure cached data is valid at least up to POS in current buffer."
676 (let ((msg (when (> wisi-debug 0) (format "wisi: parsing %s:%d ..." (buffer-name) (line-number-at-pos pos)))))
677 ;; If wisi-cache-max = pos, then there is no cache at pos; need parse
678 (when (and wisi-parse-try
679 (<= wisi-cache-max pos))
680 (when (> wisi-debug 0)
681 (message msg))
682
683 (setq wisi-parse-try nil)
684 (setq wisi-parse-error-msg nil)
685 (setq wisi-end-caches nil)
686
687 (if (> wisi-debug 1)
688 ;; let debugger stop in wisi-parse
689 (progn
690 (save-excursion
691 (wisi-parse wisi-parse-table 'wisi-forward-token)
692 (setq wisi-cache-max (point))
693 (setq wisi-parse-failed nil))
694 (run-hooks 'wisi-post-parse-succeed-hook))
695
696 ;; else capture errors from bad syntax, so higher level
697 ;; functions can try to continue and/or we don't bother the
698 ;; user.
699 (condition-case err
700 (progn
701 (save-excursion
702 (wisi-parse wisi-parse-table 'wisi-forward-token)
703 (setq wisi-cache-max (point))
704 (setq wisi-parse-failed nil))
705 (run-hooks 'wisi-post-parse-succeed-hook))
706 (wisi-parse-error
707 ;; delete caches past wisi-cache-max added by failed parse
708 (wisi-delete-cache wisi-cache-max)
709 (setq wisi-parse-failed t)
710 (setq wisi-parse-error-msg (cdr err)))
711 ))
712 (if wisi-parse-error-msg
713 ;; error
714 (when (> wisi-debug 0)
715 (message "%s error" msg)
716 (wisi-goto-error)
717 (error wisi-parse-error-msg))
718 ;; no msg; success
719 (when (> wisi-debug 0)
720 (message "%s done" msg)))
721 )))
722
723 (defun wisi-fontify-region (begin end)
724 "For `jit-lock-functions'."
725 (when (< (point-max) wisi-size-threshold)
726 (wisi-validate-cache end)))
727
728 (defun wisi-get-containing-cache (cache)
729 "Return cache from (wisi-cache-containing CACHE)."
730 (let ((containing (wisi-cache-containing cache)))
731 (and containing
732 (wisi-get-cache (1- containing)))))
733
734 (defun wisi-cache-region (cache)
735 "Return region designated by cache.
736 Point must be at cache."
737 (cons (point) (+ (point) (wisi-cache-last cache))))
738
739 (defun wisi-cache-text (cache)
740 "Return property-less buffer substring designated by cache.
741 Point must be at cache."
742 (buffer-substring-no-properties (point) (+ (point) (wisi-cache-last cache))))
743
744 ;;;; parse actions
745
746 (defun wisi-set-end (start-mark end-mark)
747 "Set END-MARK on all caches in `wisi-end-caches' in range START-MARK END-MARK,
748 delete from `wisi-end-caches'."
749 (let ((i 0)
750 pos cache)
751 (while (< i (length wisi-end-caches))
752 (setq pos (nth i wisi-end-caches))
753 (setq cache (wisi-get-cache pos))
754
755 (if (and (>= pos start-mark)
756 (< pos end-mark))
757 (progn
758 (setf (wisi-cache-end cache) end-mark)
759 (setq wisi-end-caches (delq pos wisi-end-caches)))
760
761 ;; else not in range
762 (setq i (1+ i)))
763 )))
764
765 (defvar wisi-tokens nil)
766 ;; keep byte-compiler happy; `wisi-tokens' is bound in action created
767 ;; by wisi-semantic-action
768
769 (defun wisi-statement-action (pairs)
770 "Cache information in text properties of tokens.
771 Intended as a grammar non-terminal action.
772
773 PAIRS is a vector of the form [TOKEN-NUMBER CLASS TOKEN-NUMBER
774 CLASS ...] where TOKEN-NUMBER is the (1 indexed) token number in
775 the production, CLASS is the wisi class of that token. Use in a
776 grammar action as:
777 (wisi-statement-action [1 'statement-start 7 'statement-end])"
778 (save-excursion
779 (let ((first-item t)
780 first-keyword-mark
781 (override-start nil)
782 (i 0))
783 (while (< i (length pairs))
784 (let* ((number (1- (aref pairs i)))
785 (region (cdr (aref wisi-tokens number)));; wisi-tokens is let-bound in wisi-parse-reduce
786 (token (car (aref wisi-tokens number)))
787 (class (aref pairs (setq i (1+ i))))
788 (mark
789 ;; Marker one char into token, so indent-line-to
790 ;; inserts space before the mark, not after
791 (when region (copy-marker (1+ (car region)))))
792 cache)
793
794 (setq i (1+ i))
795
796 (unless (memq class wisi-class-list)
797 (error "%s not in wisi-class-list" class))
798
799 (if region
800 (progn
801 (if (setq cache (wisi-get-cache (car region)))
802 ;; We are processing a previously set non-terminal; ie generic_formal_part in
803 ;;
804 ;; generic_package_declaration : generic_formal_part package_specification SEMICOLON
805 ;; (wisi-statement-action 1 'block-start 2 'block-middle 3 'statement-end)
806 ;;
807 ;; or simple_statement in
808 ;;
809 ;; statement : label_opt simple_statement
810 ;;
811 ;; override nonterm, class, containing
812 ;; set end only if not set yet (due to failed parse)
813 (progn
814 (cl-case (wisi-cache-class cache)
815 (block-start
816 (setf (wisi-cache-class cache)
817 (cond
818 ((eq override-start nil)
819 (cond
820 ((memq class '(block-start statement-start)) 'block-start)
821 (t 'block-middle)))
822
823 ((memq override-start '(block-start statement-start)) 'block-start)
824
825 (t (error "unexpected override-start"))
826 )))
827 (t
828 (setf (wisi-cache-class cache) (or override-start class)))
829 )
830 (setf (wisi-cache-nonterm cache) $nterm)
831 (setf (wisi-cache-containing cache) first-keyword-mark)
832 (unless (wisi-cache-end cache)
833 (if wisi-end-caches
834 (push (car region) wisi-end-caches)
835 (setq wisi-end-caches (list (car region)))
836 ))
837 )
838
839 ;; else create new cache
840 (with-silent-modifications
841 (put-text-property
842 (car region)
843 (1+ (car region))
844 'wisi-cache
845 (wisi-cache-create
846 :nonterm $nterm;; $nterm defined in wisi-semantic-action
847 :token token
848 :last (- (cdr region) (car region))
849 :class (or override-start class)
850 :containing first-keyword-mark)
851 ))
852 (if wisi-end-caches
853 (push (car region) wisi-end-caches)
854 (setq wisi-end-caches (list (car region)))
855 ))
856
857 (when first-item
858 (setq first-item nil)
859 (when (or override-start
860 ;; FIXME: why block-middle here?
861 (memq class '(block-middle block-start statement-start)))
862 (setq override-start nil)
863 (setq first-keyword-mark mark)))
864
865 (when (eq class 'statement-end)
866 (wisi-set-end (1- first-keyword-mark) (copy-marker (1+ (car region)))))
867 )
868
869 ;; region is nil when a production is empty; if the first
870 ;; token is a start, override the class on the next token.
871 (when (and first-item
872 (memq class '(block-middle block-start statement-start)))
873 (setq override-start class)))
874 ))
875 )))
876
877 (defun wisi-containing-action (containing-token contained-token)
878 "Set containing marks in all tokens in CONTAINED-TOKEN with null containing mark to marker pointing to CONTAINING-TOKEN.
879 If CONTAINING-TOKEN is empty, the next token number is used."
880 ;; wisi-tokens is is bound in action created by wisi-semantic-action
881 (let* ((containing-region (cdr (aref wisi-tokens (1- containing-token))))
882 (contained-region (cdr (aref wisi-tokens (1- contained-token)))))
883
884 (unless containing-region ;;
885 (signal 'wisi-parse-error
886 (wisi-error-msg
887 "wisi-containing-action: containing-region '%s' is empty. grammar error; bad action"
888 (wisi-token-text (aref wisi-tokens (1- containing-token))))))
889
890 (unless (or (not contained-region) ;; contained-token is empty
891 (wisi-get-cache (car containing-region)))
892 (signal 'wisi-parse-error
893 (wisi-error-msg
894 "wisi-containing-action: containing-token '%s' has no cache. grammar error; missing action"
895 (wisi-token-text (aref wisi-tokens (1- containing-token))))))
896
897 (while (not containing-region)
898 ;; containing-token is empty; use next
899 (setq containing-region (cdr (aref wisi-tokens containing-token))))
900
901 (when contained-region
902 ;; nil when empty production, may not contain any caches
903 (save-excursion
904 (goto-char (cdr contained-region))
905 (let ((cache (wisi-backward-cache))
906 (mark (copy-marker (1+ (car containing-region)))))
907 (while cache
908
909 ;; skip blocks that are already marked
910 (while (and (>= (point) (car contained-region))
911 (markerp (wisi-cache-containing cache)))
912 (goto-char (1- (wisi-cache-containing cache)))
913 (setq cache (wisi-get-cache (point))))
914
915 (if (or (and (= (car containing-region) (car contained-region))
916 (<= (point) (car contained-region)))
917 (< (point) (car contained-region)))
918 ;; done
919 (setq cache nil)
920
921 ;; else set mark, loop
922 (setf (wisi-cache-containing cache) mark)
923 (setq cache (wisi-backward-cache)))
924 ))))))
925
926 (defun wisi-match-class-token (cache class-tokens)
927 "Return t if CACHE matches CLASS-TOKENS.
928 CLASS-TOKENS is a vector [number class token_id class token_id ...].
929 number is ignored."
930 (let ((i 1)
931 (done nil)
932 (result nil)
933 class token)
934 (while (and (not done)
935 (< i (length class-tokens)))
936 (setq class (aref class-tokens i))
937 (setq token (aref class-tokens (setq i (1+ i))))
938 (setq i (1+ i))
939 (when (and (eq class (wisi-cache-class cache))
940 (eq token (wisi-cache-token cache)))
941 (setq result t
942 done t))
943 )
944 result))
945
946 (defun wisi-motion-action (token-numbers)
947 "Set prev/next marks in all tokens given by TOKEN-NUMBERS.
948 TOKEN-NUMBERS is a vector with each element one of:
949
950 number: the token number; mark that token
951
952 vector [number class token_id]:
953 vector [number class token_id class token_id ...]:
954 mark all tokens in number nonterminal matching (class token_id) with nil prev/next."
955 (save-excursion
956 (let (prev-keyword-mark
957 prev-cache
958 cache
959 mark
960 (i 0))
961 (while (< i (length token-numbers))
962 (let ((token-number (aref token-numbers i))
963 region)
964 (setq i (1+ i))
965 (cond
966 ((numberp token-number)
967 (setq region (cdr (aref wisi-tokens (1- token-number))))
968 (when region
969 (setq cache (wisi-get-cache (car region)))
970 (setq mark (copy-marker (1+ (car region))))
971
972 (when (and prev-keyword-mark
973 cache
974 (null (wisi-cache-prev cache)))
975 (setf (wisi-cache-prev cache) prev-keyword-mark)
976 (setf (wisi-cache-next prev-cache) mark))
977
978 (setq prev-keyword-mark mark)
979 (setq prev-cache cache)
980 ))
981
982 ((vectorp token-number)
983 ;; token-number may contain 0, 1, or more 'class token_id' pairs
984 ;; the corresponding region may be empty
985 ;; there must have been a prev keyword
986 (setq region (cdr (aref wisi-tokens (1- (aref token-number 0)))))
987 (when region ;; not an empty token
988 ;; We must search for all targets at the same time, to
989 ;; get the motion order right.
990 (goto-char (car region))
991 (setq cache (or (wisi-get-cache (point))
992 (wisi-forward-cache)))
993 (while (< (point) (cdr region))
994 (when (wisi-match-class-token cache token-number)
995 (when (null (wisi-cache-prev cache))
996 (setf (wisi-cache-prev cache) prev-keyword-mark))
997 (when (null (wisi-cache-next cache))
998 (setq mark (copy-marker (1+ (point))))
999 (setf (wisi-cache-next prev-cache) mark)
1000 (setq prev-keyword-mark mark)
1001 (setq prev-cache cache)))
1002
1003 (setq cache (wisi-forward-cache))
1004 )))
1005
1006 (t
1007 (error "unexpected token-number %s" token-number))
1008 )
1009
1010 ))
1011 )))
1012
1013 (defun wisi-extend-action (number)
1014 "Extend text of cache at token NUMBER to cover all of token NUMBER.
1015 Also override token with new token."
1016 (let* ((token-region (aref wisi-tokens (1- number)));; wisi-tokens is let-bound in wisi-parse-reduce
1017 (token (car token-region))
1018 (region (cdr token-region))
1019 cache)
1020
1021 (when region
1022 (setq cache (wisi-get-cache (car region)))
1023 (setf (wisi-cache-last cache) (- (cdr region) (car region)))
1024 (setf (wisi-cache-token cache) token)
1025 )
1026 ))
1027
1028 (defun wisi-face-action-1 (face region &optional no-override)
1029 "Apply FACE to REGION. If NO-OVERRIDE is non-nil, don't override existing face."
1030 (when region
1031 ;; We allow overriding a face property, because we don't want to
1032 ;; delete them in wisi-invalidate (see comments there). On the
1033 ;; other hand, it can be an error, so keep this debug
1034 ;; code. However, note that font-lock-face properties must be
1035 ;; removed first, or the buffer must be fresh (never parsed).
1036 ;;
1037 ;; Grammar sets no-override when a higher-level production might
1038 ;; override a face in a lower-level production; that's not an
1039 ;; error.
1040 (let (cur-face
1041 (do-set t))
1042 (when (or no-override
1043 (> wisi-debug 1))
1044 (setq cur-face (get-text-property (car region) 'font-lock-face))
1045 (if cur-face
1046 (if no-override
1047 (setq do-set nil)
1048 (message "%s:%d overriding face %s with %s on '%s'"
1049 (buffer-file-name)
1050 (line-number-at-pos (car region))
1051 face
1052 cur-face
1053 (buffer-substring-no-properties (car region) (cdr region))))
1054
1055 ))
1056 (when do-set
1057 (with-silent-modifications
1058 (add-text-properties
1059 (car region) (cdr region)
1060 (list
1061 'font-lock-face face
1062 'fontified t))))
1063 )))
1064
1065 (defun wisi-face-action (pairs &optional no-override)
1066 "Cache face information in text properties of tokens.
1067 Intended as a grammar non-terminal action.
1068
1069 PAIRS is a vector of the form [token-number face token-number face ...]
1070 token-number may be an integer, or a vector [integer token_id token_id ...]
1071
1072 For an integer token-number, apply face to the first cached token
1073 in the range covered by wisi-tokens[token-number]. If there are
1074 no cached tokens, apply face to entire wisi-tokens[token-number]
1075 region.
1076
1077 For a vector token-number, apply face to the first cached token
1078 in the range matching one of token_id covered by
1079 wisi-tokens[token-number].
1080
1081 If NO-OVERRIDE is non-nil, don't override existing face."
1082 (let (number region face (tokens nil) cache (i 0) (j 1))
1083 (while (< i (length pairs))
1084 (setq number (aref pairs i))
1085 (setq face (aref pairs (setq i (1+ i))))
1086 (cond
1087 ((integerp number)
1088 (setq region (cdr (aref wisi-tokens (1- number))));; wisi-tokens is let-bound in wisi-parse-reduce
1089 (when region
1090 (save-excursion
1091 (goto-char (car region))
1092 (setq cache (or (wisi-get-cache (point))
1093 (wisi-forward-cache)))
1094 (if (< (point) (cdr region))
1095 (when cache
1096 (wisi-face-action-1 face (wisi-cache-region cache) no-override))
1097
1098 ;; no caches in region; just apply face to region
1099 (wisi-face-action-1 face region no-override))
1100 )))
1101
1102 ((vectorp number)
1103 (setq region (cdr (aref wisi-tokens (1- (aref number 0)))))
1104 (when region
1105 (while (< j (length number))
1106 (setq tokens (cons (aref number j) tokens))
1107 (setq j (1+ j)))
1108 (save-excursion
1109 (goto-char (car region))
1110 (setq cache (wisi-forward-find-token tokens (cdr region) t))
1111 ;; might be looking for IDENTIFIER in name, but only have "*".
1112 (when cache
1113 (wisi-face-action-1 face (wisi-cache-region cache) no-override))
1114 )))
1115 )
1116 (setq i (1+ i))
1117
1118 )))
1119
1120 (defun wisi-face-list-action (pairs &optional no-override)
1121 "Cache face information in text properties of tokens.
1122 Intended as a grammar non-terminal action.
1123
1124 PAIRS is a vector of the form [token-number face token-number face ...]
1125 token-number is an integer. Apply face to all cached tokens
1126 in the range covered by wisi-tokens[token-number].
1127
1128 If NO-OVERRIDE is non-nil, don't override existing face."
1129 (let (number region face cache (i 0))
1130 (while (< i (length pairs))
1131 (setq number (aref pairs i))
1132 (setq face (aref pairs (setq i (1+ i))))
1133 (setq region (cdr (aref wisi-tokens (1- number))));; wisi-tokens is let-bound in wisi-parse-reduce
1134 (when region
1135 (save-excursion
1136 (goto-char (car region))
1137 (setq cache (or (wisi-get-cache (point))
1138 (wisi-forward-cache)))
1139 (while (<= (point) (cdr region))
1140 (when cache
1141 (wisi-face-action-1 face (wisi-cache-region cache) no-override))
1142 (setq cache (wisi-forward-cache))
1143 )))
1144
1145 (setq i (1+ i))
1146
1147 )))
1148
1149 ;;;; motion
1150 (defun wisi-backward-cache ()
1151 "Move point backward to the beginning of the first token preceding point that has a cache.
1152 Returns cache, or nil if at beginning of buffer."
1153 (let (cache pos)
1154 (setq pos (previous-single-property-change (point) 'wisi-cache))
1155 ;; There are three cases:
1156 ;;
1157 ;; 1) caches separated by non-cache chars: 'if ... then'
1158 ;; pos is before 'f', cache is on 'i'
1159 ;;
1160 ;; 2) caches not separated: ');'
1161 ;; pos is before ';', cache is on ';'
1162 ;;
1163 ;; 3) at bob; pos is nil
1164 ;;
1165 (if pos
1166 (progn
1167 (setq cache (get-text-property pos 'wisi-cache))
1168 (if cache
1169 ;; case 2
1170 (goto-char pos)
1171 ;; case 1
1172 (setq cache (get-text-property (1- pos) 'wisi-cache))
1173 (goto-char (1- pos))))
1174 ;; at bob
1175 (goto-char (point-min))
1176 (setq cache nil))
1177 cache
1178 ))
1179
1180 (defun wisi-forward-cache ()
1181 "Move point forward to the beginning of the first token after point that has a cache.
1182 Returns cache, or nil if at end of buffer."
1183 (let (cache pos)
1184 (when (get-text-property (point) 'wisi-cache)
1185 ;; on a cache; get past it
1186 (goto-char (1+ (point))))
1187
1188 (setq cache (get-text-property (point) 'wisi-cache))
1189 (if cache
1190 nil
1191
1192 (setq pos (next-single-property-change (point) 'wisi-cache))
1193 (if pos
1194 (progn
1195 (goto-char pos)
1196 (setq cache (get-text-property pos 'wisi-cache)))
1197 ;; at eob
1198 (goto-char (point-max))
1199 (setq cache nil))
1200 )
1201 cache
1202 ))
1203
1204 (defun wisi-forward-find-class (class limit)
1205 "Search forward for a token that has a cache with CLASS.
1206 Return cache, or nil if at end of buffer.
1207 If LIMIT (a buffer position) is reached, throw an error."
1208 (let ((cache (wisi-forward-cache)))
1209 (while (not (eq class (wisi-cache-class cache)))
1210 (setq cache (wisi-forward-cache))
1211 (when (>= (point) limit)
1212 (error "cache with class %s not found" class)))
1213 cache))
1214
1215 (defun wisi-forward-find-token (token limit &optional noerror)
1216 "Search forward for a token that has a cache with TOKEN.
1217 If point is at a matching token, return that token.
1218 TOKEN may be a list; stop on any cache that has a member of the list.
1219 Return cache, or nil if at end of buffer.
1220 If LIMIT (a buffer position) is reached, then if NOERROR is nil, throw an
1221 error, if non-nil, return nil."
1222 (let ((token-list (cond
1223 ((listp token) token)
1224 (t (list token))))
1225 (cache (wisi-get-cache (point)))
1226 (done nil))
1227 (while (not (or done
1228 (and cache
1229 (memq (wisi-cache-token cache) token-list))))
1230 (setq cache (wisi-forward-cache))
1231 (when (>= (point) limit)
1232 (if noerror
1233 (progn
1234 (setq done t)
1235 (setq cache nil))
1236 (error "cache with token %s not found" token))))
1237 cache))
1238
1239 (defun wisi-forward-find-nonterm (nonterm limit)
1240 "Search forward for a token that has a cache with NONTERM.
1241 NONTERM may be a list; stop on any cache that has a member of the list.
1242 Return cache, or nil if at end of buffer.
1243 If LIMIT (a buffer position) is reached, throw an error."
1244 (let ((nonterm-list (cond
1245 ((listp nonterm) nonterm)
1246 (t (list nonterm))))
1247 (cache (wisi-forward-cache)))
1248 (while (not (memq (wisi-cache-nonterm cache) nonterm-list))
1249 (setq cache (wisi-forward-cache))
1250 (when (>= (point) limit)
1251 (error "cache with nonterm %s not found" nonterm)))
1252 cache))
1253
1254 (defun wisi-goto-cache-next (cache)
1255 (goto-char (1- (wisi-cache-next cache)))
1256 (wisi-get-cache (point))
1257 )
1258
1259 (defun wisi-forward-statement-keyword ()
1260 "If not at a cached token, move forward to next
1261 cache. Otherwise move to cache-next, or next cache if nil.
1262 Return cache found."
1263 (wisi-validate-cache (point-max)) ;; ensure there is a next cache to move to
1264 (let ((cache (wisi-get-cache (point))))
1265 (if cache
1266 (let ((next (wisi-cache-next cache)))
1267 (if next
1268 (goto-char (1- next))
1269 (wisi-forward-token)
1270 (wisi-forward-cache)))
1271 (wisi-forward-cache))
1272 )
1273 (wisi-get-cache (point))
1274 )
1275
1276 (defun wisi-backward-statement-keyword ()
1277 "If not at a cached token, move backward to prev
1278 cache. Otherwise move to cache-prev, or prev cache if nil."
1279 (wisi-validate-cache (point))
1280 (let ((cache (wisi-get-cache (point))))
1281 (if cache
1282 (let ((prev (wisi-cache-prev cache)))
1283 (if prev
1284 (goto-char (1- prev))
1285 (wisi-backward-cache)))
1286 (wisi-backward-cache))
1287 ))
1288
1289 (defun wisi-goto-containing (cache &optional error)
1290 "Move point to containing token for CACHE, return cache at that point.
1291 If ERROR, throw error when CACHE has no container; else return nil."
1292 (cond
1293 ((markerp (wisi-cache-containing cache))
1294 (goto-char (1- (wisi-cache-containing cache)))
1295 (wisi-get-cache (point)))
1296 (t
1297 (when error
1298 (error "already at outermost containing token")))
1299 ))
1300
1301 (defun wisi-goto-containing-paren (cache)
1302 "Move point to just after the open-paren containing CACHE.
1303 Return cache for paren, or nil if no containing paren."
1304 (while (and cache
1305 (not (eq (wisi-cache-class cache) 'open-paren)))
1306 (setq cache (wisi-goto-containing cache)))
1307 (when cache
1308 (forward-char 1))
1309 cache)
1310
1311 (defun wisi-goto-start (cache)
1312 "Move point to containing ancestor of CACHE that has class block-start or statement-start.
1313 Return start cache."
1314 (when
1315 ;; cache nil at bob, or on cache in partially parsed statement
1316 (while (and cache
1317 (not (memq (wisi-cache-class cache) '(block-start statement-start))))
1318 (setq cache (wisi-goto-containing cache)))
1319 )
1320 cache)
1321
1322 (defun wisi-goto-end-1 (cache)
1323 (goto-char (1- (wisi-cache-end cache))))
1324
1325 (defun wisi-goto-statement-start ()
1326 "Move point to token at start of statement point is in or after.
1327 Return start cache."
1328 (interactive)
1329 (wisi-validate-cache (point))
1330 (let ((cache (wisi-get-cache (point))))
1331 (unless cache
1332 (setq cache (wisi-backward-cache)))
1333 (wisi-goto-start cache)))
1334
1335 (defun wisi-goto-statement-end ()
1336 "Move point to token at end of statement point is in or before."
1337 (interactive)
1338 (wisi-validate-cache (point))
1339 (let ((cache (or (wisi-get-cache (point))
1340 (wisi-forward-cache))))
1341 (when (wisi-cache-end cache)
1342 ;; nil when cache is statement-end
1343 (wisi-goto-end-1 cache))
1344 ))
1345
1346 (defun wisi-next-statement-cache (cache)
1347 "Move point to CACHE-next, return cache; error if nil."
1348 (when (not (markerp (wisi-cache-next cache)))
1349 (error "no next statement cache"))
1350 (goto-char (1- (wisi-cache-next cache)))
1351 (wisi-get-cache (point)))
1352
1353 (defun wisi-prev-statement-cache (cache)
1354 "Move point to CACHE-next, return cache; error if nil."
1355 (when (not (markerp (wisi-cache-prev cache)))
1356 (error "no prev statement cache"))
1357 (goto-char (1- (wisi-cache-prev cache)))
1358 (wisi-get-cache (point)))
1359
1360 ;;;; indentation
1361
1362 (defun wisi-comment-indent ()
1363 "For `comment-indent-function'. Indent single line comment to
1364 the comment on the previous line."
1365 ;; This should only be called by comment-indent-new-line or
1366 ;; fill-comment-paragraph, so there will be a preceding comment line
1367 ;; that we can trust.
1368 (save-excursion
1369 (forward-comment -1)
1370 (if (looking-at comment-start)
1371 (current-column)
1372 (error "wisi-comment-indent called after non-comment"))))
1373
1374 (defun wisi-indent-current (offset)
1375 "Return indentation OFFSET relative to indentation of current line."
1376 (+ (current-indentation) offset)
1377 )
1378
1379 (defun wisi-indent-paren (offset)
1380 "Return indentation OFFSET relative to preceding open paren."
1381 (save-excursion
1382 (goto-char (nth 1 (syntax-ppss)))
1383 (+ (current-column) offset)))
1384
1385 (defun wisi-indent-start (offset cache)
1386 "Return indentation of OFFSET relative to containing ancestor
1387 of CACHE with class statement-start or block-start."
1388 (wisi-goto-start cache)
1389 (+ (current-indentation) offset))
1390
1391 (defun wisi-indent-statement ()
1392 "Indent region given by `wisi-goto-start' on cache at or before point, then wisi-cache-end."
1393 (wisi-validate-cache (point))
1394
1395 (save-excursion
1396 (let ((cache (or (wisi-get-cache (point))
1397 (wisi-backward-cache))))
1398 (when cache
1399 ;; can be nil if in header comment
1400 (let ((start (progn (wisi-goto-start cache) (point)))
1401 (end (progn
1402 (when (wisi-cache-end cache)
1403 ;; nil when cache is statement-end
1404 (goto-char (1- (wisi-cache-end cache))))
1405 (point))))
1406 (indent-region start end)
1407 ))
1408 )))
1409
1410 (defvar-local wisi-indent-calculate-functions nil
1411 "Functions to calculate indentation. Each called with point
1412 before a token at the beginning of a line (at current
1413 indentation); return indentation column for that token, or
1414 nil. May move point. Calling stops when first function returns
1415 non-nil.")
1416
1417 (defvar-local wisi-post-parse-fail-hook
1418 "Function to reindent portion of buffer.
1419 Called from `wisi-indent-line' when a parse succeeds after
1420 failing; assumes user was editing code that is now syntactically
1421 correct. Must leave point at indentation of current line.")
1422
1423 (defvar-local wisi-indent-failed nil
1424 "Non-nil when wisi-indent-line fails due to parse failing; cleared when indent succeeds.")
1425
1426 (defun wisi-indent-line ()
1427 "Indent current line using the wisi indentation engine."
1428 (interactive)
1429
1430 (let ((savep (point))
1431 indent)
1432 (save-excursion
1433 (back-to-indentation)
1434 (when (>= (point) savep) (setq savep nil))
1435
1436 (when (>= (point) wisi-cache-max)
1437 (wisi-validate-cache (line-end-position))) ;; include at lease the first token on this line
1438
1439 (if (> (point) wisi-cache-max)
1440 ;; parse failed
1441 (progn
1442 ;; no indent info at point. Assume user is
1443 ;; editing; indent to previous line, fix it
1444 ;; after parse succeeds
1445 (setq wisi-indent-failed t)
1446 (forward-line -1);; safe at bob
1447 (back-to-indentation)
1448 (setq indent (current-column)))
1449
1450 ;; parse succeeded
1451 (when wisi-indent-failed
1452 ;; previous parse failed
1453 (setq wisi-indent-failed nil)
1454 (run-hooks 'wisi-post-parse-fail-hook))
1455
1456 (when (> (point) wisi-cache-max)
1457 (error "wisi-post-parse-fail-hook invalidated parse."))
1458
1459 (setq indent
1460 (with-demoted-errors
1461 (or (run-hook-with-args-until-success 'wisi-indent-calculate-functions) 0))
1462 )
1463 ))
1464
1465 (if savep
1466 ;; point was inside line text; leave it there
1467 (save-excursion (indent-line-to indent))
1468 ;; point was before line text; move to start of text
1469 (indent-line-to indent))
1470 ))
1471
1472 ;;;; debug
1473 (defun wisi-parse-buffer ()
1474 (interactive)
1475 (syntax-propertize (point-max))
1476 (wisi-invalidate-cache)
1477 (wisi-validate-cache (point-max)))
1478
1479 (defun wisi-show-cache ()
1480 "Show cache at point."
1481 (interactive)
1482 (message "%s" (wisi-get-cache (point))))
1483
1484 (defun wisi-show-token ()
1485 "Move forward across one keyword, show token_id."
1486 (interactive)
1487 (let ((token (wisi-forward-token)))
1488 (message "%s" (car token))))
1489
1490 (defun wisi-show-containing-or-previous-cache ()
1491 (interactive)
1492 (let ((cache (wisi-get-cache (point))))
1493 (if cache
1494 (message "containing %s" (wisi-goto-containing cache t))
1495 (message "previous %s" (wisi-backward-cache)))
1496 ))
1497
1498 (defun wisi-show-cache-max ()
1499 (interactive)
1500 (push-mark)
1501 (goto-char wisi-cache-max))
1502
1503 ;;;;; setup
1504
1505 (defun wisi-setup (indent-calculate post-parse-fail class-list keyword-table token-table parse-table)
1506 "Set up a buffer for parsing files with wisi."
1507 (setq wisi-class-list class-list)
1508 (setq wisi-string-double-term (car (symbol-value (intern-soft "string-double" token-table))))
1509 (setq wisi-string-single-term (car (symbol-value (intern-soft "string-single" token-table))))
1510 (setq wisi-symbol-term (car (symbol-value (intern-soft "symbol" token-table))))
1511
1512 (let ((numbers (cadr (symbol-value (intern-soft "number" token-table)))))
1513 (setq wisi-number-term (car numbers))
1514 (setq wisi-number-p (cdr numbers)))
1515
1516 (setq wisi-punctuation-table (symbol-value (intern-soft "punctuation" token-table)))
1517 (setq wisi-punctuation-table-max-length 0)
1518 (let (fail)
1519 (dolist (item wisi-punctuation-table)
1520 (when item ;; default matcher can be nil
1521
1522 ;; check that all chars used in punctuation tokens have punctuation syntax
1523 (mapc (lambda (char)
1524 (when (not (= ?. (char-syntax char)))
1525 (setq fail t)
1526 (message "in %s, %c does not have punctuation syntax"
1527 (car item) char)))
1528 (cdr item))
1529
1530 (when (< wisi-punctuation-table-max-length (length (cdr item)))
1531 (setq wisi-punctuation-table-max-length (length (cdr item)))))
1532 )
1533 (when fail
1534 (error "aborting due to punctuation errors")))
1535
1536 (setq wisi-keyword-table keyword-table)
1537 (setq wisi-parse-table parse-table)
1538
1539 (setq wisi-indent-calculate-functions indent-calculate)
1540 (set (make-local-variable 'indent-line-function) 'wisi-indent-line)
1541
1542 (setq wisi-post-parse-fail-hook post-parse-fail)
1543 (setq wisi-indent-failed nil)
1544
1545 (add-hook 'before-change-functions 'wisi-before-change nil t)
1546 (add-hook 'after-change-functions 'wisi-after-change nil t)
1547
1548 (when (functionp 'jit-lock-register)
1549 (jit-lock-register 'wisi-fontify-region))
1550
1551 ;; see comments on "lexer" above re syntax-propertize
1552 (syntax-propertize (point-max))
1553
1554 (wisi-invalidate-cache)
1555 )
1556
1557 (provide 'wisi)
1558 ;;; wisi.el ends here