1 ;;; sm-c-mode.el --- Experimental C major mode based on SMIE -*- lexical-binding: t; -*-
3 ;; Copyright (C) 2015 Free Software Foundation, Inc.
5 ;; Author: Stefan Monnier <monnier@iro.umontreal.ca>
9 ;; This program is free software; you can redistribute it and/or modify
10 ;; it under the terms of the GNU General Public License as published by
11 ;; the Free Software Foundation, either version 3 of the License, or
12 ;; (at your option) any later version.
14 ;; This program is distributed in the hope that it will be useful,
15 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
16 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 ;; GNU General Public License for more details.
19 ;; You should have received a copy of the GNU General Public License
20 ;; along with this program. If not, see <http://www.gnu.org/licenses/>.
26 ;; This is an experiment to see concretely where&how SMIE falls down when
27 ;; trying to handle a language like C.
28 ;; So, strictly speaking, this does provide "SMIE-based indentation for C" and
29 ;; might even do it OK for simple cases, but it really doesn't benefit much
31 ;; - it does a lot of its own parsing by hand.
32 ;; - its smie-ruled-function also does a lot of indentation by hand.
33 ;; Hopefully at some point, someone will find a way to extend SMIE such that
34 ;; it can handle C without having to constantly work around SMIE, e.g.
35 ;; it'd be nice to hook the sm-c--while-to-do, sm-c--else-to-if, and sm-c--boi
36 ;; functions into SMIE at some level.
39 ;; - M-; mistakes # for a comment in CPP directives!
40 ;; Ha! As if this was the only/main problem!
47 (defgroup sm-c-mode nil
48 "Major mode to edit C code, based on SMIE."
51 (defcustom sm-c-indent-basic 2
52 "Basic step of indentation.
53 Typically 2 for GNU style and `tab-width' for Linux style."
56 (defcustom sm-c-indent-braces t
57 "If non-nil, braces in if/while/... are indented."
60 ;;; Handling CPP directives.
62 (defsubst sm-c--cpp-inside-p (ppss)
66 (defconst sm-c--cpp-regexp "^[ \t]*\\(\\(#\\)[ \t]*\\([a-z]+\\)\\)"))
68 (defconst sm-c--cpp-syntax-table
69 (let ((st (make-syntax-table)))
70 (modify-syntax-entry ?/ ". 124" st)
71 (modify-syntax-entry ?* ". 23b" st)
72 (modify-syntax-entry ?\n ">" st)
75 (defun sm-c--cpp-goto-end (ppss &optional limit)
76 (cl-assert (sm-c--cpp-inside-p ppss))
79 (and (setq found (re-search-forward "\\(?:\\\\\\\\\\)*\n" limit 'move))
80 ;; We could also check (nth 5 ppss) to figure out if we're
81 ;; after a backslash, but this is a very common case, so it's good
82 ;; to avoid calling parse-partial-sexp for that.
83 (or (eq ?\\ (char-before (match-beginning 0)))
84 (with-syntax-table sm-c--cpp-syntax-table
85 (nth 4 (parse-partial-sexp (1+ (nth 8 ppss)) (point)))))))
88 (defun sm-c--cpp-fontify-syntactically (ppss)
89 ;; FIXME: ¡¡BIG UGLY HACK!!
90 ;; Copied from font-lock.el's font-lock-fontify-syntactically-region.
91 (cl-assert (> (point) (nth 8 ppss)))
94 (sm-c--cpp-goto-end ppss)
95 (narrow-to-region (1+ (nth 8 ppss)) (point))
96 ;; FIXME: We should add some "with-local-syntax-ppss" macro to
98 (let ((syntax-propertize-function nil)
99 (syntax-ppss-cache nil)
100 (syntax-ppss-last nil))
101 (font-lock-fontify-syntactically-region (point-min) (point-max))))))
103 (defun sm-c--cpp-syntax-propertize (end)
104 (let ((ppss (syntax-ppss))
106 (when (sm-c--cpp-inside-p ppss)
108 (and (setq found (re-search-forward "\\(\\\\\\\\\\)*\n" end 'move))
109 (or (eq ?\\ (char-before (match-beginning 0)))
110 (with-syntax-table sm-c--cpp-syntax-table
111 (nth 4 (parse-partial-sexp (1+ (nth 8 ppss)) (point)))))))
113 (put-text-property (1- (point)) (point)
114 'syntax-table (string-to-syntax "> c"))))))
116 ;;;; Indenting CPP directives.
118 (defcustom sm-c-indent-cpp-basic 1
119 "Indent step for CPP directives."
122 (defun sm-c--cpp-prev (tok)
125 (when (re-search-backward sm-c--cpp-regexp nil t)
126 (pcase (cons tok (match-string 3))
127 (`(,_ . "endif") (sm-c--cpp-prev "endif"))
128 ((or `(,(or "endif" "else" "elif") . ,(or "if" "ifdef" "ifndef"))
129 `(,(or "else" "elif") . "elif"))
131 (`(,(or "endif" "else" "elif") . ,_) nil)
132 (`(,_ . ,(or "if" "ifdef" "ifndef" "elif" "else"))
133 (setq offset sm-c-indent-cpp-basic))
137 (goto-char (match-beginning 3))
138 (+ offset (current-column)))))
141 (defun sm-c--cpp-indent-line (&optional _arg)
142 ;; FIXME: Also align the terminating \, if any.
143 (when (> sm-c-indent-cpp-basic 0)
144 (let* ((pos (point-marker))
149 (when (looking-at sm-c--cpp-regexp)
150 (setq beg (match-beginning 3))
151 (or (sm-c--cpp-prev (match-string 3)) 0)))))
153 (let ((before (<= pos beg)))
155 (unless (= (current-column) indent)
156 (skip-chars-backward " \t")
157 (delete-region (point)
158 (progn (skip-chars-forward " \t") (point)))
160 (unless before (goto-char pos)))))))
162 ;;;; Indenting inside CPP #define.
164 (defconst sm-c--cpp-smie-indent-functions
165 ;; FIXME: Don't just align line after #define with the "d"!
166 (remq #'smie-indent-comment-inside
167 (default-value 'smie-indent-functions)))
169 (defun sm-c--cpp-smie-indent ()
170 (let ((ppss (syntax-ppss)))
172 ((sm-c--cpp-inside-p ppss)
174 (narrow-to-region (nth 8 ppss) (point-max))
175 (let ((smie-indent-functions sm-c--cpp-smie-indent-functions))
176 (smie-indent-calculate))))
177 ((equal (syntax-after (point)) (string-to-syntax "< c")) 0)
178 ((looking-at sm-c--cpp-regexp)
179 (message "s-p-l=%S s-p-d=%S" syntax-ppss-last syntax-propertize--done)
180 (when (get-buffer "*trace-output*")
181 (with-current-buffer "*trace-output*"
182 (message "%S" (buffer-string))))
187 (defvar sm-c-mode-syntax-table
188 (let ((st (make-syntax-table)))
189 (modify-syntax-entry ?/ ". 124" st)
190 (modify-syntax-entry ?* ". 23b" st)
191 (modify-syntax-entry ?\n ">" st)
192 (modify-syntax-entry ?\" "\"" st)
193 (modify-syntax-entry ?\' "\"" st)
194 (modify-syntax-entry ?= "." st)
195 (modify-syntax-entry ?< "." st)
196 (modify-syntax-entry ?> "." st)
199 (defun sm-c-syntax-propertize (start end)
201 (sm-c--cpp-syntax-propertize end)
203 (syntax-propertize-rules
204 (sm-c--cpp-regexp (2 (prog1 "< c" (sm-c--cpp-syntax-propertize end)))))
207 (defun sm-c-syntactic-face-function (ppss)
208 (if (sm-c--cpp-inside-p ppss)
209 (prog1 nil (sm-c--cpp-fontify-syntactically ppss))
210 (funcall (default-value 'font-lock-syntactic-face-function) ppss)))
214 (defconst sm-c-paren-block-keywords '("if" "while" "for" "switch"))
216 (defconst sm-c-smie-precedence-table
218 ;; Compiled from https://en.wikipedia.org/wiki/Operators_in_C_and_C++.
220 ;; (nonassoc "throw")
221 (nonassoc "=" "+=" "-=" "*=" "/=" "%=" "<<=" ">>=" "&=" "^=" "|=") ;2
222 ;; (nonassoc "?" ":") ;; Better handle it in the BNF.
227 ;; (assoc "&") ;; Binary and. Confused with address-of.
228 (nonassoc "==" "!=") ;7
229 (nonassoc "<" "<=" ">" ">=") ;8
230 (nonassoc "<<" ">>") ;9
232 (assoc "/" "* mult" "%") ;11
233 ;; (nonassoc ".*" "->*") ;12 ;; Only C++
234 ;; (nonassoc "++" "--" "+" "-" "!" "~" "(type)" "*" "&"
235 ;; "sizeof" "new" "delete");13 ;; All prefix.
236 (left "." "->") ;; "++" "--" suffixes, "()", "[]", "typeid", "*_cast". ;14
237 ;; (noassoc "::") ;; Only C++
240 (defconst sm-c-smie-grammar
241 ;; `((:smie-closer-alist ("{" . "}")) ("{" (39) 0) ("}" 0 (40)) ("else" 27 26) ("," 38 38) ("do" (41) 22) ("while" (42) 23) ("for" (43) 24) (";" 11 11) ("if" (44) 25))
245 '((decls ("typedef" decl) ("extern" decl)
249 (insts ("{" insts "}")
254 ("case" subexp ": case")
256 (exp-if ("if" exp) ("do" exp) ("while" exp) ("switch" exp) ("for" exp)
258 (exp ("(" exp ")") (exp "," exp) (subexp "?" exp ":" exp))
259 (subexp (subexp "||" subexp))
260 ;; Some of the precedence table deals with pre/postfixes, which
261 ;; smie-precs->prec2 can't handle, so handle it here instead.
262 (exp11 (exp12) (exp11 "/" exp11))
263 (exp12 (exp13)) ;C++ only.
264 (exp13 (exp14) ("++ prefix" exp13) ("-- prefix" exp13)
265 ("!" exp13) ("~" exp13) ("&" exp13) ("* deref" exp13))
266 (exp14 (id) (exp14 "++ postfix") (exp14 "-- postfix")
267 (exp14 "->" id) (exp14 "." id)))
268 '((assoc ";") (assoc ",") (nonassoc "?" ":"))
269 sm-c-smie-precedence-table)
270 (smie-precs->prec2 sm-c-smie-precedence-table)
271 (smie-precs->prec2 '((nonassoc ";") (nonassoc ":"))))))
273 ;; (defun sm-c--:-discriminate ()
275 ;; (and (null (smie-backward-sexp))
276 ;; (let ((prev (smie-indent-backward-token)))
278 ;; ((equal prev "case" ) ": case")
279 ;; ((member prev '(";" "{" "}")) ":-label")
282 (defconst sm-c-smie-operator-regexp
284 (pcase-dolist (`(,token . ,_) sm-c-smie-grammar)
285 (when (and (stringp token) (string-match "\\`[^ [:alnum:]]+" token))
286 (push (match-string 0 token) ops)))
289 (defun sm-c-smie-forward-token ()
290 (forward-comment (point-max))
291 (let ((tok (if (looking-at sm-c-smie-operator-regexp)
292 (progn (goto-char (match-end 0)) (match-string 0))
293 (smie-default-forward-token))))
295 ((and (equal tok "") (looking-at "\\\\\n"))
296 (goto-char (match-end 0))
297 (sm-c-smie-forward-token))
298 ((member tok '(":" "*"))
299 (save-excursion (sm-c-smie-backward-token)))
300 ((looking-at "[ \t]*:")
301 (if (not (equal (save-excursion (sm-c-smie-forward-token)) ":label"))
303 (looking-at "[ \t]*:")
304 (goto-char (match-end 0)) ":label"))
308 (defun sm-c-smie-backward-token ()
309 (forward-comment (- (point)))
310 (let ((tok (if (looking-back sm-c-smie-operator-regexp (- (point) 3) t)
311 (progn (goto-char (match-beginning 0)) (match-string 0))
312 (smie-default-backward-token))))
314 ((and (equal tok "") (looking-at "\n"))
316 (if (not (= 0 (mod (skip-chars-backward "\\\\") 2)))
317 (sm-c-smie-backward-token)
320 ((equal tok "*") (sm-c-smie--*-token))
323 (prev (sm-c-smie-backward-token)))
324 (if (zerop (length prev))
325 (progn (goto-char pos1) tok)
326 (let ((pos2 (point)))
327 (pcase (car (smie-indent-backward-token))
328 ("case" (goto-char pos1) ": case")
329 ((or ";" "{" "}") (goto-char pos2) ":label")
330 (_ (goto-char pos1) tok))))))
333 (defun sm-c--prev-token ()
334 (car (smie-indent-backward-token)))
336 (defun sm-c--else-to-if ()
338 (unless (equal (sm-c--prev-token) ";")
341 (pcase (smie-backward-sexp)
342 (`(,_ ,pos "if") (goto-char pos) nil) ;Found it!
343 (`(,_ ,_ ";") nil) ;Can't find it!
344 (`(,_ ,pos "else") (goto-char pos) (sm-c--else-to-if) t)
346 (goto-char pos) (unless (sm-c--while-to-do) (goto-char pos)) t)
347 (`(t . ,_) nil) ;Can't find it!
348 (`(,_ ,pos . ,_) (goto-char pos) t)
351 (defun sm-c--while-to-do ()
352 "Jump to the matching `do' and return non-nil, if any. Return nil otherwise."
353 (pcase (sm-c--prev-token)
356 (forward-char 1) (backward-sexp 1)
357 (equal (sm-c--prev-token) "do"))
359 (let ((found-do nil))
361 (pcase (smie-backward-sexp)
362 (`(,_ ,pos "do") (goto-char pos) (setq found-do t) nil)
363 (`(,_ ,_ ";") nil) ;Can't find it!
364 (`(,_ ,pos "else") (goto-char pos) (sm-c--else-to-if) t)
366 (goto-char pos) (unless (sm-c--while-to-do) (goto-char pos)) t)
367 (`(t . ,_) nil) ;Can't find it!
368 (`(,_ ,pos . ,_) (goto-char pos) t)
369 (`nil (or (not (looking-at "{"))
370 (smie-rule-prev-p "=")))))
373 (defun sm-c--skip-labels (max)
375 (let ((start (point)))
376 (pcase (sm-c-smie-forward-token)
378 (smie-forward-sexp "case")
379 (forward-comment (point-max))
380 (if (>= (point) max) (progn (goto-char start) nil)
383 (forward-comment (point-max))
384 (if (>= (point) max) (progn (goto-char start) nil)
386 (_ (goto-char start) nil)))))
391 (pcase (smie-backward-sexp)
392 (`(,_ ,_ ";") nil) ;Found it!
393 (`(,_ ,pos "else") (goto-char pos) (sm-c--else-to-if) t)
395 (goto-char pos) (unless (sm-c--while-to-do) (goto-char pos)) t)
396 (`(,(pred numberp) ,pos . ,_) (goto-char pos) t)
397 ((or `nil `(nil . ,_))
398 (if (and (or (not (looking-at "{"))
399 (smie-rule-prev-p "="))
402 (goto-char pos) nil))
403 (`(,_ ,_ ,(or "(" "{" "[")) nil) ;Found it!
404 (`(,_ ,pos . ,_) (goto-char pos) t)))))
406 ;; (defun sm-c--if-tail-to-head ()
407 ;; (pcase (sm-c--prev-token)
409 ;; (forward-char 1) (backward-sexp 1)
410 ;; (pcase (sm-c--prev-token)
412 ;; ((or "while" "for") (sm-c--if-tail-to-head))))
413 ;; ("do" (sm-c--if-tail-to-head))))
415 (defun sm-c--boe (tok)
416 (let ((start (point))
417 (res (smie-backward-sexp tok))
420 (and (member (nth 2 res) '("if" "while" "do" "for" "else"))
421 (let ((skip (cdr (assoc (nth 2 res)
428 (let ((forward-sexp-function nil))
429 (forward-sexp (1- skip)))
430 (forward-comment (point-max))
431 (if (< (point) start)
436 (defun sm-c-smie--*-token ()
439 (pcase (car (smie-indent-backward-token))
440 ((or ")" "]") "* mult") ;Multiplication.
441 ((or "(" "[" "{") "* deref")
444 (pcase (smie-backward-sexp "* mult")
445 (`(,_ ,_ ,(or ";" "{")) "* deref")
449 (defun sm-c-smie-hanging-eolp ()
450 (let ((start (point))
451 (prev (smie-indent-backward-token)))
452 (if (and (not (numberp (nth 1 prev)))
453 (save-excursion (equal (sm-c-smie-backward-token) ";")))
454 ;; Treat instructions that start after ";" as always "hanging".
457 (skip-chars-forward " \t")
459 (forward-comment (point-max))
460 (and (looking-at "\\\\\n")
461 (goto-char (match-end 0)))))
463 (defvar sm-c-smie--inhibit-case/label-rule nil)
465 (defun sm-c--smie-virtual ()
466 (if (and (smie-indent--bolp)
468 (member (sm-c-smie-forward-token)
469 '("case" ":label")))))
471 (let ((sm-c-smie--inhibit-case/label-rule t))
472 (smie-indent-calculate))))
474 (defun sm-c-smie-rules (kind token)
475 (pcase (cons kind token)
476 (`(:elem . basic) sm-c-indent-basic)
477 (`(:list-intro . ";")
480 (if (and (null (smie-forward-sexp))
481 ;; FIXME: Handle \\\n as well!
482 (progn (forward-comment (point-max))
489 `(column . ,(smie-indent-virtual))))
490 (`(:before . "while")
492 (when (sm-c--while-to-do)
493 `(column . ,(smie-indent-virtual)))))
494 (`(:before . ,(or "=" "+=" "-=" "*=" "/=" "%=" "<<=" ">>=" "&=" "^=" "|="))
497 `(column . ,(+ (funcall smie-rules-function :elem 'basic)
498 (smie-indent-virtual)))))
500 (when (and (not (smie-rule-bolp)) (smie-rule-prev-p "else"))
502 (smie-indent-backward-token)
503 `(column . ,(sm-c--smie-virtual)))))
504 ;; (`(:after . ,(or "=" "+=" "-=" "*=" "/=" "%=" "<<=" ">>=" "&=" "^=" "|="))
505 ;; (funcall smie-rules-function :elem 'basic))
508 ((smie-rule-prev-p "=") nil) ;Not a block of instructions!
510 (sm-c--boi) (sm-c--skip-labels (point-max))
511 (let ((tok (save-excursion (sm-c-smie-forward-token))))
513 ((member tok '("enum" "struct" "typedef"))
514 `(column . ,(+ (funcall smie-rules-function :elem 'basic)
515 (smie-indent-virtual))))
516 ((or (member tok sm-c-paren-block-keywords)
519 (t `(column . ,(smie-indent-virtual)))))))
520 ((smie-rule-hanging-p)
522 ((smie-rule-prev-p "do" "else")
523 (smie-indent-backward-token))
524 ((smie-rule-prev-p ")")
526 (smie-indent-backward-token))
528 `(column . ,(sm-c--smie-virtual)))
531 (pcase (sm-c--prev-token)
535 `(column . ,(+ (funcall smie-rules-function :elem 'basic)
536 (smie-indent-virtual))))))
538 (_ (goto-char pos) (sm-c--boi)
540 `(column . ,(sm-c--smie-virtual)))))))))
543 (let ((res (smie-backward-sexp)))
545 (`nil `(column . ,(+ (funcall smie-rules-function :elem 'basic)
546 (sm-c--smie-virtual))))
548 (unless (save-excursion
549 (member (sm-c-smie-backward-token)
550 sm-c-paren-block-keywords))
551 `(column . ,(sm-c--smie-virtual))))))))
554 (funcall smie-rules-function :elem 'basic)))
557 (forward-char 1) (backward-sexp 1)
558 (let ((prev (sm-c-smie-backward-token)))
559 (when (member prev sm-c-paren-block-keywords)
560 `(column . ,(+ (funcall smie-rules-function :elem 'basic)
561 (smie-indent-virtual)))))))
564 (forward-char 1) (backward-sexp 1)
566 `(column . ,(sm-c--smie-virtual))))
570 `(column . ,(sm-c--smie-virtual))))
571 (`(:after . ":label")
573 `(column . ,(sm-c--smie-virtual)))
574 (`(:after . ": case")
577 (smie-backward-sexp ": case")
578 `(column . ,(sm-c--smie-virtual))))
579 (`(:after . "* deref") `(column . ,(sm-c--smie-virtual)))
580 ((and `(:before . ":label") (guard (not sm-c-smie--inhibit-case/label-rule)))
581 (let ((ppss (syntax-ppss)))
584 (goto-char (nth 1 ppss))
585 `(column . ,(smie-indent-virtual))))))
586 ((and `(:before . "case") (guard (not sm-c-smie--inhibit-case/label-rule)))
588 (dolist (pos (reverse (nth 9 (syntax-ppss))))
591 (and (looking-at "{")
592 (null (car-safe (smie-backward-sexp)))
593 (equal "switch" (sm-c-smie-backward-token))
595 (throw 'found `(column . ,(smie-indent-virtual))))))))))
597 ;;; Font-lock support
599 (defconst sm-c-font-lock-keywords
600 `((,sm-c--cpp-regexp (1 font-lock-preprocessor-face))
601 ("\\_<\\(?:true\\|false\\)\\_>" (0 font-lock-constant-face))
602 ("\\_<\\(case\\)\\_>[ \t]*\\([^: \t]+\\)"
603 (1 font-lock-keyword-face)
604 (2 font-lock-constant-face))
605 ("\\(?:[{};]\\(\\)\\|^\\)[ \t]*\\([[:alpha:]_][[:alnum:]_]*\\)[ \t]*:"
606 (2 (if (or (match-beginning 1)
607 (save-excursion (equal ":label" (sm-c-smie-backward-token))))
608 font-lock-constant-face)))
609 (,(let ((kws (delq nil (mapcar (lambda (x)
612 (string-match "\\`[a-z]" x)
614 sm-c-smie-grammar))))
615 (concat "\\_<" (regexp-opt
617 ;; Elements not in SMIE's grammar. Either because
618 ;; they're uninteresting from a parsing point of view,
619 ;; or because SMIE's parsing engine can't handle them
621 '("break" "continue" "struct" "enum" "union" "static")
622 ;; "case" already handled above.
623 (delete "case" kws)))
625 (0 font-lock-keyword-face))
626 (,(let* ((spc0 "\\(?:\n?[ \t]\\|/\\*.*?\\*/\\)*")
627 (spc1 (concat "\n?[ \t]" spc0))
628 (id "\\(?:\\sw\\|\\s_\\)+"))
629 (cl-flet ((repeat (repetition &rest res)
630 (concat "\\(?:" (apply #'concat res) "\\)"
632 ((pred symbolp) (symbol-name repetition))
636 (repeat '* "\\*" spc0)
637 (repeat '* id (repeat 1 spc1 "\\|" spc0 "\\*" spc0))
638 "\\(" id "\\)[ \t\n]*(")))
640 (prog1 font-lock-function-name-face
641 (if (< (match-beginning 0) (line-beginning-position))
642 (put-text-property (match-beginning 0) (match-end 0)
643 'font-lock-multiline t)))))))
647 (define-derived-mode sm-c-mode prog-mode "smC"
648 "C editing mode based on SMIE."
649 ;; (setq-local font-lock-support-mode nil) ;; To help debugging.
650 (setq-local comment-start "/* ")
651 (setq-local comment-end " */")
652 (setq-local parse-sexp-lookup-properties t)
653 (setq-local open-paren-in-column-0-is-defun-start nil)
654 (setq-local syntax-propertize-function #'sm-c-syntax-propertize)
655 (setq-local font-lock-defaults '(sm-c-font-lock-keywords))
656 (setq-local font-lock-syntactic-face-function #'sm-c-syntactic-face-function)
657 (smie-setup sm-c-smie-grammar #'sm-c-smie-rules
658 :backward-token #'sm-c-smie-backward-token
659 :forward-token #'sm-c-smie-forward-token)
660 ;; FIXME: The stock SMIE forward-sexp-function is not good enough here, since
661 ;; our grammar is much too poor. We should setup another function instead
662 ;; (and ideally teach SMIE to use it).
663 (kill-local-variable 'forward-sexp-function)
664 (add-hook 'smie-indent-functions #'sm-c--cpp-smie-indent nil t)
665 (add-function :after (local 'indent-line-function)
666 #'sm-c--cpp-indent-line)
667 (setq-local smie--hanging-eolp-function #'sm-c-smie-hanging-eolp))
670 ;;; sm-c-mode.el ends here