]> code.delx.au - gnu-emacs-elpa/blob - packages/lex/lex-parse-re.el
Merge commit '0cda39255827f283e7578cd469ae42daad9556a2' from js2-mode
[gnu-emacs-elpa] / packages / lex / lex-parse-re.el
1 ;;; lex-parse-re.el --- Parse Emacs regexps using Lex
2
3 ;; Copyright (C) 2008,2013 Free Software Foundation, Inc.
4
5 ;; Author: Stefan Monnier <monnier@iro.umontreal.ca>
6 ;; Keywords:
7
8 ;; This program is free software; you can redistribute it and/or modify
9 ;; it under the terms of the GNU General Public License as published by
10 ;; the Free Software Foundation, either version 3 of the License, or
11 ;; (at your option) any later version.
12
13 ;; This program is distributed in the hope that it will be useful,
14 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
15 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 ;; GNU General Public License for more details.
17
18 ;; You should have received a copy of the GNU General Public License
19 ;; along with this program. If not, see <http://www.gnu.org/licenses/>.
20
21 ;;; Commentary:
22
23 ;; This exports lex-parse-re, but it also defines lex--parse-charset which is
24 ;; used internally by lex-compile to handle charsets specified as a string.
25
26 ;;; Code:
27
28 (require 'lex)
29
30 ;;; Regexp parsers.
31
32 (defun lex--tokenizer (lex string)
33 (let ((tokens ())
34 (i 0)
35 tmp)
36 (while (and (< i (length string))
37 (setq tmp (lex-match-string lex string i)))
38 (push (cons (car tmp) (substring string i (setq i (cadr tmp)))) tokens))
39 (nreverse tokens)))
40
41 (defun lex--parse-charset (string)
42 (let ((i 0)
43 (ranges ()))
44 (when (eq (aref string i) ?^)
45 (push 'not ranges)
46 (setq i (1+ i)))
47 (let ((op nil)
48 (case-fold-search nil))
49 (while (not (eq op 'stop))
50 (lex-case string i
51 ((seq "[:" (0+ (char (?a . ?z) (?A . ?Z))) ":]")
52 (push (intern (substring string (+ 2 (match-beginning 0))
53 (- (match-end 0) 2)))
54 ranges))
55 ((seq anything "-" anything)
56 (push (cons (aref string (match-beginning 0))
57 (aref string (1- (match-end 0))))
58 ranges))
59 (anything (push (aref string (1- (match-end 0))) ranges))
60 (eob (setq op 'stop))))
61
62 `(char ,@(nreverse ranges)))))
63
64 (defconst lex--parse-re-lexspec
65 '(((or "*" "+" "?" "*?" "+?" "??") . suffix)
66 ((seq "[" (opt "^") (opt "]")
67 (0+ (or (seq (char not ?\]) "-" (char not ?\]))
68 (seq "[:" (1+ (char (?a . ?z) (?A . ?Z))) ":]")
69 (char not ?\]))) "]") . charset)
70 ((seq "\\c" anything) . category)
71 ((seq "\\C" anything) . not-category)
72 ((seq "\\s" anything) . syntax)
73 ((seq "\\S" anything) . not-syntax)
74 ((seq "\\" (char (?1 . ?9))) . backref)
75 ("\\'" . eob)
76 ("\\`" . bob)
77 ("." . dot)
78 ("^" . bol)
79 ("$" . eol)
80 ("." . dot)
81 ("\\<" . bow)
82 ("\\>" . eow)
83 ("\\_<" . symbol-start)
84 ("\\_>" . symbol-end)
85 ("\\w" . wordchar)
86 ("\\W" . not-wordchar)
87 ("\\b" . word-boundary)
88 ("\\B" . not-word-boundary)
89 ("\\=" . point)
90 ((or (seq ?\\ anything) anything) . char)))
91
92
93 (defconst lex--parse-ere-lexer
94 (let ((case-fold-search nil))
95 (lex-compile
96 (append '(("(?:" . shy-group)
97 ("|" . or)
98 ((seq "{" (0+ (char (?0 . ?9)))
99 (opt (seq "," (0+ (char (?0 . ?9))))) "}") . repeat)
100 ((or ")" eob) . stop)
101 ("(" . group))
102 lex--parse-re-lexspec))))
103
104 (defconst lex--parse-bre-lexer
105 (let ((case-fold-search nil))
106 (lex-compile
107 (append '(("\\(?:" . shy-group)
108 ("\\|" . or)
109 ((seq "\\{" (0+ (char (?0 . ?9)))
110 (opt (seq "," (0+ (char (?0 . ?9))))) "\\}") . repeat)
111 ((or "\\)" eob) . stop)
112 ("\\(" . group))
113 lex--parse-re-lexspec))))
114
115 (defun lex--parse-re (string i lexer)
116 (let ((stack ())
117 (op nil)
118 (res nil)
119 tmp)
120 (while (and (not (eq op 'stop))
121 (setq tmp (lex-match-string lexer string i)))
122 (pcase (car tmp)
123 (`shy-group
124 (setq tmp (lex--parse-re string (cadr tmp) lexer))
125 (unless (eq (aref string (1- (cadr tmp))) ?\))
126 (error "Unclosed shy-group"))
127 (push (car tmp) res))
128 (`group
129 (setq tmp (lex--parse-re string (cadr tmp) lexer))
130 (unless (eq (aref string (1- (cadr tmp))) ?\))
131 (error "Unclosed group"))
132 (push (list 'group (car tmp)) res))
133 (`suffix
134 (if (null res) (error "Non-prefixed suffix operator")
135 (setq res (cons (list (cdr (assoc (substring string i (cadr tmp))
136 '(("*" . 0+)
137 ("+" . 1+)
138 ("?" . opt)
139 ("*?" . *\?)
140 ("+?" . +\?)
141 ("??" . \?\?))))
142 (car res))
143 (cdr res)))))
144 (`or (push `(or (seq ,@(nreverse res))) stack)
145 (setq res nil))
146 (`charset
147 (push (lex--parse-charset (substring string (1+ i) (1- (cadr tmp))))
148 res))
149 (`repeat
150 ;; Here we would like to have sub-matches :-(
151 (let* ((min (string-to-number
152 (substring string (+ i (if (eq (aref string i) ?\\) 2 1))
153 (cadr tmp))))
154 (max (let ((comma (string-match "," string i)))
155 (if (not (and comma (< comma (cadr tmp))))
156 min
157 (if (= comma (- (cadr tmp) 2))
158 nil
159 (string-to-number (substring string (1+ comma))))))))
160 (if (null res) (error "Non-prefixed repeat operator")
161 (setq res (cons `(repeat ,min ,max ,(car res)) (cdr res))))))
162 (`stop (setq op 'stop))
163 ((or `syntax `category `not-syntax `not-category)
164 (push (list (car tmp) (aref string (1- (cadr tmp)))) res))
165 (`backref
166 (push (list (car tmp) (- (aref string (1- (cadr tmp))) ?0)) res))
167 (`char
168 (push (aref string (1- (cadr tmp))) res))
169 (_ (push (car tmp) res)))
170 (setq i (cadr tmp)))
171 (let ((re `(seq ,@(nreverse res))))
172 (while stack (setq re (nconc (pop stack) (list re))))
173 (list re i))))
174
175 ;;;###autoload
176 (defun lex-parse-re (string &optional lexer)
177 "Parse STRING as a regular expression.
178 LEXER specifies the regexp syntax to use. It can be `ere', or `bre'
179 and it defaults to `bre'."
180 (setq lexer (cond ((eq lexer 'ere) lex--parse-ere-lexer)
181 ((memq lexer '(bre re nil)) lex--parse-bre-lexer)
182 (t lexer)))
183 (let ((res (lex--parse-re string 0 lexer)))
184 (if (< (cadr res) (length string))
185 (error "Regexp parsing failed around %d: ...%s..."
186 (cadr res) (substring string (1- (cadr res)) (1+ (cadr res))))
187 (car res))))
188
189
190 ;; (defun lex--parse-re (string i)
191 ;; (let ((stack ())
192 ;; (op nil)
193 ;; (res nil))
194 ;; (while (and (not (eq op 'stop)))
195 ;; (lex-case string i
196 ;; ("(?:" ;shy-group.
197 ;; (let ((tmp (lex--parse-re string i)))
198 ;; (setq i (car tmp))
199 ;; (unless (eq (aref string (1- i)) ?\)) (error "Unclosed shy-group"))
200 ;; (push (cdr tmp) res)))
201 ;; ((or "*?" "+?" "??")
202 ;; (error "Greediness control unsupported `%s'" (match-string 0 string)))
203 ;; ((or "*" "+" "?")
204 ;; (if (null res) (error "Non-prefixed suffix operator")
205 ;; (setq res (cons (list (cdr (assq (aref string (1- i))
206 ;; '((?* . 0+)
207 ;; (?+ . 1+)
208 ;; (?? . opt))))
209 ;; (car res))
210 ;; (cdr res)))))
211 ;; ("|" (push `(or (seq ,@(nreverse res))) stack)
212 ;; (setq res nil))
213 ;; ((seq "[" (opt "^") (opt "]")
214 ;; (0+ (or (seq (char not ?\]) "-" (char not ?\]))
215 ;; (seq "[:" (1+ (char (?a . ?z) (?A . ?Z))) ":]")
216 ;; (char not ?\]))) "]")
217 ;; (push (lex--parse-charset
218 ;; (substring string (1+ (match-beginning 0))
219 ;; (1- (match-end 0))))
220 ;; res))
221 ;; ((seq "{" (0+ (char (?0 . ?9)))
222 ;; (opt (seq "," (0+ (char (?0 . ?9))))) "}")
223 ;; ;; Here we would like to have sub-matches :-(
224 ;; (let* ((min (string-to-number (substring string
225 ;; (1+ (match-beginning 0))
226 ;; (match-end 0))))
227 ;; (max (let ((comma (string-match "," string (match-beginning 0))))
228 ;; (if (not (and comma (< comma (match-end 0))))
229 ;; min
230 ;; (if (= comma (- (match-end 0) 2))
231 ;; nil
232 ;; (string-to-number (substring string (1+ comma))))))))
233 ;; (if (null res) (error "Non-prefixed repeat operator")
234 ;; (setq res (cons `(repeat ,min ,max ,(car res)) (cdr res))))))
235 ;; ((or ")" eob) (setq op 'stop))
236 ;; ("\\'" (push 'eob res))
237 ;; ("\\`" (push 'bob res))
238 ;; ("^" (push 'bol res))
239 ;; ("$" (push 'eol res))
240 ;; ("." (push 'dot res))
241
242 ;; ((or "(" "\\<" "\\>" "\\_<" "\\_>" "\\c" "\\s" "\\C" "\\S" "\\w" "\\W"
243 ;; "\\b" "\\B" "\\=" (seq "\\" (char (?1 . ?9))))
244 ;; (error "Unsupported construct `%s'" (match-string 0 string)))
245
246 ;; ((or (seq ?\\ anything) anything)
247 ;; (push (aref string (1- (match-end 0))) res))
248 ;; ("" (error "This should not be reachable"))))
249 ;; (let ((re `(seq ,@(nreverse res))))
250 ;; (while stack (setq re (nconc (pop stack) (list re))))
251 ;; (cons i re))))
252
253
254
255
256
257 (provide 'lex-parse-re)
258 ;;; lex-parse-re.el ends here