]> code.delx.au - gnu-emacs/commitdiff
* lisp/character-fold.el: Add back multi-char matching
authorArtur Malabarba <bruce.connor.am@gmail.com>
Tue, 1 Dec 2015 13:52:50 +0000 (13:52 +0000)
committerArtur Malabarba <bruce.connor.am@gmail.com>
Tue, 1 Dec 2015 13:55:45 +0000 (13:55 +0000)
(character-fold-to-regexp): Uncomment recently commented code
and make the algorithm "dummer" by not checking every possible
combination.  This will miss some possible matches, but it
greatly reduces regexp size.

* test/automated/character-fold-tests.el
(character-fold--test-fold-to-regexp): Comment out test of
functionality no longer supported.

lisp/character-fold.el
test/automated/character-fold-tests.el

index 07537d88ad5ff7f2f1f111b0c41ec81ffbb94386..fb28bae7281ffe3300e8dacf46337fa2ca7405aa 100644 (file)
@@ -180,43 +180,49 @@ from which to start."
                              (regexp-quote (string c))))
                  (alist nil))
              ;; Long string.  The regexp would probably be too long.
-             ;; (unless (> end 50)
-             ;;   (setq alist (aref multi-char-table c))
-             ;;   (when case-fold-search
-             ;;     (let ((other-c (aref lower-case-table c)))
-             ;;       (when (or (not other-c)
-             ;;                 (eq other-c c))
-             ;;         (setq other-c (aref upper-case-table c)))
-             ;;       (when other-c
-             ;;         (setq alist (append alist (aref multi-char-table other-c)))
-             ;;         (setq regexp (concat "\\(?:" regexp "\\|"
-             ;;                              (or (aref character-fold-table other-c)
-             ;;                                  (regexp-quote (string other-c)))
-             ;;                              "\\)"))))))
-             (push (let ((alist-out '("\\)")))
-                     (pcase-dolist (`(,suffix . ,out-regexp) alist)
-                       (let ((len-suf (length suffix)))
+             (unless (> end 50)
+               (setq alist (aref multi-char-table c))
+               (when case-fold-search
+                 (let ((other-c (aref lower-case-table c)))
+                   (when (or (not other-c)
+                             (eq other-c c))
+                     (setq other-c (aref upper-case-table c)))
+                   (when other-c
+                     (setq alist (append alist (aref multi-char-table other-c)))
+                     (setq regexp (concat "\\(?:" regexp "\\|"
+                                          (or (aref character-fold-table other-c)
+                                              (regexp-quote (string other-c)))
+                                          "\\)"))))))
+             (push (let ((matched-entries nil)
+                         (max-length 0))
+                     (dolist (entry alist)
+                       (let* ((suffix (car entry))
+                              (len-suf (length suffix)))
                          (when (eq (compare-strings suffix 0 nil
                                                     string (1+ i) (+ i 1 len-suf)
                                                     nil)
                                    t)
-                           ;; FIXME: If N suffixes match, we "branch"
-                           ;; out into N+1 executions for the rest of
-                           ;; the string.  This involves redundant
-                           ;; work and makes a huge regexp.
-                           (push (concat "\\|" out-regexp
-                                         (character-fold-to-regexp
-                                          string nil (+ i 1 len-suf)))
-                                 alist-out))))
+                           (push (cons len-suf (cdr entry)) matched-entries)
+                           (setq max-length (max max-length len-suf)))))
                      ;; If no suffixes matched, just go on.
-                     (if (not (cdr alist-out))
+                     (if (not matched-entries)
                          regexp
-                       ;; Otherwise, add a branch for the
-                       ;; no-suffix case, and stop the loop here.
-                       (prog1 (apply #'concat "\\(?:" regexp
-                                     (character-fold-to-regexp string nil (1+ i))
-                                     alist-out)
-                         (setq i end))))
+;;; If N suffixes match, we "branch" out into N+1 executions for the
+;;; length of the longest match.  This means "fix" will match "fix" but
+;;; not "fⅸ", but it's necessary to keep the regexp size from scaling
+;;; exponentially.  See https://lists.gnu.org/archive/html/emacs-devel/2015-11/msg02562.html
+                       (let ((subs (substring string (1+ i) (+ i 1 max-length))))
+                         ;; `i' is still going to inc by 1 below.
+                         (setq i (+ i max-length))
+                         (concat
+                          "\\(?:"
+                          (mapconcat (lambda (entry)
+                                       (let ((length (car entry))
+                                             (suffix-regexp (cdr entry)))
+                                         (concat suffix-regexp
+                                                 (character-fold-to-regexp subs nil length))))
+                                     `((0 . ,regexp) . ,matched-entries) "\\|")
+                          "\\)"))))
                    out))))
       (setq i (1+ i)))
     (when (> spaces 0)
index cf195842d0a8c8027be210e7d4389332fed8b670..40735e5df7fe09298037557c460d97ea449196a3 100644 (file)
     (aset multi ?1 '(("2" . "yy")))
     (character-fold--test-match-exactly "a1" "xx44" "99")
     (character-fold--test-match-exactly "a12" "77" "xx442" "992")
-    (character-fold--test-match-exactly "a12" "xxyy")))
+    ;; Support for this case is disabled.  See function definition or:
+    ;; https://lists.gnu.org/archive/html/emacs-devel/2015-11/msg02562.html
+    ;; (character-fold--test-match-exactly "a12" "xxyy")
+    ))
 
 
 (provide 'character-fold-tests)