From d259328fb87db8cc67d52771efcfa653e52c5b71 Mon Sep 17 00:00:00 2001 From: Lars Ingebrigtsen Date: Tue, 29 Dec 2015 17:46:00 +0100 Subject: [PATCH] Further Unicode restrictive fixups * puny.el (puny-highly-restrictive-p): Include the extra identifier characters from table 3. --- lisp/net/puny.el | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/lisp/net/puny.el b/lisp/net/puny.el index 08da51b587..ac47e13c97 100644 --- a/lisp/net/puny.el +++ b/lisp/net/puny.el @@ -191,13 +191,36 @@ For instance \"xn--bcher-kva\" => \"bücher\"." (buffer-string))) ;; http://www.unicode.org/reports/tr39/#Restriction_Level_Detection +;; http://www.unicode.org/reports/tr31/#Table_Candidate_Characters_for_Inclusion_in_Identifiers (defun puny-highly-restrictive-p (string) (let ((scripts - (seq-uniq - (seq-map (lambda (char) - (aref char-script-table char)) - string)))) + (delq + t + (seq-uniq + (seq-map (lambda (char) + (if (memq char + ;; These characters are always allowed + ;; in any string. + '(#x0027 ; APOSTROPHE + #x002D ; HYPHEN-MINUS + #x002E ; FULL STOP + #x003A ; COLON + #x00B7 ; MIDDLE DOT + #x058A ; ARMENIAN HYPHEN + #x05F3 ; HEBREW PUNCTUATION GERESH + #x05F4 ; HEBREW PUNCTUATION GERSHAYIM + #x0F0B ; IBETAN MARK INTERSYLLABIC TSHEG + #x200C ; ERO WIDTH NON-JOINER* + #x200D ; ERO WIDTH JOINER* + #x2010 ; YPHEN + #x2019 ; IGHT SINGLE QUOTATION MARK + #x2027 ; YPHENATION POINT + #x30A0 ; KATAKANA-HIRAGANA DOUBLE HYPHEN + #x30FB)) ; KATAKANA MIDDLE DOT + t + (aref char-script-table char))) + string))))) (or ;; Every character uses the same script. (= (length scripts) 1) -- 2.39.2