]> code.delx.au - gnu-emacs/commitdiff
Further Unicode restrictive fixups
authorLars Ingebrigtsen <larsi@gnus.org>
Tue, 29 Dec 2015 16:46:00 +0000 (17:46 +0100)
committerLars Ingebrigtsen <larsi@gnus.org>
Tue, 29 Dec 2015 16:46:00 +0000 (17:46 +0100)
* puny.el (puny-highly-restrictive-p): Include the extra
identifier characters from table 3.

lisp/net/puny.el

index 08da51b587fe110ccd1f0e7569e37d71e8f99f85..ac47e13c97d4e182ef1da25bebc0f97b97e0e859 100644 (file)
@@ -191,13 +191,36 @@ For instance \"xn--bcher-kva\" => \"bücher\"."
     (buffer-string)))
 
 ;; http://www.unicode.org/reports/tr39/#Restriction_Level_Detection
+;; http://www.unicode.org/reports/tr31/#Table_Candidate_Characters_for_Inclusion_in_Identifiers
 
 (defun puny-highly-restrictive-p (string)
   (let ((scripts
-         (seq-uniq
-          (seq-map (lambda (char)
-                     (aref char-script-table char))
-                   string))))
+         (delq
+          t
+          (seq-uniq
+           (seq-map (lambda (char)
+                      (if (memq char
+                                ;; These characters are always allowed
+                                ;; in any string.
+                                '(#x0027 ; APOSTROPHE
+                                  #x002D ; HYPHEN-MINUS
+                                  #x002E ; FULL STOP
+                                  #x003A ; COLON
+                                  #x00B7 ; MIDDLE DOT
+                                  #x058A ; ARMENIAN HYPHEN
+                                  #x05F3 ; HEBREW PUNCTUATION GERESH
+                                  #x05F4 ; HEBREW PUNCTUATION GERSHAYIM
+                                  #x0F0B ; IBETAN MARK INTERSYLLABIC TSHEG
+                                  #x200C ; ERO WIDTH NON-JOINER*
+                                  #x200D ; ERO WIDTH JOINER*
+                                  #x2010 ; YPHEN
+                                  #x2019 ; IGHT SINGLE QUOTATION MARK
+                                  #x2027 ; YPHENATION POINT
+                                  #x30A0 ; KATAKANA-HIRAGANA DOUBLE HYPHEN
+                                  #x30FB)) ; KATAKANA MIDDLE DOT
+                          t
+                        (aref char-script-table char)))
+                    string)))))
     (or
      ;; Every character uses the same script.
      (= (length scripts) 1)