]> code.delx.au - gnu-emacs/blob - lisp/international/ucs-normalize.el
Optimize ucs-normalize.el compilation
[gnu-emacs] / lisp / international / ucs-normalize.el
1 ;;; ucs-normalize.el --- Unicode normalization NFC/NFD/NFKD/NFKC
2
3 ;; Copyright (C) 2009-2016 Free Software Foundation, Inc.
4
5 ;; Author: Taichi Kawabata <kawabata.taichi@gmail.com>
6 ;; Keywords: unicode, normalization
7
8 ;; This file is part of GNU Emacs.
9
10 ;; GNU Emacs is free software: you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation, either version 3 of the License, or
13 ;; (at your option) any later version.
14
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
19
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
22
23 ;;; Commentary:
24 ;;
25 ;; This program has passed the NormalizationTest-5.2.0.txt.
26 ;;
27 ;; References:
28 ;; http://www.unicode.org/reports/tr15/
29 ;; http://www.unicode.org/review/pr-29.html
30 ;;
31 ;; HFS-Normalization:
32 ;; Reference:
33 ;; http://developer.apple.com/technotes/tn/tn1150.html
34 ;;
35 ;; HFS Normalization excludes following area for decomposition.
36 ;;
37 ;; U+02000 .. U+02FFF :: Punctuation, symbols, dingbats, arrows, etc.
38 ;; (Characters in this region will be composed.)
39 ;; U+0F900 .. U+0FAFF :: CJK compatibility Ideographs.
40 ;; U+2F800 .. U+2FFFF :: CJK compatibility Ideographs.
41 ;;
42 ;; HFS-Normalization is useful for normalizing text involving CJK Ideographs.
43 ;;
44 ;;;
45 ;;; Implementation Notes on NFC/HFS-NFC.
46 ;;;
47 ;;
48 ;; <Stages> Decomposition Composition
49 ;; NFD: 'nfd nil
50 ;; NFC: 'nfd t
51 ;; NFKD: 'nfkd nil
52 ;; NFKC: 'nfkd t
53 ;; HFS-NFD: 'hfs-nfd 'hfs-nfd-comp-p
54 ;; HFS-NFC: 'hfs-nfd t
55 ;;
56 ;; Algorithm for Normalization
57 ;;
58 ;; Before normalization, following data will be prepared.
59 ;;
60 ;; 1. quick-check-list
61 ;;
62 ;; `quick-check-list' consists of characters that will be decomposed
63 ;; during normalization. It includes composition-exclusions,
64 ;; singletons, non-starter-decompositions and decomposable
65 ;; characters.
66 ;;
67 ;; `quick-check-regexp' will search the above characters plus
68 ;; combining characters.
69 ;;
70 ;; 2. decomposition-translation
71 ;;
72 ;; `decomposition-translation' is a translation table that will be
73 ;; used to decompose the characters.
74 ;;
75 ;;
76 ;; Normalization Process
77 ;;
78 ;; A. Searching (`ucs-normalize-region')
79 ;;
80 ;; Region is searched for `quick-check-regexp' to find possibly
81 ;; normalizable point.
82 ;;
83 ;; B. Identification of Normalization Block
84 ;;
85 ;; (1) start of the block
86 ;; If the searched character is a starter and not combining
87 ;; with previous character, then the beginning of the block is
88 ;; the searched character. If searched character is combining
89 ;; character, then previous character will be the target
90 ;; character
91 ;; (2) end of the block
92 ;; Block ends at non-composable starter character.
93 ;;
94 ;; C. Decomposition (`ucs-normalize-block')
95 ;;
96 ;; The entire block will be decomposed by
97 ;; `decomposition-translation' table.
98 ;;
99 ;; D. Sorting and Composition of Smaller Blocks (`ucs-normalize-block-compose-chars')
100 ;;
101 ;; The block will be split to multiple samller blocks by starter
102 ;; characters. Each block is sorted, and composed if necessary.
103 ;;
104 ;; E. Composition of Entire Block (`ucs-normalize-compose-chars')
105 ;;
106 ;; Composed blocks are collected and again composed.
107
108 ;;; Code:
109
110 (defconst ucs-normalize-version "1.2")
111
112 (eval-when-compile (require 'cl-lib))
113
114 (declare-function nfd "ucs-normalize" (char))
115
116 (eval-when-compile
117
118 (defconst ucs-normalize-composition-exclusions
119 '(#x0958 #x0959 #x095A #x095B #x095C #x095D #x095E #x095F
120 #x09DC #x09DD #x09DF #x0A33 #x0A36 #x0A59 #x0A5A #x0A5B
121 #x0A5E #x0B5C #x0B5D #x0F43 #x0F4D #x0F52 #x0F57 #x0F5C
122 #x0F69 #x0F76 #x0F78 #x0F93 #x0F9D #x0FA2 #x0FA7 #x0FAC
123 #x0FB9 #xFB1D #xFB1F #xFB2A #xFB2B #xFB2C #xFB2D #xFB2E
124 #xFB2F #xFB30 #xFB31 #xFB32 #xFB33 #xFB34 #xFB35 #xFB36
125 #xFB38 #xFB39 #xFB3A #xFB3B #xFB3C #xFB3E #xFB40 #xFB41
126 #xFB43 #xFB44 #xFB46 #xFB47 #xFB48 #xFB49 #xFB4A #xFB4B
127 #xFB4C #xFB4D #xFB4E #x2ADC #x1D15E #x1D15F #x1D160 #x1D161
128 #x1D162 #x1D163 #x1D164 #x1D1BB #x1D1BC #x1D1BD #x1D1BE
129 #x1D1BF #x1D1C0)
130 "Composition Exclusion List.
131 This list is taken from
132 http://www.unicode.org/Public/UNIDATA/5.2/CompositionExclusions.txt")
133
134 ;; Unicode ranges that decompositions & combining characters are defined.
135 (defvar check-range nil)
136 (setq check-range '((#x00a0 . #x3400) (#xA600 . #xAC00) (#xF900 . #x110ff) (#x1d000 . #x1dfff) (#x1f100 . #x1f2ff) (#x2f800 . #x2faff)))
137
138 ;; Basic normalization functions
139 (defun nfd (char)
140 (let ((decomposition
141 (get-char-code-property char 'decomposition)))
142 (if (and decomposition (numberp (car decomposition))
143 (or (> (length decomposition) 1)
144 (/= (car decomposition) char)))
145 decomposition)))
146
147 (defun nfkd (char)
148 (let ((decomposition
149 (get-char-code-property char 'decomposition)))
150 (if (symbolp (car decomposition)) (cdr decomposition)
151 (if (or (> (length decomposition) 1)
152 (/= (car decomposition) char)) decomposition))))
153
154 (defun hfs-nfd (char)
155 (when (or (and (>= char 0) (< char #x2000))
156 (and (>= char #x3000) (< char #xf900))
157 (and (>= char #xfb00) (< char #x2f800))
158 (>= char #x30000))
159 (nfd char))))
160
161 (eval-and-compile
162 (defun ucs-normalize-hfs-nfd-comp-p (char)
163 (and (>= char #x2000) (< char #x3000)))
164
165 (defsubst ucs-normalize-ccc (char)
166 (get-char-code-property char 'canonical-combining-class))
167 )
168
169 ;; Data common to all normalizations
170
171 (eval-when-compile
172
173 (defvar combining-chars nil)
174 (setq combining-chars nil)
175 (defvar decomposition-pair-to-composition nil)
176 (setq decomposition-pair-to-composition nil)
177 (defvar non-starter-decompositions nil)
178 (setq non-starter-decompositions nil)
179 ;; This file needs to access these 2 Unicode properties, but when we
180 ;; compile it during bootstrap, charprop.el was not built yet, and
181 ;; therefore is not yet loaded into bootstrap-emacs, so
182 ;; char-code-property-alist is nil, and get-char-code-property
183 ;; always returns nil, something the code here doesn't like.
184 (define-char-code-property 'decomposition "uni-decomposition.el")
185 (define-char-code-property 'canonical-combining-class "uni-combining.el")
186 (let ((char 0) ccc decomposition)
187 (mapc
188 (lambda (start-end)
189 (cl-do ((char (car start-end) (+ char 1))) ((> char (cdr start-end)))
190 (setq ccc (ucs-normalize-ccc char))
191 (setq decomposition (get-char-code-property
192 char 'decomposition))
193 (if (and (= (length decomposition) 1)
194 (= (car decomposition) char))
195 (setq decomposition nil))
196 (if (and ccc (/= 0 ccc)) (add-to-list 'combining-chars char))
197 (if (and (numberp (car decomposition))
198 (/= (ucs-normalize-ccc (car decomposition))
199 0))
200 (add-to-list 'non-starter-decompositions char))
201 (when (numberp (car decomposition))
202 (if (and (= 2 (length decomposition))
203 (null (memq char ucs-normalize-composition-exclusions))
204 (null (memq char non-starter-decompositions)))
205 (setq decomposition-pair-to-composition
206 (cons (cons decomposition char)
207 decomposition-pair-to-composition)))
208 ;; If not singleton decomposition, second and later characters in
209 ;; decomposition will be the subject of combining characters.
210 (if (cdr decomposition)
211 (dolist (char (cdr decomposition))
212 (add-to-list 'combining-chars char))))))
213 check-range))
214
215 (setq combining-chars
216 (append combining-chars
217 '(?ᅡ ?ᅢ ?ᅣ ?ᅤ ?ᅥ ?ᅦ ?ᅧ ?ᅨ ?ᅩ ?ᅪ
218 ?ᅫ ?ᅬ ?ᅭ ?ᅮ ?ᅯ ?ᅰ ?ᅱ ?ᅲ ?ᅳ ?ᅴ ?ᅵ
219 ?ᆨ ?ᆩ ?ᆪ ?ᆫ ?ᆬ ?ᆭ ?ᆮ ?ᆯ ?ᆰ ?ᆱ ?ᆲ ?ᆳ ?ᆴ
220 ?ᆵ ?ᆶ ?ᆷ ?ᆸ ?ᆹ ?ᆺ ?ᆻ ?ᆼ ?ᆽ ?ᆾ ?ᆿ ?ᇀ ?ᇁ ?ᇂ)))
221 )
222
223 (eval-and-compile
224 (defun ucs-normalize-make-hash-table-from-alist (alist)
225 (let ((table (make-hash-table :test 'equal :size 2000)))
226 (mapc (lambda (x) (puthash (car x) (cdr x) table)) alist)
227 table))
228
229 (defvar ucs-normalize-decomposition-pair-to-primary-composite nil
230 "Hashtable of decomposed pair to primary composite.
231 Note that Hangul are excluded.")
232 (setq ucs-normalize-decomposition-pair-to-primary-composite
233 (ucs-normalize-make-hash-table-from-alist
234 (eval-when-compile decomposition-pair-to-composition)))
235
236 (defun ucs-normalize-primary-composite (decomposition-pair composition-predicate)
237 "Convert DECOMPOSITION-PAIR to primary composite using COMPOSITION-PREDICATE."
238 (let ((char (or (gethash decomposition-pair
239 ucs-normalize-decomposition-pair-to-primary-composite)
240 (and (<= #x1100 (car decomposition-pair))
241 (< (car decomposition-pair) #x1113)
242 (<= #x1161 (cadr decomposition-pair))
243 (< (car decomposition-pair) #x1176)
244 (let ((lindex (- (car decomposition-pair) #x1100))
245 (vindex (- (cadr decomposition-pair) #x1161)))
246 (+ #xAC00 (* (+ (* lindex 21) vindex) 28))))
247 (and (<= #xac00 (car decomposition-pair))
248 (< (car decomposition-pair) #xd7a4)
249 (<= #x11a7 (cadr decomposition-pair))
250 (< (cadr decomposition-pair) #x11c3)
251 (= 0 (% (- (car decomposition-pair) #xac00) 28))
252 (let ((tindex (- (cadr decomposition-pair) #x11a7)))
253 (+ (car decomposition-pair) tindex))))))
254 (if (and char
255 (functionp composition-predicate)
256 (null (funcall composition-predicate char)))
257 nil char)))
258 )
259
260 (defvar ucs-normalize-combining-chars nil)
261 (setq ucs-normalize-combining-chars (eval-when-compile combining-chars))
262
263 (defvar ucs-normalize-combining-chars-regexp nil
264 "Regular expression to match sequence of combining characters.")
265 (setq ucs-normalize-combining-chars-regexp
266 (eval-when-compile (concat (regexp-opt-charset combining-chars) "+")))
267
268 (declare-function decomposition-translation-alist "ucs-normalize"
269 (decomposition-function))
270 (declare-function decomposition-char-recursively "ucs-normalize"
271 (char decomposition-function))
272 (declare-function alist-list-to-vector "ucs-normalize" (alist))
273
274 (eval-when-compile
275
276 (defun decomposition-translation-alist (decomposition-function)
277 (let (decomposition alist)
278 (mapc
279 (lambda (start-end)
280 (cl-do ((char (car start-end) (+ char 1))) ((> char (cdr start-end)))
281 (setq decomposition (funcall decomposition-function char))
282 (if decomposition
283 (setq alist (cons (cons char
284 (apply 'append
285 (mapcar (lambda (x)
286 (decomposition-char-recursively
287 x decomposition-function))
288 decomposition)))
289 alist)))))
290 check-range)
291 alist))
292
293 (defun decomposition-char-recursively (char decomposition-function)
294 (let ((decomposition (funcall decomposition-function char)))
295 (if decomposition
296 (apply 'append
297 (mapcar (lambda (x)
298 (decomposition-char-recursively x decomposition-function))
299 decomposition))
300 (list char))))
301
302 (defun alist-list-to-vector (alist)
303 (mapcar (lambda (x) (cons (car x) (apply 'vector (cdr x)))) alist))
304
305 (defvar nfd-alist nil)
306 (setq nfd-alist (alist-list-to-vector (decomposition-translation-alist 'nfd)))
307 (defvar nfkd-alist nil)
308 (setq nfkd-alist (alist-list-to-vector (decomposition-translation-alist 'nfkd)))
309 (defvar hfs-nfd-alist nil)
310 (setq hfs-nfd-alist (alist-list-to-vector (decomposition-translation-alist 'hfs-nfd)))
311 )
312
313 (eval-and-compile
314 (defvar ucs-normalize-hangul-translation-alist nil)
315 (setq ucs-normalize-hangul-translation-alist
316 (let ((i 0) entries)
317 (while (< i 11172)
318 (setq entries
319 (cons (cons (+ #xac00 i)
320 (if (= 0 (% i 28))
321 (vector (+ #x1100 (/ i 588))
322 (+ #x1161 (/ (% i 588) 28)))
323 (vector (+ #x1100 (/ i 588))
324 (+ #x1161 (/ (% i 588) 28))
325 (+ #x11a7 (% i 28)))))
326 entries)
327 i (1+ i))) entries))
328
329 (defun ucs-normalize-make-translation-table-from-alist (alist)
330 (make-translation-table-from-alist
331 (append alist ucs-normalize-hangul-translation-alist)))
332
333 (define-translation-table 'ucs-normalize-nfd-table
334 (ucs-normalize-make-translation-table-from-alist (eval-when-compile nfd-alist)))
335 (define-translation-table 'ucs-normalize-nfkd-table
336 (ucs-normalize-make-translation-table-from-alist (eval-when-compile nfkd-alist)))
337 (define-translation-table 'ucs-normalize-hfs-nfd-table
338 (ucs-normalize-make-translation-table-from-alist (eval-when-compile hfs-nfd-alist)))
339
340 (defun ucs-normalize-sort (chars)
341 "Sort by canonical combining class of CHARS."
342 (sort chars
343 (lambda (ch1 ch2)
344 (< (ucs-normalize-ccc ch1) (ucs-normalize-ccc ch2)))))
345
346 (defun ucs-normalize-compose-chars (chars composition-predicate)
347 "Compose CHARS by COMPOSITION-PREDICATE.
348 CHARS must be sorted and normalized in starter-combining pairs."
349 (if composition-predicate
350 (let* ((starter (car chars))
351 remain result prev-ccc
352 (target-chars (cdr chars))
353 target target-ccc
354 primary-composite)
355 (while target-chars
356 (setq target (car target-chars)
357 target-ccc (ucs-normalize-ccc target))
358 (if (and (or (null prev-ccc)
359 (< prev-ccc target-ccc))
360 (setq primary-composite
361 (ucs-normalize-primary-composite (list starter target)
362 composition-predicate)))
363 ;; case 1: composable
364 (setq starter primary-composite
365 prev-ccc nil)
366 (if (= 0 target-ccc)
367 ;; case 2: move starter
368 (setq result (nconc result (cons starter (nreverse remain)))
369 starter target
370 remain nil)
371 ;; case 3: move target
372 (setq prev-ccc target-ccc
373 remain (cons target remain))))
374 (setq target-chars (cdr target-chars)))
375 (nconc result (cons starter (nreverse remain))))
376 chars))
377
378 (defun ucs-normalize-block-compose-chars (chars composition-predicate)
379 "Try composing CHARS by COMPOSITION-PREDICATE.
380 If COMPOSITION-PREDICATE is not given, then do nothing."
381 (let ((chars (ucs-normalize-sort chars)))
382 (if composition-predicate
383 (ucs-normalize-compose-chars chars composition-predicate)
384 chars)))
385 )
386
387 (declare-function quick-check-list "ucs-normalize"
388 (decomposition-translation &optional composition-predicate))
389 (declare-function quick-check-list-to-regexp "ucs-normalize" (quick-check-list))
390
391 (eval-when-compile
392
393 (defun quick-check-list (decomposition-translation
394 &optional composition-predicate)
395 "Quick-Check List for DECOMPOSITION-TRANSLATION and COMPOSITION-PREDICATE.
396 It includes Singletons, CompositionExclusions, and Non-Starter
397 decomposition."
398 (let (entries decomposition composition)
399 (with-temp-buffer
400 (mapc
401 (lambda (start-end)
402 (cl-do ((i (car start-end) (+ i 1))) ((> i (cdr start-end)))
403 (setq decomposition
404 (string-to-list
405 (progn
406 (erase-buffer)
407 (insert i)
408 (translate-region 1 2 decomposition-translation)
409 (buffer-string))))
410 (setq composition
411 (ucs-normalize-block-compose-chars decomposition composition-predicate))
412 (when (not (equal composition (list i)))
413 (setq entries (cons i entries)))))
414 check-range))
415 ;;(remove-duplicates
416 (append entries
417 ucs-normalize-composition-exclusions
418 non-starter-decompositions)))
419 ;;)
420
421 (defvar nfd-quick-check-list nil)
422 (setq nfd-quick-check-list (quick-check-list 'ucs-normalize-nfd-table ))
423 (defvar nfc-quick-check-list nil)
424 (setq nfc-quick-check-list (quick-check-list 'ucs-normalize-nfd-table t ))
425 (defvar nfkd-quick-check-list nil)
426 (setq nfkd-quick-check-list (quick-check-list 'ucs-normalize-nfkd-table ))
427 (defvar nfkc-quick-check-list nil)
428 (setq nfkc-quick-check-list (quick-check-list 'ucs-normalize-nfkd-table t ))
429 (defvar hfs-nfd-quick-check-list nil)
430 (setq hfs-nfd-quick-check-list (quick-check-list 'ucs-normalize-hfs-nfd-table
431 'ucs-normalize-hfs-nfd-comp-p))
432 (defvar hfs-nfc-quick-check-list nil)
433 (setq hfs-nfc-quick-check-list (quick-check-list 'ucs-normalize-hfs-nfd-table t ))
434
435 (defun quick-check-list-to-regexp (quick-check-list)
436 (regexp-opt-charset (append quick-check-list combining-chars)))
437
438 (defun quick-check-decomposition-list-to-regexp (quick-check-list)
439 (concat (quick-check-list-to-regexp quick-check-list) "\\|[가-힣]"))
440
441 (defun quick-check-composition-list-to-regexp (quick-check-list)
442 (concat (quick-check-list-to-regexp quick-check-list) "\\|[ᅡ-ᅵᆨ-ᇂ]"))
443 )
444
445
446 ;; NFD/NFC
447 (defvar ucs-normalize-nfd-quick-check-regexp nil)
448 (setq ucs-normalize-nfd-quick-check-regexp
449 (eval-when-compile (quick-check-decomposition-list-to-regexp nfd-quick-check-list)))
450 (defvar ucs-normalize-nfc-quick-check-regexp nil)
451 (setq ucs-normalize-nfc-quick-check-regexp
452 (eval-when-compile (quick-check-composition-list-to-regexp nfc-quick-check-list)))
453
454 ;; NFKD/NFKC
455 (defvar ucs-normalize-nfkd-quick-check-regexp nil)
456 (setq ucs-normalize-nfkd-quick-check-regexp
457 (eval-when-compile (quick-check-decomposition-list-to-regexp nfkd-quick-check-list)))
458 (defvar ucs-normalize-nfkc-quick-check-regexp nil)
459 (setq ucs-normalize-nfkc-quick-check-regexp
460 (eval-when-compile (quick-check-composition-list-to-regexp nfkc-quick-check-list)))
461
462 ;; HFS-NFD/HFS-NFC
463 (defvar ucs-normalize-hfs-nfd-quick-check-regexp nil)
464 (setq ucs-normalize-hfs-nfd-quick-check-regexp
465 (eval-when-compile (concat (quick-check-decomposition-list-to-regexp hfs-nfd-quick-check-list))))
466 (defvar ucs-normalize-hfs-nfc-quick-check-regexp nil)
467 (setq ucs-normalize-hfs-nfc-quick-check-regexp
468 (eval-when-compile (quick-check-composition-list-to-regexp hfs-nfc-quick-check-list)))
469
470 ;;------------------------------------------------------------------------------------------
471
472 ;; Normalize local region.
473
474 (defun ucs-normalize-block
475 (from to &optional decomposition-translation-table composition-predicate)
476 "Normalize region FROM TO, by sorting the region with canonical-cc.
477 If DECOMPOSITION-TRANSLATION-TABLE is given, translate region
478 before sorting. If COMPOSITION-PREDICATE is given, then compose
479 the region by using it."
480 (save-restriction
481 (narrow-to-region from to)
482 (goto-char (point-min))
483 (if decomposition-translation-table
484 (translate-region from to decomposition-translation-table))
485 (goto-char (point-min))
486 (let ((start (point)) chars); ccc)
487 (while (not (eobp))
488 (forward-char)
489 (when (or (eobp)
490 (= 0 (ucs-normalize-ccc (char-after (point)))))
491 (setq chars
492 (nconc chars
493 (ucs-normalize-block-compose-chars
494 (string-to-list (buffer-substring start (point)))
495 composition-predicate))
496 start (point)))
497 ;;(unless ccc (error "Undefined character can not be normalized!"))
498 )
499 (delete-region (point-min) (point-max))
500 (apply 'insert
501 (ucs-normalize-compose-chars
502 chars composition-predicate)))))
503
504 (defun ucs-normalize-region
505 (from to quick-check-regexp translation-table composition-predicate)
506 "Normalize region from FROM to TO.
507 QUICK-CHECK-REGEXP is applied for searching the region.
508 TRANSLATION-TABLE will be used to decompose region.
509 COMPOSITION-PREDICATE will be used to compose region."
510 (save-excursion
511 (save-restriction
512 (narrow-to-region from to)
513 (goto-char (point-min))
514 (let (start-pos starter)
515 (while (re-search-forward quick-check-regexp nil t)
516 (setq starter (string-to-char (match-string 0)))
517 (setq start-pos (match-beginning 0))
518 (ucs-normalize-block
519 ;; from
520 (if (or (= start-pos (point-min))
521 (and (= 0 (ucs-normalize-ccc starter))
522 (not (memq starter ucs-normalize-combining-chars))))
523 start-pos (1- start-pos))
524 ;; to
525 (if (looking-at ucs-normalize-combining-chars-regexp)
526 (match-end 0) (1+ start-pos))
527 translation-table composition-predicate))))))
528
529 ;; --------------------------------------------------------------------------------
530
531 (defmacro ucs-normalize-string (ucs-normalize-region)
532 `(with-temp-buffer
533 (insert str)
534 (,ucs-normalize-region (point-min) (point-max))
535 (buffer-string)))
536
537 ;;;###autoload
538 (defun ucs-normalize-NFD-region (from to)
539 "Normalize the current region by the Unicode NFD."
540 (interactive "r")
541 (ucs-normalize-region from to
542 ucs-normalize-nfd-quick-check-regexp
543 'ucs-normalize-nfd-table nil))
544 ;;;###autoload
545 (defun ucs-normalize-NFD-string (str)
546 "Normalize the string STR by the Unicode NFD."
547 (ucs-normalize-string ucs-normalize-NFD-region))
548
549 ;;;###autoload
550 (defun ucs-normalize-NFC-region (from to)
551 "Normalize the current region by the Unicode NFC."
552 (interactive "r")
553 (ucs-normalize-region from to
554 ucs-normalize-nfc-quick-check-regexp
555 'ucs-normalize-nfd-table t))
556 ;;;###autoload
557 (defun ucs-normalize-NFC-string (str)
558 "Normalize the string STR by the Unicode NFC."
559 (ucs-normalize-string ucs-normalize-NFC-region))
560
561 ;;;###autoload
562 (defun ucs-normalize-NFKD-region (from to)
563 "Normalize the current region by the Unicode NFKD."
564 (interactive "r")
565 (ucs-normalize-region from to
566 ucs-normalize-nfkd-quick-check-regexp
567 'ucs-normalize-nfkd-table nil))
568 ;;;###autoload
569 (defun ucs-normalize-NFKD-string (str)
570 "Normalize the string STR by the Unicode NFKD."
571 (ucs-normalize-string ucs-normalize-NFKD-region))
572
573 ;;;###autoload
574 (defun ucs-normalize-NFKC-region (from to)
575 "Normalize the current region by the Unicode NFKC."
576 (interactive "r")
577 (ucs-normalize-region from to
578 ucs-normalize-nfkc-quick-check-regexp
579 'ucs-normalize-nfkd-table t))
580 ;;;###autoload
581 (defun ucs-normalize-NFKC-string (str)
582 "Normalize the string STR by the Unicode NFKC."
583 (ucs-normalize-string ucs-normalize-NFKC-region))
584
585 ;;;###autoload
586 (defun ucs-normalize-HFS-NFD-region (from to)
587 "Normalize the current region by the Unicode NFD and Mac OS's HFS Plus."
588 (interactive "r")
589 (ucs-normalize-region from to
590 ucs-normalize-hfs-nfd-quick-check-regexp
591 'ucs-normalize-hfs-nfd-table
592 'ucs-normalize-hfs-nfd-comp-p))
593 ;;;###autoload
594 (defun ucs-normalize-HFS-NFD-string (str)
595 "Normalize the string STR by the Unicode NFD and Mac OS's HFS Plus."
596 (ucs-normalize-string ucs-normalize-HFS-NFD-region))
597 ;;;###autoload
598 (defun ucs-normalize-HFS-NFC-region (from to)
599 "Normalize the current region by the Unicode NFC and Mac OS's HFS Plus."
600 (interactive "r")
601 (ucs-normalize-region from to
602 ucs-normalize-hfs-nfc-quick-check-regexp
603 'ucs-normalize-hfs-nfd-table t))
604 ;;;###autoload
605 (defun ucs-normalize-HFS-NFC-string (str)
606 "Normalize the string STR by the Unicode NFC and Mac OS's HFS Plus."
607 (ucs-normalize-string ucs-normalize-HFS-NFC-region))
608
609 ;; Post-read-conversion function for `utf-8-hfs'.
610 (defun ucs-normalize-hfs-nfd-post-read-conversion (len)
611 (save-excursion
612 (save-restriction
613 (narrow-to-region (point) (+ (point) len))
614 (ucs-normalize-HFS-NFC-region (point-min) (point-max))
615 (- (point-max) (point-min)))))
616
617 ;; Pre-write conversion for `utf-8-hfs'.
618 ;; _from and _to are legacy arguments (see `define-coding-system').
619 (defun ucs-normalize-hfs-nfd-pre-write-conversion (_from _to)
620 (ucs-normalize-HFS-NFD-region (point-min) (point-max)))
621
622 ;;; coding-system definition
623 (define-coding-system 'utf-8-hfs
624 "UTF-8 based coding system for MacOS HFS file names.
625 The singleton characters in HFS normalization exclusion will not
626 be decomposed."
627 :coding-type 'utf-8
628 :mnemonic ?U
629 :charset-list '(unicode)
630 :post-read-conversion 'ucs-normalize-hfs-nfd-post-read-conversion
631 :pre-write-conversion 'ucs-normalize-hfs-nfd-pre-write-conversion
632 )
633
634 ;; This is tested in dired.c:file_name_completion in order to reject
635 ;; false positives due to comparison of encoded file names.
636 (coding-system-put 'utf-8-hfs 'decomposed-characters 't)
637
638 (provide 'ucs-normalize)
639
640 ;; Local Variables:
641 ;; coding: utf-8
642 ;; End:
643
644 ;;; ucs-normalize.el ends here