]> code.delx.au - gnu-emacs/blob - admin/unidata/uvs.el
* lisp/net/tramp-gvfs.el (tramp-gvfs-mount-spec): Fix typo.
[gnu-emacs] / admin / unidata / uvs.el
1 ;;; uvs.el --- utility for UVS (format 14) cmap subtables in OpenType fonts.
2
3 ;; Copyright (C) 2014-2016 Free Software Foundation, Inc.
4
5 ;; Author: YAMAMOTO Mitsuharu <mituharu@math.s.chiba-u.ac.jp>
6
7 ;; This file is part of GNU Emacs.
8
9 ;; GNU Emacs is free software: you can redistribute it and/or modify
10 ;; it under the terms of the GNU General Public License as published by
11 ;; the Free Software Foundation, either version 3 of the License, or
12 ;; (at your option) any later version.
13
14 ;; GNU Emacs is distributed in the hope that it will be useful,
15 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
16 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 ;; GNU General Public License for more details.
18
19 ;; You should have received a copy of the GNU General Public License
20 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
21
22 ;;; Commentary:
23
24 ;; To extract a C array definition of a UVS table for the Adobe-Japan1
25 ;; character collection from an IVD Sequences file, execute
26 ;; $ emacs -batch -l uvs.el \
27 ;; --eval '(uvs-print-table-ivd "IVD_Sequences.txt" "Adobe-Japan1")' \
28 ;; > uvs.h
29
30 ;;; Code:
31
32 (defun uvs-fields-total-size (fields)
33 (apply '+ (mapcar (lambda (field) (get field 'uvs-field-size)) fields)))
34
35 ;;; Fields in Format 14 header.
36 (defconst uvs-format-14-header-fields
37 '(format length num-var-selector-records))
38 (put 'format 'uvs-field-size 2)
39 (put 'length 'uvs-field-size 4)
40 (put 'num-var-selector-records 'uvs-field-size 4)
41 (defconst uvs-format-14-header-size
42 (uvs-fields-total-size uvs-format-14-header-fields))
43
44 ;;; Fields in Variation Selector Record.
45 (defconst uvs-variation-selector-record-fields
46 '(var-selector default-uvs-offset non-default-uvs-offset))
47 (put 'var-selector 'uvs-field-size 3)
48 (put 'default-uvs-offset 'uvs-field-size 4)
49 (put 'non-default-uvs-offset 'uvs-field-size 4)
50 (defconst uvs-variation-selector-record-size
51 (uvs-fields-total-size uvs-variation-selector-record-fields))
52
53 ;;; Fields in Non-Default UVS Table.
54 (defconst uvs-non-default-uvs-table-header-fields '(num-uvs-mappings))
55 (put 'num-uvs-mappings 'uvs-field-size 4)
56 (defconst uvs-non-default-uvs-table-header-size
57 (uvs-fields-total-size uvs-non-default-uvs-table-header-fields))
58
59 ;;; Fields in UVS Mapping.
60 (defconst uvs-uvs-mapping-fields '(unicode-value glyph-id))
61 (put 'unicode-value 'uvs-field-size 3)
62 (put 'glyph-id 'uvs-field-size 2)
63 (defconst uvs-uvs-mapping-size
64 (uvs-fields-total-size uvs-uvs-mapping-fields))
65
66 (defun uvs-alist-from-ivd (collection-id sequence-id-to-glyph-function)
67 "Create UVS alist from IVD Sequences and COLLECTION-ID.
68 The IVD (Ideographic Variation Database) Sequences are obtained
69 from the contents of the current buffer, and should be in the
70 form of IVD_Sequences.txt specified in Unicode Technical Standard
71 #37. COLLECTION-ID is a string specifying the identifier of the
72 collection to extract (e.g., \"Adobe-Japan1\").
73 SEQUENCE-ID-TO-GLYPH-FUNC is a function to convert an identifier
74 string of the sequence to a glyph number. UVS alist is of the
75 following form:
76 ((SELECTOR1 . ((BASE11 . GLYPH11) (BASE12 . GLYPH12) ...))
77 (SELECTOR2 . ((BASE21 . GLYPH21) (BASE22 . GLYPH22) ...)) ...),
78 where selectors and bases are sorted in ascending order."
79 (let (uvs-alist)
80 (goto-char (point-min))
81 (while (re-search-forward
82 (concat "^[[:blank:]]*"
83 "\\([[:xdigit:]]+\\) \\([[:xdigit:]]+\\)"
84 "[[:blank:]]*;[[:blank:]]*"
85 "\\(?:" (regexp-quote collection-id) "\\)"
86 "[[:blank:]]*;[[:blank:]]*"
87 "\\([^\n[:blank:]]+\\)"
88 "[[:blank:]]*$") nil t)
89 (let* ((base (string-to-number (match-string 1) 16))
90 (selector (string-to-number (match-string 2) 16))
91 (sequence-id (match-string 3))
92 (glyph (funcall sequence-id-to-glyph-function sequence-id)))
93 (let ((selector-bgs (assq selector uvs-alist))
94 (base-glyph (cons base glyph)))
95 (if selector-bgs
96 (setcdr selector-bgs (cons base-glyph (cdr selector-bgs)))
97 (push (cons selector (list base-glyph)) uvs-alist)))))
98 (dolist (selector-bgs uvs-alist)
99 (setcdr selector-bgs
100 (sort (cdr selector-bgs)
101 (lambda (bg1 bg2) (< (car bg1) (car bg2))))))
102 (sort uvs-alist (lambda (sb1 sb2) (< (car sb1) (car sb2))))))
103
104 (defun uvs-int-to-bytes (value size)
105 "Convert integer VALUE to a list of SIZE bytes.
106 The most significant byte comes first."
107 (let (result)
108 (dotimes (i size)
109 (push (logand value #xff) result)
110 (setq value (lsh value -8)))
111 result))
112
113 (defun uvs-insert-fields-as-bytes (fields &rest values)
114 "Insert VALUES for FIELDS as a sequence of bytes to the current buffer.
115 VALUES and FIELDS are lists of integers and field symbols,
116 respectively. Byte length of each value is determined by the
117 `uvs-field-size' property of the corresponding field."
118 (while fields
119 (let ((field (car fields))
120 (value (car values)))
121 (insert (apply 'unibyte-string
122 (uvs-int-to-bytes value (get field 'uvs-field-size))))
123 (setq fields (cdr fields) values (cdr values)))))
124
125 (defun uvs-insert-alist-as-bytes (uvs-alist)
126 "Insert UVS-ALIST as a sequence of bytes to the current buffer."
127 (let* ((nrecords (length uvs-alist)) ; # of selectors
128 (total-nmappings
129 (apply '+ (mapcar
130 (lambda (selector-bgs) (length (cdr selector-bgs)))
131 uvs-alist)))
132 (non-default-offset
133 (+ uvs-format-14-header-size
134 (* uvs-variation-selector-record-size nrecords))))
135 (uvs-insert-fields-as-bytes uvs-format-14-header-fields
136 14
137 (+ uvs-format-14-header-size
138 (* uvs-variation-selector-record-size
139 nrecords)
140 (* uvs-non-default-uvs-table-header-size
141 nrecords)
142 (* uvs-uvs-mapping-size total-nmappings))
143 nrecords)
144 (dolist (selector-bgs uvs-alist)
145 (uvs-insert-fields-as-bytes uvs-variation-selector-record-fields
146 (car selector-bgs)
147 0 ; No Default UVS Tables.
148 non-default-offset)
149 (setq non-default-offset
150 (+ non-default-offset
151 uvs-non-default-uvs-table-header-size
152 (* (length (cdr selector-bgs)) uvs-uvs-mapping-size))))
153 (dolist (selector-bgs uvs-alist)
154 (uvs-insert-fields-as-bytes uvs-non-default-uvs-table-header-fields
155 (length (cdr selector-bgs)))
156 (dolist (base-glyph (cdr selector-bgs))
157 (uvs-insert-fields-as-bytes uvs-uvs-mapping-fields
158 (car base-glyph)
159 (cdr base-glyph))))))
160
161 (defun uvs-dump (&optional bytes-per-line separator separator-eol line-prefix)
162 "Print the current buffer as in representation of C array contents."
163 (or bytes-per-line (setq bytes-per-line 8))
164 (or separator (setq separator ", "))
165 (or separator-eol (setq separator-eol ","))
166 (or line-prefix (setq line-prefix " "))
167 (goto-char (point-min))
168 (while (> (- (point-max) (point)) bytes-per-line)
169 (princ line-prefix)
170 (princ (mapconcat (lambda (byte) (format "0x%02x" byte))
171 (string-to-unibyte
172 (buffer-substring (point) (+ (point) bytes-per-line)))
173 separator))
174 (princ separator-eol)
175 (terpri)
176 (forward-char bytes-per-line))
177 (princ line-prefix)
178 (princ (mapconcat (lambda (byte) (format "0x%02x" byte))
179 (string-to-unibyte
180 (buffer-substring (point) (point-max)))
181 separator))
182 (terpri))
183
184 (defun uvs-print-table-ivd (filename collection-id
185 &optional sequence-id-to-glyph-func)
186 "Print a C array definition of a UVS table for IVD Sequences.
187 FILENAME specifies the IVD Sequences file. COLLECTION-ID is a
188 string specifying the identifier of the collection to
189 extract (e.g., \"Adobe-Japan1\"). SEQUENCE-ID-TO-GLYPH-FUNC is a
190 function to convert an identifier string of the sequence to a
191 glyph number, and nil means to convert \"CID\\+[0-9]+\" to the
192 corresponding number."
193 (or sequence-id-to-glyph-func
194 (setq sequence-id-to-glyph-func
195 (lambda (sequence-id)
196 (string-match "\\`CID\\+\\([[:digit:]]+\\)\\'" sequence-id)
197 (string-to-number (match-string 1 sequence-id)))))
198 (let ((uvs-alist
199 (with-temp-buffer
200 (insert-file-contents filename)
201 (uvs-alist-from-ivd collection-id
202 sequence-id-to-glyph-func))))
203 (set-binary-mode 'stdout t)
204 (princ "/* Automatically generated by uvs.el. */\n")
205 (princ
206 (format "static const unsigned char mac_uvs_table_%s_bytes[] =\n {\n"
207 (replace-regexp-in-string "[^_[:alnum:]]" "_"
208 (downcase collection-id))))
209 (with-temp-buffer
210 (set-buffer-multibyte nil)
211 (uvs-insert-alist-as-bytes uvs-alist)
212 (uvs-dump))
213 (princ " };\n")))
214
215 ;;; uvs.el ends here