]> code.delx.au - gnu-emacs/blob - lisp/language/tml-util.el
New directory
[gnu-emacs] / lisp / language / tml-util.el
1 ;;; tml-util.el --- support for composing tamil characters -*-coding: iso-2022-7bit;-*-
2
3 ;; Copyright (C) 2001 Free Software Foundation, Inc.
4
5 ;; Maintainer: KAWABATA, Taichi <kawabata@m17n.org>
6 ;; Keywords: multilingual, Indian, Tamil
7
8 ;; This file is part of GNU Emacs.
9
10 ;; GNU Emacs is free software; you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation; either version 2, or (at your option)
13 ;; any later version.
14
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
19
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs; see the file COPYING. If not, write to the
22 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
24
25 ;; Created: Nov. 08. 2002
26
27 ;;; Commentary:
28
29 ;; This file provides character(Unicode) to glyph(CDAC) conversion and
30 ;; composition of Tamil script characters.
31
32 ;;; Code:
33
34 ;; Tamil Composable Pattern
35 ;; C .. Consonants
36 ;; V .. Vowel
37 ;; H .. Pulli
38 ;; M .. Matra
39 ;; V .. Vowel
40 ;; A .. Anuswar
41 ;; D .. Chandrabindu
42 ;; 1. vowel
43 ;; V
44 ;; 2. syllable : only ligature-formed pattern forms composition.
45 ;; (CkHCs|C)(H|M)?
46 ;; 3. sri special
47 ;; (CsHCrVi)
48
49 ;; oririnal
50 ;; ((CH)?(CH)?(CH)?CH)?C(H|M?(A|D)?)?
51
52 (defconst tamil-consonant
53 "[\e$,1<5\e(B-\e$,1<Y\e(B]")
54
55 (defconst tamil-composable-pattern
56 (concat
57 "\\([\e$,1<%\e(B-\e$,1<4\e(B]\\)\\|"
58 "[\e$,1<"<#\e(B]\\|" ;; vowel modifier considered independent
59 "\\(\\(?:\\(?:\e$,1<5<m<W\e(B\\)\\|[\e$,1<5\e(B-\e$,1<Y\e(B]\\)[\e$,1<m<^\e(B-\e$,1<l\e(B]?\\)\\|"
60 "\\(\e$,1<W<m<P<`\e(B\\)")
61 "Regexp matching a composable sequence of Tamil characters.")
62
63 ;;;###autoload
64 (defun tamil-compose-region (from to)
65 (interactive "r")
66 (save-excursion
67 (save-restriction
68 (narrow-to-region from to)
69 (goto-char (point-min))
70 (while (re-search-forward tamil-composable-pattern nil t)
71 (tamil-compose-syllable-region (match-beginning 0)
72 (match-end 0))))))
73 (defun tamil-compose-string (string)
74 (with-temp-buffer
75 (insert (decompose-string string))
76 (tamil-compose-region (point-min) (point-max))
77 (buffer-string)))
78
79 (defun tamil-post-read-conversion (len)
80 (save-excursion
81 (save-restriction
82 (let ((buffer-modified-p (buffer-modified-p)))
83 (narrow-to-region (point) (+ (point) len))
84 (tamil-compose-region (point-min) (point-max))
85 (set-buffer-modified-p buffer-modified-p)
86 (- (point-max) (point-min))))))
87
88 (defun tamil-range (from to)
89 "Make the list of the integers of range FROM to TO."
90 (let (result)
91 (while (<= from to) (setq result (cons to result) to (1- to))) result))
92
93 (defun tamil-regexp-of-hashtbl-keys (hashtbl)
94 "Return a regular expression that matches all keys in hashtable HASHTBL."
95 (let ((max-specpdl-size 1000))
96 (regexp-opt
97 (sort
98 (let (dummy)
99 (maphash (function (lambda (key val) (setq dummy (cons key dummy)))) hashtbl)
100 dummy)
101 (function (lambda (x y) (> (length x) (length y))))))))
102
103
104 ;;;###autoload
105 (defun tamil-composition-function (from to pattern &optional string)
106 "Compose Tamil characters in REGION, or STRING if specified.
107 Assume that the REGION or STRING must fully match the composable
108 PATTERN regexp."
109 (if string (tamil-compose-syllable-string string)
110 (tamil-compose-syllable-region from to))
111 (- to from))
112
113 ;; Register a function to compose Tamil characters.
114 (mapc
115 (function (lambda (ucs)
116 (aset composition-function-table (decode-char 'ucs ucs)
117 (list (cons tamil-composable-pattern
118 'tamil-composition-function)))))
119 (nconc '(#x0b82 #x0b83) (tamil-range #x0b85 #x0bb9)))
120
121 ;; Notes on conversion steps.
122
123 ;; 1. chars to glyphs
124 ;; Simple replacement of characters to glyphs is done.
125
126 ;; 2. glyphs reordering.
127 ;; following "\e$,4)j\e(B", "\e$,4)k\e(B", "\e$,4)l\e(B" goes to the front.
128
129 ;; 3. glyphs to glyphs
130 ;; reordered vowels are ligatured to consonants.
131
132 ;; 4. Composition.
133 ;; left modifiers will be attached at the left.
134 ;; others will be attached right.
135
136 (defvar tml-char-glyph
137 '(;; various signs
138 ;;("\e$,1<"\e(B" . "")
139 ("\e$,1<#\e(B" . "\e$,4*G\e(B")
140 ;; Independent Vowels
141 ("\e$,1<%\e(B" . "\e$,4*<\e(B")
142 ("\e$,1<&\e(B" . "\e$,4*=\e(B")
143 ("\e$,1<'\e(B" . "\e$,4*>\e(B")
144 ("\e$,1<(\e(B" . "\e$,4*?\e(B")
145 ("\e$,1<)\e(B" . "\e$,4*@\e(B")
146 ("\e$,1<*\e(B" . "\e$,4*A\e(B")
147 ("\e$,1<.\e(B" . "\e$,4*B\e(B")
148 ("\e$,1</\e(B" . "\e$,4*C\e(B")
149 ("\e$,1<0\e(B" . "\e$,4*D\e(B")
150 ("\e$,1<2\e(B" . "\e$,4*E\e(B")
151 ("\e$,1<3\e(B" . "\e$,4*F\e(B")
152 ("\e$,1<4\e(B" . "\e$,4*E*W\e(B")
153 ;; Consonants
154 ("\e$,1<5<m<W<m\e(B" . "\e$,4):\e(B") ; ks.
155 ("\e$,1<5<m<W\e(B" . "\e$,4*^\e(B") ; ks
156 ("\e$,1<5\e(B" . "\e$,4*H\e(B")
157
158 ("\e$,1<9\e(B" . "\e$,4*I\e(B")
159 ("\e$,1<:\e(B" . "\e$,4*J\e(B")
160 ("\e$,1<<\e(B" . "\e$,4*\\e(B")
161 ("\e$,1<<<m\e(B" . "\e$,4)8\e(B")
162 ("\e$,1<>\e(B" . "\e$,4*K\e(B")
163 ("\e$,1<?\e(B" . "\e$,4*L\e(B")
164 ("\e$,1<C\e(B" . "\e$,4*M\e(B")
165 ("\e$,1<D\e(B" . "\e$,4*N\e(B")
166 ("\e$,1<H\e(B" . "\e$,4*O\e(B")
167 ("\e$,1<I\e(B" . "\e$,4*Y\e(B")
168 ("\e$,1<I<m\e(B" . "\e$,4)a\e(B")
169 ("\e$,1<J\e(B" . "\e$,4*P\e(B")
170 ("\e$,1<N\e(B" . "\e$,4*Q\e(B")
171 ("\e$,1<O\e(B" . "\e$,4*R\e(B")
172 ("\e$,1<P\e(B" . "\e$,4*S\e(B")
173 ("\e$,1<Q\e(B" . "\e$,4*X\e(B")
174 ("\e$,1<R\e(B" . "\e$,4*T\e(B")
175 ("\e$,1<S\e(B" . "\e$,4*W\e(B")
176 ("\e$,1<T\e(B" . "\e$,4*V\e(B")
177 ("\e$,1<U\e(B" . "\e$,4*U\e(B")
178 ("\e$,1<W\e(B" . "\e$,4*[\e(B")
179 ("\e$,1<W<m\e(B" . "\e$,4)7\e(B")
180 ("\e$,1<W<m<P<`\e(B" . "\e$,4*_\e(B")
181 ("\e$,1<X\e(B" . "\e$,4*Z\e(B")
182 ("\e$,1<X<m\e(B" . "\e$,4)6\e(B")
183 ("\e$,1<Y\e(B" . "\e$,4*]\e(B")
184 ("\e$,1<Y<m\e(B" . "\e$,4)9\e(B")
185
186 ;; Dependent vowel signs
187 ("\e$,1<^\e(B" . "\e$,4)c\e(B")
188 ("\e$,1<_\e(B" . "\e$,4)d\e(B")
189 ("\e$,1<`\e(B" . "\e$,4)f\e(B")
190 ("\e$,1<a\e(B" . "\e$,4)g\e(B")
191 ("\e$,1<b\e(B" . "\e$,4)h\e(B")
192 ("\e$,1<f\e(B" . "\e$,4)j\e(B")
193 ("\e$,1<g\e(B" . "\e$,4)k\e(B")
194 ("\e$,1<h\e(B" . "\e$,4)l\e(B")
195 ("\e$,1<j\e(B" . "\e$,4)j)c\e(B")
196 ("\e$,1<k\e(B" . "\e$,4)k)c\e(B")
197 ("\e$,1<l\e(B" . "\e$,4)j*W\e(B")
198
199 ;; Various signs
200 ("\e$,1<m\e(B" . "\e$,4)b\e(B")
201 ("\e$,1<w\e(B" . "nil") ;; not supported?
202 ))
203
204 (defvar tml-char-glyph-hash
205 (let* ((hash (make-hash-table :test 'equal)))
206 (mapc (function (lambda (x) (puthash (car x) (cdr x) hash)))
207 tml-char-glyph)
208 hash))
209
210 (defvar tml-char-glyph-regexp
211 (tamil-regexp-of-hashtbl-keys tml-char-glyph-hash))
212
213 ;; Tamil languages needed to be reordered.
214
215 (defvar tml-consonants-regexp
216 "[\e$,4*H*^*I*J*\*K*L*M*N*O*Y*P*Q*R*S*X*T*W*V*U*[*Z*]\e(B]")
217
218 (defvar tml-glyph-reorder-key-glyphs "[\e$,4)j)k)l\e(B]")
219
220 (defvar tml-glyph-reordering-regexp-list
221 (cons
222 (concat "\\(" tml-consonants-regexp "\\)\\([\e$,4)j)k)l\e(B]\\)") "\\2\\1"))
223
224 ;; Tamil vowel modifiers to be ligatured.
225 (defvar tml-glyph-glyph
226 '(
227 ("\e$,4*H)d\e(B" . "\e$,4(a\e(B") ; ki
228 ("\e$,4*^)d\e(B" . "\e$,4(v\e(B") ; ksi
229 ("\e$,4*^)f\e(B" . "\e$,4)2\e(B") ; ksi~
230 ("\e$,4*I)d\e(B" . "\e$,4(b\e(B") ; n^i
231 ("\e$,4*J)d\e(B" . "\e$,4(c\e(B") ; ci
232 ("\e$,4*K)d\e(B" . "\e$,4(d\e(B") ; n~i
233 ("\e$,4*L)d\e(B" . "\e$,4)n\e(B") ; t.i
234 ("\e$,4*M)d\e(B" . "\e$,4(e\e(B") ; n.i
235 ("\e$,4*N)d\e(B" . "\e$,4(f\e(B") ; ti
236 ("\e$,4*O)d\e(B" . "\e$,4(g\e(B") ; ni
237 ("\e$,4*P)d\e(B" . "\e$,4(h\e(B") ; pi
238 ("\e$,4*Q)d\e(B" . "\e$,4(i\e(B") ; mi
239 ("\e$,4*R)d\e(B" . "\e$,4(j\e(B") ; yi
240 ("\e$,4*S)d\e(B" . "\e$,4(k\e(B") ; ri
241 ("\e$,4*T)d\e(B" . "\e$,4(l\e(B") ; li
242 ("\e$,4*U)d\e(B" . "\e$,4(m\e(B") ; vi
243 ("\e$,4*V)d\e(B" . "\e$,4(n\e(B") ; l_i
244 ("\e$,4*W)d\e(B" . "\e$,4(o\e(B") ; l.i
245 ("\e$,4*X)d\e(B" . "\e$,4(p\e(B") ; r_i
246 ("\e$,4*Y)d\e(B" . "\e$,4(q\e(B") ; n_i
247 ("\e$,4*Z)d\e(B" . "\e$,4(r\e(B") ; si
248 ("\e$,4*[)d\e(B" . "\e$,4(s\e(B") ; s'i
249 ("\e$,4*\)d\e(B" . "\e$,4(t\e(B") ; ji
250 ("\e$,4*])d\e(B" . "\e$,4(u\e(B") ; hi
251
252 ("\e$,4*H)f\e(B" . "\e$,4(w\e(B") ; ki~
253 ("\e$,4*I)f\e(B" . "\e$,4(x\e(B") ; n^i~
254 ("\e$,4*J)f\e(B" . "\e$,4(y\e(B") ; ci~
255 ("\e$,4*K)f\e(B" . "\e$,4(z\e(B") ; n~i~
256 ("\e$,4*L)f\e(B" . "\e$,4)o\e(B") ; t.i~
257 ("\e$,4*M)f\e(B" . "\e$,4)!\e(B") ; n.i~
258 ("\e$,4*N)f\e(B" . "\e$,4)"\e(B") ; ti~
259 ("\e$,4*O)f\e(B" . "\e$,4)#\e(B") ; ni~
260 ("\e$,4*P)f\e(B" . "\e$,4)$\e(B") ; pi~
261 ("\e$,4*Q)f\e(B" . "\e$,4)%\e(B") ; mi~
262 ("\e$,4*R)f\e(B" . "\e$,4)&\e(B") ; yi~
263 ("\e$,4*S)f\e(B" . "\e$,4)'\e(B") ; ri~
264 ("\e$,4*T)f\e(B" . "\e$,4)(\e(B") ; li~
265 ("\e$,4*U)f\e(B" . "\e$,4))\e(B") ; vi~
266 ("\e$,4*V)f\e(B" . "\e$,4)*\e(B") ; l_i~
267 ("\e$,4*W)f\e(B" . "\e$,4)+\e(B") ; l.i~
268 ("\e$,4*X)f\e(B" . "\e$,4),\e(B") ; r_i~
269 ("\e$,4*Y)f\e(B" . "\e$,4)-\e(B") ; n_i~
270 ("\e$,4*Z)f\e(B" . "\e$,4).\e(B") ; si~
271 ("\e$,4*[)f\e(B" . "\e$,4)/\e(B") ; s'i~
272 ("\e$,4*\)f\e(B" . "\e$,4)0\e(B") ; ji~
273 ("\e$,4*])f\e(B" . "\e$,4)1\e(B") ; hi~
274
275 ("\e$,4*H)g\e(B" . "\e$,4)p\e(B") ; ku
276 ("\e$,4*I)g\e(B" . "\e$,4)q\e(B") ; n^u
277 ("\e$,4*J)g\e(B" . "\e$,4)r\e(B") ; cu
278 ("\e$,4*K)g\e(B" . "\e$,4)s\e(B") ; n~u
279 ("\e$,4*L)g\e(B" . "\e$,4)t\e(B") ; t.u
280 ("\e$,4*M)g\e(B" . "\e$,4)u\e(B") ; n.u
281 ("\e$,4*N)g\e(B" . "\e$,4)v\e(B") ; tu
282 ("\e$,4*O)g\e(B" . "\e$,4)x\e(B") ; nu
283 ("\e$,4*P)g\e(B" . "\e$,4)y\e(B") ; pu
284 ("\e$,4*Q)g\e(B" . "\e$,4)z\e(B") ; mu
285 ("\e$,4*R)g\e(B" . "\e$,4){\e(B") ; yu
286 ("\e$,4*S)g\e(B" . "\e$,4)|\e(B") ; ru
287 ("\e$,4*T)g\e(B" . "\e$,4)}\e(B") ; lu
288 ("\e$,4*U)g\e(B" . "\e$,4)~\e(B") ; vu
289 ("\e$,4*V)g\e(B" . "\e$,4)\7f\e(B") ; l_u
290 ("\e$,4*W)g\e(B" . "\e$,4* \e(B") ; l.u
291 ("\e$,4*X)g\e(B" . "\e$,4*!\e(B") ; r_u
292 ("\e$,4*Y)g\e(B" . "\e$,4*"\e(B") ; n_u
293
294 ("\e$,4*H)h\e(B" . "\e$,4*#\e(B") ; ku~
295 ("\e$,4*I)h\e(B" . "\e$,4*$\e(B") ; n^u~
296 ("\e$,4*J)h\e(B" . "\e$,4*%\e(B") ; cu~
297 ("\e$,4*K)h\e(B" . "\e$,4*&\e(B") ; n~u~
298 ("\e$,4*L)h\e(B" . "\e$,4*'\e(B") ; t.u~
299 ("\e$,4*M)h\e(B" . "\e$,4*(\e(B") ; n.u~
300 ("\e$,4*N)h\e(B" . "\e$,4*)\e(B") ; tu~
301 ("\e$,4*O)h\e(B" . "\e$,4*+\e(B") ; nu~
302 ("\e$,4*P)h\e(B" . "\e$,4*,\e(B") ; pu~
303 ("\e$,4*Q)h\e(B" . "\e$,4*-\e(B") ; mu~
304 ("\e$,4*R)h\e(B" . "\e$,4*.\e(B") ; yu~
305 ("\e$,4*S)h\e(B" . "\e$,4*/\e(B") ; ru~
306 ("\e$,4*T)h\e(B" . "\e$,4*6\e(B") ; lu~
307 ("\e$,4*U)h\e(B" . "\e$,4*7\e(B") ; vu~
308 ("\e$,4*V)h\e(B" . "\e$,4*8\e(B") ; l_u~
309 ("\e$,4*W)h\e(B" . "\e$,4*9\e(B") ; l.u~
310 ("\e$,4*X)h\e(B" . "\e$,4*:\e(B") ; r_u~
311 ("\e$,4*Y)h\e(B" . "\e$,4*;\e(B") ; n_u~
312 ))
313
314 (defvar tml-glyph-glyph-hash
315 (let* ((hash (make-hash-table :test 'equal)))
316 (mapc (function (lambda (x) (puthash (car x) (cdr x) hash)))
317 tml-glyph-glyph)
318 hash))
319
320 (defvar tml-glyph-glyph-regexp
321 (tamil-regexp-of-hashtbl-keys tml-glyph-glyph-hash))
322
323 (defun tamil-compose-syllable-string (string)
324 (with-temp-buffer
325 (insert (decompose-string string))
326 (tamil-compose-syllable-region (point-min) (point-max))
327 (buffer-string)))
328
329 (defun tamil-compose-syllable-region (from to)
330 "Compose tamil syllable in region FROM to TO."
331 (let (glyph-str match-str glyph-reorder-regexps)
332 (save-excursion
333 (save-restriction
334 (narrow-to-region from to)
335 (goto-char (point-min))
336 ;; char-glyph-conversion
337 (while (re-search-forward tml-char-glyph-regexp nil t)
338 (setq match-str (match-string 0))
339 (setq glyph-str
340 (concat glyph-str (gethash match-str tml-char-glyph-hash))))
341 ;; glyph reordering
342 (when (string-match tml-glyph-reorder-key-glyphs glyph-str)
343 (if (string-match (car tml-glyph-reordering-regexp-list)
344 glyph-str)
345 (setq glyph-str
346 (replace-match (cdr tml-glyph-reordering-regexp-list)
347 nil nil glyph-str))))
348 ;; glyph-glyph-conversion
349 (when (string-match tml-glyph-glyph-regexp glyph-str)
350 (setq match-str (match-string 0 glyph-str))
351 (setq glyph-str
352 (replace-match (gethash match-str tml-glyph-glyph-hash)
353 nil nil glyph-str)))
354 ;; concatenate and attach reference-points.
355 (setq glyph-str
356 (cdr
357 (apply
358 'nconc
359 (mapcar
360 (function
361 (lambda (x) (list '(5 . 3) x))) ;; default ref. point.
362 glyph-str))))
363 (compose-region from to glyph-str)))))
364
365 (provide 'tml-util)
366
367 ;;; tml-util.el ends here