]> code.delx.au - gnu-emacs/blob - lisp/language/tml-util.el
Merge from emacs--devo--0
[gnu-emacs] / lisp / language / tml-util.el
1 ;;; tml-util.el --- support for composing tamil characters -*-coding: iso-2022-7bit;-*-
2
3 ;; Copyright (C) 2001, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4
5 ;; Maintainer: KAWABATA, Taichi <kawabata@m17n.org>
6 ;; Keywords: multilingual, Indian, Tamil
7
8 ;; This file is part of GNU Emacs.
9
10 ;; GNU Emacs is free software; you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation; either version 2, or (at your option)
13 ;; any later version.
14
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
19
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs; see the file COPYING. If not, write to the
22 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
23 ;; Boston, MA 02110-1301, USA.
24
25 ;; Created: Nov. 08. 2002
26
27 ;;; Commentary:
28
29 ;; This file provides character(Unicode) to glyph(CDAC) conversion and
30 ;; composition of Tamil script characters.
31
32 ;;; Code:
33
34 ;; Tamil Composable Pattern
35 ;; C .. Consonants
36 ;; V .. Vowel
37 ;; H .. Pulli
38 ;; M .. Matra
39 ;; V .. Vowel
40 ;; A .. Anuswar
41 ;; D .. Chandrabindu
42 ;; 1. vowel
43 ;; V
44 ;; 2. syllable : only ligature-formed pattern forms composition.
45 ;; (CkHCs|C)(H|M)?
46 ;; 3. sri special
47 ;; (CsHCrVi)
48
49 ;; oririnal
50 ;; ((CH)?(CH)?(CH)?CH)?C(H|M?(A|D)?)?
51
52 (defconst tamil-consonant
53 "[\e$,1<5\e(B-\e$,1<Y\e(B]")
54
55 (defconst tamil-composable-pattern
56 (concat
57 "\\([\e$,1<%\e(B-\e$,1<4\e(B]\\)\\|"
58 "[\e$,1<"<#\e(B]\\|" ;; vowel modifier considered independent
59 "\\(\\(?:\\(?:\e$,1<5<m<W\e(B\\)\\|[\e$,1<5\e(B-\e$,1<Y\e(B]\\)[\e$,1<m<^\e(B-\e$,1<l\e(B]?\\)\\|"
60 "\\(\e$,1<W<m<P<`\e(B\\)")
61 "Regexp matching a composable sequence of Tamil characters.")
62
63 ;;;###autoload
64 (defun tamil-compose-region (from to)
65 (interactive "r")
66 (save-excursion
67 (save-restriction
68 (narrow-to-region from to)
69 (goto-char (point-min))
70 (while (re-search-forward tamil-composable-pattern nil t)
71 (tamil-compose-syllable-region (match-beginning 0)
72 (match-end 0))))))
73 (defun tamil-compose-string (string)
74 (with-temp-buffer
75 (insert (decompose-string string))
76 (tamil-compose-region (point-min) (point-max))
77 (buffer-string)))
78
79 ;;;###autoload
80 (defun tamil-post-read-conversion (len)
81 (save-excursion
82 (save-restriction
83 (let ((buffer-modified-p (buffer-modified-p)))
84 (narrow-to-region (point) (+ (point) len))
85 (tamil-compose-region (point-min) (point-max))
86 (set-buffer-modified-p buffer-modified-p)
87 (- (point-max) (point-min))))))
88
89 (defun tamil-range (from to)
90 "Make the list of the integers of range FROM to TO."
91 (let (result)
92 (while (<= from to) (setq result (cons to result) to (1- to))) result))
93
94 (defun tamil-regexp-of-hashtbl-keys (hashtbl)
95 "Return a regular expression that matches all keys in hashtable HASHTBL."
96 (let ((max-specpdl-size 1000))
97 (regexp-opt
98 (sort
99 (let (dummy)
100 (maphash (function (lambda (key val) (setq dummy (cons key dummy)))) hashtbl)
101 dummy)
102 (function (lambda (x y) (> (length x) (length y))))))))
103
104
105 ;; Notes on conversion steps.
106
107 ;; 1. chars to glyphs
108 ;; Simple replacement of characters to glyphs is done.
109
110 ;; 2. glyphs reordering.
111 ;; following "\e$,4)j\e(B", "\e$,4)k\e(B", "\e$,4)l\e(B" goes to the front.
112
113 ;; 3. glyphs to glyphs
114 ;; reordered vowels are ligatured to consonants.
115
116 ;; 4. Composition.
117 ;; left modifiers will be attached at the left.
118 ;; others will be attached right.
119
120 (defvar tml-char-glyph
121 '(;; various signs
122 ("\e$,1<"\e(B" . "\e$,4)b\e(B") ;; not good
123 ("\e$,1<#\e(B" . "\e$,4*G\e(B")
124 ;; Independent Vowels
125 ("\e$,1<%\e(B" . "\e$,4*<\e(B")
126 ("\e$,1<&\e(B" . "\e$,4*=\e(B")
127 ("\e$,1<'\e(B" . "\e$,4*>\e(B")
128 ("\e$,1<(\e(B" . "\e$,4*?\e(B")
129 ("\e$,1<)\e(B" . "\e$,4*@\e(B")
130 ("\e$,1<*\e(B" . "\e$,4*A\e(B")
131 ("\e$,1<.\e(B" . "\e$,4*B\e(B")
132 ("\e$,1</\e(B" . "\e$,4*C\e(B")
133 ("\e$,1<0\e(B" . "\e$,4*D\e(B")
134 ("\e$,1<2\e(B" . "\e$,4*E\e(B")
135 ("\e$,1<3\e(B" . "\e$,4*F\e(B")
136 ("\e$,1<4\e(B" . "\e$,4*E*W\e(B")
137 ;; Consonants
138 ("\e$,1<5<m<W<m\e(B" . "\e$,4):\e(B") ; ks.
139 ("\e$,1<5<m<W\e(B" . "\e$,4*^\e(B") ; ks
140 ("\e$,1<5\e(B" . "\e$,4*H\e(B")
141
142 ("\e$,1<9\e(B" . "\e$,4*I\e(B")
143 ("\e$,1<:\e(B" . "\e$,4*J\e(B")
144 ("\e$,1<<\e(B" . "\e$,4*\\e(B")
145 ("\e$,1<<<m\e(B" . "\e$,4)8\e(B")
146 ("\e$,1<>\e(B" . "\e$,4*K\e(B")
147 ("\e$,1<?\e(B" . "\e$,4*L\e(B")
148 ("\e$,1<C\e(B" . "\e$,4*M\e(B")
149 ("\e$,1<D\e(B" . "\e$,4*N\e(B")
150 ("\e$,1<H\e(B" . "\e$,4*O\e(B")
151 ("\e$,1<I\e(B" . "\e$,4*Y\e(B")
152 ("\e$,1<I<m\e(B" . "\e$,4)a\e(B")
153 ("\e$,1<J\e(B" . "\e$,4*P\e(B")
154 ("\e$,1<N\e(B" . "\e$,4*Q\e(B")
155 ("\e$,1<O\e(B" . "\e$,4*R\e(B")
156 ("\e$,1<P\e(B" . "\e$,4*S\e(B")
157 ("\e$,1<Q\e(B" . "\e$,4*X\e(B")
158 ("\e$,1<R\e(B" . "\e$,4*T\e(B")
159 ("\e$,1<S\e(B" . "\e$,4*W\e(B")
160 ("\e$,1<T\e(B" . "\e$,4*V\e(B")
161 ("\e$,1<U\e(B" . "\e$,4*U\e(B")
162 ("\e$,1<W\e(B" . "\e$,4*[\e(B")
163 ("\e$,1<W<m\e(B" . "\e$,4)7\e(B")
164 ("\e$,1<W<m<P<`\e(B" . "\e$,4*_\e(B")
165 ("\e$,1<X\e(B" . "\e$,4*Z\e(B")
166 ("\e$,1<X<m\e(B" . "\e$,4)6\e(B")
167 ("\e$,1<Y\e(B" . "\e$,4*]\e(B")
168 ("\e$,1<Y<m\e(B" . "\e$,4)9\e(B")
169
170 ;; Dependent vowel signs
171 ("\e$,1<^\e(B" . "\e$,4)c\e(B")
172 ("\e$,1<_\e(B" . "\e$,4)d\e(B")
173 ("\e$,1<`\e(B" . "\e$,4)f\e(B")
174 ("\e$,1<a\e(B" . "\e$,4)g\e(B")
175 ("\e$,1<b\e(B" . "\e$,4)h\e(B")
176 ("\e$,1<f\e(B" . "\e$,4)j\e(B")
177 ("\e$,1<g\e(B" . "\e$,4)k\e(B")
178 ("\e$,1<h\e(B" . "\e$,4)l\e(B")
179 ("\e$,1<j\e(B" . "\e$,4)j)c\e(B")
180 ("\e$,1<k\e(B" . "\e$,4)k)c\e(B")
181 ("\e$,1<l\e(B" . "\e$,4)j*W\e(B")
182
183 ;; Various signs
184 ("\e$,1<m\e(B" . "\e$,4)b\e(B")
185 ("\e$,1<w\e(B" . "nil") ;; not supported?
186 ))
187
188 (defvar tml-char-glyph-hash
189 (let* ((hash (make-hash-table :test 'equal)))
190 (mapc (function (lambda (x) (puthash (car x) (cdr x) hash)))
191 tml-char-glyph)
192 hash))
193
194 (defvar tml-char-glyph-regexp
195 (tamil-regexp-of-hashtbl-keys tml-char-glyph-hash))
196
197 ;; Tamil languages needed to be reordered.
198
199 (defvar tml-consonants-regexp
200 "[\e$,4*H*^*I*J*\*K*L*M*N*O*Y*P*Q*R*S*X*T*W*V*U*[*Z*]\e(B]")
201
202 (defvar tml-glyph-reorder-key-glyphs "[\e$,4)j)k)l\e(B]")
203
204 (defvar tml-glyph-reordering-regexp-list
205 (cons
206 (concat "\\(" tml-consonants-regexp "\\)\\([\e$,4)j)k)l\e(B]\\)") "\\2\\1"))
207
208 ;; Tamil vowel modifiers to be ligatured.
209 (defvar tml-glyph-glyph
210 '(
211 ("\e$,4*H)d\e(B" . "\e$,4(a\e(B") ; ki
212 ("\e$,4*^)d\e(B" . "\e$,4(v\e(B") ; ksi
213 ("\e$,4*^)f\e(B" . "\e$,4)2\e(B") ; ksi~
214 ("\e$,4*I)d\e(B" . "\e$,4(b\e(B") ; n^i
215 ("\e$,4*J)d\e(B" . "\e$,4(c\e(B") ; ci
216 ("\e$,4*K)d\e(B" . "\e$,4(d\e(B") ; n~i
217 ("\e$,4*L)d\e(B" . "\e$,4)n\e(B") ; t.i
218 ("\e$,4*M)d\e(B" . "\e$,4(e\e(B") ; n.i
219 ("\e$,4*N)d\e(B" . "\e$,4(f\e(B") ; ti
220 ("\e$,4*O)d\e(B" . "\e$,4(g\e(B") ; ni
221 ("\e$,4*P)d\e(B" . "\e$,4(h\e(B") ; pi
222 ("\e$,4*Q)d\e(B" . "\e$,4(i\e(B") ; mi
223 ("\e$,4*R)d\e(B" . "\e$,4(j\e(B") ; yi
224 ("\e$,4*S)d\e(B" . "\e$,4(k\e(B") ; ri
225 ("\e$,4*T)d\e(B" . "\e$,4(l\e(B") ; li
226 ("\e$,4*U)d\e(B" . "\e$,4(m\e(B") ; vi
227 ("\e$,4*V)d\e(B" . "\e$,4(n\e(B") ; l_i
228 ("\e$,4*W)d\e(B" . "\e$,4(o\e(B") ; l.i
229 ("\e$,4*X)d\e(B" . "\e$,4(p\e(B") ; r_i
230 ("\e$,4*Y)d\e(B" . "\e$,4(q\e(B") ; n_i
231 ("\e$,4*Z)d\e(B" . "\e$,4(r\e(B") ; si
232 ("\e$,4*[)d\e(B" . "\e$,4(s\e(B") ; s'i
233 ("\e$,4*\)d\e(B" . "\e$,4(t\e(B") ; ji
234 ("\e$,4*])d\e(B" . "\e$,4(u\e(B") ; hi
235
236 ("\e$,4*H)f\e(B" . "\e$,4(w\e(B") ; ki~
237 ("\e$,4*I)f\e(B" . "\e$,4(x\e(B") ; n^i~
238 ("\e$,4*J)f\e(B" . "\e$,4(y\e(B") ; ci~
239 ("\e$,4*K)f\e(B" . "\e$,4(z\e(B") ; n~i~
240 ("\e$,4*L)f\e(B" . "\e$,4)o\e(B") ; t.i~
241 ("\e$,4*M)f\e(B" . "\e$,4)!\e(B") ; n.i~
242 ("\e$,4*N)f\e(B" . "\e$,4)"\e(B") ; ti~
243 ("\e$,4*O)f\e(B" . "\e$,4)#\e(B") ; ni~
244 ("\e$,4*P)f\e(B" . "\e$,4)$\e(B") ; pi~
245 ("\e$,4*Q)f\e(B" . "\e$,4)%\e(B") ; mi~
246 ("\e$,4*R)f\e(B" . "\e$,4)&\e(B") ; yi~
247 ("\e$,4*S)f\e(B" . "\e$,4)'\e(B") ; ri~
248 ("\e$,4*T)f\e(B" . "\e$,4)(\e(B") ; li~
249 ("\e$,4*U)f\e(B" . "\e$,4))\e(B") ; vi~
250 ("\e$,4*V)f\e(B" . "\e$,4)*\e(B") ; l_i~
251 ("\e$,4*W)f\e(B" . "\e$,4)+\e(B") ; l.i~
252 ("\e$,4*X)f\e(B" . "\e$,4),\e(B") ; r_i~
253 ("\e$,4*Y)f\e(B" . "\e$,4)-\e(B") ; n_i~
254 ("\e$,4*Z)f\e(B" . "\e$,4).\e(B") ; si~
255 ("\e$,4*[)f\e(B" . "\e$,4)/\e(B") ; s'i~
256 ("\e$,4*\)f\e(B" . "\e$,4)0\e(B") ; ji~
257 ("\e$,4*])f\e(B" . "\e$,4)1\e(B") ; hi~
258
259 ("\e$,4*H)g\e(B" . "\e$,4)p\e(B") ; ku
260 ("\e$,4*I)g\e(B" . "\e$,4)q\e(B") ; n^u
261 ("\e$,4*J)g\e(B" . "\e$,4)r\e(B") ; cu
262 ("\e$,4*K)g\e(B" . "\e$,4)s\e(B") ; n~u
263 ("\e$,4*L)g\e(B" . "\e$,4)t\e(B") ; t.u
264 ("\e$,4*M)g\e(B" . "\e$,4)u\e(B") ; n.u
265 ("\e$,4*N)g\e(B" . "\e$,4)v\e(B") ; tu
266 ("\e$,4*O)g\e(B" . "\e$,4)x\e(B") ; nu
267 ("\e$,4*P)g\e(B" . "\e$,4)y\e(B") ; pu
268 ("\e$,4*Q)g\e(B" . "\e$,4)z\e(B") ; mu
269 ("\e$,4*R)g\e(B" . "\e$,4){\e(B") ; yu
270 ("\e$,4*S)g\e(B" . "\e$,4)|\e(B") ; ru
271 ("\e$,4*T)g\e(B" . "\e$,4)}\e(B") ; lu
272 ("\e$,4*U)g\e(B" . "\e$,4)~\e(B") ; vu
273 ("\e$,4*V)g\e(B" . "\e$,4)\7f\e(B") ; l_u
274 ("\e$,4*W)g\e(B" . "\e$,4* \e(B") ; l.u
275 ("\e$,4*X)g\e(B" . "\e$,4*!\e(B") ; r_u
276 ("\e$,4*Y)g\e(B" . "\e$,4*"\e(B") ; n_u
277
278 ("\e$,4*H)h\e(B" . "\e$,4*#\e(B") ; ku~
279 ("\e$,4*I)h\e(B" . "\e$,4*$\e(B") ; n^u~
280 ("\e$,4*J)h\e(B" . "\e$,4*%\e(B") ; cu~
281 ("\e$,4*K)h\e(B" . "\e$,4*&\e(B") ; n~u~
282 ("\e$,4*L)h\e(B" . "\e$,4*'\e(B") ; t.u~
283 ("\e$,4*M)h\e(B" . "\e$,4*(\e(B") ; n.u~
284 ("\e$,4*N)h\e(B" . "\e$,4*)\e(B") ; tu~
285 ("\e$,4*O)h\e(B" . "\e$,4*+\e(B") ; nu~
286 ("\e$,4*P)h\e(B" . "\e$,4*,\e(B") ; pu~
287 ("\e$,4*Q)h\e(B" . "\e$,4*-\e(B") ; mu~
288 ("\e$,4*R)h\e(B" . "\e$,4*.\e(B") ; yu~
289 ("\e$,4*S)h\e(B" . "\e$,4*/\e(B") ; ru~
290 ("\e$,4*T)h\e(B" . "\e$,4*6\e(B") ; lu~
291 ("\e$,4*U)h\e(B" . "\e$,4*7\e(B") ; vu~
292 ("\e$,4*V)h\e(B" . "\e$,4*8\e(B") ; l_u~
293 ("\e$,4*W)h\e(B" . "\e$,4*9\e(B") ; l.u~
294 ("\e$,4*X)h\e(B" . "\e$,4*:\e(B") ; r_u~
295 ("\e$,4*Y)h\e(B" . "\e$,4*;\e(B") ; n_u~
296 ))
297
298 (defvar tml-glyph-glyph-hash
299 (let* ((hash (make-hash-table :test 'equal)))
300 (mapc (function (lambda (x) (puthash (car x) (cdr x) hash)))
301 tml-glyph-glyph)
302 hash))
303
304 (defvar tml-glyph-glyph-regexp
305 (tamil-regexp-of-hashtbl-keys tml-glyph-glyph-hash))
306
307 (defun tamil-compose-syllable-string (string)
308 (with-temp-buffer
309 (insert (decompose-string string))
310 (tamil-compose-syllable-region (point-min) (point-max))
311 (buffer-string)))
312
313 (defun tamil-compose-syllable-region (from to)
314 "Compose tamil syllable in region FROM to TO."
315 (let (glyph-str match-str glyph-reorder-regexps)
316 (save-excursion
317 (save-restriction
318 (narrow-to-region from to)
319 (goto-char (point-min))
320 ;; char-glyph-conversion
321 (while (not (eobp))
322 (if (looking-at tml-char-glyph-regexp)
323 (progn
324 (setq match-str (match-string 0)
325 glyph-str
326 (concat glyph-str
327 (gethash match-str tml-char-glyph-hash)))
328 (goto-char (match-end 0)))
329 (setq glyph-str (concat glyph-str (string (following-char))))
330 (forward-char 1)))
331 (or glyph-str
332 (aset glyph-str 0 (following-char)))
333 ;; glyph reordering
334 (when (string-match tml-glyph-reorder-key-glyphs glyph-str)
335 (if (string-match (car tml-glyph-reordering-regexp-list)
336 glyph-str)
337 (setq glyph-str
338 (replace-match (cdr tml-glyph-reordering-regexp-list)
339 nil nil glyph-str))))
340 ;; glyph-glyph-conversion
341 (when (string-match tml-glyph-glyph-regexp glyph-str)
342 (setq match-str (match-string 0 glyph-str))
343 (setq glyph-str
344 (replace-match (gethash match-str tml-glyph-glyph-hash)
345 nil nil glyph-str)))
346 ;; concatenate and attach reference-points.
347 (setq glyph-str
348 (cdr
349 (apply
350 'nconc
351 (mapcar
352 (function
353 (lambda (x) (list '(5 . 3) x))) ;; default ref. point.
354 glyph-str))))
355 (compose-region from to glyph-str)))))
356
357 ;;;###autoload
358 (defun tamil-composition-function (pos &optional string)
359 "Compose Tamil characters after the position POS.
360 If STRING is not nil, it is a string, and POS is an index to the string.
361 In this case, compose characters after POS of the string."
362 (if string
363 ;; Not yet implemented.
364 nil
365 (goto-char pos)
366 (if (looking-at tamil-composable-pattern)
367 (prog1 (match-end 0)
368 (tamil-compose-syllable-region pos (match-end 0))))))
369
370 (provide 'tml-util)
371
372 ;;; arch-tag: 4d1c9737-e7b1-44cf-a040-4f64c50e773e
373 ;;; tml-util.el ends here