1 ;;; tml-util.el --- support for composing tamil characters -*-coding: iso-2022-7bit;-*-
3 ;; Copyright (C) 2001, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
5 ;; Maintainer: KAWABATA, Taichi <kawabata@m17n.org>
6 ;; Keywords: multilingual, Indian, Tamil
8 ;; This file is part of GNU Emacs.
10 ;; GNU Emacs is free software; you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation; either version 2, or (at your option)
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs; see the file COPYING. If not, write to the
22 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
23 ;; Boston, MA 02110-1301, USA.
25 ;; Created: Nov. 08. 2002
29 ;; This file provides character(Unicode) to glyph(CDAC) conversion and
30 ;; composition of Tamil script characters.
34 ;; Tamil Composable Pattern
44 ;; 2. syllable : only ligature-formed pattern forms composition.
50 ;; ((CH)?(CH)?(CH)?CH)?C(H|M?(A|D)?)?
52 (defconst tamil-consonant
53 "[
\e$,1<5
\e(B-
\e$,1<Y
\e(B]")
55 (defconst tamil-composable-pattern
57 "\\([
\e$,1<%
\e(B-
\e$,1<4
\e(B]\\)\\|"
58 "[
\e$,1<"<#
\e(B]\\|" ;; vowel modifier considered independent
59 "\\(\\(?:\\(?:
\e$,1<5<m<W
\e(B\\)\\|[
\e$,1<5
\e(B-
\e$,1<Y
\e(B]\\)[
\e$,1<m<^
\e(B-
\e$,1<l
\e(B]?\\)\\|"
60 "\\(
\e$,1<W<m<P<`
\e(B\\)")
61 "Regexp matching a composable sequence of Tamil characters.")
64 (defun tamil-compose-region (from to)
68 (narrow-to-region from to)
69 (goto-char (point-min))
70 (while (re-search-forward tamil-composable-pattern nil t)
71 (tamil-compose-syllable-region (match-beginning 0)
73 (defun tamil-compose-string (string)
75 (insert (decompose-string string))
76 (tamil-compose-region (point-min) (point-max))
80 (defun tamil-post-read-conversion (len)
83 (let ((buffer-modified-p (buffer-modified-p)))
84 (narrow-to-region (point) (+ (point) len))
85 (tamil-compose-region (point-min) (point-max))
86 (set-buffer-modified-p buffer-modified-p)
87 (- (point-max) (point-min))))))
89 (defun tamil-range (from to)
90 "Make the list of the integers of range FROM to TO."
92 (while (<= from to) (setq result (cons to result) to (1- to))) result))
94 (defun tamil-regexp-of-hashtbl-keys (hashtbl)
95 "Return a regular expression that matches all keys in hashtable HASHTBL."
96 (let ((max-specpdl-size 1000))
100 (maphash (function (lambda (key val) (setq dummy (cons key dummy)))) hashtbl)
102 (function (lambda (x y) (> (length x) (length y))))))))
105 ;; Notes on conversion steps.
107 ;; 1. chars to glyphs
108 ;; Simple replacement of characters to glyphs is done.
110 ;; 2. glyphs reordering.
111 ;; following "
\e$,4)j
\e(B", "
\e$,4)k
\e(B", "
\e$,4)l
\e(B" goes to the front.
113 ;; 3. glyphs to glyphs
114 ;; reordered vowels are ligatured to consonants.
117 ;; left modifiers will be attached at the left.
118 ;; others will be attached right.
120 (defvar tml-char-glyph
122 ("
\e$,1<"
\e(B" . "
\e$,4)b
\e(B") ;; not good
123 ("
\e$,1<#
\e(B" . "
\e$,4*G
\e(B")
124 ;; Independent Vowels
125 ("
\e$,1<%
\e(B" . "
\e$,4*<
\e(B")
126 ("
\e$,1<&
\e(B" . "
\e$,4*=
\e(B")
127 ("
\e$,1<'
\e(B" . "
\e$,4*>
\e(B")
128 ("
\e$,1<(
\e(B" . "
\e$,4*?
\e(B")
129 ("
\e$,1<)
\e(B" . "
\e$,4*@
\e(B")
130 ("
\e$,1<*
\e(B" . "
\e$,4*A
\e(B")
131 ("
\e$,1<.
\e(B" . "
\e$,4*B
\e(B")
132 ("
\e$,1</
\e(B" . "
\e$,4*C
\e(B")
133 ("
\e$,1<0
\e(B" . "
\e$,4*D
\e(B")
134 ("
\e$,1<2
\e(B" . "
\e$,4*E
\e(B")
135 ("
\e$,1<3
\e(B" . "
\e$,4*F
\e(B")
136 ("
\e$,1<4
\e(B" . "
\e$,4*E*W
\e(B")
138 ("
\e$,1<5<m<W<m
\e(B" . "
\e$,4):
\e(B") ; ks.
139 ("
\e$,1<5<m<W
\e(B" . "
\e$,4*^
\e(B") ; ks
140 ("
\e$,1<5
\e(B" . "
\e$,4*H
\e(B")
142 ("
\e$,1<9
\e(B" . "
\e$,4*I
\e(B")
143 ("
\e$,1<:
\e(B" . "
\e$,4*J
\e(B")
144 ("
\e$,1<<
\e(B" . "
\e$,4*\
\e(B")
145 ("
\e$,1<<<m
\e(B" . "
\e$,4)8
\e(B")
146 ("
\e$,1<>
\e(B" . "
\e$,4*K
\e(B")
147 ("
\e$,1<?
\e(B" . "
\e$,4*L
\e(B")
148 ("
\e$,1<C
\e(B" . "
\e$,4*M
\e(B")
149 ("
\e$,1<D
\e(B" . "
\e$,4*N
\e(B")
150 ("
\e$,1<H
\e(B" . "
\e$,4*O
\e(B")
151 ("
\e$,1<I
\e(B" . "
\e$,4*Y
\e(B")
152 ("
\e$,1<I<m
\e(B" . "
\e$,4)a
\e(B")
153 ("
\e$,1<J
\e(B" . "
\e$,4*P
\e(B")
154 ("
\e$,1<N
\e(B" . "
\e$,4*Q
\e(B")
155 ("
\e$,1<O
\e(B" . "
\e$,4*R
\e(B")
156 ("
\e$,1<P
\e(B" . "
\e$,4*S
\e(B")
157 ("
\e$,1<Q
\e(B" . "
\e$,4*X
\e(B")
158 ("
\e$,1<R
\e(B" . "
\e$,4*T
\e(B")
159 ("
\e$,1<S
\e(B" . "
\e$,4*W
\e(B")
160 ("
\e$,1<T
\e(B" . "
\e$,4*V
\e(B")
161 ("
\e$,1<U
\e(B" . "
\e$,4*U
\e(B")
162 ("
\e$,1<W
\e(B" . "
\e$,4*[
\e(B")
163 ("
\e$,1<W<m
\e(B" . "
\e$,4)7
\e(B")
164 ("
\e$,1<W<m<P<`
\e(B" . "
\e$,4*_
\e(B")
165 ("
\e$,1<X
\e(B" . "
\e$,4*Z
\e(B")
166 ("
\e$,1<X<m
\e(B" . "
\e$,4)6
\e(B")
167 ("
\e$,1<Y
\e(B" . "
\e$,4*]
\e(B")
168 ("
\e$,1<Y<m
\e(B" . "
\e$,4)9
\e(B")
170 ;; Dependent vowel signs
171 ("
\e$,1<^
\e(B" . "
\e$,4)c
\e(B")
172 ("
\e$,1<_
\e(B" . "
\e$,4)d
\e(B")
173 ("
\e$,1<`
\e(B" . "
\e$,4)f
\e(B")
174 ("
\e$,1<a
\e(B" . "
\e$,4)g
\e(B")
175 ("
\e$,1<b
\e(B" . "
\e$,4)h
\e(B")
176 ("
\e$,1<f
\e(B" . "
\e$,4)j
\e(B")
177 ("
\e$,1<g
\e(B" . "
\e$,4)k
\e(B")
178 ("
\e$,1<h
\e(B" . "
\e$,4)l
\e(B")
179 ("
\e$,1<j
\e(B" . "
\e$,4)j)c
\e(B")
180 ("
\e$,1<k
\e(B" . "
\e$,4)k)c
\e(B")
181 ("
\e$,1<l
\e(B" . "
\e$,4)j*W
\e(B")
184 ("
\e$,1<m
\e(B" . "
\e$,4)b
\e(B")
185 ("
\e$,1<w
\e(B" . "nil") ;; not supported?
188 (defvar tml-char-glyph-hash
189 (let* ((hash (make-hash-table :test 'equal)))
190 (mapc (function (lambda (x) (puthash (car x) (cdr x) hash)))
194 (defvar tml-char-glyph-regexp
195 (tamil-regexp-of-hashtbl-keys tml-char-glyph-hash))
197 ;; Tamil languages needed to be reordered.
199 (defvar tml-consonants-regexp
200 "[
\e$,4*H*^*I*J*\*K*L*M*N*O*Y*P*Q*R*S*X*T*W*V*U*[*Z*]
\e(B]")
202 (defvar tml-glyph-reorder-key-glyphs "[
\e$,4)j)k)l
\e(B]")
204 (defvar tml-glyph-reordering-regexp-list
206 (concat "\\(" tml-consonants-regexp "\\)\\([
\e$,4)j)k)l
\e(B]\\)") "\\2\\1"))
208 ;; Tamil vowel modifiers to be ligatured.
209 (defvar tml-glyph-glyph
211 ("
\e$,4*H)d
\e(B" . "
\e$,4(a
\e(B") ; ki
212 ("
\e$,4*^)d
\e(B" . "
\e$,4(v
\e(B") ; ksi
213 ("
\e$,4*^)f
\e(B" . "
\e$,4)2
\e(B") ; ksi~
214 ("
\e$,4*I)d
\e(B" . "
\e$,4(b
\e(B") ; n^i
215 ("
\e$,4*J)d
\e(B" . "
\e$,4(c
\e(B") ; ci
216 ("
\e$,4*K)d
\e(B" . "
\e$,4(d
\e(B") ; n~i
217 ("
\e$,4*L)d
\e(B" . "
\e$,4)n
\e(B") ; t.i
218 ("
\e$,4*M)d
\e(B" . "
\e$,4(e
\e(B") ; n.i
219 ("
\e$,4*N)d
\e(B" . "
\e$,4(f
\e(B") ; ti
220 ("
\e$,4*O)d
\e(B" . "
\e$,4(g
\e(B") ; ni
221 ("
\e$,4*P)d
\e(B" . "
\e$,4(h
\e(B") ; pi
222 ("
\e$,4*Q)d
\e(B" . "
\e$,4(i
\e(B") ; mi
223 ("
\e$,4*R)d
\e(B" . "
\e$,4(j
\e(B") ; yi
224 ("
\e$,4*S)d
\e(B" . "
\e$,4(k
\e(B") ; ri
225 ("
\e$,4*T)d
\e(B" . "
\e$,4(l
\e(B") ; li
226 ("
\e$,4*U)d
\e(B" . "
\e$,4(m
\e(B") ; vi
227 ("
\e$,4*V)d
\e(B" . "
\e$,4(n
\e(B") ; l_i
228 ("
\e$,4*W)d
\e(B" . "
\e$,4(o
\e(B") ; l.i
229 ("
\e$,4*X)d
\e(B" . "
\e$,4(p
\e(B") ; r_i
230 ("
\e$,4*Y)d
\e(B" . "
\e$,4(q
\e(B") ; n_i
231 ("
\e$,4*Z)d
\e(B" . "
\e$,4(r
\e(B") ; si
232 ("
\e$,4*[)d
\e(B" . "
\e$,4(s
\e(B") ; s'i
233 ("
\e$,4*\)d
\e(B" . "
\e$,4(t
\e(B") ; ji
234 ("
\e$,4*])d
\e(B" . "
\e$,4(u
\e(B") ; hi
236 ("
\e$,4*H)f
\e(B" . "
\e$,4(w
\e(B") ; ki~
237 ("
\e$,4*I)f
\e(B" . "
\e$,4(x
\e(B") ; n^i~
238 ("
\e$,4*J)f
\e(B" . "
\e$,4(y
\e(B") ; ci~
239 ("
\e$,4*K)f
\e(B" . "
\e$,4(z
\e(B") ; n~i~
240 ("
\e$,4*L)f
\e(B" . "
\e$,4)o
\e(B") ; t.i~
241 ("
\e$,4*M)f
\e(B" . "
\e$,4)!
\e(B") ; n.i~
242 ("
\e$,4*N)f
\e(B" . "
\e$,4)"
\e(B") ; ti~
243 ("
\e$,4*O)f
\e(B" . "
\e$,4)#
\e(B") ; ni~
244 ("
\e$,4*P)f
\e(B" . "
\e$,4)$
\e(B") ; pi~
245 ("
\e$,4*Q)f
\e(B" . "
\e$,4)%
\e(B") ; mi~
246 ("
\e$,4*R)f
\e(B" . "
\e$,4)&
\e(B") ; yi~
247 ("
\e$,4*S)f
\e(B" . "
\e$,4)'
\e(B") ; ri~
248 ("
\e$,4*T)f
\e(B" . "
\e$,4)(
\e(B") ; li~
249 ("
\e$,4*U)f
\e(B" . "
\e$,4))
\e(B") ; vi~
250 ("
\e$,4*V)f
\e(B" . "
\e$,4)*
\e(B") ; l_i~
251 ("
\e$,4*W)f
\e(B" . "
\e$,4)+
\e(B") ; l.i~
252 ("
\e$,4*X)f
\e(B" . "
\e$,4),
\e(B") ; r_i~
253 ("
\e$,4*Y)f
\e(B" . "
\e$,4)-
\e(B") ; n_i~
254 ("
\e$,4*Z)f
\e(B" . "
\e$,4).
\e(B") ; si~
255 ("
\e$,4*[)f
\e(B" . "
\e$,4)/
\e(B") ; s'i~
256 ("
\e$,4*\)f
\e(B" . "
\e$,4)0
\e(B") ; ji~
257 ("
\e$,4*])f
\e(B" . "
\e$,4)1
\e(B") ; hi~
259 ("
\e$,4*H)g
\e(B" . "
\e$,4)p
\e(B") ; ku
260 ("
\e$,4*I)g
\e(B" . "
\e$,4)q
\e(B") ; n^u
261 ("
\e$,4*J)g
\e(B" . "
\e$,4)r
\e(B") ; cu
262 ("
\e$,4*K)g
\e(B" . "
\e$,4)s
\e(B") ; n~u
263 ("
\e$,4*L)g
\e(B" . "
\e$,4)t
\e(B") ; t.u
264 ("
\e$,4*M)g
\e(B" . "
\e$,4)u
\e(B") ; n.u
265 ("
\e$,4*N)g
\e(B" . "
\e$,4)v
\e(B") ; tu
266 ("
\e$,4*O)g
\e(B" . "
\e$,4)x
\e(B") ; nu
267 ("
\e$,4*P)g
\e(B" . "
\e$,4)y
\e(B") ; pu
268 ("
\e$,4*Q)g
\e(B" . "
\e$,4)z
\e(B") ; mu
269 ("
\e$,4*R)g
\e(B" . "
\e$,4){
\e(B") ; yu
270 ("
\e$,4*S)g
\e(B" . "
\e$,4)|
\e(B") ; ru
271 ("
\e$,4*T)g
\e(B" . "
\e$,4)}
\e(B") ; lu
272 ("
\e$,4*U)g
\e(B" . "
\e$,4)~
\e(B") ; vu
273 ("
\e$,4*V)g
\e(B" . "
\e$,4)
\7f\e(B") ; l_u
274 ("
\e$,4*W)g
\e(B" . "
\e$,4*
\e(B") ; l.u
275 ("
\e$,4*X)g
\e(B" . "
\e$,4*!
\e(B") ; r_u
276 ("
\e$,4*Y)g
\e(B" . "
\e$,4*"
\e(B") ; n_u
278 ("
\e$,4*H)h
\e(B" . "
\e$,4*#
\e(B") ; ku~
279 ("
\e$,4*I)h
\e(B" . "
\e$,4*$
\e(B") ; n^u~
280 ("
\e$,4*J)h
\e(B" . "
\e$,4*%
\e(B") ; cu~
281 ("
\e$,4*K)h
\e(B" . "
\e$,4*&
\e(B") ; n~u~
282 ("
\e$,4*L)h
\e(B" . "
\e$,4*'
\e(B") ; t.u~
283 ("
\e$,4*M)h
\e(B" . "
\e$,4*(
\e(B") ; n.u~
284 ("
\e$,4*N)h
\e(B" . "
\e$,4*)
\e(B") ; tu~
285 ("
\e$,4*O)h
\e(B" . "
\e$,4*+
\e(B") ; nu~
286 ("
\e$,4*P)h
\e(B" . "
\e$,4*,
\e(B") ; pu~
287 ("
\e$,4*Q)h
\e(B" . "
\e$,4*-
\e(B") ; mu~
288 ("
\e$,4*R)h
\e(B" . "
\e$,4*.
\e(B") ; yu~
289 ("
\e$,4*S)h
\e(B" . "
\e$,4*/
\e(B") ; ru~
290 ("
\e$,4*T)h
\e(B" . "
\e$,4*6
\e(B") ; lu~
291 ("
\e$,4*U)h
\e(B" . "
\e$,4*7
\e(B") ; vu~
292 ("
\e$,4*V)h
\e(B" . "
\e$,4*8
\e(B") ; l_u~
293 ("
\e$,4*W)h
\e(B" . "
\e$,4*9
\e(B") ; l.u~
294 ("
\e$,4*X)h
\e(B" . "
\e$,4*:
\e(B") ; r_u~
295 ("
\e$,4*Y)h
\e(B" . "
\e$,4*;
\e(B") ; n_u~
298 (defvar tml-glyph-glyph-hash
299 (let* ((hash (make-hash-table :test 'equal)))
300 (mapc (function (lambda (x) (puthash (car x) (cdr x) hash)))
304 (defvar tml-glyph-glyph-regexp
305 (tamil-regexp-of-hashtbl-keys tml-glyph-glyph-hash))
307 (defun tamil-compose-syllable-string (string)
309 (insert (decompose-string string))
310 (tamil-compose-syllable-region (point-min) (point-max))
313 (defun tamil-compose-syllable-region (from to)
314 "Compose tamil syllable in region FROM to TO."
315 (let (glyph-str match-str glyph-reorder-regexps)
318 (narrow-to-region from to)
319 (goto-char (point-min))
320 ;; char-glyph-conversion
322 (if (looking-at tml-char-glyph-regexp)
324 (setq match-str (match-string 0)
327 (gethash match-str tml-char-glyph-hash)))
328 (goto-char (match-end 0)))
329 (setq glyph-str (concat glyph-str (string (following-char))))
332 (aset glyph-str 0 (following-char)))
334 (when (string-match tml-glyph-reorder-key-glyphs glyph-str)
335 (if (string-match (car tml-glyph-reordering-regexp-list)
338 (replace-match (cdr tml-glyph-reordering-regexp-list)
339 nil nil glyph-str))))
340 ;; glyph-glyph-conversion
341 (when (string-match tml-glyph-glyph-regexp glyph-str)
342 (setq match-str (match-string 0 glyph-str))
344 (replace-match (gethash match-str tml-glyph-glyph-hash)
346 ;; concatenate and attach reference-points.
353 (lambda (x) (list '(5 . 3) x))) ;; default ref. point.
355 (compose-region from to glyph-str)))))
358 (defun tamil-composition-function (pos &optional string)
359 "Compose Tamil characters after the position POS.
360 If STRING is not nil, it is a string, and POS is an index to the string.
361 In this case, compose characters after POS of the string."
363 ;; Not yet implemented.
366 (if (looking-at tamil-composable-pattern)
368 (tamil-compose-syllable-region pos (match-end 0))))))
372 ;;; arch-tag: 4d1c9737-e7b1-44cf-a040-4f64c50e773e
373 ;;; tml-util.el ends here