1 ;;; tml-util.el --- support for composing tamil characters -*-coding: iso-2022-7bit;-*-
3 ;; Copyright (C) 2001 Free Software Foundation, Inc.
5 ;; Maintainer: KAWABATA, Taichi <kawabata@m17n.org>
6 ;; Keywords: multilingual, Indian, Tamil
8 ;; This file is part of GNU Emacs.
10 ;; GNU Emacs is free software; you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation; either version 2, or (at your option)
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs; see the file COPYING. If not, write to the
22 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
25 ;; Created: Nov. 08. 2002
29 ;; This file provides character(Unicode) to glyph(CDAC) conversion and
30 ;; composition of Tamil script characters.
34 ;; Tamil Composable Pattern
44 ;; 2. syllable : only ligature-formed pattern forms composition.
50 ;; ((CH)?(CH)?(CH)?CH)?C(H|M?(A|D)?)?
52 (defconst tamil-consonant
53 "[
\e$,1<5
\e(B-
\e$,1<Y
\e(B]")
55 (defconst tamil-composable-pattern
57 "\\([
\e$,1<%
\e(B-
\e$,1<4
\e(B]\\)\\|"
58 "[
\e$,1<"<#
\e(B]\\|" ;; vowel modifier considered independent
59 "\\(\\(?:\\(?:
\e$,1<5<m<W
\e(B\\)\\|[
\e$,1<5
\e(B-
\e$,1<Y
\e(B]\\)[
\e$,1<m<^
\e(B-
\e$,1<l
\e(B]?\\)\\|"
60 "\\(
\e$,1<W<m<P<`
\e(B\\)")
61 "Regexp matching a composable sequence of Tamil characters.")
64 (defun tamil-compose-region (from to)
68 (narrow-to-region from to)
69 (goto-char (point-min))
70 (while (re-search-forward tamil-composable-pattern nil t)
71 (tamil-compose-syllable-region (match-beginning 0)
73 (defun tamil-compose-string (string)
75 (insert (decompose-string string))
76 (tamil-compose-region (point-min) (point-max))
79 (defun tamil-post-read-conversion (len)
82 (let ((buffer-modified-p (buffer-modified-p)))
83 (narrow-to-region (point) (+ (point) len))
84 (tamil-compose-region (point-min) (point-max))
85 (set-buffer-modified-p buffer-modified-p)
86 (- (point-max) (point-min))))))
88 (defun tamil-range (from to)
89 "Make the list of the integers of range FROM to TO."
91 (while (<= from to) (setq result (cons to result) to (1- to))) result))
93 (defun tamil-regexp-of-hashtbl-keys (hashtbl)
94 "Return a regular expression that matches all keys in hashtable HASHTBL."
95 (let ((max-specpdl-size 1000))
99 (maphash (function (lambda (key val) (setq dummy (cons key dummy)))) hashtbl)
101 (function (lambda (x y) (> (length x) (length y))))))))
105 (defun tamil-composition-function (from to pattern &optional string)
106 "Compose Tamil characters in REGION, or STRING if specified.
107 Assume that the REGION or STRING must fully match the composable
109 (if string (tamil-compose-syllable-string string)
110 (tamil-compose-syllable-region from to))
113 ;; Register a function to compose Tamil characters.
115 (function (lambda (ucs)
116 (aset composition-function-table (decode-char 'ucs ucs)
117 (list (cons tamil-composable-pattern
118 'tamil-composition-function)))))
119 (nconc '(#x0b82 #x0b83) (tamil-range #x0b85 #x0bb9)))
121 ;; Notes on conversion steps.
123 ;; 1. chars to glyphs
124 ;; Simple replacement of characters to glyphs is done.
126 ;; 2. glyphs reordering.
127 ;; following "
\e$,4)j
\e(B", "
\e$,4)k
\e(B", "
\e$,4)l
\e(B" goes to the front.
129 ;; 3. glyphs to glyphs
130 ;; reordered vowels are ligatured to consonants.
133 ;; left modifiers will be attached at the left.
134 ;; others will be attached right.
136 (defvar tml-char-glyph
138 ;;("
\e$,1<"
\e(B" . "")
139 ("
\e$,1<#
\e(B" . "
\e$,4*G
\e(B")
140 ;; Independent Vowels
141 ("
\e$,1<%
\e(B" . "
\e$,4*<
\e(B")
142 ("
\e$,1<&
\e(B" . "
\e$,4*=
\e(B")
143 ("
\e$,1<'
\e(B" . "
\e$,4*>
\e(B")
144 ("
\e$,1<(
\e(B" . "
\e$,4*?
\e(B")
145 ("
\e$,1<)
\e(B" . "
\e$,4*@
\e(B")
146 ("
\e$,1<*
\e(B" . "
\e$,4*A
\e(B")
147 ("
\e$,1<.
\e(B" . "
\e$,4*B
\e(B")
148 ("
\e$,1</
\e(B" . "
\e$,4*C
\e(B")
149 ("
\e$,1<0
\e(B" . "
\e$,4*D
\e(B")
150 ("
\e$,1<2
\e(B" . "
\e$,4*E
\e(B")
151 ("
\e$,1<3
\e(B" . "
\e$,4*F
\e(B")
152 ("
\e$,1<4
\e(B" . "
\e$,4*E*W
\e(B")
154 ("
\e$,1<5<m<W<m
\e(B" . "
\e$,4):
\e(B") ; ks.
155 ("
\e$,1<5<m<W
\e(B" . "
\e$,4*^
\e(B") ; ks
156 ("
\e$,1<5
\e(B" . "
\e$,4*H
\e(B")
158 ("
\e$,1<9
\e(B" . "
\e$,4*I
\e(B")
159 ("
\e$,1<:
\e(B" . "
\e$,4*J
\e(B")
160 ("
\e$,1<<
\e(B" . "
\e$,4*\
\e(B")
161 ("
\e$,1<<<m
\e(B" . "
\e$,4)8
\e(B")
162 ("
\e$,1<>
\e(B" . "
\e$,4*K
\e(B")
163 ("
\e$,1<?
\e(B" . "
\e$,4*L
\e(B")
164 ("
\e$,1<C
\e(B" . "
\e$,4*M
\e(B")
165 ("
\e$,1<D
\e(B" . "
\e$,4*N
\e(B")
166 ("
\e$,1<H
\e(B" . "
\e$,4*O
\e(B")
167 ("
\e$,1<I
\e(B" . "
\e$,4*Y
\e(B")
168 ("
\e$,1<I<m
\e(B" . "
\e$,4)a
\e(B")
169 ("
\e$,1<J
\e(B" . "
\e$,4*P
\e(B")
170 ("
\e$,1<N
\e(B" . "
\e$,4*Q
\e(B")
171 ("
\e$,1<O
\e(B" . "
\e$,4*R
\e(B")
172 ("
\e$,1<P
\e(B" . "
\e$,4*S
\e(B")
173 ("
\e$,1<Q
\e(B" . "
\e$,4*X
\e(B")
174 ("
\e$,1<R
\e(B" . "
\e$,4*T
\e(B")
175 ("
\e$,1<S
\e(B" . "
\e$,4*W
\e(B")
176 ("
\e$,1<T
\e(B" . "
\e$,4*V
\e(B")
177 ("
\e$,1<U
\e(B" . "
\e$,4*U
\e(B")
178 ("
\e$,1<W
\e(B" . "
\e$,4*[
\e(B")
179 ("
\e$,1<W<m
\e(B" . "
\e$,4)7
\e(B")
180 ("
\e$,1<W<m<P<`
\e(B" . "
\e$,4*_
\e(B")
181 ("
\e$,1<X
\e(B" . "
\e$,4*Z
\e(B")
182 ("
\e$,1<X<m
\e(B" . "
\e$,4)6
\e(B")
183 ("
\e$,1<Y
\e(B" . "
\e$,4*]
\e(B")
184 ("
\e$,1<Y<m
\e(B" . "
\e$,4)9
\e(B")
186 ;; Dependent vowel signs
187 ("
\e$,1<^
\e(B" . "
\e$,4)c
\e(B")
188 ("
\e$,1<_
\e(B" . "
\e$,4)d
\e(B")
189 ("
\e$,1<`
\e(B" . "
\e$,4)f
\e(B")
190 ("
\e$,1<a
\e(B" . "
\e$,4)g
\e(B")
191 ("
\e$,1<b
\e(B" . "
\e$,4)h
\e(B")
192 ("
\e$,1<f
\e(B" . "
\e$,4)j
\e(B")
193 ("
\e$,1<g
\e(B" . "
\e$,4)k
\e(B")
194 ("
\e$,1<h
\e(B" . "
\e$,4)l
\e(B")
195 ("
\e$,1<j
\e(B" . "
\e$,4)j)c
\e(B")
196 ("
\e$,1<k
\e(B" . "
\e$,4)k)c
\e(B")
197 ("
\e$,1<l
\e(B" . "
\e$,4)j*W
\e(B")
200 ("
\e$,1<m
\e(B" . "
\e$,4)b
\e(B")
201 ("
\e$,1<w
\e(B" . "nil") ;; not supported?
204 (defvar tml-char-glyph-hash
205 (let* ((hash (make-hash-table :test 'equal)))
206 (mapc (function (lambda (x) (puthash (car x) (cdr x) hash)))
210 (defvar tml-char-glyph-regexp
211 (tamil-regexp-of-hashtbl-keys tml-char-glyph-hash))
213 ;; Tamil languages needed to be reordered.
215 (defvar tml-consonants-regexp
216 "[
\e$,4*H*^*I*J*\*K*L*M*N*O*Y*P*Q*R*S*X*T*W*V*U*[*Z*]
\e(B]")
218 (defvar tml-glyph-reorder-key-glyphs "[
\e$,4)j)k)l
\e(B]")
220 (defvar tml-glyph-reordering-regexp-list
222 (concat "\\(" tml-consonants-regexp "\\)\\([
\e$,4)j)k)l
\e(B]\\)") "\\2\\1"))
224 ;; Tamil vowel modifiers to be ligatured.
225 (defvar tml-glyph-glyph
227 ("
\e$,4*H)d
\e(B" . "
\e$,4(a
\e(B") ; ki
228 ("
\e$,4*^)d
\e(B" . "
\e$,4(v
\e(B") ; ksi
229 ("
\e$,4*^)f
\e(B" . "
\e$,4)2
\e(B") ; ksi~
230 ("
\e$,4*I)d
\e(B" . "
\e$,4(b
\e(B") ; n^i
231 ("
\e$,4*J)d
\e(B" . "
\e$,4(c
\e(B") ; ci
232 ("
\e$,4*K)d
\e(B" . "
\e$,4(d
\e(B") ; n~i
233 ("
\e$,4*L)d
\e(B" . "
\e$,4)n
\e(B") ; t.i
234 ("
\e$,4*M)d
\e(B" . "
\e$,4(e
\e(B") ; n.i
235 ("
\e$,4*N)d
\e(B" . "
\e$,4(f
\e(B") ; ti
236 ("
\e$,4*O)d
\e(B" . "
\e$,4(g
\e(B") ; ni
237 ("
\e$,4*P)d
\e(B" . "
\e$,4(h
\e(B") ; pi
238 ("
\e$,4*Q)d
\e(B" . "
\e$,4(i
\e(B") ; mi
239 ("
\e$,4*R)d
\e(B" . "
\e$,4(j
\e(B") ; yi
240 ("
\e$,4*S)d
\e(B" . "
\e$,4(k
\e(B") ; ri
241 ("
\e$,4*T)d
\e(B" . "
\e$,4(l
\e(B") ; li
242 ("
\e$,4*U)d
\e(B" . "
\e$,4(m
\e(B") ; vi
243 ("
\e$,4*V)d
\e(B" . "
\e$,4(n
\e(B") ; l_i
244 ("
\e$,4*W)d
\e(B" . "
\e$,4(o
\e(B") ; l.i
245 ("
\e$,4*X)d
\e(B" . "
\e$,4(p
\e(B") ; r_i
246 ("
\e$,4*Y)d
\e(B" . "
\e$,4(q
\e(B") ; n_i
247 ("
\e$,4*Z)d
\e(B" . "
\e$,4(r
\e(B") ; si
248 ("
\e$,4*[)d
\e(B" . "
\e$,4(s
\e(B") ; s'i
249 ("
\e$,4*\)d
\e(B" . "
\e$,4(t
\e(B") ; ji
250 ("
\e$,4*])d
\e(B" . "
\e$,4(u
\e(B") ; hi
252 ("
\e$,4*H)f
\e(B" . "
\e$,4(w
\e(B") ; ki~
253 ("
\e$,4*I)f
\e(B" . "
\e$,4(x
\e(B") ; n^i~
254 ("
\e$,4*J)f
\e(B" . "
\e$,4(y
\e(B") ; ci~
255 ("
\e$,4*K)f
\e(B" . "
\e$,4(z
\e(B") ; n~i~
256 ("
\e$,4*L)f
\e(B" . "
\e$,4)o
\e(B") ; t.i~
257 ("
\e$,4*M)f
\e(B" . "
\e$,4)!
\e(B") ; n.i~
258 ("
\e$,4*N)f
\e(B" . "
\e$,4)"
\e(B") ; ti~
259 ("
\e$,4*O)f
\e(B" . "
\e$,4)#
\e(B") ; ni~
260 ("
\e$,4*P)f
\e(B" . "
\e$,4)$
\e(B") ; pi~
261 ("
\e$,4*Q)f
\e(B" . "
\e$,4)%
\e(B") ; mi~
262 ("
\e$,4*R)f
\e(B" . "
\e$,4)&
\e(B") ; yi~
263 ("
\e$,4*S)f
\e(B" . "
\e$,4)'
\e(B") ; ri~
264 ("
\e$,4*T)f
\e(B" . "
\e$,4)(
\e(B") ; li~
265 ("
\e$,4*U)f
\e(B" . "
\e$,4))
\e(B") ; vi~
266 ("
\e$,4*V)f
\e(B" . "
\e$,4)*
\e(B") ; l_i~
267 ("
\e$,4*W)f
\e(B" . "
\e$,4)+
\e(B") ; l.i~
268 ("
\e$,4*X)f
\e(B" . "
\e$,4),
\e(B") ; r_i~
269 ("
\e$,4*Y)f
\e(B" . "
\e$,4)-
\e(B") ; n_i~
270 ("
\e$,4*Z)f
\e(B" . "
\e$,4).
\e(B") ; si~
271 ("
\e$,4*[)f
\e(B" . "
\e$,4)/
\e(B") ; s'i~
272 ("
\e$,4*\)f
\e(B" . "
\e$,4)0
\e(B") ; ji~
273 ("
\e$,4*])f
\e(B" . "
\e$,4)1
\e(B") ; hi~
275 ("
\e$,4*H)g
\e(B" . "
\e$,4)p
\e(B") ; ku
276 ("
\e$,4*I)g
\e(B" . "
\e$,4)q
\e(B") ; n^u
277 ("
\e$,4*J)g
\e(B" . "
\e$,4)r
\e(B") ; cu
278 ("
\e$,4*K)g
\e(B" . "
\e$,4)s
\e(B") ; n~u
279 ("
\e$,4*L)g
\e(B" . "
\e$,4)t
\e(B") ; t.u
280 ("
\e$,4*M)g
\e(B" . "
\e$,4)u
\e(B") ; n.u
281 ("
\e$,4*N)g
\e(B" . "
\e$,4)v
\e(B") ; tu
282 ("
\e$,4*O)g
\e(B" . "
\e$,4)x
\e(B") ; nu
283 ("
\e$,4*P)g
\e(B" . "
\e$,4)y
\e(B") ; pu
284 ("
\e$,4*Q)g
\e(B" . "
\e$,4)z
\e(B") ; mu
285 ("
\e$,4*R)g
\e(B" . "
\e$,4){
\e(B") ; yu
286 ("
\e$,4*S)g
\e(B" . "
\e$,4)|
\e(B") ; ru
287 ("
\e$,4*T)g
\e(B" . "
\e$,4)}
\e(B") ; lu
288 ("
\e$,4*U)g
\e(B" . "
\e$,4)~
\e(B") ; vu
289 ("
\e$,4*V)g
\e(B" . "
\e$,4)
\7f\e(B") ; l_u
290 ("
\e$,4*W)g
\e(B" . "
\e$,4*
\e(B") ; l.u
291 ("
\e$,4*X)g
\e(B" . "
\e$,4*!
\e(B") ; r_u
292 ("
\e$,4*Y)g
\e(B" . "
\e$,4*"
\e(B") ; n_u
294 ("
\e$,4*H)h
\e(B" . "
\e$,4*#
\e(B") ; ku~
295 ("
\e$,4*I)h
\e(B" . "
\e$,4*$
\e(B") ; n^u~
296 ("
\e$,4*J)h
\e(B" . "
\e$,4*%
\e(B") ; cu~
297 ("
\e$,4*K)h
\e(B" . "
\e$,4*&
\e(B") ; n~u~
298 ("
\e$,4*L)h
\e(B" . "
\e$,4*'
\e(B") ; t.u~
299 ("
\e$,4*M)h
\e(B" . "
\e$,4*(
\e(B") ; n.u~
300 ("
\e$,4*N)h
\e(B" . "
\e$,4*)
\e(B") ; tu~
301 ("
\e$,4*O)h
\e(B" . "
\e$,4*+
\e(B") ; nu~
302 ("
\e$,4*P)h
\e(B" . "
\e$,4*,
\e(B") ; pu~
303 ("
\e$,4*Q)h
\e(B" . "
\e$,4*-
\e(B") ; mu~
304 ("
\e$,4*R)h
\e(B" . "
\e$,4*.
\e(B") ; yu~
305 ("
\e$,4*S)h
\e(B" . "
\e$,4*/
\e(B") ; ru~
306 ("
\e$,4*T)h
\e(B" . "
\e$,4*6
\e(B") ; lu~
307 ("
\e$,4*U)h
\e(B" . "
\e$,4*7
\e(B") ; vu~
308 ("
\e$,4*V)h
\e(B" . "
\e$,4*8
\e(B") ; l_u~
309 ("
\e$,4*W)h
\e(B" . "
\e$,4*9
\e(B") ; l.u~
310 ("
\e$,4*X)h
\e(B" . "
\e$,4*:
\e(B") ; r_u~
311 ("
\e$,4*Y)h
\e(B" . "
\e$,4*;
\e(B") ; n_u~
314 (defvar tml-glyph-glyph-hash
315 (let* ((hash (make-hash-table :test 'equal)))
316 (mapc (function (lambda (x) (puthash (car x) (cdr x) hash)))
320 (defvar tml-glyph-glyph-regexp
321 (tamil-regexp-of-hashtbl-keys tml-glyph-glyph-hash))
323 (defun tamil-compose-syllable-string (string)
325 (insert (decompose-string string))
326 (tamil-compose-syllable-region (point-min) (point-max))
329 (defun tamil-compose-syllable-region (from to)
330 "Compose tamil syllable in region FROM to TO."
331 (let (glyph-str match-str glyph-reorder-regexps)
334 (narrow-to-region from to)
335 (goto-char (point-min))
336 ;; char-glyph-conversion
337 (while (re-search-forward tml-char-glyph-regexp nil t)
338 (setq match-str (match-string 0))
340 (concat glyph-str (gethash match-str tml-char-glyph-hash))))
342 (when (string-match tml-glyph-reorder-key-glyphs glyph-str)
343 (if (string-match (car tml-glyph-reordering-regexp-list)
346 (replace-match (cdr tml-glyph-reordering-regexp-list)
347 nil nil glyph-str))))
348 ;; glyph-glyph-conversion
349 (when (string-match tml-glyph-glyph-regexp glyph-str)
350 (setq match-str (match-string 0 glyph-str))
352 (replace-match (gethash match-str tml-glyph-glyph-hash)
354 ;; concatenate and attach reference-points.
361 (lambda (x) (list '(5 . 3) x))) ;; default ref. point.
363 (compose-region from to glyph-str)))))
367 ;;; tml-util.el ends here