]> code.delx.au - pulseaudio/blob - src/pulse/utf8.c
Sending translation for Serbian (Latin)
[pulseaudio] / src / pulse / utf8.c
1 /***
2 This file is part of PulseAudio.
3
4 Copyright 2006 Lennart Poettering
5 Copyright 2006 Pierre Ossman <ossman@cendio.se> for Cendio AB
6
7 PulseAudio is free software; you can redistribute it and/or modify
8 it under the terms of the GNU Lesser General Public License as
9 published by the Free Software Foundation; either version 2.1 of the
10 License, or (at your option) any later version.
11
12 PulseAudio is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public
18 License along with PulseAudio; if not, write to the Free Software
19 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
20 USA.
21 ***/
22
23 /* This file is based on the GLIB utf8 validation functions. The
24 * original license text follows. */
25
26 /* gutf8.c - Operations on UTF-8 strings.
27 *
28 * Copyright (C) 1999 Tom Tromey
29 * Copyright (C) 2000 Red Hat, Inc.
30 *
31 * This library is free software; you can redistribute it and/or
32 * modify it under the terms of the GNU Lesser General Public
33 * License as published by the Free Software Foundation; either
34 * version 2 of the License, or (at your option) any later version.
35 *
36 * This library is distributed in the hope that it will be useful,
37 * but WITHOUT ANY WARRANTY; without even the implied warranty of
38 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
39 * Lesser General Public License for more details.
40 *
41 * You should have received a copy of the GNU Lesser General Public
42 * License along with this library; if not, write to the
43 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
44 * Boston, MA 02111-1307, USA.
45 */
46
47 #ifdef HAVE_CONFIG_H
48 #include <config.h>
49 #endif
50
51 #include <errno.h>
52 #include <stdlib.h>
53 #include <inttypes.h>
54 #include <string.h>
55
56 #ifdef HAVE_ICONV
57 #include <iconv.h>
58 #endif
59
60 #include <pulse/xmalloc.h>
61 #include <pulsecore/macro.h>
62
63 #include "utf8.h"
64
65 #define FILTER_CHAR '_'
66
67 static inline pa_bool_t is_unicode_valid(uint32_t ch) {
68
69 if (ch >= 0x110000) /* End of unicode space */
70 return FALSE;
71 if ((ch & 0xFFFFF800) == 0xD800) /* Reserved area for UTF-16 */
72 return FALSE;
73 if ((ch >= 0xFDD0) && (ch <= 0xFDEF)) /* Reserved */
74 return FALSE;
75 if ((ch & 0xFFFE) == 0xFFFE) /* BOM (Byte Order Mark) */
76 return FALSE;
77
78 return TRUE;
79 }
80
81 static inline pa_bool_t is_continuation_char(uint8_t ch) {
82 if ((ch & 0xc0) != 0x80) /* 10xxxxxx */
83 return FALSE;
84 return TRUE;
85 }
86
87 static inline void merge_continuation_char(uint32_t *u_ch, uint8_t ch) {
88 *u_ch <<= 6;
89 *u_ch |= ch & 0x3f;
90 }
91
92 static char* utf8_validate(const char *str, char *output) {
93 uint32_t val = 0;
94 uint32_t min = 0;
95 const uint8_t *p, *last;
96 int size;
97 uint8_t *o;
98
99 pa_assert(str);
100
101 o = (uint8_t*) output;
102 for (p = (const uint8_t*) str; *p; p++) {
103 if (*p < 128) {
104 if (o)
105 *o = *p;
106 } else {
107 last = p;
108
109 if ((*p & 0xe0) == 0xc0) { /* 110xxxxx two-char seq. */
110 size = 2;
111 min = 128;
112 val = (uint32_t) (*p & 0x1e);
113 goto ONE_REMAINING;
114 } else if ((*p & 0xf0) == 0xe0) { /* 1110xxxx three-char seq.*/
115 size = 3;
116 min = (1 << 11);
117 val = (uint32_t) (*p & 0x0f);
118 goto TWO_REMAINING;
119 } else if ((*p & 0xf8) == 0xf0) { /* 11110xxx four-char seq */
120 size = 4;
121 min = (1 << 16);
122 val = (uint32_t) (*p & 0x07);
123 } else {
124 size = 1;
125 goto error;
126 }
127
128 p++;
129 if (!is_continuation_char(*p))
130 goto error;
131 merge_continuation_char(&val, *p);
132
133 TWO_REMAINING:
134 p++;
135 if (!is_continuation_char(*p))
136 goto error;
137 merge_continuation_char(&val, *p);
138
139 ONE_REMAINING:
140 p++;
141 if (!is_continuation_char(*p))
142 goto error;
143 merge_continuation_char(&val, *p);
144
145 if (val < min)
146 goto error;
147
148 if (!is_unicode_valid(val))
149 goto error;
150
151 if (o) {
152 memcpy(o, last, (size_t) size);
153 o += size - 1;
154 }
155
156 if (o)
157 o++;
158
159 continue;
160
161 error:
162 if (o) {
163 *o = FILTER_CHAR;
164 p = last; /* We retry at the next character */
165 } else
166 goto failure;
167 }
168
169 if (o)
170 o++;
171 }
172
173 if (o) {
174 *o = '\0';
175 return output;
176 }
177
178 return (char*) str;
179
180 failure:
181 return NULL;
182 }
183
184 char* pa_utf8_valid (const char *str) {
185 return utf8_validate(str, NULL);
186 }
187
188 char* pa_utf8_filter (const char *str) {
189 char *new_str;
190
191 pa_assert(str);
192 new_str = pa_xmalloc(strlen(str) + 1);
193 return utf8_validate(str, new_str);
194 }
195
196 #ifdef HAVE_ICONV
197
198 static char* iconv_simple(const char *str, const char *to, const char *from) {
199 char *new_str;
200 size_t len, inlen;
201 iconv_t cd;
202 ICONV_CONST char *inbuf;
203 char *outbuf;
204 size_t res, inbytes, outbytes;
205
206 pa_assert(str);
207 pa_assert(to);
208 pa_assert(from);
209
210 cd = iconv_open(to, from);
211 if (cd == (iconv_t)-1)
212 return NULL;
213
214 inlen = len = strlen(str) + 1;
215 new_str = pa_xmalloc(len);
216
217 for (;;) {
218 inbuf = (ICONV_CONST char*) str; /* Brain dead prototype for iconv() */
219 inbytes = inlen;
220 outbuf = new_str;
221 outbytes = len;
222
223 res = iconv(cd, &inbuf, &inbytes, &outbuf, &outbytes);
224
225 if (res != (size_t)-1)
226 break;
227
228 if (errno != E2BIG) {
229 pa_xfree(new_str);
230 new_str = NULL;
231 break;
232 }
233
234 pa_assert(inbytes != 0);
235
236 len += inbytes;
237 new_str = pa_xrealloc(new_str, len);
238 }
239
240 iconv_close(cd);
241
242 return new_str;
243 }
244
245 char* pa_utf8_to_locale (const char *str) {
246 return iconv_simple(str, "", "UTF-8");
247 }
248
249 char* pa_locale_to_utf8 (const char *str) {
250 return iconv_simple(str, "UTF-8", "");
251 }
252
253 #else
254
255 char* pa_utf8_to_locale (const char *str) {
256 pa_assert(str);
257 return NULL;
258 }
259
260 char* pa_locale_to_utf8 (const char *str) {
261 pa_assert(str);
262 return NULL;
263 }
264
265 #endif
266
267 char *pa_ascii_valid(const char *str) {
268 const char *p;
269 pa_assert(str);
270
271 for (p = str; *p; p++)
272 if ((unsigned char) *p >= 128)
273 return NULL;
274
275 return (char*) str;
276 }
277
278 char *pa_ascii_filter(const char *str) {
279 char *r, *s, *d;
280 pa_assert(str);
281
282 r = pa_xstrdup(str);
283
284 for (s = r, d = r; *s; s++)
285 if ((unsigned char) *s < 128)
286 *(d++) = *s;
287
288 *d = 0;
289
290 return r;
291 }