]> code.delx.au - gnu-emacs/blob - src/casefiddle.c
Convert consecutive FSF copyright years to ranges.
[gnu-emacs] / src / casefiddle.c
1 /* GNU Emacs case conversion functions.
2 Copyright (C) 1985, 1994, 1997-1999, 2001-2011 Free Software Foundation, Inc.
3
4 This file is part of GNU Emacs.
5
6 GNU Emacs is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10
11 GNU Emacs is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
18
19
20 #include <config.h>
21 #include <setjmp.h>
22 #include "lisp.h"
23 #include "buffer.h"
24 #include "character.h"
25 #include "commands.h"
26 #include "syntax.h"
27 #include "composite.h"
28 #include "keymap.h"
29
30 enum case_action {CASE_UP, CASE_DOWN, CASE_CAPITALIZE, CASE_CAPITALIZE_UP};
31
32 Lisp_Object Qidentity;
33 \f
34 Lisp_Object
35 casify_object (enum case_action flag, Lisp_Object obj)
36 {
37 register int c, c1;
38 register int inword = flag == CASE_DOWN;
39
40 /* If the case table is flagged as modified, rescan it. */
41 if (NILP (XCHAR_TABLE (current_buffer->downcase_table)->extras[1]))
42 Fset_case_table (current_buffer->downcase_table);
43
44 if (INTEGERP (obj))
45 {
46 int flagbits = (CHAR_ALT | CHAR_SUPER | CHAR_HYPER
47 | CHAR_SHIFT | CHAR_CTL | CHAR_META);
48 int flags = XINT (obj) & flagbits;
49 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
50
51 /* If the character has higher bits set
52 above the flags, return it unchanged.
53 It is not a real character. */
54 if ((unsigned) XFASTINT (obj) > (unsigned) flagbits)
55 return obj;
56
57 c1 = XFASTINT (obj) & ~flagbits;
58 /* FIXME: Even if enable-multibyte-characters is nil, we may
59 manipulate multibyte chars. This means we have a bug for latin-1
60 chars since when we receive an int 128-255 we can't tell whether
61 it's an eight-bit byte or a latin-1 char. */
62 if (c1 >= 256)
63 multibyte = 1;
64 if (! multibyte)
65 MAKE_CHAR_MULTIBYTE (c1);
66 c = DOWNCASE (c1);
67 if (inword)
68 XSETFASTINT (obj, c | flags);
69 else if (c == (XFASTINT (obj) & ~flagbits))
70 {
71 if (! inword)
72 c = UPCASE1 (c1);
73 if (! multibyte)
74 MAKE_CHAR_UNIBYTE (c);
75 XSETFASTINT (obj, c | flags);
76 }
77 return obj;
78 }
79
80 if (!STRINGP (obj))
81 wrong_type_argument (Qchar_or_string_p, obj);
82 else if (!STRING_MULTIBYTE (obj))
83 {
84 EMACS_INT i;
85 EMACS_INT size = SCHARS (obj);
86
87 obj = Fcopy_sequence (obj);
88 for (i = 0; i < size; i++)
89 {
90 c = SREF (obj, i);
91 MAKE_CHAR_MULTIBYTE (c);
92 c1 = c;
93 if (inword && flag != CASE_CAPITALIZE_UP)
94 c = DOWNCASE (c);
95 else if (!UPPERCASEP (c)
96 && (!inword || flag != CASE_CAPITALIZE_UP))
97 c = UPCASE1 (c1);
98 if ((int) flag >= (int) CASE_CAPITALIZE)
99 inword = (SYNTAX (c) == Sword);
100 if (c != c1)
101 {
102 MAKE_CHAR_UNIBYTE (c);
103 /* If the char can't be converted to a valid byte, just don't
104 change it. */
105 if (c >= 0 && c < 256)
106 SSET (obj, i, c);
107 }
108 }
109 return obj;
110 }
111 else
112 {
113 EMACS_INT i, i_byte, size = SCHARS (obj);
114 int len;
115 USE_SAFE_ALLOCA;
116 unsigned char *dst, *o;
117 /* Over-allocate by 12%: this is a minor overhead, but should be
118 sufficient in 99.999% of the cases to avoid a reallocation. */
119 EMACS_INT o_size = SBYTES (obj) + SBYTES (obj) / 8 + MAX_MULTIBYTE_LENGTH;
120 SAFE_ALLOCA (dst, void *, o_size);
121 o = dst;
122
123 for (i = i_byte = 0; i < size; i++, i_byte += len)
124 {
125 if ((o - dst) + MAX_MULTIBYTE_LENGTH > o_size)
126 { /* Not enough space for the next char: grow the destination. */
127 unsigned char *old_dst = dst;
128 o_size += o_size; /* Probably overkill, but extremely rare. */
129 SAFE_ALLOCA (dst, void *, o_size);
130 memcpy (dst, old_dst, o - old_dst);
131 o = dst + (o - old_dst);
132 }
133 c = STRING_CHAR_AND_LENGTH (SDATA (obj) + i_byte, len);
134 if (inword && flag != CASE_CAPITALIZE_UP)
135 c = DOWNCASE (c);
136 else if (!UPPERCASEP (c)
137 && (!inword || flag != CASE_CAPITALIZE_UP))
138 c = UPCASE1 (c);
139 if ((int) flag >= (int) CASE_CAPITALIZE)
140 inword = (SYNTAX (c) == Sword);
141 o += CHAR_STRING (c, o);
142 }
143 eassert (o - dst <= o_size);
144 obj = make_multibyte_string (dst, size, o - dst);
145 SAFE_FREE ();
146 return obj;
147 }
148 }
149
150 DEFUN ("upcase", Fupcase, Supcase, 1, 1, 0,
151 doc: /* Convert argument to upper case and return that.
152 The argument may be a character or string. The result has the same type.
153 The argument object is not altered--the value is a copy.
154 See also `capitalize', `downcase' and `upcase-initials'. */)
155 (Lisp_Object obj)
156 {
157 return casify_object (CASE_UP, obj);
158 }
159
160 DEFUN ("downcase", Fdowncase, Sdowncase, 1, 1, 0,
161 doc: /* Convert argument to lower case and return that.
162 The argument may be a character or string. The result has the same type.
163 The argument object is not altered--the value is a copy. */)
164 (Lisp_Object obj)
165 {
166 return casify_object (CASE_DOWN, obj);
167 }
168
169 DEFUN ("capitalize", Fcapitalize, Scapitalize, 1, 1, 0,
170 doc: /* Convert argument to capitalized form and return that.
171 This means that each word's first character is upper case
172 and the rest is lower case.
173 The argument may be a character or string. The result has the same type.
174 The argument object is not altered--the value is a copy. */)
175 (Lisp_Object obj)
176 {
177 return casify_object (CASE_CAPITALIZE, obj);
178 }
179
180 /* Like Fcapitalize but change only the initials. */
181
182 DEFUN ("upcase-initials", Fupcase_initials, Supcase_initials, 1, 1, 0,
183 doc: /* Convert the initial of each word in the argument to upper case.
184 Do not change the other letters of each word.
185 The argument may be a character or string. The result has the same type.
186 The argument object is not altered--the value is a copy. */)
187 (Lisp_Object obj)
188 {
189 return casify_object (CASE_CAPITALIZE_UP, obj);
190 }
191 \f
192 /* flag is CASE_UP, CASE_DOWN or CASE_CAPITALIZE or CASE_CAPITALIZE_UP.
193 b and e specify range of buffer to operate on. */
194
195 void
196 casify_region (enum case_action flag, Lisp_Object b, Lisp_Object e)
197 {
198 register int c;
199 register int inword = flag == CASE_DOWN;
200 register int multibyte = !NILP (current_buffer->enable_multibyte_characters);
201 EMACS_INT start, end;
202 EMACS_INT start_byte, end_byte;
203 EMACS_INT first = -1, last; /* Position of first and last changes. */
204 EMACS_INT opoint = PT;
205 EMACS_INT opoint_byte = PT_BYTE;
206
207 if (EQ (b, e))
208 /* Not modifying because nothing marked */
209 return;
210
211 /* If the case table is flagged as modified, rescan it. */
212 if (NILP (XCHAR_TABLE (current_buffer->downcase_table)->extras[1]))
213 Fset_case_table (current_buffer->downcase_table);
214
215 validate_region (&b, &e);
216 start = XFASTINT (b);
217 end = XFASTINT (e);
218 modify_region (current_buffer, start, end, 0);
219 record_change (start, end - start);
220 start_byte = CHAR_TO_BYTE (start);
221 end_byte = CHAR_TO_BYTE (end);
222
223 SETUP_BUFFER_SYNTAX_TABLE(); /* For syntax_prefix_flag_p. */
224
225 while (start < end)
226 {
227 int c2, len;
228
229 if (multibyte)
230 {
231 c = FETCH_MULTIBYTE_CHAR (start_byte);
232 len = CHAR_BYTES (c);
233 }
234 else
235 {
236 c = FETCH_BYTE (start_byte);
237 MAKE_CHAR_MULTIBYTE (c);
238 len = 1;
239 }
240 c2 = c;
241 if (inword && flag != CASE_CAPITALIZE_UP)
242 c = DOWNCASE (c);
243 else if (!UPPERCASEP (c)
244 && (!inword || flag != CASE_CAPITALIZE_UP))
245 c = UPCASE1 (c);
246 if ((int) flag >= (int) CASE_CAPITALIZE)
247 inword = ((SYNTAX (c) == Sword)
248 && (inword || !syntax_prefix_flag_p (c)));
249 if (c != c2)
250 {
251 last = start;
252 if (first < 0)
253 first = start;
254
255 if (! multibyte)
256 {
257 MAKE_CHAR_UNIBYTE (c);
258 FETCH_BYTE (start_byte) = c;
259 }
260 else if (ASCII_CHAR_P (c2) && ASCII_CHAR_P (c))
261 FETCH_BYTE (start_byte) = c;
262 else
263 {
264 int tolen = CHAR_BYTES (c);
265 int j;
266 unsigned char str[MAX_MULTIBYTE_LENGTH];
267
268 CHAR_STRING (c, str);
269 if (len == tolen)
270 {
271 /* Length is unchanged. */
272 for (j = 0; j < len; ++j)
273 FETCH_BYTE (start_byte + j) = str[j];
274 }
275 else
276 {
277 /* Replace one character with the other,
278 keeping text properties the same. */
279 replace_range_2 (start, start_byte,
280 start + 1, start_byte + len,
281 str, 1, tolen,
282 0);
283 len = tolen;
284 }
285 }
286 }
287 start++;
288 start_byte += len;
289 }
290
291 if (PT != opoint)
292 TEMP_SET_PT_BOTH (opoint, opoint_byte);
293
294 if (first >= 0)
295 {
296 signal_after_change (first, last + 1 - first, last + 1 - first);
297 update_compositions (first, last + 1, CHECK_ALL);
298 }
299 }
300
301 DEFUN ("upcase-region", Fupcase_region, Supcase_region, 2, 2, "r",
302 doc: /* Convert the region to upper case. In programs, wants two arguments.
303 These arguments specify the starting and ending character numbers of
304 the region to operate on. When used as a command, the text between
305 point and the mark is operated on.
306 See also `capitalize-region'. */)
307 (Lisp_Object beg, Lisp_Object end)
308 {
309 casify_region (CASE_UP, beg, end);
310 return Qnil;
311 }
312
313 DEFUN ("downcase-region", Fdowncase_region, Sdowncase_region, 2, 2, "r",
314 doc: /* Convert the region to lower case. In programs, wants two arguments.
315 These arguments specify the starting and ending character numbers of
316 the region to operate on. When used as a command, the text between
317 point and the mark is operated on. */)
318 (Lisp_Object beg, Lisp_Object end)
319 {
320 casify_region (CASE_DOWN, beg, end);
321 return Qnil;
322 }
323
324 DEFUN ("capitalize-region", Fcapitalize_region, Scapitalize_region, 2, 2, "r",
325 doc: /* Convert the region to capitalized form.
326 Capitalized form means each word's first character is upper case
327 and the rest of it is lower case.
328 In programs, give two arguments, the starting and ending
329 character positions to operate on. */)
330 (Lisp_Object beg, Lisp_Object end)
331 {
332 casify_region (CASE_CAPITALIZE, beg, end);
333 return Qnil;
334 }
335
336 /* Like Fcapitalize_region but change only the initials. */
337
338 DEFUN ("upcase-initials-region", Fupcase_initials_region,
339 Supcase_initials_region, 2, 2, "r",
340 doc: /* Upcase the initial of each word in the region.
341 Subsequent letters of each word are not changed.
342 In programs, give two arguments, the starting and ending
343 character positions to operate on. */)
344 (Lisp_Object beg, Lisp_Object end)
345 {
346 casify_region (CASE_CAPITALIZE_UP, beg, end);
347 return Qnil;
348 }
349 \f
350 static Lisp_Object
351 operate_on_word (Lisp_Object arg, EMACS_INT *newpoint)
352 {
353 Lisp_Object val;
354 EMACS_INT farend;
355 EMACS_INT iarg;
356
357 CHECK_NUMBER (arg);
358 iarg = XINT (arg);
359 farend = scan_words (PT, iarg);
360 if (!farend)
361 farend = iarg > 0 ? ZV : BEGV;
362
363 *newpoint = PT > farend ? PT : farend;
364 XSETFASTINT (val, farend);
365
366 return val;
367 }
368
369 DEFUN ("upcase-word", Fupcase_word, Supcase_word, 1, 1, "p",
370 doc: /* Convert following word (or ARG words) to upper case, moving over.
371 With negative argument, convert previous words but do not move.
372 See also `capitalize-word'. */)
373 (Lisp_Object arg)
374 {
375 Lisp_Object beg, end;
376 EMACS_INT newpoint;
377 XSETFASTINT (beg, PT);
378 end = operate_on_word (arg, &newpoint);
379 casify_region (CASE_UP, beg, end);
380 SET_PT (newpoint);
381 return Qnil;
382 }
383
384 DEFUN ("downcase-word", Fdowncase_word, Sdowncase_word, 1, 1, "p",
385 doc: /* Convert following word (or ARG words) to lower case, moving over.
386 With negative argument, convert previous words but do not move. */)
387 (Lisp_Object arg)
388 {
389 Lisp_Object beg, end;
390 EMACS_INT newpoint;
391 XSETFASTINT (beg, PT);
392 end = operate_on_word (arg, &newpoint);
393 casify_region (CASE_DOWN, beg, end);
394 SET_PT (newpoint);
395 return Qnil;
396 }
397
398 DEFUN ("capitalize-word", Fcapitalize_word, Scapitalize_word, 1, 1, "p",
399 doc: /* Capitalize the following word (or ARG words), moving over.
400 This gives the word(s) a first character in upper case
401 and the rest lower case.
402 With negative argument, capitalize previous words but do not move. */)
403 (Lisp_Object arg)
404 {
405 Lisp_Object beg, end;
406 EMACS_INT newpoint;
407 XSETFASTINT (beg, PT);
408 end = operate_on_word (arg, &newpoint);
409 casify_region (CASE_CAPITALIZE, beg, end);
410 SET_PT (newpoint);
411 return Qnil;
412 }
413 \f
414 void
415 syms_of_casefiddle (void)
416 {
417 Qidentity = intern_c_string ("identity");
418 staticpro (&Qidentity);
419 defsubr (&Supcase);
420 defsubr (&Sdowncase);
421 defsubr (&Scapitalize);
422 defsubr (&Supcase_initials);
423 defsubr (&Supcase_region);
424 defsubr (&Sdowncase_region);
425 defsubr (&Scapitalize_region);
426 defsubr (&Supcase_initials_region);
427 defsubr (&Supcase_word);
428 defsubr (&Sdowncase_word);
429 defsubr (&Scapitalize_word);
430 }
431
432 void
433 keys_of_casefiddle (void)
434 {
435 initial_define_key (control_x_map, Ctl('U'), "upcase-region");
436 Fput (intern ("upcase-region"), Qdisabled, Qt);
437 initial_define_key (control_x_map, Ctl('L'), "downcase-region");
438 Fput (intern ("downcase-region"), Qdisabled, Qt);
439
440 initial_define_key (meta_map, 'u', "upcase-word");
441 initial_define_key (meta_map, 'l', "downcase-word");
442 initial_define_key (meta_map, 'c', "capitalize-word");
443 }
444