code.delx.au - gnu-emacs/blob - src/doprnt.c

   1 /* Output like sprintf to a buffer of specified size.
   2    Also takes args differently: pass one pointer to the end
   3    of the format string in addition to the format string itself.
   4    Copyright (C) 1985, 2001-2012  Free Software Foundation, Inc.
   5
   6 This file is part of GNU Emacs.
   7
   8 GNU Emacs is free software: you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation, either version 3 of the License, or
  11 (at your option) any later version.
  12
  13 GNU Emacs is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  20
  21 /* If you think about replacing this with some similar standard C function of
  22    the printf family (such as vsnprintf), please note that this function
  23    supports the following Emacs-specific features:
  24
  25    . For %c conversions, it produces a string with the multibyte representation
  26      of the (`int') argument, suitable for display in an Emacs buffer.
  27
  28    . For %s and %c, when field width is specified (e.g., %25s), it accounts for
  29      the display width of each character, according to char-width-table.  That
  30      is, it does not assume that each character takes one column on display.
  31
  32    . If the size of the buffer is not enough to produce the formatted string in
  33      its entirety, it makes sure that truncation does not chop the last
  34      character in the middle of its multibyte sequence, producing an invalid
  35      sequence.
  36
  37    . It accepts a pointer to the end of the format string, so the format string
  38      could include embedded null characters.
  39
  40    . It signals an error if the length of the formatted string is about to
  41      overflow MOST_POSITIVE_FIXNUM, to avoid producing strings longer than what
  42      Emacs can handle.
  43
  44    OTOH, this function supports only a small subset of the standard C formatted
  45    output facilities.  E.g., %u and %ll are not supported, and precision is
  46    ignored %s and %c conversions.  (See below for the detailed documentation of
  47    what is supported.)  However, this is okay, as this function is supposed to
  48    be called from `error' and similar functions, and thus does not need to
  49    support features beyond those in `Fformat', which is used by `error' on the
  50    Lisp level.  */
  51
  52 /* This function supports the following %-sequences in the `format'
  53    argument:
  54
  55    %s means print a string argument.
  56    %S is silently treated as %s, for loose compatibility with `Fformat'.
  57    %d means print a `signed int' argument in decimal.
  58    %o means print an `unsigned int' argument in octal.
  59    %x means print an `unsigned int' argument in hex.
  60    %e means print a `double' argument in exponential notation.
  61    %f means print a `double' argument in decimal-point notation.
  62    %g means print a `double' argument in exponential notation
  63       or in decimal-point notation, whichever uses fewer characters.
  64    %c means print a `signed int' argument as a single character.
  65    %% means produce a literal % character.
  66
  67    A %-sequence may contain optional flag, width, and precision specifiers, and
  68    a length modifier, as follows:
  69
  70      %<flags><width><precision><length>character
  71
  72    where flags is [+ -0], width is [0-9]+, precision is .[0-9]+, and length
  73    is empty or l or the value of the pD or pI or pMd (sans "d") macros.
  74    Also, %% in a format stands for a single % in the output.  A % that
  75    does not introduce a valid %-sequence causes undefined behavior.
  76
  77    The + flag character inserts a + before any positive number, while a space
  78    inserts a space before any positive number; these flags only affect %d, %o,
  79    %x, %e, %f, and %g sequences.  The - and 0 flags affect the width specifier,
  80    as described below.  For signed numerical arguments only, the ` ' (space)
  81    flag causes the result to be prefixed with a space character if it does not
  82    start with a sign (+ or -).
  83
  84    The l (lower-case letter ell) length modifier is a `long' data type
  85    modifier: it is supported for %d, %o, and %x conversions of integral
  86    arguments, must immediately precede the conversion specifier, and means that
  87    the respective argument is to be treated as `long int' or `unsigned long
  88    int'.  Similarly, the value of the pD macro means to use ptrdiff_t,
  89    the value of the pI macro means to use EMACS_INT or EMACS_UINT, the
  90    value of the pMd etc. macros means to use intmax_t or uintmax_t,
  91    and the empty length modifier means `int' or `unsigned int'.
  92
  93    The width specifier supplies a lower limit for the length of the printed
  94    representation.  The padding, if any, normally goes on the left, but it goes
  95    on the right if the - flag is present.  The padding character is normally a
  96    space, but (for numerical arguments only) it is 0 if the 0 flag is present.
  97    The - flag takes precedence over the 0 flag.
  98
  99    For %e, %f, and %g sequences, the number after the "." in the precision
 100    specifier says how many decimal places to show; if zero, the decimal point
 101    itself is omitted.  For %s and %S, the precision specifier is ignored.  */
 102
 103 #include <config.h>
 104 #include <stdio.h>
 105 #include <setjmp.h>
 106 #include <float.h>
 107 #include <unistd.h>
 108 #include <limits.h>
 109
 110 #include "lisp.h"
 111
 112 /* Since we use the macro CHAR_HEAD_P, we have to include this, but
 113    don't have to include others because CHAR_HEAD_P does not contains
 114    another macro.  */
 115 #include "character.h"
 116
 117 /* Generate output from a format-spec FORMAT,
 118    terminated at position FORMAT_END.
 119    (*FORMAT_END is not part of the format, but must exist and be readable.)
 120    Output goes in BUFFER, which has room for BUFSIZE chars.
 121    BUFSIZE must be positive.  If the output does not fit, truncate it
 122    to fit and return BUFSIZE - 1; if this truncates a multibyte
 123    sequence, store '\0' into the sequence's first byte.
 124    Returns the number of bytes stored into BUFFER, excluding
 125    the terminating null byte.  Output is always null-terminated.
 126    String arguments are passed as C strings.
 127    Integers are passed as C integers.  */
 128
 129 ptrdiff_t
 130 doprnt (char *buffer, ptrdiff_t bufsize, const char *format,
 131         const char *format_end, va_list ap)
 132 {
 133   const char *fmt = format;     /* Pointer into format string.  */
 134   char *bufptr = buffer;        /* Pointer into output buffer.  */
 135
 136   /* Use this for sprintf unless we need something really big.  */
 137   char tembuf[DBL_MAX_10_EXP + 100];
 138
 139   /* Size of sprintf_buffer.  */
 140   ptrdiff_t size_allocated = sizeof (tembuf);
 141
 142   /* Buffer to use for sprintf.  Either tembuf or same as BIG_BUFFER.  */
 143   char *sprintf_buffer = tembuf;
 144
 145   /* Buffer we have got with malloc.  */
 146   char *big_buffer = NULL;
 147
 148   ptrdiff_t tem = -1;
 149   char *string;
 150   char fixed_buffer[20];        /* Default buffer for small formatting. */
 151   char *fmtcpy;
 152   int minlen;
 153   char charbuf[MAX_MULTIBYTE_LENGTH + 1];       /* Used for %c.  */
 154   USE_SAFE_ALLOCA;
 155
 156   if (format_end == 0)
 157     format_end = format + strlen (format);
 158
 159   fmtcpy = (format_end - format < sizeof (fixed_buffer) - 1
 160             ? fixed_buffer
 161             : SAFE_ALLOCA (format_end - format + 1));
 162
 163   bufsize--;
 164
 165   /* Loop until end of format string or buffer full. */
 166   while (fmt < format_end && bufsize > 0)
 167     {
 168       if (*fmt == '%')  /* Check for a '%' character */
 169         {
 170           ptrdiff_t size_bound = 0;
 171           ptrdiff_t width;  /* Columns occupied by STRING on display.  */
 172           enum {
 173             pDlen = sizeof pD - 1,
 174             pIlen = sizeof pI - 1,
 175             pMlen = sizeof pMd - 2
 176           };
 177           enum {
 178             no_modifier, long_modifier, pD_modifier, pI_modifier, pM_modifier
 179           } length_modifier = no_modifier;
 180           static char const modifier_len[] = { 0, 1, pDlen, pIlen, pMlen };
 181           int maxmlen = max (max (1, pDlen), max (pIlen, pMlen));
 182           int mlen;
 183
 184           fmt++;
 185           /* Copy this one %-spec into fmtcpy.  */
 186           string = fmtcpy;
 187           *string++ = '%';
 188           while (fmt < format_end)
 189             {
 190               *string++ = *fmt;
 191               if ('0' <= *fmt && *fmt <= '9')
 192                 {
 193                   /* Get an idea of how much space we might need.
 194                      This might be a field width or a precision; e.g.
 195                      %1.1000f and %1000.1f both might need 1000+ bytes.
 196                      Parse the width or precision, checking for overflow.  */
 197                   ptrdiff_t n = *fmt - '0';
 198                   while (fmt + 1 < format_end
 199                          && '0' <= fmt[1] && fmt[1] <= '9')
 200                     {
 201                       /* Avoid ptrdiff_t, size_t, and int overflow, as
 202                          many sprintfs mishandle widths greater than INT_MAX.
 203                          This test is simple but slightly conservative: e.g.,
 204                          (INT_MAX - INT_MAX % 10) is reported as an overflow
 205                          even when it's not.  */
 206                       if (n >= min (INT_MAX, min (PTRDIFF_MAX, SIZE_MAX)) / 10)
 207                         error ("Format width or precision too large");
 208                       n = n * 10 + fmt[1] - '0';
 209                       *string++ = *++fmt;
 210                     }
 211
 212                   if (size_bound < n)
 213                     size_bound = n;
 214                 }
 215               else if (! (*fmt == '-' || *fmt == ' ' || *fmt == '.'
 216                           || *fmt == '+'))
 217                 break;
 218               fmt++;
 219             }
 220
 221           /* Check for the length modifiers in textual length order, so
 222              that longer modifiers override shorter ones.  */
 223           for (mlen = 1; mlen <= maxmlen; mlen++)
 224             {
 225               if (format_end - fmt < mlen)
 226                 break;
 227               if (mlen == 1 && *fmt == 'l')
 228                 length_modifier = long_modifier;
 229               if (mlen == pDlen && memcmp (fmt, pD, pDlen) == 0)
 230                 length_modifier = pD_modifier;
 231               if (mlen == pIlen && memcmp (fmt, pI, pIlen) == 0)
 232                 length_modifier = pI_modifier;
 233               if (mlen == pMlen && memcmp (fmt, pMd, pMlen) == 0)
 234                 length_modifier = pM_modifier;
 235             }
 236
 237           mlen = modifier_len[length_modifier];
 238           memcpy (string, fmt + 1, mlen);
 239           string += mlen;
 240           fmt += mlen;
 241           *string = 0;
 242
 243           /* Make the size bound large enough to handle floating point formats
 244              with large numbers.  */
 245           if (size_bound > min (PTRDIFF_MAX, SIZE_MAX) - DBL_MAX_10_EXP - 50)
 246             error ("Format width or precision too large");
 247           size_bound += DBL_MAX_10_EXP + 50;
 248
 249           /* Make sure we have that much.  */
 250           if (size_bound > size_allocated)
 251             {
 252               if (big_buffer)
 253                 xfree (big_buffer);
 254               big_buffer = xmalloc (size_bound);
 255               sprintf_buffer = big_buffer;
 256               size_allocated = size_bound;
 257             }
 258           minlen = 0;
 259           switch (*fmt++)
 260             {
 261             default:
 262               error ("Invalid format operation %s", fmtcpy);
 263
 264 /*          case 'b': */
 265             case 'l':
 266             case 'd':
 267               switch (length_modifier)
 268                 {
 269                 case no_modifier:
 270                   {
 271                     int v = va_arg (ap, int);
 272                     tem = sprintf (sprintf_buffer, fmtcpy, v);
 273                   }
 274                   break;
 275                 case long_modifier:
 276                   {
 277                     long v = va_arg (ap, long);
 278                     tem = sprintf (sprintf_buffer, fmtcpy, v);
 279                   }
 280                   break;
 281                 case pD_modifier:
 282                 signed_pD_modifier:
 283                   {
 284                     ptrdiff_t v = va_arg (ap, ptrdiff_t);
 285                     tem = sprintf (sprintf_buffer, fmtcpy, v);
 286                   }
 287                   break;
 288                 case pI_modifier:
 289                   {
 290                     EMACS_INT v = va_arg (ap, EMACS_INT);
 291                     tem = sprintf (sprintf_buffer, fmtcpy, v);
 292                   }
 293                   break;
 294                 case pM_modifier:
 295                   {
 296                     intmax_t v = va_arg (ap, intmax_t);
 297                     tem = sprintf (sprintf_buffer, fmtcpy, v);
 298                   }
 299                   break;
 300                 }
 301               /* Now copy into final output, truncating as necessary.  */
 302               string = sprintf_buffer;
 303               goto doit;
 304
 305             case 'o':
 306             case 'x':
 307               switch (length_modifier)
 308                 {
 309                 case no_modifier:
 310                   {
 311                     unsigned v = va_arg (ap, unsigned);
 312                     tem = sprintf (sprintf_buffer, fmtcpy, v);
 313                   }
 314                   break;
 315                 case long_modifier:
 316                   {
 317                     unsigned long v = va_arg (ap, unsigned long);
 318                     tem = sprintf (sprintf_buffer, fmtcpy, v);
 319                   }
 320                   break;
 321                 case pD_modifier:
 322                   goto signed_pD_modifier;
 323                 case pI_modifier:
 324                   {
 325                     EMACS_UINT v = va_arg (ap, EMACS_UINT);
 326                     tem = sprintf (sprintf_buffer, fmtcpy, v);
 327                   }
 328                   break;
 329                 case pM_modifier:
 330                   {
 331                     uintmax_t v = va_arg (ap, uintmax_t);
 332                     tem = sprintf (sprintf_buffer, fmtcpy, v);
 333                   }
 334                   break;
 335                 }
 336               /* Now copy into final output, truncating as necessary.  */
 337               string = sprintf_buffer;
 338               goto doit;
 339
 340             case 'f':
 341             case 'e':
 342             case 'g':
 343               {
 344                 double d = va_arg (ap, double);
 345                 tem = sprintf (sprintf_buffer, fmtcpy, d);
 346                 /* Now copy into final output, truncating as necessary.  */
 347                 string = sprintf_buffer;
 348                 goto doit;
 349               }
 350
 351             case 'S':
 352               string[-1] = 's';
 353             case 's':
 354               if (fmtcpy[1] != 's')
 355                 minlen = atoi (&fmtcpy[1]);
 356               string = va_arg (ap, char *);
 357               tem = strlen (string);
 358               if (STRING_BYTES_BOUND < tem)
 359                 error ("String for %%s or %%S format is too long");
 360               width = strwidth (string, tem);
 361               goto doit1;
 362
 363               /* Copy string into final output, truncating if no room.  */
 364             doit:
 365               eassert (0 <= tem);
 366               /* Coming here means STRING contains ASCII only.  */
 367               if (STRING_BYTES_BOUND < tem)
 368                 error ("Format width or precision too large");
 369               width = tem;
 370             doit1:
 371               /* We have already calculated:
 372                  TEM -- length of STRING,
 373                  WIDTH -- columns occupied by STRING when displayed, and
 374                  MINLEN -- minimum columns of the output.  */
 375               if (minlen > 0)
 376                 {
 377                   while (minlen > width && bufsize > 0)
 378                     {
 379                       *bufptr++ = ' ';
 380                       bufsize--;
 381                       minlen--;
 382                     }
 383                   minlen = 0;
 384                 }
 385               if (tem > bufsize)
 386                 {
 387                   /* Truncate the string at character boundary.  */
 388                   tem = bufsize;
 389                   do
 390                     {
 391                       tem--;
 392                       if (CHAR_HEAD_P (string[tem]))
 393                         {
 394                           if (BYTES_BY_CHAR_HEAD (string[tem]) <= bufsize - tem)
 395                             tem = bufsize;
 396                           break;
 397                         }
 398                     }
 399                   while (tem != 0);
 400
 401                   memcpy (bufptr, string, tem);
 402                   bufptr[tem] = 0;
 403                   /* Trigger exit from the loop, but make sure we
 404                      return to the caller a value which will indicate
 405                      that the buffer was too small.  */
 406                   bufptr += bufsize;
 407                   bufsize = 0;
 408                   continue;
 409                 }
 410               memcpy (bufptr, string, tem);
 411               bufptr += tem;
 412               bufsize -= tem;
 413               if (minlen < 0)
 414                 {
 415                   while (minlen < - width && bufsize > 0)
 416                     {
 417                       *bufptr++ = ' ';
 418                       bufsize--;
 419                       minlen++;
 420                     }
 421                   minlen = 0;
 422                 }
 423               continue;
 424
 425             case 'c':
 426               {
 427                 int chr = va_arg (ap, int);
 428                 tem = CHAR_STRING (chr, (unsigned char *) charbuf);
 429                 string = charbuf;
 430                 string[tem] = 0;
 431                 width = strwidth (string, tem);
 432                 if (fmtcpy[1] != 'c')
 433                   minlen = atoi (&fmtcpy[1]);
 434                 goto doit1;
 435               }
 436
 437             case '%':
 438               fmt--;    /* Drop thru and this % will be treated as normal */
 439             }
 440         }
 441
 442       {
 443         /* Just some character; Copy it if the whole multi-byte form
 444            fit in the buffer.  */
 445         char *save_bufptr = bufptr;
 446
 447         do { *bufptr++ = *fmt++; }
 448         while (fmt < format_end && --bufsize > 0 && !CHAR_HEAD_P (*fmt));
 449         if (!CHAR_HEAD_P (*fmt))
 450           {
 451             /* Truncate, but return value that will signal to caller
 452                that the buffer was too small.  */
 453             *save_bufptr = 0;
 454             break;
 455           }
 456       }
 457     };
 458
 459   /* If we had to malloc something, free it.  */
 460   xfree (big_buffer);
 461
 462   *bufptr = 0;          /* Make sure our string ends with a '\0' */
 463
 464   SAFE_FREE ();
 465   return bufptr - buffer;
 466 }
 467
 468 /* Format to an unbounded buffer BUF.  This is like sprintf, except it
 469    is not limited to returning an 'int' so it doesn't have a silly 2
 470    GiB limit on typical 64-bit hosts.  However, it is limited to the
 471    Emacs-style formats that doprnt supports.
 472
 473    Return the number of bytes put into BUF, excluding the terminating
 474    '\0'.  */
 475 ptrdiff_t
 476 esprintf (char *buf, char const *format, ...)
 477 {
 478   ptrdiff_t nbytes;
 479   va_list ap;
 480   va_start (ap, format);
 481   nbytes = doprnt (buf, TYPE_MAXIMUM (ptrdiff_t), format, 0, ap);
 482   va_end (ap);
 483   return nbytes;
 484 }
 485
 486 #if defined HAVE_X_WINDOWS && defined USE_X_TOOLKIT
 487
 488 /* Format to buffer *BUF of positive size *BUFSIZE, reallocating *BUF
 489    and updating *BUFSIZE if the buffer is too small, and otherwise
 490    behaving line esprintf.  When reallocating, free *BUF unless it is
 491    equal to NONHEAPBUF, and if BUFSIZE_MAX is nonnegative then signal
 492    memory exhaustion instead of growing the buffer size past
 493    BUFSIZE_MAX.  */
 494 ptrdiff_t
 495 exprintf (char **buf, ptrdiff_t *bufsize,
 496           char const *nonheapbuf, ptrdiff_t bufsize_max,
 497           char const *format, ...)
 498 {
 499   ptrdiff_t nbytes;
 500   va_list ap;
 501   va_start (ap, format);
 502   nbytes = evxprintf (buf, bufsize, nonheapbuf, bufsize_max, format, ap);
 503   va_end (ap);
 504   return nbytes;
 505 }
 506
 507 #endif
 508
 509 /* Act like exprintf, except take a va_list.  */
 510 ptrdiff_t
 511 evxprintf (char **buf, ptrdiff_t *bufsize,
 512            char const *nonheapbuf, ptrdiff_t bufsize_max,
 513            char const *format, va_list ap)
 514 {
 515   for (;;)
 516     {
 517       ptrdiff_t nbytes;
 518       va_list ap_copy;
 519       va_copy (ap_copy, ap);
 520       nbytes = doprnt (*buf, *bufsize, format, 0, ap_copy);
 521       va_end (ap_copy);
 522       if (nbytes < *bufsize - 1)
 523         return nbytes;
 524       if (*buf != nonheapbuf)
 525         xfree (*buf);
 526       *buf = xpalloc (NULL, bufsize, 1, bufsize_max, 1);
 527     }
 528 }