code.delx.au - gnu-emacs/blob - src/profiler.c

   1 /* Profiler implementation.
   2
   3 Copyright (C) 2012-2016 Free Software Foundation, Inc.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software: you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation, either version 3 of the License, or (at
  10 your option) any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  19
  20 #include <config.h>
  21 #include "lisp.h"
  22 #include "syssignal.h"
  23 #include "systime.h"
  24
  25 /* Return A + B, but return the maximum fixnum if the result would overflow.
  26    Assume A and B are nonnegative and in fixnum range.  */
  27
  28 static EMACS_INT
  29 saturated_add (EMACS_INT a, EMACS_INT b)
  30 {
  31   return min (a + b, MOST_POSITIVE_FIXNUM);
  32 }
  33
  34 /* Logs.  */
  35
  36 typedef struct Lisp_Hash_Table log_t;
  37
  38 static struct hash_table_test hashtest_profiler;
  39
  40 static Lisp_Object
  41 make_log (EMACS_INT heap_size, EMACS_INT max_stack_depth)
  42 {
  43   /* We use a standard Elisp hash-table object, but we use it in
  44      a special way.  This is OK as long as the object is not exposed
  45      to Elisp, i.e. until it is returned by *-profiler-log, after which
  46      it can't be used any more.  */
  47   Lisp_Object log = make_hash_table (hashtest_profiler,
  48                                      make_number (heap_size),
  49                                      make_float (DEFAULT_REHASH_SIZE),
  50                                      make_float (DEFAULT_REHASH_THRESHOLD),
  51                                      Qnil);
  52   struct Lisp_Hash_Table *h = XHASH_TABLE (log);
  53
  54   /* What is special about our hash-tables is that the keys are pre-filled
  55      with the vectors we'll put in them.  */
  56   ptrdiff_t i = ASIZE (h->key_and_value) >> 1;
  57   while (i > 0)
  58     set_hash_key_slot (h, --i,
  59                        Fmake_vector (make_number (max_stack_depth), Qnil));
  60   return log;
  61 }
  62
  63 /* Evict the least used half of the hash_table.
  64
  65    When the table is full, we have to evict someone.
  66    The easiest and most efficient is to evict the value we're about to add
  67    (i.e. once the table is full, stop sampling).
  68
  69    We could also pick the element with the lowest count and evict it,
  70    but finding it is O(N) and for that amount of work we get very
  71    little in return: for the next sample, this latest sample will have
  72    count==1 and will hence be a prime candidate for eviction :-(
  73
  74    So instead, we take O(N) time to eliminate more or less half of the
  75    entries (the half with the lowest counts).  So we get an amortized
  76    cost of O(1) and we get O(N) time for a new entry to grow larger
  77    than the other least counts before a new round of eviction.  */
  78
  79 static EMACS_INT approximate_median (log_t *log,
  80                                      ptrdiff_t start, ptrdiff_t size)
  81 {
  82   eassert (size > 0);
  83   if (size < 2)
  84     return XINT (HASH_VALUE (log, start));
  85   if (size < 3)
  86     /* Not an actual median, but better for our application than
  87        choosing either of the two numbers.  */
  88     return ((XINT (HASH_VALUE (log, start))
  89              + XINT (HASH_VALUE (log, start + 1)))
  90             / 2);
  91   else
  92     {
  93       ptrdiff_t newsize = size / 3;
  94       ptrdiff_t start2 = start + newsize;
  95       EMACS_INT i1 = approximate_median (log, start, newsize);
  96       EMACS_INT i2 = approximate_median (log, start2, newsize);
  97       EMACS_INT i3 = approximate_median (log, start2 + newsize,
  98                                          size - 2 * newsize);
  99       return (i1 < i2
 100               ? (i2 < i3 ? i2 : (i1 < i3 ? i3 : i1))
 101               : (i1 < i3 ? i1 : (i2 < i3 ? i3 : i2)));
 102     }
 103 }
 104
 105 static void evict_lower_half (log_t *log)
 106 {
 107   ptrdiff_t size = ASIZE (log->key_and_value) / 2;
 108   EMACS_INT median = approximate_median (log, 0, size);
 109   ptrdiff_t i;
 110
 111   for (i = 0; i < size; i++)
 112     /* Evict not only values smaller but also values equal to the median,
 113        so as to make sure we evict something no matter what.  */
 114     if (XINT (HASH_VALUE (log, i)) <= median)
 115       {
 116         Lisp_Object key = HASH_KEY (log, i);
 117         { /* FIXME: we could make this more efficient.  */
 118           Lisp_Object tmp;
 119           XSET_HASH_TABLE (tmp, log); /* FIXME: Use make_lisp_ptr.  */
 120           Fremhash (key, tmp);
 121         }
 122         eassert (EQ (log->next_free, make_number (i)));
 123
 124         eassert (VECTORP (key));
 125         for (ptrdiff_t j = 0; j < ASIZE (key); j++)
 126           ASET (key, j, Qnil);
 127
 128         set_hash_key_slot (log, i, key);
 129       }
 130 }
 131
 132 /* Record the current backtrace in LOG.  COUNT is the weight of this
 133    current backtrace: interrupt counts for CPU, and the allocation
 134    size for memory.  */
 135
 136 static void
 137 record_backtrace (log_t *log, EMACS_INT count)
 138 {
 139   Lisp_Object backtrace;
 140   ptrdiff_t index;
 141
 142   if (!INTEGERP (log->next_free))
 143     /* FIXME: transfer the evicted counts to a special entry rather
 144        than dropping them on the floor.  */
 145     evict_lower_half (log);
 146   index = XINT (log->next_free);
 147
 148   /* Get a "working memory" vector.  */
 149   backtrace = HASH_KEY (log, index);
 150   get_backtrace (backtrace);
 151
 152   { /* We basically do a `gethash+puthash' here, except that we have to be
 153        careful to avoid memory allocation since we're in a signal
 154        handler, and we optimize the code to try and avoid computing the
 155        hash+lookup twice.  See fns.c:Fputhash for reference.  */
 156     EMACS_UINT hash;
 157     ptrdiff_t j = hash_lookup (log, backtrace, &hash);
 158     if (j >= 0)
 159       {
 160         EMACS_INT old_val = XINT (HASH_VALUE (log, j));
 161         EMACS_INT new_val = saturated_add (old_val, count);
 162         set_hash_value_slot (log, j, make_number (new_val));
 163       }
 164     else
 165       { /* BEWARE!  hash_put in general can allocate memory.
 166            But currently it only does that if log->next_free is nil.  */
 167         eassert (!NILP (log->next_free));
 168         ptrdiff_t j = hash_put (log, backtrace, make_number (count), hash);
 169         /* Let's make sure we've put `backtrace' right where it
 170            already was to start with.  */
 171         eassert (index == j);
 172
 173         /* FIXME: If the hash-table is almost full, we should set
 174            some global flag so that some Elisp code can offload its
 175            data elsewhere, so as to avoid the eviction code.
 176            There are 2 ways to do that, AFAICT:
 177            - Set a flag checked in QUIT, such that QUIT can then call
 178              Fprofiler_cpu_log and stash the full log for later use.
 179            - Set a flag check in post-gc-hook, so that Elisp code can call
 180              profiler-cpu-log.  That gives us more flexibility since that
 181              Elisp code can then do all kinds of fun stuff like write
 182              the log to disk.  Or turn it right away into a call tree.
 183            Of course, using Elisp is generally preferable, but it may
 184            take longer until we get a chance to run the Elisp code, so
 185            there's more risk that the table will get full before we
 186            get there.  */
 187       }
 188   }
 189 }
 190 \f
 191 /* Sampling profiler.  */
 192
 193 #ifdef PROFILER_CPU_SUPPORT
 194
 195 /* The profiler timer and whether it was properly initialized, if
 196    POSIX timers are available.  */
 197 #ifdef HAVE_ITIMERSPEC
 198 static timer_t profiler_timer;
 199 static bool profiler_timer_ok;
 200 #endif
 201
 202 /* Status of sampling profiler.  */
 203 static enum profiler_cpu_running
 204   { NOT_RUNNING,
 205 #ifdef HAVE_ITIMERSPEC
 206     TIMER_SETTIME_RUNNING,
 207 #endif
 208     SETITIMER_RUNNING
 209   }
 210   profiler_cpu_running;
 211
 212 /* Hash-table log of CPU profiler.  */
 213 static Lisp_Object cpu_log;
 214
 215 /* Separate counter for the time spent in the GC.  */
 216 static EMACS_INT cpu_gc_count;
 217
 218 /* The current sampling interval in nanoseconds.  */
 219 static EMACS_INT current_sampling_interval;
 220
 221 /* Signal handler for sampling profiler.  */
 222
 223 /* timer_getoverrun is not implemented on Cygwin, but the following
 224    seems to be good enough for profiling. */
 225 #ifdef CYGWIN
 226 #define timer_getoverrun(x) 0
 227 #endif
 228
 229 static void
 230 handle_profiler_signal (int signal)
 231 {
 232   if (EQ (backtrace_top_function (), QAutomatic_GC))
 233     /* Special case the time-count inside GC because the hash-table
 234        code is not prepared to be used while the GC is running.
 235        More specifically it uses ASIZE at many places where it does
 236        not expect the ARRAY_MARK_FLAG to be set.  We could try and
 237        harden the hash-table code, but it doesn't seem worth the
 238        effort.  */
 239     cpu_gc_count = saturated_add (cpu_gc_count, 1);
 240   else
 241     {
 242       EMACS_INT count = 1;
 243 #ifdef HAVE_ITIMERSPEC
 244       if (profiler_timer_ok)
 245         {
 246           int overruns = timer_getoverrun (profiler_timer);
 247           eassert (overruns >= 0);
 248           count += overruns;
 249         }
 250 #endif
 251       eassert (HASH_TABLE_P (cpu_log));
 252       record_backtrace (XHASH_TABLE (cpu_log), count);
 253     }
 254 }
 255
 256 static void
 257 deliver_profiler_signal (int signal)
 258 {
 259   deliver_process_signal (signal, handle_profiler_signal);
 260 }
 261
 262 static int
 263 setup_cpu_timer (Lisp_Object sampling_interval)
 264 {
 265   struct sigaction action;
 266   struct itimerval timer;
 267   struct timespec interval;
 268   int billion = 1000000000;
 269
 270   if (! RANGED_INTEGERP (1, sampling_interval,
 271                          (TYPE_MAXIMUM (time_t) < EMACS_INT_MAX / billion
 272                           ? ((EMACS_INT) TYPE_MAXIMUM (time_t) * billion
 273                              + (billion - 1))
 274                           : EMACS_INT_MAX)))
 275     return -1;
 276
 277   current_sampling_interval = XINT (sampling_interval);
 278   interval = make_timespec (current_sampling_interval / billion,
 279                             current_sampling_interval % billion);
 280   emacs_sigaction_init (&action, deliver_profiler_signal);
 281   sigaction (SIGPROF, &action, 0);
 282
 283 #ifdef HAVE_ITIMERSPEC
 284   if (! profiler_timer_ok)
 285     {
 286       /* System clocks to try, in decreasing order of desirability.  */
 287       static clockid_t const system_clock[] = {
 288 #ifdef CLOCK_THREAD_CPUTIME_ID
 289         CLOCK_THREAD_CPUTIME_ID,
 290 #endif
 291 #ifdef CLOCK_PROCESS_CPUTIME_ID
 292         CLOCK_PROCESS_CPUTIME_ID,
 293 #endif
 294 #ifdef CLOCK_MONOTONIC
 295         CLOCK_MONOTONIC,
 296 #endif
 297         CLOCK_REALTIME
 298       };
 299       int i;
 300       struct sigevent sigev;
 301       sigev.sigev_value.sival_ptr = &profiler_timer;
 302       sigev.sigev_signo = SIGPROF;
 303       sigev.sigev_notify = SIGEV_SIGNAL;
 304
 305       for (i = 0; i < ARRAYELTS (system_clock); i++)
 306         if (timer_create (system_clock[i], &sigev, &profiler_timer) == 0)
 307           {
 308             profiler_timer_ok = 1;
 309             break;
 310           }
 311     }
 312
 313   if (profiler_timer_ok)
 314     {
 315       struct itimerspec ispec;
 316       ispec.it_value = ispec.it_interval = interval;
 317       if (timer_settime (profiler_timer, 0, &ispec, 0) == 0)
 318         return TIMER_SETTIME_RUNNING;
 319     }
 320 #endif
 321
 322 #ifdef HAVE_SETITIMER
 323   timer.it_value = timer.it_interval = make_timeval (interval);
 324   if (setitimer (ITIMER_PROF, &timer, 0) == 0)
 325     return SETITIMER_RUNNING;
 326 #endif
 327
 328   return NOT_RUNNING;
 329 }
 330
 331 DEFUN ("profiler-cpu-start", Fprofiler_cpu_start, Sprofiler_cpu_start,
 332        1, 1, 0,
 333        doc: /* Start or restart the cpu profiler.
 334 It takes call-stack samples each SAMPLING-INTERVAL nanoseconds, approximately.
 335 See also `profiler-log-size' and `profiler-max-stack-depth'.  */)
 336   (Lisp_Object sampling_interval)
 337 {
 338   if (profiler_cpu_running)
 339     error ("CPU profiler is already running");
 340
 341   if (NILP (cpu_log))
 342     {
 343       cpu_gc_count = 0;
 344       cpu_log = make_log (profiler_log_size,
 345                           profiler_max_stack_depth);
 346     }
 347
 348   int status = setup_cpu_timer (sampling_interval);
 349   if (status == -1)
 350     {
 351       profiler_cpu_running = NOT_RUNNING;
 352       error ("Invalid sampling interval");
 353     }
 354   else
 355     {
 356       profiler_cpu_running = status;
 357       if (! profiler_cpu_running)
 358         error ("Unable to start profiler timer");
 359     }
 360
 361   return Qt;
 362 }
 363
 364 DEFUN ("profiler-cpu-stop", Fprofiler_cpu_stop, Sprofiler_cpu_stop,
 365        0, 0, 0,
 366        doc: /* Stop the cpu profiler.  The profiler log is not affected.
 367 Return non-nil if the profiler was running.  */)
 368   (void)
 369 {
 370   switch (profiler_cpu_running)
 371     {
 372     case NOT_RUNNING:
 373       return Qnil;
 374
 375 #ifdef HAVE_ITIMERSPEC
 376     case TIMER_SETTIME_RUNNING:
 377       {
 378         struct itimerspec disable;
 379         memset (&disable, 0, sizeof disable);
 380         timer_settime (profiler_timer, 0, &disable, 0);
 381       }
 382       break;
 383 #endif
 384
 385 #ifdef HAVE_SETITIMER
 386     case SETITIMER_RUNNING:
 387       {
 388         struct itimerval disable;
 389         memset (&disable, 0, sizeof disable);
 390         setitimer (ITIMER_PROF, &disable, 0);
 391       }
 392       break;
 393 #endif
 394     }
 395
 396   signal (SIGPROF, SIG_IGN);
 397   profiler_cpu_running = NOT_RUNNING;
 398   return Qt;
 399 }
 400
 401 DEFUN ("profiler-cpu-running-p",
 402        Fprofiler_cpu_running_p, Sprofiler_cpu_running_p,
 403        0, 0, 0,
 404        doc: /* Return non-nil if cpu profiler is running.  */)
 405   (void)
 406 {
 407   return profiler_cpu_running ? Qt : Qnil;
 408 }
 409
 410 DEFUN ("profiler-cpu-log", Fprofiler_cpu_log, Sprofiler_cpu_log,
 411        0, 0, 0,
 412        doc: /* Return the current cpu profiler log.
 413 The log is a hash-table mapping backtraces to counters which represent
 414 the amount of time spent at those points.  Every backtrace is a vector
 415 of functions, where the last few elements may be nil.
 416 Before returning, a new log is allocated for future samples.  */)
 417   (void)
 418 {
 419   Lisp_Object result = cpu_log;
 420   /* Here we're making the log visible to Elisp, so it's not safe any
 421      more for our use afterwards since we can't rely on its special
 422      pre-allocated keys anymore.  So we have to allocate a new one.  */
 423   cpu_log = (profiler_cpu_running
 424              ? make_log (profiler_log_size, profiler_max_stack_depth)
 425              : Qnil);
 426   Fputhash (Fmake_vector (make_number (1), QAutomatic_GC),
 427             make_number (cpu_gc_count),
 428             result);
 429   cpu_gc_count = 0;
 430   return result;
 431 }
 432 #endif /* PROFILER_CPU_SUPPORT */
 433 \f
 434 /* Memory profiler.  */
 435
 436 /* True if memory profiler is running.  */
 437 bool profiler_memory_running;
 438
 439 static Lisp_Object memory_log;
 440
 441 DEFUN ("profiler-memory-start", Fprofiler_memory_start, Sprofiler_memory_start,
 442        0, 0, 0,
 443        doc: /* Start/restart the memory profiler.
 444 The memory profiler will take samples of the call-stack whenever a new
 445 allocation takes place.  Note that most small allocations only trigger
 446 the profiler occasionally.
 447 See also `profiler-log-size' and `profiler-max-stack-depth'.  */)
 448   (void)
 449 {
 450   if (profiler_memory_running)
 451     error ("Memory profiler is already running");
 452
 453   if (NILP (memory_log))
 454     memory_log = make_log (profiler_log_size,
 455                            profiler_max_stack_depth);
 456
 457   profiler_memory_running = true;
 458
 459   return Qt;
 460 }
 461
 462 DEFUN ("profiler-memory-stop",
 463        Fprofiler_memory_stop, Sprofiler_memory_stop,
 464        0, 0, 0,
 465        doc: /* Stop the memory profiler.  The profiler log is not affected.
 466 Return non-nil if the profiler was running.  */)
 467   (void)
 468 {
 469   if (!profiler_memory_running)
 470     return Qnil;
 471   profiler_memory_running = false;
 472   return Qt;
 473 }
 474
 475 DEFUN ("profiler-memory-running-p",
 476        Fprofiler_memory_running_p, Sprofiler_memory_running_p,
 477        0, 0, 0,
 478        doc: /* Return non-nil if memory profiler is running.  */)
 479   (void)
 480 {
 481   return profiler_memory_running ? Qt : Qnil;
 482 }
 483
 484 DEFUN ("profiler-memory-log",
 485        Fprofiler_memory_log, Sprofiler_memory_log,
 486        0, 0, 0,
 487        doc: /* Return the current memory profiler log.
 488 The log is a hash-table mapping backtraces to counters which represent
 489 the amount of memory allocated at those points.  Every backtrace is a vector
 490 of functions, where the last few elements may be nil.
 491 Before returning, a new log is allocated for future samples.  */)
 492   (void)
 493 {
 494   Lisp_Object result = memory_log;
 495   /* Here we're making the log visible to Elisp , so it's not safe any
 496      more for our use afterwards since we can't rely on its special
 497      pre-allocated keys anymore.  So we have to allocate a new one.  */
 498   memory_log = (profiler_memory_running
 499                 ? make_log (profiler_log_size, profiler_max_stack_depth)
 500                 : Qnil);
 501   return result;
 502 }
 503
 504 \f
 505 /* Signals and probes.  */
 506
 507 /* Record that the current backtrace allocated SIZE bytes.  */
 508 void
 509 malloc_probe (size_t size)
 510 {
 511   eassert (HASH_TABLE_P (memory_log));
 512   record_backtrace (XHASH_TABLE (memory_log), min (size, MOST_POSITIVE_FIXNUM));
 513 }
 514
 515 DEFUN ("function-equal", Ffunction_equal, Sfunction_equal, 2, 2, 0,
 516        doc: /* Return non-nil if F1 and F2 come from the same source.
 517 Used to determine if different closures are just different instances of
 518 the same lambda expression, or are really unrelated function.  */)
 519      (Lisp_Object f1, Lisp_Object f2)
 520 {
 521   bool res;
 522   if (EQ (f1, f2))
 523     res = true;
 524   else if (COMPILEDP (f1) && COMPILEDP (f2))
 525     res = EQ (AREF (f1, COMPILED_BYTECODE), AREF (f2, COMPILED_BYTECODE));
 526   else if (CONSP (f1) && CONSP (f2) && CONSP (XCDR (f1)) && CONSP (XCDR (f2))
 527            && EQ (Qclosure, XCAR (f1))
 528            && EQ (Qclosure, XCAR (f2)))
 529     res = EQ (XCDR (XCDR (f1)), XCDR (XCDR (f2)));
 530   else
 531     res = false;
 532   return res ? Qt : Qnil;
 533 }
 534
 535 static bool
 536 cmpfn_profiler (struct hash_table_test *t,
 537                 Lisp_Object bt1, Lisp_Object bt2)
 538 {
 539   if (VECTORP (bt1) && VECTORP (bt2))
 540     {
 541       ptrdiff_t i, l = ASIZE (bt1);
 542       if (l != ASIZE (bt2))
 543         return false;
 544       for (i = 0; i < l; i++)
 545         if (NILP (Ffunction_equal (AREF (bt1, i), AREF (bt2, i))))
 546           return false;
 547       return true;
 548     }
 549   else
 550     return EQ (bt1, bt2);
 551 }
 552
 553 static EMACS_UINT
 554 hashfn_profiler (struct hash_table_test *ht, Lisp_Object bt)
 555 {
 556   if (VECTORP (bt))
 557     {
 558       EMACS_UINT hash = 0;
 559       ptrdiff_t i, l = ASIZE (bt);
 560       for (i = 0; i < l; i++)
 561         {
 562           Lisp_Object f = AREF (bt, i);
 563           EMACS_UINT hash1
 564             = (COMPILEDP (f) ? XHASH (AREF (f, COMPILED_BYTECODE))
 565                : (CONSP (f) && CONSP (XCDR (f)) && EQ (Qclosure, XCAR (f)))
 566                ? XHASH (XCDR (XCDR (f))) : XHASH (f));
 567           hash = sxhash_combine (hash, hash1);
 568         }
 569       return SXHASH_REDUCE (hash);
 570     }
 571   else
 572     return XHASH (bt);
 573 }
 574
 575 void
 576 syms_of_profiler (void)
 577 {
 578   DEFVAR_INT ("profiler-max-stack-depth", profiler_max_stack_depth,
 579               doc: /* Number of elements from the call-stack recorded in the log.  */);
 580   profiler_max_stack_depth = 16;
 581   DEFVAR_INT ("profiler-log-size", profiler_log_size,
 582               doc: /* Number of distinct call-stacks that can be recorded in a profiler log.
 583 If the log gets full, some of the least-seen call-stacks will be evicted
 584 to make room for new entries.  */);
 585   profiler_log_size = 10000;
 586
 587   DEFSYM (Qprofiler_backtrace_equal, "profiler-backtrace-equal");
 588
 589   hashtest_profiler.name = Qprofiler_backtrace_equal;
 590   hashtest_profiler.user_hash_function = Qnil;
 591   hashtest_profiler.user_cmp_function = Qnil;
 592   hashtest_profiler.cmpfn = cmpfn_profiler;
 593   hashtest_profiler.hashfn = hashfn_profiler;
 594
 595   defsubr (&Sfunction_equal);
 596
 597 #ifdef PROFILER_CPU_SUPPORT
 598   profiler_cpu_running = NOT_RUNNING;
 599   cpu_log = Qnil;
 600   staticpro (&cpu_log);
 601   defsubr (&Sprofiler_cpu_start);
 602   defsubr (&Sprofiler_cpu_stop);
 603   defsubr (&Sprofiler_cpu_running_p);
 604   defsubr (&Sprofiler_cpu_log);
 605 #endif
 606   profiler_memory_running = false;
 607   memory_log = Qnil;
 608   staticpro (&memory_log);
 609   defsubr (&Sprofiler_memory_start);
 610   defsubr (&Sprofiler_memory_stop);
 611   defsubr (&Sprofiler_memory_running_p);
 612   defsubr (&Sprofiler_memory_log);
 613 }