]> code.delx.au - gnu-emacs/blob - src/profiler.c
Improve diagnostics of profiler-cpu-start
[gnu-emacs] / src / profiler.c
1 /* Profiler implementation.
2
3 Copyright (C) 2012-2015 Free Software Foundation, Inc.
4
5 This file is part of GNU Emacs.
6
7 GNU Emacs is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
11
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
19
20 #include <config.h>
21 #include "lisp.h"
22 #include "syssignal.h"
23 #include "systime.h"
24
25 /* Return A + B, but return the maximum fixnum if the result would overflow.
26 Assume A and B are nonnegative and in fixnum range. */
27
28 static EMACS_INT
29 saturated_add (EMACS_INT a, EMACS_INT b)
30 {
31 return min (a + b, MOST_POSITIVE_FIXNUM);
32 }
33
34 /* Logs. */
35
36 typedef struct Lisp_Hash_Table log_t;
37
38 static struct hash_table_test hashtest_profiler;
39
40 static Lisp_Object
41 make_log (int heap_size, int max_stack_depth)
42 {
43 /* We use a standard Elisp hash-table object, but we use it in
44 a special way. This is OK as long as the object is not exposed
45 to Elisp, i.e. until it is returned by *-profiler-log, after which
46 it can't be used any more. */
47 Lisp_Object log = make_hash_table (hashtest_profiler,
48 make_number (heap_size),
49 make_float (DEFAULT_REHASH_SIZE),
50 make_float (DEFAULT_REHASH_THRESHOLD),
51 Qnil);
52 struct Lisp_Hash_Table *h = XHASH_TABLE (log);
53
54 /* What is special about our hash-tables is that the keys are pre-filled
55 with the vectors we'll put in them. */
56 int i = ASIZE (h->key_and_value) / 2;
57 while (i > 0)
58 set_hash_key_slot (h, --i,
59 Fmake_vector (make_number (max_stack_depth), Qnil));
60 return log;
61 }
62
63 /* Evict the least used half of the hash_table.
64
65 When the table is full, we have to evict someone.
66 The easiest and most efficient is to evict the value we're about to add
67 (i.e. once the table is full, stop sampling).
68
69 We could also pick the element with the lowest count and evict it,
70 but finding it is O(N) and for that amount of work we get very
71 little in return: for the next sample, this latest sample will have
72 count==1 and will hence be a prime candidate for eviction :-(
73
74 So instead, we take O(N) time to eliminate more or less half of the
75 entries (the half with the lowest counts). So we get an amortized
76 cost of O(1) and we get O(N) time for a new entry to grow larger
77 than the other least counts before a new round of eviction. */
78
79 static EMACS_INT approximate_median (log_t *log,
80 ptrdiff_t start, ptrdiff_t size)
81 {
82 eassert (size > 0);
83 if (size < 2)
84 return XINT (HASH_VALUE (log, start));
85 if (size < 3)
86 /* Not an actual median, but better for our application than
87 choosing either of the two numbers. */
88 return ((XINT (HASH_VALUE (log, start))
89 + XINT (HASH_VALUE (log, start + 1)))
90 / 2);
91 else
92 {
93 ptrdiff_t newsize = size / 3;
94 ptrdiff_t start2 = start + newsize;
95 EMACS_INT i1 = approximate_median (log, start, newsize);
96 EMACS_INT i2 = approximate_median (log, start2, newsize);
97 EMACS_INT i3 = approximate_median (log, start2 + newsize,
98 size - 2 * newsize);
99 return (i1 < i2
100 ? (i2 < i3 ? i2 : (i1 < i3 ? i3 : i1))
101 : (i1 < i3 ? i1 : (i2 < i3 ? i3 : i2)));
102 }
103 }
104
105 static void evict_lower_half (log_t *log)
106 {
107 ptrdiff_t size = ASIZE (log->key_and_value) / 2;
108 EMACS_INT median = approximate_median (log, 0, size);
109 ptrdiff_t i;
110
111 for (i = 0; i < size; i++)
112 /* Evict not only values smaller but also values equal to the median,
113 so as to make sure we evict something no matter what. */
114 if (XINT (HASH_VALUE (log, i)) <= median)
115 {
116 Lisp_Object key = HASH_KEY (log, i);
117 { /* FIXME: we could make this more efficient. */
118 Lisp_Object tmp;
119 XSET_HASH_TABLE (tmp, log); /* FIXME: Use make_lisp_ptr. */
120 Fremhash (key, tmp);
121 }
122 eassert (EQ (log->next_free, make_number (i)));
123 {
124 int j;
125 eassert (VECTORP (key));
126 for (j = 0; j < ASIZE (key); j++)
127 ASET (key, j, Qnil);
128 }
129 set_hash_key_slot (log, i, key);
130 }
131 }
132
133 /* Record the current backtrace in LOG. COUNT is the weight of this
134 current backtrace: interrupt counts for CPU, and the allocation
135 size for memory. */
136
137 static void
138 record_backtrace (log_t *log, EMACS_INT count)
139 {
140 Lisp_Object backtrace;
141 ptrdiff_t index;
142
143 if (!INTEGERP (log->next_free))
144 /* FIXME: transfer the evicted counts to a special entry rather
145 than dropping them on the floor. */
146 evict_lower_half (log);
147 index = XINT (log->next_free);
148
149 /* Get a "working memory" vector. */
150 backtrace = HASH_KEY (log, index);
151 get_backtrace (backtrace);
152
153 { /* We basically do a `gethash+puthash' here, except that we have to be
154 careful to avoid memory allocation since we're in a signal
155 handler, and we optimize the code to try and avoid computing the
156 hash+lookup twice. See fns.c:Fputhash for reference. */
157 EMACS_UINT hash;
158 ptrdiff_t j = hash_lookup (log, backtrace, &hash);
159 if (j >= 0)
160 {
161 EMACS_INT old_val = XINT (HASH_VALUE (log, j));
162 EMACS_INT new_val = saturated_add (old_val, count);
163 set_hash_value_slot (log, j, make_number (new_val));
164 }
165 else
166 { /* BEWARE! hash_put in general can allocate memory.
167 But currently it only does that if log->next_free is nil. */
168 int j;
169 eassert (!NILP (log->next_free));
170 j = hash_put (log, backtrace, make_number (count), hash);
171 /* Let's make sure we've put `backtrace' right where it
172 already was to start with. */
173 eassert (index == j);
174
175 /* FIXME: If the hash-table is almost full, we should set
176 some global flag so that some Elisp code can offload its
177 data elsewhere, so as to avoid the eviction code.
178 There are 2 ways to do that, AFAICT:
179 - Set a flag checked in QUIT, such that QUIT can then call
180 Fprofiler_cpu_log and stash the full log for later use.
181 - Set a flag check in post-gc-hook, so that Elisp code can call
182 profiler-cpu-log. That gives us more flexibility since that
183 Elisp code can then do all kinds of fun stuff like write
184 the log to disk. Or turn it right away into a call tree.
185 Of course, using Elisp is generally preferable, but it may
186 take longer until we get a chance to run the Elisp code, so
187 there's more risk that the table will get full before we
188 get there. */
189 }
190 }
191 }
192 \f
193 /* Sampling profiler. */
194
195 #ifdef PROFILER_CPU_SUPPORT
196
197 /* The profiler timer and whether it was properly initialized, if
198 POSIX timers are available. */
199 #ifdef HAVE_ITIMERSPEC
200 static timer_t profiler_timer;
201 static bool profiler_timer_ok;
202 #endif
203
204 /* Status of sampling profiler. */
205 static enum profiler_cpu_running
206 { NOT_RUNNING, TIMER_SETTIME_RUNNING, SETITIMER_RUNNING }
207 profiler_cpu_running;
208
209 /* Hash-table log of CPU profiler. */
210 static Lisp_Object cpu_log;
211
212 /* Separate counter for the time spent in the GC. */
213 static EMACS_INT cpu_gc_count;
214
215 /* The current sampling interval in nanoseconds. */
216 static EMACS_INT current_sampling_interval;
217
218 /* Signal handler for sampling profiler. */
219
220 static void
221 handle_profiler_signal (int signal)
222 {
223 if (EQ (backtrace_top_function (), Qautomatic_gc))
224 /* Special case the time-count inside GC because the hash-table
225 code is not prepared to be used while the GC is running.
226 More specifically it uses ASIZE at many places where it does
227 not expect the ARRAY_MARK_FLAG to be set. We could try and
228 harden the hash-table code, but it doesn't seem worth the
229 effort. */
230 cpu_gc_count = saturated_add (cpu_gc_count, 1);
231 else
232 {
233 EMACS_INT count = 1;
234 #ifdef HAVE_ITIMERSPEC
235 if (profiler_timer_ok)
236 {
237 int overruns = timer_getoverrun (profiler_timer);
238 eassert (overruns >= 0);
239 count += overruns;
240 }
241 #endif
242 eassert (HASH_TABLE_P (cpu_log));
243 record_backtrace (XHASH_TABLE (cpu_log), count);
244 }
245 }
246
247 static void
248 deliver_profiler_signal (int signal)
249 {
250 deliver_process_signal (signal, handle_profiler_signal);
251 }
252
253 static int
254 setup_cpu_timer (Lisp_Object sampling_interval)
255 {
256 struct sigaction action;
257 struct itimerval timer;
258 struct timespec interval;
259 int billion = 1000000000;
260
261 if (! RANGED_INTEGERP (1, sampling_interval,
262 (TYPE_MAXIMUM (time_t) < EMACS_INT_MAX / billion
263 ? ((EMACS_INT) TYPE_MAXIMUM (time_t) * billion
264 + (billion - 1))
265 : EMACS_INT_MAX)))
266 return -1;
267
268 current_sampling_interval = XINT (sampling_interval);
269 interval = make_timespec (current_sampling_interval / billion,
270 current_sampling_interval % billion);
271 emacs_sigaction_init (&action, deliver_profiler_signal);
272 sigaction (SIGPROF, &action, 0);
273
274 #ifdef HAVE_ITIMERSPEC
275 if (! profiler_timer_ok)
276 {
277 /* System clocks to try, in decreasing order of desirability. */
278 static clockid_t const system_clock[] = {
279 #ifdef CLOCK_THREAD_CPUTIME_ID
280 CLOCK_THREAD_CPUTIME_ID,
281 #endif
282 #ifdef CLOCK_PROCESS_CPUTIME_ID
283 CLOCK_PROCESS_CPUTIME_ID,
284 #endif
285 #ifdef CLOCK_MONOTONIC
286 CLOCK_MONOTONIC,
287 #endif
288 CLOCK_REALTIME
289 };
290 int i;
291 struct sigevent sigev;
292 sigev.sigev_value.sival_ptr = &profiler_timer;
293 sigev.sigev_signo = SIGPROF;
294 sigev.sigev_notify = SIGEV_SIGNAL;
295
296 for (i = 0; i < ARRAYELTS (system_clock); i++)
297 if (timer_create (system_clock[i], &sigev, &profiler_timer) == 0)
298 {
299 profiler_timer_ok = 1;
300 break;
301 }
302 }
303
304 if (profiler_timer_ok)
305 {
306 struct itimerspec ispec;
307 ispec.it_value = ispec.it_interval = interval;
308 if (timer_settime (profiler_timer, 0, &ispec, 0) == 0)
309 return TIMER_SETTIME_RUNNING;
310 }
311 #endif
312
313 #ifdef HAVE_SETITIMER
314 timer.it_value = timer.it_interval = make_timeval (interval);
315 if (setitimer (ITIMER_PROF, &timer, 0) == 0)
316 return SETITIMER_RUNNING;
317 #endif
318
319 return NOT_RUNNING;
320 }
321
322 DEFUN ("profiler-cpu-start", Fprofiler_cpu_start, Sprofiler_cpu_start,
323 1, 1, 0,
324 doc: /* Start or restart the cpu profiler.
325 It takes call-stack samples each SAMPLING-INTERVAL nanoseconds, approximately.
326 See also `profiler-log-size' and `profiler-max-stack-depth'. */)
327 (Lisp_Object sampling_interval)
328 {
329 if (profiler_cpu_running)
330 error ("CPU profiler is already running");
331
332 if (NILP (cpu_log))
333 {
334 cpu_gc_count = 0;
335 cpu_log = make_log (profiler_log_size,
336 profiler_max_stack_depth);
337 }
338
339 int status = setup_cpu_timer (sampling_interval);
340 if (status == -1)
341 {
342 profiler_cpu_running = NOT_RUNNING;
343 error ("Invalid sampling interval");
344 }
345 else
346 {
347 profiler_cpu_running = status;
348 if (! profiler_cpu_running)
349 error ("Unable to start profiler timer");
350 }
351
352 return Qt;
353 }
354
355 DEFUN ("profiler-cpu-stop", Fprofiler_cpu_stop, Sprofiler_cpu_stop,
356 0, 0, 0,
357 doc: /* Stop the cpu profiler. The profiler log is not affected.
358 Return non-nil if the profiler was running. */)
359 (void)
360 {
361 switch (profiler_cpu_running)
362 {
363 case NOT_RUNNING:
364 return Qnil;
365
366 #ifdef HAVE_ITIMERSPEC
367 case TIMER_SETTIME_RUNNING:
368 {
369 struct itimerspec disable;
370 memset (&disable, 0, sizeof disable);
371 timer_settime (profiler_timer, 0, &disable, 0);
372 }
373 break;
374 #endif
375
376 #ifdef HAVE_SETITIMER
377 case SETITIMER_RUNNING:
378 {
379 struct itimerval disable;
380 memset (&disable, 0, sizeof disable);
381 setitimer (ITIMER_PROF, &disable, 0);
382 }
383 break;
384 #endif
385 }
386
387 signal (SIGPROF, SIG_IGN);
388 profiler_cpu_running = NOT_RUNNING;
389 return Qt;
390 }
391
392 DEFUN ("profiler-cpu-running-p",
393 Fprofiler_cpu_running_p, Sprofiler_cpu_running_p,
394 0, 0, 0,
395 doc: /* Return non-nil if cpu profiler is running. */)
396 (void)
397 {
398 return profiler_cpu_running ? Qt : Qnil;
399 }
400
401 DEFUN ("profiler-cpu-log", Fprofiler_cpu_log, Sprofiler_cpu_log,
402 0, 0, 0,
403 doc: /* Return the current cpu profiler log.
404 The log is a hash-table mapping backtraces to counters which represent
405 the amount of time spent at those points. Every backtrace is a vector
406 of functions, where the last few elements may be nil.
407 Before returning, a new log is allocated for future samples. */)
408 (void)
409 {
410 Lisp_Object result = cpu_log;
411 /* Here we're making the log visible to Elisp, so it's not safe any
412 more for our use afterwards since we can't rely on its special
413 pre-allocated keys anymore. So we have to allocate a new one. */
414 cpu_log = (profiler_cpu_running
415 ? make_log (profiler_log_size, profiler_max_stack_depth)
416 : Qnil);
417 Fputhash (Fmake_vector (make_number (1), Qautomatic_gc),
418 make_number (cpu_gc_count),
419 result);
420 cpu_gc_count = 0;
421 return result;
422 }
423 #endif /* PROFILER_CPU_SUPPORT */
424 \f
425 /* Memory profiler. */
426
427 /* True if memory profiler is running. */
428 bool profiler_memory_running;
429
430 static Lisp_Object memory_log;
431
432 DEFUN ("profiler-memory-start", Fprofiler_memory_start, Sprofiler_memory_start,
433 0, 0, 0,
434 doc: /* Start/restart the memory profiler.
435 The memory profiler will take samples of the call-stack whenever a new
436 allocation takes place. Note that most small allocations only trigger
437 the profiler occasionally.
438 See also `profiler-log-size' and `profiler-max-stack-depth'. */)
439 (void)
440 {
441 if (profiler_memory_running)
442 error ("Memory profiler is already running");
443
444 if (NILP (memory_log))
445 memory_log = make_log (profiler_log_size,
446 profiler_max_stack_depth);
447
448 profiler_memory_running = true;
449
450 return Qt;
451 }
452
453 DEFUN ("profiler-memory-stop",
454 Fprofiler_memory_stop, Sprofiler_memory_stop,
455 0, 0, 0,
456 doc: /* Stop the memory profiler. The profiler log is not affected.
457 Return non-nil if the profiler was running. */)
458 (void)
459 {
460 if (!profiler_memory_running)
461 return Qnil;
462 profiler_memory_running = false;
463 return Qt;
464 }
465
466 DEFUN ("profiler-memory-running-p",
467 Fprofiler_memory_running_p, Sprofiler_memory_running_p,
468 0, 0, 0,
469 doc: /* Return non-nil if memory profiler is running. */)
470 (void)
471 {
472 return profiler_memory_running ? Qt : Qnil;
473 }
474
475 DEFUN ("profiler-memory-log",
476 Fprofiler_memory_log, Sprofiler_memory_log,
477 0, 0, 0,
478 doc: /* Return the current memory profiler log.
479 The log is a hash-table mapping backtraces to counters which represent
480 the amount of memory allocated at those points. Every backtrace is a vector
481 of functions, where the last few elements may be nil.
482 Before returning, a new log is allocated for future samples. */)
483 (void)
484 {
485 Lisp_Object result = memory_log;
486 /* Here we're making the log visible to Elisp , so it's not safe any
487 more for our use afterwards since we can't rely on its special
488 pre-allocated keys anymore. So we have to allocate a new one. */
489 memory_log = (profiler_memory_running
490 ? make_log (profiler_log_size, profiler_max_stack_depth)
491 : Qnil);
492 return result;
493 }
494
495 \f
496 /* Signals and probes. */
497
498 /* Record that the current backtrace allocated SIZE bytes. */
499 void
500 malloc_probe (size_t size)
501 {
502 eassert (HASH_TABLE_P (memory_log));
503 record_backtrace (XHASH_TABLE (memory_log), min (size, MOST_POSITIVE_FIXNUM));
504 }
505
506 DEFUN ("function-equal", Ffunction_equal, Sfunction_equal, 2, 2, 0,
507 doc: /* Return non-nil if F1 and F2 come from the same source.
508 Used to determine if different closures are just different instances of
509 the same lambda expression, or are really unrelated function. */)
510 (Lisp_Object f1, Lisp_Object f2)
511 {
512 bool res;
513 if (EQ (f1, f2))
514 res = true;
515 else if (COMPILEDP (f1) && COMPILEDP (f2))
516 res = EQ (AREF (f1, COMPILED_BYTECODE), AREF (f2, COMPILED_BYTECODE));
517 else if (CONSP (f1) && CONSP (f2) && CONSP (XCDR (f1)) && CONSP (XCDR (f2))
518 && EQ (Qclosure, XCAR (f1))
519 && EQ (Qclosure, XCAR (f2)))
520 res = EQ (XCDR (XCDR (f1)), XCDR (XCDR (f2)));
521 else
522 res = false;
523 return res ? Qt : Qnil;
524 }
525
526 static bool
527 cmpfn_profiler (struct hash_table_test *t,
528 Lisp_Object bt1, Lisp_Object bt2)
529 {
530 if (VECTORP (bt1) && VECTORP (bt2))
531 {
532 ptrdiff_t i, l = ASIZE (bt1);
533 if (l != ASIZE (bt2))
534 return false;
535 for (i = 0; i < l; i++)
536 if (NILP (Ffunction_equal (AREF (bt1, i), AREF (bt2, i))))
537 return false;
538 return true;
539 }
540 else
541 return EQ (bt1, bt2);
542 }
543
544 static EMACS_UINT
545 hashfn_profiler (struct hash_table_test *ht, Lisp_Object bt)
546 {
547 if (VECTORP (bt))
548 {
549 EMACS_UINT hash = 0;
550 ptrdiff_t i, l = ASIZE (bt);
551 for (i = 0; i < l; i++)
552 {
553 Lisp_Object f = AREF (bt, i);
554 EMACS_UINT hash1
555 = (COMPILEDP (f) ? XHASH (AREF (f, COMPILED_BYTECODE))
556 : (CONSP (f) && CONSP (XCDR (f)) && EQ (Qclosure, XCAR (f)))
557 ? XHASH (XCDR (XCDR (f))) : XHASH (f));
558 hash = sxhash_combine (hash, hash1);
559 }
560 return SXHASH_REDUCE (hash);
561 }
562 else
563 return XHASH (bt);
564 }
565
566 void
567 syms_of_profiler (void)
568 {
569 DEFVAR_INT ("profiler-max-stack-depth", profiler_max_stack_depth,
570 doc: /* Number of elements from the call-stack recorded in the log. */);
571 profiler_max_stack_depth = 16;
572 DEFVAR_INT ("profiler-log-size", profiler_log_size,
573 doc: /* Number of distinct call-stacks that can be recorded in a profiler log.
574 If the log gets full, some of the least-seen call-stacks will be evicted
575 to make room for new entries. */);
576 profiler_log_size = 10000;
577
578 DEFSYM (Qprofiler_backtrace_equal, "profiler-backtrace-equal");
579
580 hashtest_profiler.name = Qprofiler_backtrace_equal;
581 hashtest_profiler.user_hash_function = Qnil;
582 hashtest_profiler.user_cmp_function = Qnil;
583 hashtest_profiler.cmpfn = cmpfn_profiler;
584 hashtest_profiler.hashfn = hashfn_profiler;
585
586 defsubr (&Sfunction_equal);
587
588 #ifdef PROFILER_CPU_SUPPORT
589 profiler_cpu_running = NOT_RUNNING;
590 cpu_log = Qnil;
591 staticpro (&cpu_log);
592 defsubr (&Sprofiler_cpu_start);
593 defsubr (&Sprofiler_cpu_stop);
594 defsubr (&Sprofiler_cpu_running_p);
595 defsubr (&Sprofiler_cpu_log);
596 #endif
597 profiler_memory_running = false;
598 memory_log = Qnil;
599 staticpro (&memory_log);
600 defsubr (&Sprofiler_memory_start);
601 defsubr (&Sprofiler_memory_stop);
602 defsubr (&Sprofiler_memory_running_p);
603 defsubr (&Sprofiler_memory_log);
604 }