1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or
http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #pragma ident "%Z%%M% %I% %E% SMI"
28
29 /*
30 * based on usr/src/uts/common/os/kmem.c r1.64 from 2001/12/18
31 *
32 * The slab allocator, as described in the following two papers:
33 *
34 * Jeff Bonwick,
35 * The Slab Allocator: An Object-Caching Kernel Memory Allocator.
36 * Proceedings of the Summer 1994 Usenix Conference.
37 * Available as /shared/sac/PSARC/1994/028/materials/kmem.pdf.
38 *
39 * Jeff Bonwick and Jonathan Adams,
40 * Magazines and vmem: Extending the Slab Allocator to Many CPUs and
41 * Arbitrary Resources.
42 * Proceedings of the 2001 Usenix Conference.
43 * Available as /shared/sac/PSARC/2000/550/materials/vmem.pdf.
44 *
45 * 1. Overview
46 * -----------
47 * umem is very close to kmem in implementation. There are four major
48 * areas of divergence:
49 *
50 * * Initialization
51 *
52 * * CPU handling
53 *
54 * * umem_update()
55 *
56 * * KM_SLEEP v.s. UMEM_NOFAIL
57 *
58 * * lock ordering
59 *
60 * 2. Initialization
61 * -----------------
62 * kmem is initialized early on in boot, and knows that no one will call
63 * into it before it is ready. umem does not have these luxuries. Instead,
64 * initialization is divided into two phases:
65 *
66 * * library initialization, and
67 *
68 * * first use
69 *
70 * umem's full initialization happens at the time of the first allocation
71 * request (via malloc() and friends, umem_alloc(), or umem_zalloc()),
72 * or the first call to umem_cache_create().
73 *
74 * umem_free(), and umem_cache_alloc() do not require special handling,
75 * since the only way to get valid arguments for them is to successfully
76 * call a function from the first group.
77 *
78 * 2.1. Library Initialization: umem_startup()
79 * -------------------------------------------
80 * umem_startup() is libumem.so's .init section. It calls pthread_atfork()
81 * to install the handlers necessary for umem's Fork1-Safety. Because of
82 * race condition issues, all other pre-umem_init() initialization is done
83 * statically (i.e. by the dynamic linker).
84 *
85 * For standalone use, umem_startup() returns everything to its initial
86 * state.
87 *
88 * 2.2. First use: umem_init()
89 * ------------------------------
90 * The first time any memory allocation function is used, we have to
91 * create the backing caches and vmem arenas which are needed for it.
92 * umem_init() is the central point for that task. When it completes,
93 * umem_ready is either UMEM_READY (all set) or UMEM_READY_INIT_FAILED (unable
94 * to initialize, probably due to lack of memory).
95 *
96 * There are four different paths from which umem_init() is called:
97 *
98 * * from umem_alloc() or umem_zalloc(), with 0 < size < UMEM_MAXBUF,
99 *
100 * * from umem_alloc() or umem_zalloc(), with size > UMEM_MAXBUF,
101 *
102 * * from umem_cache_create(), and
103 *
104 * * from memalign(), with align > UMEM_ALIGN.
105 *
106 * The last three just check if umem is initialized, and call umem_init()
107 * if it is not. For performance reasons, the first case is more complicated.
108 *
109 * 2.2.1. umem_alloc()/umem_zalloc(), with 0 < size < UMEM_MAXBUF
110 * -----------------------------------------------------------------
111 * In this case, umem_cache_alloc(&umem_null_cache, ...) is called.
112 * There is special case code in which causes any allocation on
113 * &umem_null_cache to fail by returning (NULL), regardless of the
114 * flags argument.
115 *
116 * So umem_cache_alloc() returns NULL, and umem_alloc()/umem_zalloc() call
117 * umem_alloc_retry(). umem_alloc_retry() sees that the allocation
118 * was agains &umem_null_cache, and calls umem_init().
119 *
120 * If initialization is successful, umem_alloc_retry() returns 1, which
121 * causes umem_alloc()/umem_zalloc() to start over, which causes it to load
122 * the (now valid) cache pointer from umem_alloc_table.
123 *
124 * 2.2.2. Dealing with race conditions
125 * -----------------------------------
126 * There are a couple race conditions resulting from the initialization
127 * code that we have to guard against:
128 *
129 * * In umem_cache_create(), there is a special UMC_INTERNAL cflag
130 * that is passed for caches created during initialization. It
131 * is illegal for a user to try to create a UMC_INTERNAL cache.
132 * This allows initialization to proceed, but any other
133 * umem_cache_create()s will block by calling umem_init().
134 *
135 * * Since umem_null_cache has a 1-element cache_cpu, it's cache_cpu_mask
136 * is always zero. umem_cache_alloc uses cp->cache_cpu_mask to
137 * mask the cpu number. This prevents a race between grabbing a
138 * cache pointer out of umem_alloc_table and growing the cpu array.
139 *
140 *
141 * 3. CPU handling
142 * ---------------
143 * kmem uses the CPU's sequence number to determine which "cpu cache" to
144 * use for an allocation. Currently, there is no way to get the sequence
145 * number in userspace.
146 *
147 * umem keeps track of cpu information in umem_cpus, an array of umem_max_ncpus
148 * umem_cpu_t structures. CURCPU() is a a "hint" function, which we then mask
149 * with either umem_cpu_mask or cp->cache_cpu_mask to find the actual "cpu" id.
150 * The mechanics of this is all in the CPU(mask) macro.
151 *
152 * Currently, umem uses _lwp_self() as its hint.
153 *
154 *
155 * 4. The update thread
156 * --------------------
157 * kmem uses a task queue, kmem_taskq, to do periodic maintenance on
158 * every kmem cache. vmem has a periodic timeout for hash table resizing.
159 * The kmem_taskq also provides a separate context for kmem_cache_reap()'s
160 * to be done in, avoiding issues of the context of kmem_reap() callers.
161 *
162 * Instead, umem has the concept of "updates", which are asynchronous requests
163 * for work attached to single caches. All caches with pending work are
164 * on a doubly linked list rooted at the umem_null_cache. All update state
165 * is protected by the umem_update_lock mutex, and the umem_update_cv is used
166 * for notification between threads.
167 *
168 * 4.1. Cache states with regards to updates
169 * -----------------------------------------
170 * A given cache is in one of three states:
171 *
172 * Inactive cache_uflags is zero, cache_u{next,prev} are NULL
173 *
174 * Work Requested cache_uflags is non-zero (but UMU_ACTIVE is not set),
175 * cache_u{next,prev} link the cache onto the global
176 * update list
177 *
178 * Active cache_uflags has UMU_ACTIVE set, cache_u{next,prev}
179 * are NULL, and either umem_update_thr or
180 * umem_st_update_thr are actively doing work on the
181 * cache.
182 *
183 * An update can be added to any cache in any state -- if the cache is
184 * Inactive, it transitions to being Work Requested. If the cache is
185 * Active, the worker will notice the new update and act on it before
186 * transitioning the cache to the Inactive state.
187 *
188 * If a cache is in the Active state, UMU_NOTIFY can be set, which asks
189 * the worker to broadcast the umem_update_cv when it has finished.
190 *
191 * 4.2. Update interface
192 * ---------------------
193 * umem_add_update() adds an update to a particular cache.
194 * umem_updateall() adds an update to all caches.
195 * umem_remove_updates() returns a cache to the Inactive state.
196 *
197 * umem_process_updates() process all caches in the Work Requested state.
198 *
199 * 4.3. Reaping
200 * ------------
201 * When umem_reap() is called (at the time of heap growth), it schedule
202 * UMU_REAP updates on every cache. It then checks to see if the update
203 * thread exists (umem_update_thr != 0). If it is, it broadcasts
204 * the umem_update_cv to wake the update thread up, and returns.
205 *
206 * If the update thread does not exist (umem_update_thr == 0), and the
207 * program currently has multiple threads, umem_reap() attempts to create
208 * a new update thread.
209 *
210 * If the process is not multithreaded, or the creation fails, umem_reap()
211 * calls umem_st_update() to do an inline update.
212 *
213 * 4.4. The update thread
214 * ----------------------
215 * The update thread spends most of its time in cond_timedwait() on the
216 * umem_update_cv. It wakes up under two conditions:
217 *
218 * * The timedwait times out, in which case it needs to run a global
219 * update, or
220 *
221 * * someone cond_broadcast(3THR)s the umem_update_cv, in which case
222 * it needs to check if there are any caches in the Work Requested
223 * state.
224 *
225 * When it is time for another global update, umem calls umem_cache_update()
226 * on every cache, then calls vmem_update(), which tunes the vmem structures.
227 * umem_cache_update() can request further work using umem_add_update().
228 *
229 * After any work from the global update completes, the update timer is
230 * reset to umem_reap_interval seconds in the future. This makes the
231 * updates self-throttling.
232 *
233 * Reaps are similarly self-throttling. After a UMU_REAP update has
234 * been scheduled on all caches, umem_reap() sets a flag and wakes up the
235 * update thread. The update thread notices the flag, and resets the
236 * reap state.
237 *
238 * 4.5. Inline updates
239 * -------------------
240 * If the update thread is not running, umem_st_update() is used instead. It
241 * immediately does a global update (as above), then calls
242 * umem_process_updates() to process both the reaps that umem_reap() added and
243 * any work generated by the global update. Afterwards, it resets the reap
244 * state.
245 *
246 * While the umem_st_update() is running, umem_st_update_thr holds the thread
247 * id of the thread performing the update.
248 *
249 * 4.6. Updates and fork1()
250 * ------------------------
251 * umem has fork1() pre- and post-handlers which lock up (and release) every
252 * mutex in every cache. They also lock up the umem_update_lock. Since
253 * fork1() only copies over a single lwp, other threads (including the update
254 * thread) could have been actively using a cache in the parent. This
255 * can lead to inconsistencies in the child process.
256 *
257 * Because we locked all of the mutexes, the only possible inconsistancies are:
258 *
259 * * a umem_cache_alloc() could leak its buffer.
260 *
261 * * a caller of umem_depot_alloc() could leak a magazine, and all the
262 * buffers contained in it.
263 *
264 * * a cache could be in the Active update state. In the child, there
265 * would be no thread actually working on it.
266 *
267 * * a umem_hash_rescale() could leak the new hash table.
268 *
269 * * a umem_magazine_resize() could be in progress.
270 *
271 * * a umem_reap() could be in progress.
272 *
273 * The memory leaks we can't do anything about. umem_release_child() resets
274 * the update state, moves any caches in the Active state to the Work Requested
275 * state. This might cause some updates to be re-run, but UMU_REAP and
276 * UMU_HASH_RESCALE are effectively idempotent, and the worst that can
277 * happen from umem_magazine_resize() is resizing the magazine twice in close
278 * succession.
279 *
280 * Much of the cleanup in umem_release_child() is skipped if
281 * umem_st_update_thr == thr_self(). This is so that applications which call
282 * fork1() from a cache callback does not break. Needless to say, any such
283 * application is tremendously broken.
284 *
285 *
286 * 5. KM_SLEEP v.s. UMEM_NOFAIL
287 * ----------------------------
288 * Allocations against kmem and vmem have two basic modes: SLEEP and
289 * NOSLEEP. A sleeping allocation is will go to sleep (waiting for
290 * more memory) instead of failing (returning NULL).
291 *
292 * SLEEP allocations presume an extremely multithreaded model, with
293 * a lot of allocation and deallocation activity. umem cannot presume
294 * that its clients have any particular type of behavior. Instead,
295 * it provides two types of allocations:
296 *
297 * * UMEM_DEFAULT, equivalent to KM_NOSLEEP (i.e. return NULL on
298 * failure)
299 *
300 * * UMEM_NOFAIL, which, on failure, calls an optional callback
301 * (registered with umem_nofail_callback()).
302 *
303 * The callback is invoked with no locks held, and can do an arbitrary
304 * amount of work. It then has a choice between:
305 *
306 * * Returning UMEM_CALLBACK_RETRY, which will cause the allocation
307 * to be restarted.
308 *
309 * * Returning UMEM_CALLBACK_EXIT(status), which will cause exit(2)
310 * to be invoked with status. If multiple threads attempt to do
311 * this simultaneously, only one will call exit(2).
312 *
313 * * Doing some kind of non-local exit (thr_exit(3thr), longjmp(3C),
314 * etc.)
315 *
316 * The default callback returns UMEM_CALLBACK_EXIT(255).
317 *
318 * To have these callbacks without risk of state corruption (in the case of
319 * a non-local exit), we have to ensure that the callbacks get invoked
320 * close to the original allocation, with no inconsistent state or held
321 * locks. The following steps are taken:
322 *
323 * * All invocations of vmem are VM_NOSLEEP.
324 *
325 * * All constructor callbacks (which can themselves to allocations)
326 * are passed UMEM_DEFAULT as their required allocation argument. This
327 * way, the constructor will fail, allowing the highest-level allocation
328 * invoke the nofail callback.
329 *
330 * If a constructor callback _does_ do a UMEM_NOFAIL allocation, and
331 * the nofail callback does a non-local exit, we will leak the
332 * partially-constructed buffer.
333 *
334 *
335 * 6. Lock Ordering
336 * ----------------
337 * umem has a few more locks than kmem does, mostly in the update path. The
338 * overall lock ordering (earlier locks must be acquired first) is:
339 *
340 * umem_init_lock
341 *
342 * vmem_list_lock
343 * vmem_nosleep_lock.vmpl_mutex
344 * vmem_t's:
345 * vm_lock
346 * sbrk_lock
347 *
348 * umem_cache_lock
349 * umem_update_lock
350 * umem_flags_lock
351 * umem_cache_t's:
352 * cache_cpu[*].cc_lock
353 * cache_depot_lock
354 * cache_lock
355 * umem_log_header_t's:
356 * lh_cpu[*].clh_lock
357 * lh_lock
358 */
359
360 #include <umem_impl.h>
361 #include <sys/vmem_impl_user.h>
362 #include "umem_base.h"
363 #include "vmem_base.h"
364
365 #include <sys/processor.h>
366 #include <sys/sysmacros.h>
367
368 #include <alloca.h>
369 #include <errno.h>
370 #include <limits.h>
371 #include <stdio.h>
372 #include <stdlib.h>
373 #include <string.h>
374 #include <strings.h>
375 #include <signal.h>
376 #include <unistd.h>
377 #include <atomic.h>
378
379 #include "misc.h"
380
381 #define UMEM_VMFLAGS(umflag) (VM_NOSLEEP)
382
383 size_t pagesize;
384
385 /*
386 * The default set of caches to back umem_alloc().
387 * These sizes should be reevaluated periodically.
388 *
389 * We want allocations that are multiples of the coherency granularity
390 * (64 bytes) to be satisfied from a cache which is a multiple of 64
391 * bytes, so that it will be 64-byte aligned. For all multiples of 64,
392 * the next kmem_cache_size greater than or equal to it must be a
393 * multiple of 64.
394 *
395 * This table must be in sorted order, from smallest to highest. The
396 * highest slot must be UMEM_MAXBUF, and every slot afterwards must be
397 * zero.
398 */
399 static int umem_alloc_sizes[] = {
400 #ifdef _LP64
401 1 * 8,
402 1 * 16,
403 2 * 16,
404 3 * 16,
405 #else
406 1 * 8,
407 2 * 8,
408 3 * 8,
409 4 * 8, 5 * 8, 6 * 8, 7 * 8,
410 #endif
411 4 * 16, 5 * 16, 6 * 16, 7 * 16,
412 4 * 32, 5 * 32, 6 * 32, 7 * 32,
413 4 * 64, 5 * 64, 6 * 64, 7 * 64,
414 4 * 128, 5 * 128, 6 * 128, 7 * 128,
415 P2ALIGN(8192 / 7, 64),
416 P2ALIGN(8192 / 6, 64),
417 P2ALIGN(8192 / 5, 64),
418 P2ALIGN(8192 / 4, 64), 2304,
419 P2ALIGN(8192 / 3, 64),
420 P2ALIGN(8192 / 2, 64), 4544,
421 P2ALIGN(8192 / 1, 64), 9216,
422 4096 * 3,
423 UMEM_MAXBUF, /* = 8192 * 2 */
424 /* 24 slots for user expansion */
425 0, 0, 0, 0, 0, 0, 0, 0,
426 0, 0, 0, 0, 0, 0, 0, 0,
427 0, 0, 0, 0, 0, 0, 0, 0,
428 };
429 #define NUM_ALLOC_SIZES (sizeof (umem_alloc_sizes) / sizeof (*umem_alloc_sizes))
430
431 static umem_magtype_t umem_magtype[] = {
432 { 1, 8, 3200, 65536 },
433 { 3, 16, 256, 32768 },
434 { 7, 32, 64, 16384 },
435 { 15, 64, 0, 8192 },
436 { 31, 64, 0, 4096 },
437 { 47, 64, 0, 2048 },
438 { 63, 64, 0, 1024 },
439 { 95, 64, 0, 512 },
440 { 143, 64, 0, 0 },
441 };
442
443 /*
444 * umem tunables
445 */
446 uint32_t umem_max_ncpus; /* # of CPU caches. */
447
448 uint32_t umem_stack_depth = 15; /* # stack frames in a bufctl_audit */
449 uint32_t umem_reap_interval = 10; /* max reaping rate (seconds) */
450 uint_t umem_depot_contention = 2; /* max failed trylocks per real interval */
451 uint_t umem_abort = 1; /* whether to abort on error */
452 uint_t umem_output = 0; /* whether to write to standard error */
453 uint_t umem_logging = 0; /* umem_log_enter() override */
454 uint32_t umem_mtbf = 0; /* mean time between failures [default: off] */
455 size_t umem_transaction_log_size; /* size of transaction log */
456 size_t umem_content_log_size; /* size of content log */
457 size_t umem_failure_log_size; /* failure log [4 pages per CPU] */
458 size_t umem_slab_log_size; /* slab create log [4 pages per CPU] */
459 size_t umem_content_maxsave = 256; /* UMF_CONTENTS max bytes to log */
460 size_t umem_lite_minsize = 0; /* minimum buffer size for UMF_LITE */
461 size_t umem_lite_maxalign = 1024; /* maximum buffer alignment for UMF_LITE */
462 size_t umem_maxverify; /* maximum bytes to inspect in debug routines */
463 size_t umem_minfirewall; /* hardware-enforced redzone threshold */
464
465 uint_t umem_flags = 0;
466
467 mutex_t umem_init_lock; /* locks initialization */
468 cond_t umem_init_cv; /* initialization CV */
469 thread_t umem_init_thr; /* thread initializing */
470 int umem_init_env_ready; /* environ pre-initted */
471 int umem_ready = UMEM_READY_STARTUP;
472
473 static umem_nofail_callback_t *nofail_callback;
474 static mutex_t umem_nofail_exit_lock;
475 static thread_t umem_nofail_exit_thr;
476
477 static umem_cache_t *umem_slab_cache;
478 static umem_cache_t *umem_bufctl_cache;
479 static umem_cache_t *umem_bufctl_audit_cache;
480
481 mutex_t umem_flags_lock;
482
483 static vmem_t *heap_arena;
484 static vmem_alloc_t *heap_alloc;
485 static vmem_free_t *heap_free;
486
487 static vmem_t *umem_internal_arena;
488 static vmem_t *umem_cache_arena;
489 static vmem_t *umem_hash_arena;
490 static vmem_t *umem_log_arena;
491 static vmem_t *umem_oversize_arena;
492 static vmem_t *umem_va_arena;
493 static vmem_t *umem_default_arena;
494 static vmem_t *umem_firewall_va_arena;
495 static vmem_t *umem_firewall_arena;
496
497 vmem_t *umem_memalign_arena;
498
499 umem_log_header_t *umem_transaction_log;
500 umem_log_header_t *umem_content_log;
501 umem_log_header_t *umem_failure_log;
502 umem_log_header_t *umem_slab_log;
503
504 #define CPUHINT() (thr_self())
505 #define CPUHINT_MAX() INT_MAX
506
507 #define CPU(mask) (umem_cpus + (CPUHINT() & (mask)))
508 static umem_cpu_t umem_startup_cpu = { /* initial, single, cpu */
509 UMEM_CACHE_SIZE(0),
510 0
511 };
512
513 static uint32_t umem_cpu_mask = 0; /* global cpu mask */
514 static umem_cpu_t *umem_cpus = &umem_startup_cpu; /* cpu list */
515
516 volatile uint32_t umem_reaping;
517
518 thread_t umem_update_thr;
519 struct timeval umem_update_next; /* timeofday of next update */
520 volatile thread_t umem_st_update_thr; /* only used when single-thd */
521
522 #define IN_UPDATE() (thr_self() == umem_update_thr || \
523 thr_self() == umem_st_update_thr)
524 #define IN_REAP() IN_UPDATE()
525
526 mutex_t umem_update_lock; /* cache_u{next,prev,flags} */
527 cond_t umem_update_cv;
528
529 volatile hrtime_t umem_reap_next; /* min hrtime of next reap */
530
531 mutex_t umem_cache_lock; /* inter-cache linkage only */
532
533 #ifdef UMEM_STANDALONE
534 umem_cache_t umem_null_cache;
535 static const umem_cache_t umem_null_cache_template = {
536 #else
537 umem_cache_t umem_null_cache = {
538 #endif
539 0, 0, 0, 0, 0,
540 0, 0,
541 0, 0,
542 0, 0,
543 "invalid_cache",
544 0, 0,
545 NULL, NULL, NULL, NULL,
546 NULL,
547 0, 0, 0, 0,
548 &umem_null_cache, &umem_null_cache,
549 &umem_null_cache, &umem_null_cache,
550 0,
551 DEFAULTMUTEX, /* start of slab layer */
552 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
553 &umem_null_cache.cache_nullslab,
554 {
555 &umem_null_cache,
556 NULL,
557 &umem_null_cache.cache_nullslab,
558 &umem_null_cache.cache_nullslab,
559 NULL,
560 -1,
561 0
562 },
563 NULL,
564 NULL,
565 DEFAULTMUTEX, /* start of depot layer */
566 NULL, {
567 NULL, 0, 0, 0, 0
568 }, {
569 NULL, 0, 0, 0, 0
570 }, {
571 {
572 DEFAULTMUTEX, /* start of CPU cache */
573 0, 0, NULL, NULL, -1, -1, 0
574 }
575 }
576 };
577
578 #define ALLOC_TABLE_4 \
579 &umem_null_cache, &umem_null_cache, &umem_null_cache, &umem_null_cache
580
581 #define ALLOC_TABLE_64 \
582 ALLOC_TABLE_4, ALLOC_TABLE_4, ALLOC_TABLE_4, ALLOC_TABLE_4, \
583 ALLOC_TABLE_4, ALLOC_TABLE_4, ALLOC_TABLE_4, ALLOC_TABLE_4, \
584 ALLOC_TABLE_4, ALLOC_TABLE_4, ALLOC_TABLE_4, ALLOC_TABLE_4, \
585 ALLOC_TABLE_4, ALLOC_TABLE_4, ALLOC_TABLE_4, ALLOC_TABLE_4
586
587 #define ALLOC_TABLE_1024 \
588 ALLOC_TABLE_64, ALLOC_TABLE_64, ALLOC_TABLE_64, ALLOC_TABLE_64, \
589 ALLOC_TABLE_64, ALLOC_TABLE_64, ALLOC_TABLE_64, ALLOC_TABLE_64, \
590 ALLOC_TABLE_64, ALLOC_TABLE_64, ALLOC_TABLE_64, ALLOC_TABLE_64, \
591 ALLOC_TABLE_64, ALLOC_TABLE_64, ALLOC_TABLE_64, ALLOC_TABLE_64
592
593 static umem_cache_t *umem_alloc_table[UMEM_MAXBUF >> UMEM_ALIGN_SHIFT] = {
594 ALLOC_TABLE_1024,
595 ALLOC_TABLE_1024
596 };
597
598
599 /* Used to constrain audit-log stack traces */
600 caddr_t umem_min_stack;
601 caddr_t umem_max_stack;
602
603
604 #define UMERR_MODIFIED 0 /* buffer modified while on freelist */
605 #define UMERR_REDZONE 1 /* redzone violation (write past end of buf) */
606 #define UMERR_DUPFREE 2 /* freed a buffer twice */
607 #define UMERR_BADADDR 3 /* freed a bad (unallocated) address */
608 #define UMERR_BADBUFTAG 4 /* buftag corrupted */
609 #define UMERR_BADBUFCTL 5 /* bufctl corrupted */
610 #define UMERR_BADCACHE 6 /* freed a buffer to the wrong cache */
611 #define UMERR_BADSIZE 7 /* alloc size != free size */
612 #define UMERR_BADBASE 8 /* buffer base address wrong */
613
614 struct {
615 hrtime_t ump_timestamp; /* timestamp of error */
616 int ump_error; /* type of umem error (UMERR_*) */
617 void *ump_buffer; /* buffer that induced abort */
618 void *ump_realbuf; /* real start address for buffer */
619 umem_cache_t *ump_cache; /* buffer's cache according to client */
620 umem_cache_t *ump_realcache; /* actual cache containing buffer */
621 umem_slab_t *ump_slab; /* slab accoring to umem_findslab() */
622 umem_bufctl_t *ump_bufctl; /* bufctl */
623 } umem_abort_info;
624
625 static void
626 copy_pattern(uint64_t pattern, void *buf_arg, size_t size)
627 {
628 uint64_t *bufend = (uint64_t *)((char *)buf_arg + size);
629 uint64_t *buf = buf_arg;
630
631 while (buf < bufend)
632 *buf++ = pattern;
633 }
634
635 static void *
636 verify_pattern(uint64_t pattern, void *buf_arg, size_t size)
637 {
638 uint64_t *bufend = (uint64_t *)((char *)buf_arg + size);
639 uint64_t *buf;
640
641 for (buf = buf_arg; buf < bufend; buf++)
642 if (*buf != pattern)
643 return (buf);
644 return (NULL);
645 }
646
647 static void *
648 verify_and_copy_pattern(uint64_t old, uint64_t new, void *buf_arg, size_t size)
649 {
650 uint64_t *bufend = (uint64_t *)((char *)buf_arg + size);
651 uint64_t *buf;
652
653 for (buf = buf_arg; buf < bufend; buf++) {
654 if (*buf != old) {
655 copy_pattern(old, buf_arg,
656 (char *)buf - (char *)buf_arg);
657 return (buf);
658 }
659 *buf = new;
660 }
661
662 return (NULL);
663 }
664
665 void
666 umem_cache_applyall(void (*func)(umem_cache_t *))
667 {
668 umem_cache_t *cp;
669
670 (void) mutex_lock(&umem_cache_lock);
671 for (cp = umem_null_cache.cache_next; cp != &umem_null_cache;
672 cp = cp->cache_next)
673 func(cp);
674 (void) mutex_unlock(&umem_cache_lock);
675 }
676
677 static void
678 umem_add_update_unlocked(umem_cache_t *cp, int flags)
679 {
680 umem_cache_t *cnext, *cprev;
681
682 flags &= ~UMU_ACTIVE;
683
684 if (!flags)
685 return;
686
687 if (cp->cache_uflags & UMU_ACTIVE) {
688 cp->cache_uflags |= flags;
689 } else {
690 if (cp->cache_unext != NULL) {
691 ASSERT(cp->cache_uflags != 0);
692 cp->cache_uflags |= flags;
693 } else {
694 ASSERT(cp->cache_uflags == 0);
695 cp->cache_uflags = flags;
696 cp->cache_unext = cnext = &umem_null_cache;
697 cp->cache_uprev = cprev = umem_null_cache.cache_uprev;
698 cnext->cache_uprev = cp;
699 cprev->cache_unext = cp;
700 }
701 }
702 }
703
704 static void
705 umem_add_update(umem_cache_t *cp, int flags)
706 {
707 (void) mutex_lock(&umem_update_lock);
708
709 umem_add_update_unlocked(cp, flags);
710
711 if (!IN_UPDATE())
712 (void) cond_broadcast(&umem_update_cv);
713
714 (void) mutex_unlock(&umem_update_lock);
715 }
716
717 /*
718 * Remove a cache from the update list, waiting for any in-progress work to
719 * complete first.
720 */
721 static void
722 umem_remove_updates(umem_cache_t *cp)
723 {
724 (void) mutex_lock(&umem_update_lock);
725
726 /*
727 * Get it out of the active state
728 */
729 while (cp->cache_uflags & UMU_ACTIVE) {
730 int cancel_state;
731
732 ASSERT(cp->cache_unext == NULL);
733
734 cp->cache_uflags |= UMU_NOTIFY;
735
736 /*
737 * Make sure the update state is sane, before we wait
738 */
739 ASSERT(umem_update_thr != 0 || umem_st_update_thr != 0);
740 ASSERT(umem_update_thr != thr_self() &&
741 umem_st_update_thr != thr_self());
742
743 (void) pthread_setcancelstate(PTHREAD_CANCEL_DISABLE,
744 &cancel_state);
745 (void) cond_wait(&umem_update_cv, &umem_update_lock);
746 (void) pthread_setcancelstate(cancel_state, NULL);
747 }
748 /*
749 * Get it out of the Work Requested state
750 */
751 if (cp->cache_unext != NULL) {
752 cp->cache_uprev->cache_unext = cp->cache_unext;
753 cp->cache_unext->cache_uprev = cp->cache_uprev;
754 cp->cache_uprev = cp->cache_unext = NULL;
755 cp->cache_uflags = 0;
756 }
757 /*
758 * Make sure it is in the Inactive state
759 */
760 ASSERT(cp->cache_unext == NULL && cp->cache_uflags == 0);
761 (void) mutex_unlock(&umem_update_lock);
762 }
763
764 static void
765 umem_updateall(int flags)
766 {
767 umem_cache_t *cp;
768
769 /*
770 * NOTE: To prevent deadlock, umem_cache_lock is always acquired first.
771 *
772 * (umem_add_update is called from things run via umem_cache_applyall)
773 */
774 (void) mutex_lock(&umem_cache_lock);
775 (void) mutex_lock(&umem_update_lock);
776
777 for (cp = umem_null_cache.cache_next; cp != &umem_null_cache;
778 cp = cp->cache_next)
779 umem_add_update_unlocked(cp, flags);
780
781 if (!IN_UPDATE())
782 (void) cond_broadcast(&umem_update_cv);
783
784 (void) mutex_unlock(&umem_update_lock);
785 (void) mutex_unlock(&umem_cache_lock);
786 }
787
788 /*
789 * Debugging support. Given a buffer address, find its slab.
790 */
791 static umem_slab_t *
792 umem_findslab(umem_cache_t *cp, void *buf)
793 {
794 umem_slab_t *sp;
795
796 (void) mutex_lock(&cp->cache_lock);
797 for (sp = cp->cache_nullslab.slab_next;
798 sp != &cp->cache_nullslab; sp = sp->slab_next) {
799 if (UMEM_SLAB_MEMBER(sp, buf)) {
800 (void) mutex_unlock(&cp->cache_lock);
801 return (sp);
802 }
803 }
804 (void) mutex_unlock(&cp->cache_lock);
805
806 return (NULL);
807 }
808
809 static void
810 umem_error(int error, umem_cache_t *cparg, void *bufarg)
811 {
812 umem_buftag_t *btp = NULL;
813 umem_bufctl_t *bcp = NULL;
814 umem_cache_t *cp = cparg;
815 umem_slab_t *sp;
816 uint64_t *off;
817 void *buf = bufarg;
818
819 int old_logging = umem_logging;
820
821 umem_logging = 0; /* stop logging when a bad thing happens */
822
823 umem_abort_info.ump_timestamp = gethrtime();
824
825 sp = umem_findslab(cp, buf);
826 if (sp == NULL) {
827 for (cp = umem_null_cache.cache_prev; cp != &umem_null_cache;
828 cp = cp->cache_prev) {
829 if ((sp = umem_findslab(cp, buf)) != NULL)
830 break;
831 }
832 }
833
834 if (sp == NULL) {
835 cp = NULL;
836 error = UMERR_BADADDR;
837 } else {
838 if (cp != cparg)
839 error = UMERR_BADCACHE;
840 else
841 buf = (char *)bufarg - ((uintptr_t)bufarg -
842 (uintptr_t)sp->slab_base) % cp->cache_chunksize;
843 if (buf != bufarg)
844 error = UMERR_BADBASE;
845 if (cp->cache_flags & UMF_BUFTAG)
846 btp = UMEM_BUFTAG(cp, buf);
847 if (cp->cache_flags & UMF_HASH) {
848 (void) mutex_lock(&cp->cache_lock);
849 for (bcp = *UMEM_HASH(cp, buf); bcp; bcp = bcp->bc_next)
850 if (bcp->bc_addr == buf)
851 break;
852 (void) mutex_unlock(&cp->cache_lock);
853 if (bcp == NULL && btp != NULL)
854 bcp = btp->bt_bufctl;
855 if (umem_findslab(cp->cache_bufctl_cache, bcp) ==
856 NULL || P2PHASE((uintptr_t)bcp, UMEM_ALIGN) ||
857 bcp->bc_addr != buf) {
858 error = UMERR_BADBUFCTL;
859 bcp = NULL;
860 }
861 }
862 }
863
864 umem_abort_info.ump_error = error;
865 umem_abort_info.ump_buffer = bufarg;
866 umem_abort_info.ump_realbuf = buf;
867 umem_abort_info.ump_cache = cparg;
868 umem_abort_info.ump_realcache = cp;
869 umem_abort_info.ump_slab = sp;
870 umem_abort_info.ump_bufctl = bcp;
871
872 umem_printf("umem allocator: ");
873
874 switch (error) {
875
876 case UMERR_MODIFIED:
877 umem_printf("buffer modified after being freed\n");
878 off = verify_pattern(UMEM_FREE_PATTERN, buf, cp->cache_verify);
879 if (off == NULL) /* shouldn't happen */
880 off = buf;
881 umem_printf("modification occurred at offset 0x%lx "
882 "(0x%llx replaced by 0x%llx)\n",
883 (uintptr_t)off - (uintptr_t)buf,
884 (longlong_t)UMEM_FREE_PATTERN, (longlong_t)*off);
885 break;
886
887 case UMERR_REDZONE:
888 umem_printf("redzone violation: write past end of buffer\n");
889 break;
890
891 case UMERR_BADADDR:
892 umem_printf("invalid free: buffer not in cache\n");
893 break;
894
895 case UMERR_DUPFREE:
896 umem_printf("duplicate free: buffer freed twice\n");
897 break;
898
899 case UMERR_BADBUFTAG:
900 umem_printf("boundary tag corrupted\n");
901 umem_printf("bcp ^ bxstat = %lx, should be %lx\n",
902 (intptr_t)btp->bt_bufctl ^ btp->bt_bxstat,
903 UMEM_BUFTAG_FREE);
904 break;
905
906 case UMERR_BADBUFCTL:
907 umem_printf("bufctl corrupted\n");
908 break;
909
910 case UMERR_BADCACHE:
911 umem_printf("buffer freed to wrong cache\n");
912 umem_printf("buffer was allocated from %s,\n", cp->cache_name);
913 umem_printf("caller attempting free to %s.\n",
914 cparg->cache_name);
915 break;
916
917 case UMERR_BADSIZE:
918 umem_printf("bad free: free size (%u) != alloc size (%u)\n",
919 UMEM_SIZE_DECODE(((uint32_t *)btp)[0]),
920 UMEM_SIZE_DECODE(((uint32_t *)btp)[1]));
921 break;
922
923 case UMERR_BADBASE:
924 umem_printf("bad free: free address (%p) != alloc address "
925 "(%p)\n", bufarg, buf);
926 break;
927 }