Add kern.objcache opaque sysctl to dump depot and per-cpu objcache statistics. Change lock protecting objcache list from spinlock to lockmgr lock because we need to hold it while executing the sysctl handler and we may block while copying data to userland. Add ocstat(1) to print human-readable objcache stats. By default, ocstat displays (from left to right), the cache name, magazine capacity (MAGC), number of unallocated objects (UNOBJ), number of empty (EMAG) and full magazines (FMAG) in the depot, number of objects in loaded (LRD) and previous (PRD) rounds. Per-cpu values are separated by commas: $ ocstat NAME MAGC UNOBJ EMAG FMAG LRD PRD vnodes 256 -1120 2 0 0,0 0,0 nameibufs 64 -5 2 0 2,2 0,0 cdev_t 32 -351 2 0 0,0 0,0 vmspace 32 -24 2 0 16,0 0,0 thread 3 -23 2 0 0,1 3,0 exec-args 2 30 6 0 1,1 0,0 CCMS 64 -1120 2 0 0,0 0,0 mbuf pkt hdr + cluster 64 1024 6 0 0,0 0,0 mbuf + cluster 64 1024 6 0 0,0 0,0 cluster mbuf 64 1024 6 0 0,0 0,0 mbuf pkt hdr 64 2044 10 0 4,0 0,0 mbuf 64 2042 10 0 5,0 0,0 ocstat also provides the number of successful gets and puts and failed gets since boot: $ocstat -c NAME GET PUT NUL vnodes 972,147 0,0 0,0 nameibufs 3806,1237 3806,1237 0,0 cdev_t 350,0 0,0 0,0 vmspace 235,438 247,419 0,0 thread 245,440 247,420 0,0 exec-args 358,114 358,114 0,0 CCMS 972,147 0,0 0,0 mbuf pkt hdr + cluster 0,0 0,0 0,0 mbuf + cluster 0,0 0,0 0,0 cluster mbuf 0,0 0,0 0,0 mbuf pkt hdr 2,3 5,0 0,0 mbuf 4,3 6,0 0,0 There is also a -l option to display all available stats and a -o one to select what fields to display (a la ps(1)). TODO: ocstat man page, update sysctl man page. Index: src2/sys/kern/kern_objcache.c =================================================================== --- src2.orig/sys/kern/kern_objcache.c 2008-06-08 06:04:04.876593000 +0200 +++ src2/sys/kern/kern_objcache.c 2008-06-08 06:29:48.000000000 +0200 @@ -34,15 +34,19 @@ #include #include +#include #include #include #include +#include #include #include #include #include +#include #include #include +#include #include static MALLOC_DEFINE(M_OBJCACHE, "objcache", "Object Cache"); @@ -139,7 +143,7 @@ struct objcache { struct percpu_objcache cache_percpu[]; /* per-cpu caches */ }; -static struct spinlock objcachelist_spin; +static struct lock objcachelist_lock; static LIST_HEAD(objcachelist, objcache) allobjcaches; static struct magazine * @@ -274,9 +278,9 @@ objcache_create(const char *name, int cl SLIST_INSERT_HEAD(&depot->emptymagazines, mag, nextmagazine); } - spin_lock_wr(&objcachelist_spin); + lockmgr(&objcachelist_lock, LK_EXCLUSIVE); LIST_INSERT_HEAD(&allobjcaches, oc, oc_next); - spin_unlock_wr(&objcachelist_spin); + lockmgr(&objcachelist_lock, LK_RELEASE); return (oc); } @@ -816,9 +820,9 @@ objcache_destroy(struct objcache *oc) int clusterid, cpuid; struct magazinelist tmplist; - spin_lock_wr(&objcachelist_spin); + lockmgr(&objcachelist_lock, LK_EXCLUSIVE); LIST_REMOVE(oc, oc_next); - spin_unlock_wr(&objcachelist_spin); + lockmgr(&objcachelist_lock, LK_RELEASE); SLIST_INIT(&tmplist); for (clusterid = 0; clusterid < MAXCLUSTERS; clusterid++) { @@ -941,10 +945,97 @@ objcache_timer(void *dummy) #endif +/* + * Return number of magazines in list. Caller must hold spinlock of + * depot owning list. + */ + +static int +listsize(const struct magazinelist *ml) +{ + struct magazine *m; + int size = 0; + + SLIST_FOREACH(m, ml, nextmagazine) + size++; + + return size; +} + +/* + * Pass stats for all caches to userland. + */ +static int +sysctl_kern_objcache(SYSCTL_HANDLER_ARGS) +{ + struct kinfo_objcache *kcp; + struct objcache *oc; + struct magazinedepot *dp; + int error = 0; + int size; + int i; + + /* + * Alloc enough space to store stats for one cache and zero it out + * to avoid leaking sensitive data to userland. + */ + size = sizeof(struct kinfo_objcache) + + (ncpus-1) * sizeof(struct kinfo_objcache_cpu); + kcp = kmalloc(size, M_TEMP, M_WAITOK|M_ZERO); + + lockmgr(&objcachelist_lock, LK_SHARED); + + LIST_FOREACH(oc, &allobjcaches, oc_next) { + strlcpy(kcp->kc_name, oc->name, sizeof(kcp->kc_name)); + + /* + * Dump depot-level stats. + */ + CTASSERT(MAXCLUSTERS == 1); + dp = &oc->depot[0]; + + spin_lock_rd(&dp->spin); + kcp->kc_emptymagazines = listsize(&dp->emptymagazines); + kcp->kc_fullmagazines = listsize(&dp->fullmagazines); + spin_unlock_rd(&dp->spin); + + kcp->kc_unallocated_objects = dp->unallocated_objects; + kcp->kc_magcapacity = dp->magcapacity; + + /* + * Dump per-cpu stats. + */ + + for (i = 0; i < ncpus; i++) { + struct percpu_objcache *cp = &oc->cache_percpu[i]; + struct kinfo_objcache_cpu *kccp = &kcp->kc_cpu[i]; + + kccp->kcc_load_rounds = cp->loaded_magazine->rounds; + kccp->kcc_prev_rounds = cp->previous_magazine->rounds; + kccp->kcc_gets_cum = cp->gets_cumulative; + kccp->kcc_gets_null = cp->gets_null; + kccp->kcc_puts_cum = cp->puts_cumulative; + } + + error = SYSCTL_OUT(req, kcp, size); + if (error) + goto end; + } + +end: + if (kcp != NULL) + kfree(kcp, M_TEMP); + lockmgr(&objcachelist_lock, LK_RELEASE); + return (error); +} + +SYSCTL_PROC(_kern, KERN_OBJCACHE, objcache, CTLTYPE_OPAQUE|CTLFLAG_RD, \ + NULL, 0, sysctl_kern_objcache, "S,kinfo_objcache", "objcache stats"); + static void objcache_init(void) { - spin_init(&objcachelist_spin); + lockinit(&objcachelist_lock, "oclist", 0, 0); #if 0 callout_init(&objcache_callout); objcache_rebalance_period = 60 * hz; Index: src2/usr.bin/Makefile =================================================================== --- src2.orig/usr.bin/Makefile 2008-06-08 06:04:04.877285000 +0200 +++ src2/usr.bin/Makefile 2008-06-08 06:19:50.000000000 +0200 @@ -128,6 +128,7 @@ SUBDIR= alias \ nl \ nohup \ objformat \ + ocstat\ opieinfo \ opiekey \ opiepasswd \ Index: src2/usr.bin/ocstat/ocstat.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ src2/usr.bin/ocstat/ocstat.c 2008-06-08 06:19:50.000000000 +0200 @@ -0,0 +1,375 @@ +/* + * Copyright (c) 2008 The DragonFly Project. All rights reserved. + * + * This code is derived from software contributed to The DragonFly Project + * by Nicolas Thery + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $DragonFly$ + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +/* + * formatting data for an objcache attribute + */ +struct ocattr { + const char *ca_name; /* header name */ + int (*ca_fn)(const void *, int); /* print function */ + int ca_per_cpu; /* per-cpu stat? */ + int ca_minlen; /* min length in output */ + size_t ca_offset; /* offset in kinfo_* struct */ +}; + +#define MINLEN(p) \ + ((p)->ca_per_cpu ? (p)->ca_minlen*ncpus + ncpus-1 : (p)->ca_minlen) + +/* + * internal functions + */ +static void get_objcache_stats(void **statsp, size_t *statsizep, int *nstatsp); +static void parse_fmt(const char *fmt, struct ocattr *selattrs[]); +static void print_attrs(void *stats, size_t statsize, int nstats, + struct ocattr *attrs[]); +static void print_attr(struct kinfo_objcache *kcp, struct ocattr *cap, + int last); +static void print_per_cpu_attr(struct kinfo_objcache_cpu *cp, + struct ocattr *cap, int last); +static struct ocattr *lookup_attr(const char *name); +static int pint(const void *p, int minlen); +static int pname(const void *p, int minlen); +static int get_ncpus(void); +static void usage(void); + +/* + * table listing all supported attributes + */ + +#define OFFKOC(member) offsetof(struct kinfo_objcache, member) +#define OFFKOCCPU(member) offsetof(struct kinfo_objcache_cpu, member) + +struct ocattr allattrs[] = { + { "NAME", pname, 0, 16, OFFKOC(kc_name) }, + { "MAGC", pint, 0, 5, OFFKOC(kc_magcapacity) }, + { "EMAG", pint, 0, 5, OFFKOC(kc_emptymagazines) }, + { "FMAG", pint, 0, 5, OFFKOC(kc_fullmagazines) }, + { "UNOBJ", pint, 0, 6, OFFKOC(kc_unallocated_objects) }, + { "LRD", pint, 1, 4, OFFKOCCPU(kcc_load_rounds) }, + { "PRD", pint, 1, 4, OFFKOCCPU(kcc_prev_rounds) }, + { "GET", pint, 1, 5, OFFKOCCPU(kcc_gets_cum) }, + { "NUL", pint, 1, 3, OFFKOCCPU(kcc_gets_null) }, + { "PUT", pint, 1, 5, OFFKOCCPU(kcc_puts_cum) }, + { NULL, NULL, 0, 0, 0 } +}; + +#define NATTRS (sizeof(allattrs)/sizeof(allattrs[0])) + +/* + * format lists for various options + */ +const char fmt_default[] = "name,magc,unobj,emag,fmag,lrd,prd"; +const char fmt_l[] = "name,magc,emag,fmag,unobj,lrd,prd,get,put,nul"; +const char fmt_c[] = "name,get,put,nul"; + +int ncpus; /* number of CPUs in system */ + + +int +main(int argc, char *argv[]) +{ + struct ocattr *selattrs[NATTRS]; + void *stats; + int nstats; + size_t statsize; + const char *fmt; + int ch; + + ncpus = get_ncpus(); + + /* + * Parse command line and build list of attributes to display. + */ + + fmt = fmt_default; + while ((ch = getopt(argc, argv, "clo:")) != -1) { + switch (ch) { + case 'l': + fmt = fmt_l; + break; + case 'c': + fmt = fmt_c; + break; + case 'o': + fmt = optarg; + break; + default: + usage(); + break; + } + } + + if (optind != argc) { + fprintf(stderr, "%s: unexpected argument: %s\n", + getprogname(), argv[optind]); + usage(); + } + + parse_fmt(fmt, selattrs); + + /* + * Get stats from kernel and display selected subset. + */ + + get_objcache_stats(&stats, &statsize, &nstats); + print_attrs(stats, statsize, nstats, selattrs); + + free(stats); + + return (0); +} + +/* + * Get object cache stats from kernel. Caller must free statsp. + */ +static void +get_objcache_stats(void **statsp, size_t *statsizep, int *nstatsp) +{ + static int mib[2] = { CTL_KERN, KERN_OBJCACHE }; + + int error; + int totsize; + size_t statsize; + void *stats; + + totsize = 0; + error = sysctl(mib, 2, NULL, &totsize, NULL, NULL); + if (error) + err(1, "can not get kern.objcache size"); + + error = ENOMEM; + stats = NULL; + while (error == ENOMEM) { + stats = realloc(stats, totsize); + if (stats == NULL) + errx(1, "out of memory"); + error = sysctl(mib, 2, stats, &totsize, NULL, NULL); + } + + if (error) + err(1, "can not get kern.objcache content"); + + statsize = sizeof(struct kinfo_objcache) + + (ncpus-1) * sizeof(struct kinfo_objcache_cpu); + if ((totsize % statsize) != 0) + errx(1, "invalid sysctl size"); + + *statsp = stats; + *statsizep = statsize; + *nstatsp = totsize / statsize; +} + +/* + * Parse comma-separated list of attribute names into NULL-terminated list + * of attributes. + */ +static void +parse_fmt(const char *fmt, struct ocattr *selattrs[NATTRS]) +{ + char *copy; + const char *name; + struct ocattr **pp; + unsigned i; + + copy = strdup(fmt); + if (copy == NULL) + errx(1, "out of memory"); + + pp = selattrs; + for (i = 0, name = strtok(copy, ","); i < NATTRS && name != NULL; + i++, name = strtok(NULL, ",")) { + *pp++ = lookup_attr(name); + } + *pp = NULL; + + if (i == NATTRS) + warnx("format list too long, excedent ignored"); + + free(copy); +} + +/* + * Display selected attributes of specified objcaches. + */ +static void +print_attrs(void *stats, size_t statsize, int nstats, struct ocattr *attrs[]) +{ + struct ocattr **capp; + struct ocattr *cap; + uint8_t *bp; + struct kinfo_objcache *kcp; + int i; + + /* display headers */ + + for (capp = attrs; *capp != NULL; capp++) { + cap = *capp; + if (*(capp+1) != NULL) { + pname(cap->ca_name, MINLEN(cap)); + printf(" "); + } else { + pname(cap->ca_name, 0); + } + } + printf("\n"); + + /* display attributes */ + + for (i = 0, bp = (uint8_t *)stats; i < nstats; i++, bp += statsize) { + for (capp = attrs; *capp != NULL; capp++) { + cap = *capp; + kcp = (struct kinfo_objcache *)bp; + if (*(capp+1) != NULL) { + print_attr(kcp, cap, 0); + printf(" "); + } else { + print_attr(kcp, cap, 1); + } + } + printf("\n"); + } +} + + +/* + * Print specified attribute. + */ + +static void +print_attr(struct kinfo_objcache *kcp, struct ocattr *cap, int last) +{ + if (cap->ca_per_cpu) { + print_per_cpu_attr(kcp->kc_cpu, cap, last); + } else { + cap->ca_fn((const uint8_t *)kcp + cap->ca_offset, + last ? 0 : MINLEN(cap)); + } +} + +/* + * Print specified per-CPU attribute. + */ +static void +print_per_cpu_attr(struct kinfo_objcache_cpu *cp, struct ocattr *cap, int last) +{ + const uint8_t *bp; + int i; + int curlen = 0; + int nblanks; + + for (i = 0; i < ncpus; i++) { + if (i > 0) { + printf(","); + curlen++; + } + bp = (const uint8_t *)(cp + i) + cap->ca_offset; + curlen += (*cap->ca_fn)(bp, 0); + } + + if (!last) { + nblanks = MINLEN(cap) - curlen; + for (i = 0; i < nblanks; i++) + printf(" "); + } +} + + +/* + * Return pointer to attribute with specified name. + */ +static struct ocattr * +lookup_attr(const char *name) +{ + struct ocattr *cap; + + for (cap = allattrs; cap->ca_name != NULL; cap++) + if (strcasecmp(cap->ca_name, name) == 0) + return (cap); + + errx(1, "unknown attribute: %s", name); +} + +/* + * Print string pointed to by p. Right justify it with spaces if less than + * minlen characters. + */ +static int +pname(const void *p, int minlen) +{ + return printf("%-*s", minlen, (const char *)p); +} + +/* + * Print int pointed to by p. Right justify it with spaces if less than + * minlen characters. + */ +static int +pint(const void *p, int minlen) +{ + return printf("%-*d", minlen, * (const int *)p); +} + +static int +get_ncpus(void) +{ + int error; + int n; + int size = sizeof(n); + + error = sysctlbyname("hw.ncpu", &n, &size, NULL, NULL); + if (error || size != sizeof(n) || n < 1) + err(1, "can not get number of CPUs"); + return (n); +} + +static void +usage(void) +{ + fprintf(stderr, "usage: %s [-cl] [-o fmt]\n", getprogname()); + exit(1); +} Index: src2/usr.bin/ocstat/Makefile =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ src2/usr.bin/ocstat/Makefile 2008-06-08 06:19:50.000000000 +0200 @@ -0,0 +1,7 @@ +# +# $DragonFly$ + +PROG= ocstat +NOMAN= + +.include Index: src2/sys/sys/kinfo.h =================================================================== --- src2.orig/sys/sys/kinfo.h 2008-06-08 06:04:04.876857000 +0200 +++ src2/sys/sys/kinfo.h 2008-06-08 06:19:50.000000000 +0200 @@ -207,6 +207,29 @@ struct kinfo_proc { int kp_spare[4]; }; +#define KC_NAME_MAX 32 /* must be multiple of 4 */ + +/* + * KERN_OBJCACHE subtype returns array of object cache statistics. + */ + +struct kinfo_objcache_cpu { + int kcc_load_rounds; + int kcc_prev_rounds; + int kcc_gets_cum; + int kcc_gets_null; + int kcc_puts_cum; +}; + +struct kinfo_objcache { + char kc_name[KC_NAME_MAX]; + int kc_emptymagazines; + int kc_fullmagazines; + int kc_unallocated_objects; + int kc_magcapacity; + struct kinfo_objcache_cpu kc_cpu[1]; /* one slot per CPU */ +}; + struct proc; struct lwp; struct thread; Index: src2/sys/sys/sysctl.h =================================================================== --- src2.orig/sys/sys/sysctl.h 2008-06-10 07:20:10.000000000 +0200 +++ src2/sys/sys/sysctl.h 2008-06-08 06:19:50.000000000 +0200 @@ -370,7 +370,8 @@ TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_e #define KERN_LOGSIGEXIT 34 /* int: do we log sigexit procs? */ #define KERN_IOV_MAX 35 /* int: value of UIO_MAXIOV */ #define KERN_MAXPOSIXLOCKSPERUID 36 /* int: max POSIX locks per uid */ -#define KERN_MAXID 37 /* number of valid kern ids */ +#define KERN_OBJCACHE 37 /* struct: object cache stats */ +#define KERN_MAXID 38 /* number of valid kern ids */ #define CTL_KERN_NAMES { \ { 0, 0 }, \ @@ -410,6 +411,7 @@ TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_e { "logsigexit", CTLTYPE_INT }, \ { "iov_max", CTLTYPE_INT }, \ { "maxposixlocksperuid", CTLTYPE_INT }, \ ++ { "objcache", CTLTYPE_STRUCT }, \ } /*