:Increasing MAXBUFSPACE looks like it would make the problem worse :because getdatablk() does a linear search. I've researched it a bit more and the default buffer cache is only 40KB... only 5 buffers or so (and the minimum is 5), so it seems unlikely that the buffer cache is responsible for the cpu use. I went ahead and hashed the buffer cache anyway, for DFly. The patch (for DFly) is included below if anyone wants to adapt it. It is NOT well tested yet so use at your own risk. I also enhanced the disk I/O / cache statistics reporting for ^T. I think the best way to track down the cpu usage is for Mark to actually ^\ (core dump) his fsck while it is eating all that cpu and then use gdb to see which routine is burning the cpu. Do that a couple of times. It may or may not be possible to fix the problem but I can't imagine why fsck would burn so much cpu with such a small buffer cache short of there being a bug in fsck. -Matt Matthew Dillon <dillon_at_backplane.com> [Patch relative to DragonFly, would have to be adapted to FreeBSD-5] Index: fsck.h =================================================================== RCS file: /cvs/src/sbin/fsck/fsck.h,v retrieving revision 1.4 diff -u -r1.4 fsck.h --- fsck.h 3 Nov 2003 19:51:04 -0000 1.4 +++ fsck.h 4 Sep 2004 18:26:31 -0000 _at__at_ -41,8 +41,8 _at__at_ #define MAXDUP 10 /* limit on dup blks (per inode) */ #define MAXBAD 10 /* limit on bad blks (per inode) */ -#define MAXBUFSPACE 40*1024 /* maximum space to allocate to buffers */ -#define INOBUFSIZE 56*1024 /* size of buffer to read inodes in pass1 */ +#define MAXBUFSPACE (4*1024*1024) /* maximum space to allocate to buffers */ +#define INOBUFSIZE (256*1024) /* size of buffer to read inodes in pass1 */ /* * Each inode on the filesystem is described by the following structure. _at__at_ -94,10 +94,6 _at__at_ char b_dirty; }; -#define B_INUSE 1 - -#define MINBUFS 5 /* minimum number of buffers required */ -struct bufarea bufhead; /* head of list of other blks in filesys */ struct bufarea sblk; /* file system superblock */ struct bufarea cgblk; /* cylinder group blocks */ struct bufarea *pdirbp; /* current directory contents */ Index: fsutil.h =================================================================== RCS file: /cvs/src/sbin/fsck/fsutil.h,v retrieving revision 1.3 diff -u -r1.3 fsutil.h --- fsutil.h 1 Nov 2003 17:15:58 -0000 1.3 +++ fsutil.h 4 Sep 2004 18:00:29 -0000 _at__at_ -54,6 +54,7 _at__at_ void *emalloc(size_t); void *erealloc(void *, size_t); char *estrdup(const char *); +void show_cacheinfo(void); #define CHECK_PREEN 0x0001 #define CHECK_VERBOSE 0x0002 Index: pass1.c =================================================================== RCS file: /cvs/src/sbin/fsck/pass1.c,v retrieving revision 1.5 diff -u -r1.5 pass1.c --- pass1.c 1 Nov 2003 17:15:58 -0000 1.5 +++ pass1.c 4 Sep 2004 18:00:02 -0000 _at__at_ -92,6 +92,7 _at__at_ printf("%s: phase 1: cyl group %d of %d (%d%%)\n", cdevname, c, sblock.fs_ncg, c * 100 / sblock.fs_ncg); + show_cacheinfo(); got_siginfo = 0; } /* Index: pass1b.c =================================================================== RCS file: /cvs/src/sbin/fsck/pass1b.c,v retrieving revision 1.5 diff -u -r1.5 pass1b.c --- pass1b.c 1 Nov 2003 17:15:58 -0000 1.5 +++ pass1b.c 4 Sep 2004 18:00:04 -0000 _at__at_ -65,6 +65,7 _at__at_ printf("%s: phase 1b: cyl group %d of %d (%d%%)\n", cdevname, c, sblock.fs_ncg, c * 100 / sblock.fs_ncg); + show_cacheinfo(); got_siginfo = 0; } for (i = 0; i < sblock.fs_ipg; i++, inumber++) { Index: pass2.c =================================================================== RCS file: /cvs/src/sbin/fsck/pass2.c,v retrieving revision 1.6 diff -u -r1.6 pass2.c --- pass2.c 4 Feb 2004 17:39:59 -0000 1.6 +++ pass2.c 4 Sep 2004 18:00:08 -0000 _at__at_ -134,6 +134,7 _at__at_ printf("%s: phase 2: dir %d of %ld (%d%%)\n", cdevname, inpp - inpsort, inplast, (int)((inpp - inpsort) * 100 / inplast)); + show_cacheinfo(); got_siginfo = 0; } inp = *inpp; Index: pass3.c =================================================================== RCS file: /cvs/src/sbin/fsck/pass3.c,v retrieving revision 1.5 diff -u -r1.5 pass3.c --- pass3.c 4 Feb 2004 17:39:59 -0000 1.5 +++ pass3.c 4 Sep 2004 18:00:11 -0000 _at__at_ -59,6 +59,7 _at__at_ printf("%s: phase 3: dir %ld of %ld (%d%%)\n", cdevname, inplast - inpindex - 1, inplast, (int)((inplast - inpindex - 1) * 100 / inplast)); + show_cacheinfo(); got_siginfo = 0; } inp = inpsort[inpindex]; Index: pass4.c =================================================================== RCS file: /cvs/src/sbin/fsck/pass4.c,v retrieving revision 1.4 diff -u -r1.4 pass4.c --- pass4.c 28 Sep 2003 14:39:17 -0000 1.4 +++ pass4.c 4 Sep 2004 18:00:13 -0000 _at__at_ -62,6 +62,7 _at__at_ printf("%s: phase 4: cyl group %d of %d (%d%%)\n", cdevname, cg, sblock.fs_ncg, cg * 100 / sblock.fs_ncg); + show_cacheinfo(); got_siginfo = 0; } inumber = cg * sblock.fs_ipg; Index: pass5.c =================================================================== RCS file: /cvs/src/sbin/fsck/pass5.c,v retrieving revision 1.4 diff -u -r1.4 pass5.c --- pass5.c 28 Sep 2003 14:39:17 -0000 1.4 +++ pass5.c 4 Sep 2004 18:00:17 -0000 _at__at_ -169,6 +169,7 _at__at_ printf("%s: phase 5: cyl group %d of %d (%d%%)\n", cdevname, c, sblock.fs_ncg, c * 100 / sblock.fs_ncg); + show_cacheinfo(); got_siginfo = 0; } getblk(&cgblk, cgtod(fs, c), fs->fs_cgsize); Index: utilities.c =================================================================== RCS file: /cvs/src/sbin/fsck/utilities.c,v retrieving revision 1.6 diff -u -r1.6 utilities.c --- utilities.c 4 Feb 2004 17:39:59 -0000 1.6 +++ utilities.c 4 Sep 2004 18:29:13 -0000 _at__at_ -46,10 +46,42 _at__at_ #include "fsck.h" -long diskreads, totalreads; /* Disk cache statistics */ +struct bufarea *bufhead; +struct bufarea buffree; +int bufhash_size; +int bufhash_reap; +int bufhash_mask; +int nbuffers; +long totaliterations; /* Disk cache statistics */ +long totalreads; +long diskreads; +long bdiskreads; + +#define BUFHASH(blkno) ((blkno + (blkno >> 8) + (blkno >> 15)) & bufhash_mask) +#define BUFSPERBUCKET 5 /* nominal hash table sizing factor */ +#define MINBUFS 16 /* minimum number of buffers required */ + +#define B_INUSE 0x0001 + static void rwerror(char *mesg, ufs_daddr_t blk); +void +show_cacheinfo(void) +{ + printf("bufcache %ld.%02d%% (%ld/%ld) %ld uncached_rd " + "%d/%d used %ld scans\n", + (totalreads - bdiskreads) * 100 / totalreads, + (totalreads - bdiskreads) * 10000 / totalreads % 100, + totalreads - bdiskreads, + totalreads, + diskreads - bdiskreads, + nbuffers - (int)buffree.b_size, + nbuffers, + totaliterations + ); +} + int ftypeok(struct dinode *dp) { _at__at_ -133,7 +165,7 _at__at_ void bufinit(void) { - register struct bufarea *bp; + struct bufarea *bp; long bufcnt, i; char *bufp; _at__at_ -143,10 +175,32 _at__at_ errx(EEXIT, "cannot allocate buffer pool"); cgblk.b_un.b_buf = bufp; initbarea(&cgblk); - bufhead.b_next = bufhead.b_prev = &bufhead; + + /* + * How many buffers should we have? + */ bufcnt = MAXBUFSPACE / sblock.fs_bsize; if (bufcnt < MINBUFS) bufcnt = MINBUFS; + + /* + * Create a hash table large enough to hold approximately 10 + * buffers per entry and initialize the list heads. + */ + bufhash_size = 1; + while (bufhash_size < bufcnt / BUFSPERBUCKET) + bufhash_size <<= 1; + bufhash_mask = bufhash_size - 1; + bufhead = calloc(sizeof(struct bufarea), bufhash_size); + for (i = 0; i < bufhash_size; ++i) + bufhead[i].b_next = bufhead[i].b_prev = &bufhead[i]; + buffree.b_next = &buffree; + buffree.b_prev = &buffree; + + /* + * Allocate memory for our buffers and place them on the free + * list. + */ for (i = 0; i < bufcnt; i++) { bp = (struct bufarea *)malloc(sizeof(struct bufarea)); bufp = malloc((unsigned int)sblock.fs_bsize); _at__at_ -156,13 +210,14 _at__at_ errx(EEXIT, "cannot allocate buffer pool"); } bp->b_un.b_buf = bufp; - bp->b_prev = &bufhead; - bp->b_next = bufhead.b_next; - bufhead.b_next->b_prev = bp; - bufhead.b_next = bp; + bp->b_prev = &buffree; + bp->b_next = buffree.b_next; + buffree.b_next->b_prev = bp; + buffree.b_next = bp; + ++buffree.b_size; initbarea(bp); } - bufhead.b_size = i; /* save number of buffers */ + nbuffers = bufcnt; } /* _at__at_ -171,26 +226,56 _at__at_ struct bufarea * getdatablk(ufs_daddr_t blkno, long size) { - register struct bufarea *bp; + struct bufarea *bp; + struct bufarea *bh_old; + struct bufarea *bh_new; + int i; - for (bp = bufhead.b_next; bp != &bufhead; bp = bp->b_next) + bh_old = bh_new = &bufhead[BUFHASH(blkno)]; + + for (bp = bh_new->b_next; bp != bh_new; bp = bp->b_next) { if (bp->b_bno == fsbtodb(&sblock, blkno)) goto foundit; - for (bp = bufhead.b_prev; bp != &bufhead; bp = bp->b_prev) - if ((bp->b_flags & B_INUSE) == 0) - break; - if (bp == &bufhead) - errx(EEXIT, "deadlocked buffer pool"); + ++totaliterations; + } + if (bh_new->b_size > BUFSPERBUCKET) { + for (bp = bh_new->b_prev; bp != bh_new; bp = bp->b_prev) { + if ((bp->b_flags & B_INUSE) == 0) + goto getit; + ++totaliterations; + } + } + if (buffree.b_next != &buffree) { + bh_old = &buffree; + bp = buffree.b_next; + goto getit; + } + for (i = bufhash_size; i; --i) { + bh_old = &bufhead[bufhash_reap]; + bufhash_reap = (bufhash_reap - 1) & bufhash_mask; + if (bh_old->b_size < BUFSPERBUCKET / 4) + continue; + for (bp = bh_old->b_prev; bp != bh_old; bp = bp->b_prev) { + if ((bp->b_flags & B_INUSE) == 0) + goto getit; + ++totaliterations; + } + } + errx(EEXIT, "deadlocked buffer pool"); +getit: + ++bdiskreads; getblk(bp, blkno, size); /* fall through */ foundit: - totalreads++; - bp->b_prev->b_next = bp->b_next; + totalreads++; /* accounting */ + ++bh_new->b_size; + --bh_old->b_size; + bp->b_prev->b_next = bp->b_next; /* unlink */ bp->b_next->b_prev = bp->b_prev; - bp->b_prev = &bufhead; - bp->b_next = bufhead.b_next; - bufhead.b_next->b_prev = bp; - bufhead.b_next = bp; + bp->b_prev = bh_new; /* relink */ + bp->b_next = bh_new->b_next; + bh_new->b_next->b_prev = bp; + bh_new->b_next = bp; bp->b_flags |= B_INUSE; return (bp); } _at__at_ -203,7 +288,6 _at__at_ dblk = fsbtodb(&sblock, blk); if (bp->b_bno != dblk) { flush(fswritefd, bp); - diskreads++; bp->b_errs = bread(fsreadfd, bp->b_un.b_buf, dblk, size); bp->b_bno = dblk; bp->b_size = size; _at__at_ -248,8 +332,9 _at__at_ void ckfini(int markclean) { - register struct bufarea *bp, *nbp; + struct bufarea *bh, *bp, *nbp; int ofsmodified, cnt = 0; + int i; if (fswritefd < 0) { (void)close(fsreadfd); _at__at_ -264,15 +349,24 _at__at_ } flush(fswritefd, &cgblk); free(cgblk.b_un.b_buf); - for (bp = bufhead.b_prev; bp && bp != &bufhead; bp = nbp) { - cnt++; - flush(fswritefd, bp); - nbp = bp->b_prev; - free(bp->b_un.b_buf); - free((char *)bp); + + for (i = 0; i < bufhash_size; ++i) { + bh = &bufhead[i]; + + for (bp = bh->b_prev; bp && bp != bh; bp = nbp) { + cnt++; + flush(fswritefd, bp); + nbp = bp->b_prev; + free(bp->b_un.b_buf); + free((char *)bp); + } + bh->b_next = bh; + bh->b_prev = bh; + } + if (nbuffers != cnt + buffree.b_size) { + errx(EEXIT, "panic: lost %d buffers", + nbuffers - cnt + buffree.b_size); } - if (bufhead.b_size != cnt) - errx(EEXIT, "panic: lost %d buffers", bufhead.b_size - cnt); pbp = pdirbp = (struct bufarea *)0; if (sblock.fs_clean != markclean) { sblock.fs_clean = markclean; _at__at_ -291,8 +385,7 _at__at_ rerun = 1; } if (debug) - printf("cache missed %ld of %ld (%d%%)\n", diskreads, - totalreads, (int)(diskreads * 100 / totalreads)); + show_cacheinfo(); (void)close(fsreadfd); (void)close(fswritefd); } _at__at_ -304,6 +397,7 _at__at_ int i, errs; off_t offset; + diskreads++; offset = blk; offset *= dev_bsize; if (lseek(fd, offset, 0) < 0)Received on Sat Sep 04 2004 - 16:38:30 UTC
This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:38:10 UTC