On Thu, May 05, 2011 at 10:23:47AM -0700, Garrett Cooper wrote: > On May 4, 2011, at 2:07 AM, Kostik Belousov wrote: > > > On Tue, May 03, 2011 at 11:58:49PM -0700, Garrett Cooper wrote: > >> On Tue, May 3, 2011 at 11:42 PM, Garrett Cooper <yanegomi_at_gmail.com> wrote: > >>> On Tue, May 3, 2011 at 10:59 PM, Kirk McKusick <mckusick_at_mckusick.com> wrote: > >>>>> Date: Tue, 3 May 2011 22:40:26 -0700 > >>>>> Subject: Nasty non-recursive lockmgr panic on softdep only enabled UFS > >>>>> partition when filesystem full > >>>>> From: Garrett Cooper <yanegomi_at_gmail.com> > >>>>> To: Jeff Roberson <jeff_at_freebsd.org>, > >>>>> Marshall Kirk McKusick <mckusick_at_mckusick.com> > >>>>> Cc: FreeBSD Current <freebsd-current_at_freebsd.org> > >>>>> > >>>>> Hi Jeff and Dr. McKusick, > >>>>> Ran into this panic when /usr ran out of space doing a make > >>>>> universe on amd64/r221219 (it took ~15 minutes for the panic to occur > >>>>> after the filesystem ran out of space -- wasn't quite sure what it was > >>>>> doing at the time): > >>>>> > >>>>> ... > >>>>> > >>>>> Let me know what other commands you would like for me to run in kgdb. > >>>>> Thanks, > >>>>> -Garrett > >>>> > >>>> You did not indicate whether you are running an 8.X system or a 9-current > >>>> system. It would be helpful to know that. > >>> > >>> I've actually been running CURRENT for a few years now, but you're right -- > >>> I didn't mention that part. > >>> > >>>> Jeff thinks that there may be a potential race in the locking code for > >>>> softdep_request_cleanup. If so, this patch for 9-current should fix it: > >>>> > >>>> Index: ffs_softdep.c > >>>> =================================================================== > >>>> --- ffs_softdep.c (revision 221385) > >>>> +++ ffs_softdep.c (working copy) > >>>> _at__at_ -11380,7 +11380,8 _at__at_ > >>>> continue; > >>>> } > >>>> MNT_IUNLOCK(mp); > >>>> - if (vget(lvp, LK_EXCLUSIVE | LK_INTERLOCK, curthread)) { > >>>> + if (vget(lvp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, > >>>> + curthread)) { > >>>> MNT_ILOCK(mp); > >>>> continue; > >>>> } > >>>> > >>>> If you are running an 8.X system, hopefully you will be able to apply it. > >>> > >>> I've applied it, rebuilt and installed the kernel, and trying to > >>> repro the case again. Will let you know how things go! > >> > >> Happened again with the change. It's really easy to repro: > >> > >> 1. Get a filesystem with UFS+SU > >> 2. Execute something that does a large number of small writes to a partition. > >> 3. 'dd if=/dev/zero of=FOO bs=10m' on the same partition > >> > >> The kernel will panic with the issue I discussed above. > >> Thanks! > > > > Jeff' change is required to avoid LORs, but it is not sufficient to > > prevent recursion. We must skip the vnode supplied as a parameter to > > softdep_request_cleanup(). Theoretically, other vnodes might be also > > locked by curthread, thus I think the change below is needed. Try this. > > > > diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c > > index a6d4441..25fa5d6 100644 > > --- a/sys/ufs/ffs/ffs_softdep.c > > +++ b/sys/ufs/ffs/ffs_softdep.c > > _at__at_ -11380,7 +11380,9 _at__at_ retry: > > continue; > > } > > MNT_IUNLOCK(mp); > > - if (vget(lvp, LK_EXCLUSIVE | LK_INTERLOCK, curthread)) { > > + if (VOP_ISLOCKED(lvp) || > > + vget(lvp, LK_EXCLUSIVE | LK_INTERLOCK | LK_NOWAIT, > > + curthread)) { > > MNT_ILOCK(mp); > > continue; > > } > > Ran into the same panic after I applied the patch above with the repro steps I described before. One thing that I noticed is that the issue isn't as easy to reproduce unless you add the dd in parallel with the make operation. Well, I misread your original report. Also, there is another issue that is easily reproducable in similar situation. The latest patch is below. diff --git a/sys/sys/mount.h b/sys/sys/mount.h index 231e3d6..f064053 100644 --- a/sys/sys/mount.h +++ b/sys/sys/mount.h _at__at_ -366,6 +366,8 _at__at_ void __mnt_vnode_markerfree(struct vnode **mvp, struct mount *mp); #define MNT_LAZY 3 /* push data not written by filesystem syncer */ #define MNT_SUSPEND 4 /* Suspend file system after sync */ +#define MNT_WAIT_ADV 0x10000000 /* MNT_WAIT prevent deadlock */ + /* * Generic file handle */ diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c index e60514d..87837cc 100644 --- a/sys/ufs/ffs/ffs_alloc.c +++ b/sys/ufs/ffs/ffs_alloc.c _at__at_ -420,13 +420,13 _at__at_ nospace: */ if (reclaimed == 0) { reclaimed = 1; - softdep_request_cleanup(fs, vp, cred, FLUSH_BLOCKS_WAIT); - UFS_UNLOCK(ump); if (bp) { + UFS_UNLOCK(ump); brelse(bp); bp = NULL; + UFS_LOCK(ump); } - UFS_LOCK(ump); + softdep_request_cleanup(fs, vp, cred, FLUSH_BLOCKS_WAIT); goto retry; } UFS_UNLOCK(ump); diff --git a/sys/ufs/ffs/ffs_extern.h b/sys/ufs/ffs/ffs_extern.h index d819c8a..d12e1dc 100644 --- a/sys/ufs/ffs/ffs_extern.h +++ b/sys/ufs/ffs/ffs_extern.h _at__at_ -141,7 +141,7 _at__at_ void softdep_setup_inofree(struct mount *, struct buf *, ino_t, void softdep_setup_sbupdate(struct ufsmount *, struct fs *, struct buf *); void *softdep_setup_trunc(struct vnode *vp, off_t length, int flags); void softdep_fsync_mountdev(struct vnode *); -int softdep_sync_metadata(struct vnode *); +int softdep_sync_metadata(struct vnode *, int flags); int softdep_process_worklist(struct mount *, int); int softdep_fsync(struct vnode *); int softdep_waitidle(struct mount *); diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c index a6d4441..0b66e68 100644 --- a/sys/ufs/ffs/ffs_softdep.c +++ b/sys/ufs/ffs/ffs_softdep.c _at__at_ -492,7 +492,7 _at__at_ softdep_flushworklist(oldmnt, countp, td) } int -softdep_sync_metadata(struct vnode *vp) +softdep_sync_metadata(struct vnode *vp, int flags) { return (0); _at__at_ -733,7 +733,7 _at__at_ static void unlinked_inodedep(struct mount *, struct inodedep *); static void clear_unlinked_inodedep(struct inodedep *); static struct inodedep *first_unlinked_inodedep(struct ufsmount *); static int flush_pagedep_deps(struct vnode *, struct mount *, - struct diraddhd *); + struct diraddhd *, int); static void free_pagedep(struct pagedep *); static int flush_newblk_dep(struct vnode *, struct mount *, ufs_lbn_t); static int flush_inodedep_deps(struct mount *, ino_t); _at__at_ -10662,7 +10662,7 _at__at_ restart: * associated with the file. If any I/O errors occur, they are returned. */ int -softdep_sync_metadata(struct vnode *vp) +softdep_sync_metadata(struct vnode *vp, int flags) { struct pagedep *pagedep; struct allocindir *aip; _at__at_ -10792,7 +10792,8 _at__at_ loop: continue; if ((error = flush_pagedep_deps(vp, wk->wk_mp, - &pagedep->pd_diraddhd[i]))) { + &pagedep->pd_diraddhd[i], + flags))) { FREE_LOCK(&lk); goto loop_end; } _at__at_ -11056,10 +11057,11 _at__at_ flush_newblk_dep(vp, mp, lbn) * Called with splbio blocked. */ static int -flush_pagedep_deps(pvp, mp, diraddhdp) +flush_pagedep_deps(pvp, mp, diraddhdp, flags) struct vnode *pvp; struct mount *mp; struct diraddhd *diraddhdp; + int flags; { struct inodedep *inodedep; struct inoref *inoref; _at__at_ -11069,8 +11071,13 _at__at_ flush_pagedep_deps(pvp, mp, diraddhdp) int error = 0; struct buf *bp; ino_t inum; + int lkflags; ump = VFSTOUFS(mp); + lkflags = LK_EXCLUSIVE; + if ((flags & MNT_WAIT_ADV) != 0) + lkflags |= LK_NOWAIT; + restart: while ((dap = LIST_FIRST(diraddhdp)) != NULL) { /* _at__at_ -11112,7 +11119,7 _at__at_ restart: } if (dap->da_state & MKDIR_BODY) { FREE_LOCK(&lk); - if ((error = ffs_vgetf(mp, inum, LK_EXCLUSIVE, &vp, + if ((error = ffs_vgetf(mp, inum, lkflags, &vp, FFSV_FORCEINSMQ))) break; error = flush_newblk_dep(vp, mp, 0); _at__at_ -11176,7 +11183,7 _at__at_ retry: */ if (dap == LIST_FIRST(diraddhdp)) { FREE_LOCK(&lk); - if ((error = ffs_vgetf(mp, inum, LK_EXCLUSIVE, &vp, + if ((error = ffs_vgetf(mp, inum, lkflags, &vp, FFSV_FORCEINSMQ))) break; error = ffs_update(vp, MNT_WAIT); _at__at_ -11379,17 +11386,17 _at__at_ retry: VI_UNLOCK(lvp); continue; } - MNT_IUNLOCK(mp); - if (vget(lvp, LK_EXCLUSIVE | LK_INTERLOCK, curthread)) { - MNT_ILOCK(mp); + if (vget(lvp, LK_EXCLUSIVE | LK_INTERLOCK | LK_NOWAIT, + curthread)) { continue; } + MNT_IUNLOCK(mp); if (lvp->v_vflag & VV_NOSYNC) { /* unlinked */ vput(lvp); MNT_ILOCK(mp); continue; } - (void) ffs_syncvnode(lvp, MNT_WAIT); + (void) ffs_syncvnode(lvp, MNT_WAIT | MNT_WAIT_ADV); vput(lvp); MNT_ILOCK(mp); } diff --git a/sys/ufs/ffs/ffs_vnops.c b/sys/ufs/ffs/ffs_vnops.c index cf6a5a8..c73e2a5 100644 --- a/sys/ufs/ffs/ffs_vnops.c +++ b/sys/ufs/ffs/ffs_vnops.c _at__at_ -216,9 +216,11 _at__at_ ffs_syncvnode(struct vnode *vp, int waitfor) struct bufobj *bo; struct buf *bp; struct buf *nbp; - int s, error, wait, passes, skipmeta; + int s, error, wait, passes, skipmeta, wait_adv; ufs_lbn_t lbn; + wait_adv = waitfor & MNT_WAIT_ADV; + waitfor &= ~MNT_WAIT_ADV; wait = (waitfor == MNT_WAIT); lbn = lblkno(ip->i_fs, (ip->i_size + ip->i_fs->fs_bsize - 1)); bo = &vp->v_bufobj; _at__at_ -328,7 +330,7 _at__at_ loop: * with the vnode has been written. */ splx(s); - if ((error = softdep_sync_metadata(vp)) != 0) + if ((error = softdep_sync_metadata(vp, wait_adv)) != 0) return (error); s = splbio();
This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:40:13 UTC