On Mon, 18 Oct 2004, Robert Watson wrote: > If this is timing-related, it could be KTR disrupts the timing > sufficiently to prevent it. More likely, it went away because the work > load on the NFS server changed, or the directory layout changed, or some > other factor that caused the passage through the NFS code to change. Actually, I think I found it. Did the drive or partition layout in your NFS server change recently? If so, that would cause the fsid of the file system to change, resulting in ESTALE errors being returned to the client; I found one or two nits in the error handling for vfs_getvfs() that would result in the problem you saw. The attached patch should correct, as well as add additional assertions to catch related bugs; note that this patch appears to work fine in my environment, but that the additional assertions could detect bugs I'm not seeing in my environment for similar reasons to the ones that prevented me from seeing the one you just reported. FYI, when the last client referencing the old FSID was rebooted, the problem would have gone away as that set of ESTALE conditions would no longer be triggered. Robert N M Watson FreeBSD Core Team, TrustedBSD Projects robert_at_fledge.watson.org Principal Research Scientist, McAfee Research > > - aW > > > > > > > > 0n Sun, Oct 17, 2004 at 10:19:34AM -0400, Robert Watson wrote: > > > > On Fri, 15 Oct 2004, Wilkinson, Alex wrote: > > > > > Hi all, > > > > > > I currently get a panic with "nfs_server_enable=YES" in /etc/rc.conf. > > > > > > OS: FreeBSD 5.3-BETA4 #2: Tue Sep 14 13:55:30 UTC 2004 > > > > > > Backtrace > > > --------- > > > > > > panic: _mtx_lock_sleep: recursed on non-recursive mutex nfsd_mtx _at_ /usr /src/sys/nfsserver/nfs_serv.c:1947 > > > > Is the NFS server code compiled into your kernel, or is it getting loaded > > as a module? Do you have any other NFS-related entries in /etc/rc.conf? > > Could you show the output of "show locks" and "show witness" with witness > > compiled into the kernel? > > > > Thanks! > > > > Robert N M Watson FreeBSD Core Team, TrustedBSD Projects > > robert_at_fledge.watson.org Principal Research Scientist, McAfee Research > > > > > > > cpuid = 0 > > > KDB: enter: panic > > > [thread 100074] > > > Stopped at kdb_enter+0x32: leave > > > db> tr > > > kdb_enter(c068ba66,0,c068aed8,dd2ed90c,c1b6b340) at kdb_enter+0x32 > > > panic(c068aed8,c069885f,c0698617,79b,c0698617) at panic+0x1b0 > > > _mtx_lock_sleep(c071c940,c1b6b340,0,c0698617,79b) at _mtx_lock_sleep+0x16e > > > _mtx_lock_flags(c071c940,0,c0698617,79b,0) at _mtx_lock_flags+0xb0 > > > nfsrv_create(c1eb6800,c1b98800,c1b6b340,dd2edc8c,0) at nfsrv_create+0x8c4 > > > nfssvc(c1b6b340,dd2edd14,8,0,2) at nfssvc+0x6ea > > > syscall(2f,2f,2f,0,0) at syscall+0x13b > > > Xint0x80_syscall() at Xint0x80_syscall+0x1f > > > --- syscall (155, FreeBSD ELF32, nfssvc), eip = 0x280c60bf, esp = 0xbfbfeb1c, eb p = 0xbfbfeb38 --- > > > > > > > > > - aW > > > _______________________________________________ > > > freebsd-current_at_freebsd.org mailing list > > > http://lists.freebsd.org/mailman/listinfo/freebsd-current > > > To unsubscribe, send any mail to "freebsd-current-unsubscribe_at_freebsd.org" > > > > > > > _______________________________________________ > > freebsd-current_at_freebsd.org mailing list > > http://lists.freebsd.org/mailman/listinfo/freebsd-current > > To unsubscribe, send any mail to "freebsd-current-unsubscribe_at_freebsd.org" > > > > _______________________________________________ > freebsd-current_at_freebsd.org mailing list > http://lists.freebsd.org/mailman/listinfo/freebsd-current > To unsubscribe, send any mail to "freebsd-current-unsubscribe_at_freebsd.org" > Index: nfs_serv.c =================================================================== RCS file: /home/ncvs/src/sys/nfsserver/nfs_serv.c,v retrieving revision 1.148 diff -u -r1.148 nfs_serv.c --- nfs_serv.c 25 Aug 2004 16:52:59 -0000 1.148 +++ nfs_serv.c 18 Oct 2004 09:53:52 -0000 _at__at_ -225,6 +225,7 _at__at_ tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(nfsmode); nfsmout: + NFSD_LOCK_ASSERT(); if (vp) { NFSD_UNLOCK(); mtx_lock(&Giant); /* VFS */ _at__at_ -285,6 +286,7 @@ /* fall through */ nfsmout: + NFSD_LOCK_ASSERT(); if (vp) { NFSD_UNLOCK(); mtx_lock(&Giant); /* VFS */ _at__at_ -412,6 +414,7 _at__at_ mtx_unlock(&Giant); /* VFS */ NFSD_LOCK(); } + NFSD_LOCK_ASSERT(); /* * If the size is being changed write acces is required, otherwise _at__at_ -439,6 +442,7 _at__at_ if (!error) error = postat_ret; out: + NFSD_LOCK_ASSERT(); if (vp != NULL) { NFSD_UNLOCK(); mtx_lock(&Giant); /* VFS */ _at__at_ -460,6 +464,7 @@ /* fall through */ nfsmout: + NFSD_LOCK_ASSERT(); NFSD_UNLOCK(); mtx_lock(&Giant); /* VFS */ if (vp) _at__at_ -653,6 +658,7 @@ } nfsmout: + NFSD_LOCK_ASSERT(); NFSD_UNLOCK(); mtx_lock(&Giant); /* VFS */ if (dirp) _at__at_ -771,6 +777,7 _at__at_ mb->m_next = mp3; mp3 = NULL; nfsmout: + NFSD_LOCK_ASSERT(); if (mp3) m_freem(mp3); if (vp) { _at__at_ -1040,6 +1047,7 _at__at_ } *tl = txdr_unsigned(cnt); nfsmout: + NFSD_LOCK_ASSERT(); if (vp) { NFSD_UNLOCK(); mtx_lock(&Giant); /* VFS */ _at__at_ -1242,6 +1250,7 @@ if (!error) error = aftat_ret; ereply: + NFSD_LOCK_ASSERT(); nfsm_reply(NFSX_PREOPATTR(v3) + NFSX_POSTOPORFATTR(v3) + 2 * NFSX_UNSIGNED + NFSX_WRITEVERF(v3)); if (v3) { _at__at_ -1275,6 +1284,7 @@ } error = 0; nfsmout: + NFSD_LOCK_ASSERT(); NFSD_UNLOCK(); mtx_lock(&Giant); /* VFS */ if (vp) _at__at_ -1386,6 +1396,7 @@ } if (len > NFS_MAXDATA || len < 0 || i < len) { nfsmout: + NFSD_LOCK_ASSERT(); m_freem(mrep); error = EIO; nfsm_writereply(2 * NFSX_UNSIGNED); _at__at_ -1719,7 +1730,7 _at__at_ nfsm_srvmtofh(fhp); if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) { error = ESTALE; - goto ereply; + goto ereply_locked; } NFSD_UNLOCK(); mtx_lock(&Giant); /* VFS */ _at__at_ -1943,8 +1954,11 @@ } } ereply: + NFSD_UNLOCK_ASSERT(); mtx_unlock(&Giant); /* VFS */ NFSD_LOCK(); +ereply_locked: + NFSD_LOCK_ASSERT(); nfsm_reply(NFSX_SRVFH(v3) + NFSX_FATTR(v3) + NFSX_WCCDATA(v3)); if (v3) { if (!error) { _at__at_ -1961,6 +1975,7 _at__at_ error = 0; nfsmout: + NFSD_LOCK_ASSERT(); NFSD_UNLOCK(); mtx_lock(&Giant); /* VFS */ if (nd.ni_startdir) { _at__at_ -2116,6 +2131,7 @@ * send response, cleanup, return. */ out: + NFSD_UNLOCK_ASSERT(); if (nd.ni_startdir) { vrele(nd.ni_startdir); nd.ni_startdir = NULL; _at__at_ -2146,9 +2162,10 @@ diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td); VOP_UNLOCK(dirp, 0, td); } -ereply: mtx_unlock(&Giant); /* VFS */ NFSD_LOCK(); +ereply: + NFSD_LOCK_ASSERT(); nfsm_reply(NFSX_SRVFH(1) + NFSX_POSTOPATTR(1) + NFSX_WCCDATA(1)); if (v3) { if (!error) { _at__at_ -2164,6 +2181,7 _at__at_ NFSD_LOCK(); return (0); nfsmout: + NFSD_LOCK_ASSERT(); NFSD_UNLOCK(); mtx_lock(&Giant); /* VFS */ if (dirp) _at__at_ -2249,6 +2267,7 @@ goto out; } out: + NFSD_UNLOCK_ASSERT(); if (!error) { error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); NDFREE(&nd, NDF_ONLY_PNBUF); _at__at_ -2280,12 +2299,14 _at__at_ mtx_unlock(&Giant); /* VFS */ NFSD_LOCK(); ereply: + NFSD_LOCK_ASSERT(); nfsm_reply(NFSX_WCCDATA(v3)); if (v3) { nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); error = 0; } nfsmout: + NFSD_LOCK_ASSERT(); NFSD_UNLOCK(); mtx_lock(&Giant); /* VFS */ NDFREE(&nd, NDF_ONLY_PNBUF); _at__at_ -2397,8 +2418,11 @@ vrele(tdirp); tdirp = NULL; } - if (error) + if (error) { + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); goto out1; + } tdvp = tond.ni_dvp; tvp = tond.ni_vp; _at__at_ -2455,6 +2479,7 @@ fromnd.ni_cnd.cn_namelen)) error = -1; out: + NFSD_UNLOCK_ASSERT(); if (!error) { /* * The VOP_RENAME function releases all vnode references & _at__at_ -2477,9 +2502,10 @@ } /* fall through */ -out1: mtx_unlock(&Giant); /* VFS */ NFSD_LOCK(); +out1: + NFSD_LOCK_ASSERT(); nfsm_reply(2 * NFSX_WCCDATA(v3)); if (v3) { /* Release existing locks to prevent deadlock. */ _at__at_ -2518,6 +2544,7 _at__at_ /* * Clear out tond related fields */ + NFSD_LOCK_ASSERT(); NFSD_UNLOCK(); mtx_lock(&Giant); /* VFS */ if (tdirp) _at__at_ -2680,6 +2707,7 @@ mtx_unlock(&Giant); /* VFS */ NFSD_LOCK(); ereply: + NFSD_LOCK_ASSERT(); nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3)); if (v3) { nfsm_srvpostop_attr(getret, &at); _at__at_ -2689,6 +2717,7 _at_@ /* fall through */ nfsmout: + NFSD_LOCK_ASSERT(); NFSD_UNLOCK(); mtx_lock(&Giant); /* VFS */ NDFREE(&nd, NDF_ONLY_PNBUF); _at__at_ -2744,6 +2773,8 _at__at_ fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) { + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ error = ESTALE; goto out; } _at__at_ -2841,6 +2872,7 @@ } } out: + NFSD_UNLOCK_ASSERT(); /* * These releases aren't strictly required, does even doing them * make any sense? XXX can nfsm_reply() block? _at__at_ -2872,6 +2904,7 _at__at_ /* fall through */ nfsmout: + NFSD_LOCK_ASSERT(); NFSD_UNLOCK(); mtx_lock(&Giant); /* VFS */ NDFREE(&nd, NDF_ONLY_PNBUF); _at__at_ -2930,6 +2963,8 @@ fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) { + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ error = ESTALE; goto out; } _at__at_ -3004,6 +3039,7 _at_@ error = VOP_GETATTR(nd.ni_vp, vap, cred, td); } out: + NFSD_UNLOCK_ASSERT(); if (dirp) { if (dirp == nd.ni_dvp) { diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td); _at__at_ -3048,6 +3084,7 _at__at_ /* fall through */ nfsmout: + NFSD_LOCK_ASSERT(); NFSD_UNLOCK(); mtx_lock(&Giant); /* VFS */ if (dirp) _at__at_ -3152,6 +3189,7 _at__at_ * Issue or abort op. Since SAVESTART is not set, path name * component is freed by the VOP after either. */ + NFSD_LOCK_ASSERT(); NFSD_UNLOCK(); mtx_lock(&Giant); /* VFS */ if (!error) _at__at_ -3187,6 +3225,7 @@ /* fall through */ nfsmout: + NFSD_LOCK_ASSERT(); NFSD_UNLOCK(); mtx_lock(&Giant); /* VFS */ NDFREE(&nd, NDF_ONLY_PNBUF); _at__at_ -3356,6 +3395,7 _at__at_ */ MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK); again: + NFSD_UNLOCK_ASSERT(); iv.iov_base = rbuf; iv.iov_len = fullsiz; io.uio_iov = &iv; _at__at_ -3556,6 +3596,7 _at__at_ FREE((caddr_t)cookies, M_TEMP); nfsmout: + NFSD_LOCK_ASSERT(); if (vp) { NFSD_UNLOCK(); mtx_lock(&Giant); /* VFS */ _at__at_ -3664,6 +3705,7 @@ VOP_UNLOCK(vp, 0, td); MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK); again: + NFSD_UNLOCK_ASSERT(); iv.iov_base = rbuf; iv.iov_len = fullsiz; io.uio_iov = &iv; _at__at_ -3897,6 +3939,7 _at__at_ } } invalid: + NFSD_UNLOCK_ASSERT(); cpos += dp->d_reclen; dp = (struct dirent *)cpos; cookiep++; _at__at_ -3923,6 +3966,7 _at__at_ FREE((caddr_t)cookies, M_TEMP); FREE((caddr_t)rbuf, M_TEMP); nfsmout: + NFSD_LOCK_ASSERT(); if (vp) { NFSD_UNLOCK(); mtx_lock(&Giant); /* VFS */ _at__at_ -4081,6 +4125,7 @@ vp = NULL; NFSD_LOCK(); ereply: + NFSD_LOCK_ASSERT(); nfsm_reply(NFSX_V3WCCDATA + NFSX_V3WRITEVERF); nfsm_srvwcc_data(for_ret, &bfor, aft_ret, &aft); if (!error) { _at__at_ -4093,6 +4138,7 _at__at_ error = 0; } nfsmout: + NFSD_LOCK_ASSERT(); NFSD_UNLOCK(); mtx_lock(&Giant); /* VFS */ if (vp) _at__at_ -4194,6 +4240,7 _at__at_ sfp->sf_bavail = txdr_unsigned(sf->f_bavail); } nfsmout: + NFSD_LOCK_ASSERT(); if (vp) { NFSD_UNLOCK(); mtx_lock(&Giant); /* VFS */ _at__at_ -4280,6 +4327,7 _at__at_ NFSV3FSINFO_SYMLINK | NFSV3FSINFO_HOMOGENEOUS | NFSV3FSINFO_CANSETTIME); nfsmout: + NFSD_LOCK_ASSERT(); if (vp) { NFSD_UNLOCK(); mtx_lock(&Giant); /* VFS */ _at__at_ -4361,6 +4409,7 @@ pc->pc_caseinsensitive = nfsrv_nfs_false; pc->pc_casepreserving = nfsrv_nfs_true; nfsmout: + NFSD_LOCK_ASSERT(); if (vp) { NFSD_UNLOCK(); mtx_lock(&Giant); /* VFS */ _at__at_ -4389,6 +4438,7 _at__at_ nfsdbprintf(("%s %d\n", __FILE__, __LINE__)); nfsm_reply(0); nfsmout: + NFSD_LOCK_ASSERT(); return (error); } _at__at_ -4415,6 +4465,7 @@ nfsm_reply(0); error = 0; nfsmout: + NFSD_LOCK_ASSERT(); return (error); } _at__at_ -4440,7 +4491,6 _at__at_ int error; NFSD_LOCK_ASSERT(); - NFSD_UNLOCK(); nfsdbprintf(("%s %d\n", __FILE__, __LINE__)); if (flags & VWRITE) { _at__at_ -4455,8 +4505,7 @@ case VREG: case VDIR: case VLNK: - error = EROFS; - goto out; + return (EROFS); default: break; } _at__at_ -4465,15 +4514,14 _at__at_ * If there's shared text associated with * the inode, we can't allow writing. */ - if (vp->v_vflag & VV_TEXT) { - NFSD_LOCK(); + if (vp->v_vflag & VV_TEXT) return (ETXTBSY); - } } + NFSD_UNLOCK(); mtx_lock(&Giant); /* VFS */ error = VOP_GETATTR(vp, &vattr, cred, td); if (error) - goto out2; + goto out; error = VOP_ACCESS(vp, flags, cred, td); /* * Allow certain operations for the owner (reads and writes _at__at_ -4481,9 +4529,9 _at__at_ */ if (override && error == EACCES && cred->cr_uid == vattr.va_uid) error = 0; -out2: - mtx_unlock(&Giant); /* VFS */ out: + NFSD_UNLOCK_ASSERT(); + mtx_unlock(&Giant); /* VFS */ NFSD_LOCK(); return error; }Received on Mon Oct 18 2004 - 09:01:50 UTC
This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:38:18 UTC