Re: RELENG_5 panic [nic: _mtx_lock_sleep: recursed on non-recursivemutex nfsd_mtx]

From: Robert Watson <rwatson_at_freebsd.org>
Date: Mon, 18 Oct 2004 07:01:39 -0400 (EDT)
On Mon, 18 Oct 2004, Robert Watson wrote:

> If this is timing-related, it could be KTR disrupts the timing
> sufficiently to prevent it.  More likely, it went away because the work
> load on the NFS server changed, or the directory layout changed, or some
> other factor that caused the passage through the NFS code to change. 

Actually, I think I found it.  Did the drive or partition layout in your
NFS server change recently?  If so, that would cause the fsid of the file
system to change, resulting in ESTALE errors being returned to the client; 
I found one or two nits in the error handling for vfs_getvfs()  that would
result in the problem you saw.  The attached patch should correct, as well
as add additional assertions to catch related bugs; note that this patch
appears to work fine in my environment, but that the additional assertions
could detect bugs I'm not seeing in my environment for similar reasons to
the ones that prevented me from seeing the one you just reported. 

FYI, when the last client referencing the old FSID was rebooted, the
problem would have gone away as that set of ESTALE conditions would no
longer be triggered.

Robert N M Watson             FreeBSD Core Team, TrustedBSD Projects
robert_at_fledge.watson.org      Principal Research Scientist, McAfee Research

> >  - aW
> > 
> > 
> > 
> > 	0n Sun, Oct 17, 2004 at 10:19:34AM -0400, Robert Watson wrote: 
> > 	
> > 	On Fri, 15 Oct 2004, Wilkinson, Alex wrote:
> > 	
> > 	> Hi all,
> > 	> 
> > 	> I currently get a panic with "nfs_server_enable=YES" in /etc/rc.conf.
> > 	> 
> > 	> OS: FreeBSD 5.3-BETA4 #2: Tue Sep 14 13:55:30 UTC 2004
> > 	> 
> > 	> Backtrace
> > 	> ---------
> > 	> 
> > 	> panic: _mtx_lock_sleep: recursed on non-recursive mutex nfsd_mtx _at_ /usr /src/sys/nfsserver/nfs_serv.c:1947
> > 	
> > 	Is the NFS server code compiled into your kernel, or is it getting loaded
> > 	as a module?  Do you have any other NFS-related entries in /etc/rc.conf?
> > 	Could you show the output of "show locks" and "show witness" with witness
> > 	compiled into the kernel?
> > 	
> > 	Thanks!
> > 	
> > 	Robert N M Watson             FreeBSD Core Team, TrustedBSD Projects
> > 	robert_at_fledge.watson.org      Principal Research Scientist, McAfee Research
> > 	
> > 	
> > 	> cpuid = 0
> > 	> KDB: enter: panic
> > 	> [thread 100074]
> > 	> Stopped at      kdb_enter+0x32: leave
> > 	> db> tr
> > 	> kdb_enter(c068ba66,0,c068aed8,dd2ed90c,c1b6b340) at kdb_enter+0x32
> > 	> panic(c068aed8,c069885f,c0698617,79b,c0698617) at panic+0x1b0
> > 	> _mtx_lock_sleep(c071c940,c1b6b340,0,c0698617,79b) at _mtx_lock_sleep+0x16e
> > 	> _mtx_lock_flags(c071c940,0,c0698617,79b,0) at _mtx_lock_flags+0xb0
> > 	> nfsrv_create(c1eb6800,c1b98800,c1b6b340,dd2edc8c,0) at nfsrv_create+0x8c4
> > 	> nfssvc(c1b6b340,dd2edd14,8,0,2) at nfssvc+0x6ea
> > 	> syscall(2f,2f,2f,0,0) at syscall+0x13b
> > 	> Xint0x80_syscall() at Xint0x80_syscall+0x1f
> > 	> --- syscall (155, FreeBSD ELF32, nfssvc), eip = 0x280c60bf, esp = 0xbfbfeb1c, eb p = 0xbfbfeb38 ---
> > 	> 
> > 	> 
> > 	>  - aW
> > 	> _______________________________________________
> > 	> freebsd-current_at_freebsd.org mailing list
> > 	> http://lists.freebsd.org/mailman/listinfo/freebsd-current
> > 	> To unsubscribe, send any mail to "freebsd-current-unsubscribe_at_freebsd.org"
> > 	> 
> > 	
> > _______________________________________________
> > freebsd-current_at_freebsd.org mailing list
> > http://lists.freebsd.org/mailman/listinfo/freebsd-current
> > To unsubscribe, send any mail to "freebsd-current-unsubscribe_at_freebsd.org"
> > 
> 
> _______________________________________________
> freebsd-current_at_freebsd.org mailing list
> http://lists.freebsd.org/mailman/listinfo/freebsd-current
> To unsubscribe, send any mail to "freebsd-current-unsubscribe_at_freebsd.org"
> 

Index: nfs_serv.c
===================================================================
RCS file: /home/ncvs/src/sys/nfsserver/nfs_serv.c,v
retrieving revision 1.148
diff -u -r1.148 nfs_serv.c
--- nfs_serv.c	25 Aug 2004 16:52:59 -0000	1.148
+++ nfs_serv.c	18 Oct 2004 09:53:52 -0000
_at__at_ -225,6 +225,7 _at__at_
 	tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED);
 	*tl = txdr_unsigned(nfsmode);
 nfsmout:
+	NFSD_LOCK_ASSERT();
 	if (vp) {
 		NFSD_UNLOCK();
 		mtx_lock(&Giant);	/* VFS */
_at__at_ -285,6 +286,7 @@
 	/* fall through */
 
 nfsmout:
+	NFSD_LOCK_ASSERT();
 	if (vp) {
 		NFSD_UNLOCK();
 		mtx_lock(&Giant);	/* VFS */
_at__at_ -412,6 +414,7 _at__at_
 		mtx_unlock(&Giant);	/* VFS */
 		NFSD_LOCK();
 	}
+	NFSD_LOCK_ASSERT();
 
 	/*
 	 * If the size is being changed write acces is required, otherwise
_at__at_ -439,6 +442,7 _at__at_
 	if (!error)
 		error = postat_ret;
 out:
+	NFSD_LOCK_ASSERT();
 	if (vp != NULL) {
 		NFSD_UNLOCK();
 		mtx_lock(&Giant);	/* VFS */
_at__at_ -460,6 +464,7 @@
 	/* fall through */
 
 nfsmout:
+	NFSD_LOCK_ASSERT();
 	NFSD_UNLOCK();
 	mtx_lock(&Giant);	/* VFS */
 	if (vp)
_at__at_ -653,6 +658,7 @@
 	}
 
 nfsmout:
+	NFSD_LOCK_ASSERT();
 	NFSD_UNLOCK();
 	mtx_lock(&Giant);	/* VFS */
 	if (dirp)
_at__at_ -771,6 +777,7 _at__at_
 	mb->m_next = mp3;
 	mp3 = NULL;
 nfsmout:
+	NFSD_LOCK_ASSERT();
 	if (mp3)
 		m_freem(mp3);
 	if (vp) {
_at__at_ -1040,6 +1047,7 _at__at_
 	}
 	*tl = txdr_unsigned(cnt);
 nfsmout:
+	NFSD_LOCK_ASSERT();
 	if (vp) {
 		NFSD_UNLOCK();
 		mtx_lock(&Giant);	/* VFS */
_at__at_ -1242,6 +1250,7 @@
 	if (!error)
 		error = aftat_ret;
 ereply:
+	NFSD_LOCK_ASSERT();
 	nfsm_reply(NFSX_PREOPATTR(v3) + NFSX_POSTOPORFATTR(v3) +
 		2 * NFSX_UNSIGNED + NFSX_WRITEVERF(v3));
 	if (v3) {
_at__at_ -1275,6 +1284,7 @@
 	}
 	error = 0;
 nfsmout:
+	NFSD_LOCK_ASSERT();
 	NFSD_UNLOCK();
 	mtx_lock(&Giant);	/* VFS */
 	if (vp)
_at__at_ -1386,6 +1396,7 @@
 	    }
 	    if (len > NFS_MAXDATA || len < 0  || i < len) {
 nfsmout:
+		NFSD_LOCK_ASSERT();
 		m_freem(mrep);
 		error = EIO;
 		nfsm_writereply(2 * NFSX_UNSIGNED);
_at__at_ -1719,7 +1730,7 _at__at_
 	nfsm_srvmtofh(fhp);
 	if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
 		error = ESTALE;
-		goto ereply;
+		goto ereply_locked;
 	}
 	NFSD_UNLOCK();
 	mtx_lock(&Giant);	/* VFS */
_at__at_ -1943,8 +1954,11 @@
 		}
 	}
 ereply:
+	NFSD_UNLOCK_ASSERT();
 	mtx_unlock(&Giant);	/* VFS */
 	NFSD_LOCK();
+ereply_locked:
+	NFSD_LOCK_ASSERT();
 	nfsm_reply(NFSX_SRVFH(v3) + NFSX_FATTR(v3) + NFSX_WCCDATA(v3));
 	if (v3) {
 		if (!error) {
_at__at_ -1961,6 +1975,7 _at__at_
 	error = 0;
 
 nfsmout:
+	NFSD_LOCK_ASSERT();
 	NFSD_UNLOCK();
 	mtx_lock(&Giant);	/* VFS */
 	if (nd.ni_startdir) {
_at__at_ -2116,6 +2131,7 @@
 	 * send response, cleanup, return.
 	 */
 out:
+	NFSD_UNLOCK_ASSERT();
 	if (nd.ni_startdir) {
 		vrele(nd.ni_startdir);
 		nd.ni_startdir = NULL;
_at__at_ -2146,9 +2162,10 @@
 		diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
 		VOP_UNLOCK(dirp, 0, td);
 	}
-ereply:
 	mtx_unlock(&Giant);	/* VFS */
 	NFSD_LOCK();
+ereply:
+	NFSD_LOCK_ASSERT();
 	nfsm_reply(NFSX_SRVFH(1) + NFSX_POSTOPATTR(1) + NFSX_WCCDATA(1));
 	if (v3) {
 		if (!error) {
_at__at_ -2164,6 +2181,7 _at__at_
 	NFSD_LOCK();
 	return (0);
 nfsmout:
+	NFSD_LOCK_ASSERT();
 	NFSD_UNLOCK();
 	mtx_lock(&Giant);	/* VFS */
 	if (dirp)
_at__at_ -2249,6 +2267,7 @@
 			goto out;
 		}
 out:
+		NFSD_UNLOCK_ASSERT();
 		if (!error) {
 			error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
 			NDFREE(&nd, NDF_ONLY_PNBUF);
_at__at_ -2280,12 +2299,14 _at__at_
 	mtx_unlock(&Giant);	/* VFS */
 	NFSD_LOCK();
 ereply:
+	NFSD_LOCK_ASSERT();
 	nfsm_reply(NFSX_WCCDATA(v3));
 	if (v3) {
 		nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
 		error = 0;
 	}
 nfsmout:
+	NFSD_LOCK_ASSERT();
 	NFSD_UNLOCK();
 	mtx_lock(&Giant);	/* VFS */
 	NDFREE(&nd, NDF_ONLY_PNBUF);
_at__at_ -2397,8 +2418,11 @@
 		vrele(tdirp);
 		tdirp = NULL;
 	}
-	if (error)
+	if (error) {
+		mtx_unlock(&Giant);	/* VFS */
+		NFSD_LOCK();
 		goto out1;
+	}
 
 	tdvp = tond.ni_dvp;
 	tvp = tond.ni_vp;
_at__at_ -2455,6 +2479,7 @@
 	      fromnd.ni_cnd.cn_namelen))
 		error = -1;
 out:
+	NFSD_UNLOCK_ASSERT();
 	if (!error) {
 		/*
 		 * The VOP_RENAME function releases all vnode references &
_at__at_ -2477,9 +2502,10 @@
 	}
 	/* fall through */
 
-out1:
 	mtx_unlock(&Giant);	/* VFS */
 	NFSD_LOCK();
+out1:
+	NFSD_LOCK_ASSERT();
 	nfsm_reply(2 * NFSX_WCCDATA(v3));
 	if (v3) {
 		/* Release existing locks to prevent deadlock. */
_at__at_ -2518,6 +2544,7 _at__at_
 	/*
 	 * Clear out tond related fields
 	 */
+	NFSD_LOCK_ASSERT();
 	NFSD_UNLOCK();
 	mtx_lock(&Giant);	/* VFS */
 	if (tdirp)
_at__at_ -2680,6 +2707,7 @@
 	mtx_unlock(&Giant);	/* VFS */
 	NFSD_LOCK();
 ereply:
+	NFSD_LOCK_ASSERT();
 	nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3));
 	if (v3) {
 		nfsm_srvpostop_attr(getret, &at);
_at__at_ -2689,6 +2717,7 _at_@
 	/* fall through */
 
 nfsmout:
+	NFSD_LOCK_ASSERT();
 	NFSD_UNLOCK();
 	mtx_lock(&Giant);	/* VFS */
 	NDFREE(&nd, NDF_ONLY_PNBUF);
_at__at_ -2744,6 +2773,8 _at__at_
 	fhp = &nfh.fh_generic;
 	nfsm_srvmtofh(fhp);
 	if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
+		NFSD_UNLOCK();
+		mtx_lock(&Giant);	/* VFS */
 		error = ESTALE;
 		goto out;
 	}
_at__at_ -2841,6 +2872,7 @@
 	    }
 	}
 out:
+	NFSD_UNLOCK_ASSERT();
 	/*
 	 * These releases aren't strictly required, does even doing them
 	 * make any sense? XXX can nfsm_reply() block?
_at__at_ -2872,6 +2904,7 _at__at_
 	/* fall through */
 
 nfsmout:
+	NFSD_LOCK_ASSERT();
 	NFSD_UNLOCK();
 	mtx_lock(&Giant);	/* VFS */
 	NDFREE(&nd, NDF_ONLY_PNBUF);
_at__at_ -2930,6 +2963,8 @@
 	fhp = &nfh.fh_generic;
 	nfsm_srvmtofh(fhp);
 	if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
+		NFSD_UNLOCK();
+		mtx_lock(&Giant);	/* VFS */
 		error = ESTALE;
 		goto out;
 	}
_at__at_ -3004,6 +3039,7 _at_@
 			error = VOP_GETATTR(nd.ni_vp, vap, cred, td);
 	}
 out:
+	NFSD_UNLOCK_ASSERT();
 	if (dirp) {
 		if (dirp == nd.ni_dvp) {
 			diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
_at__at_ -3048,6 +3084,7 _at__at_
 	/* fall through */
 
 nfsmout:
+	NFSD_LOCK_ASSERT();
 	NFSD_UNLOCK();
 	mtx_lock(&Giant);	/* VFS */
 	if (dirp)
_at__at_ -3152,6 +3189,7 _at__at_
 	 * Issue or abort op.  Since SAVESTART is not set, path name
 	 * component is freed by the VOP after either.
 	 */
+	NFSD_LOCK_ASSERT();
 	NFSD_UNLOCK();
 	mtx_lock(&Giant);	/* VFS */
 	if (!error)
_at__at_ -3187,6 +3225,7 @@
 	/* fall through */
 
 nfsmout:
+	NFSD_LOCK_ASSERT();
 	NFSD_UNLOCK();
 	mtx_lock(&Giant);	/* VFS */
 	NDFREE(&nd, NDF_ONLY_PNBUF);
_at__at_ -3356,6 +3395,7 _at__at_
 	 */
 	MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
 again:
+	NFSD_UNLOCK_ASSERT();
 	iv.iov_base = rbuf;
 	iv.iov_len = fullsiz;
 	io.uio_iov = &iv;
_at__at_ -3556,6 +3596,7 _at__at_
 	FREE((caddr_t)cookies, M_TEMP);
 
 nfsmout:
+	NFSD_LOCK_ASSERT();
 	if (vp) {
 		NFSD_UNLOCK();
 		mtx_lock(&Giant);	/* VFS */
_at__at_ -3664,6 +3705,7 @@
 	VOP_UNLOCK(vp, 0, td);
 	MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
 again:
+	NFSD_UNLOCK_ASSERT();
 	iv.iov_base = rbuf;
 	iv.iov_len = fullsiz;
 	io.uio_iov = &iv;
_at__at_ -3897,6 +3939,7 _at__at_
 			}
 		}
 invalid:
+		NFSD_UNLOCK_ASSERT();
 		cpos += dp->d_reclen;
 		dp = (struct dirent *)cpos;
 		cookiep++;
_at__at_ -3923,6 +3966,7 _at__at_
 	FREE((caddr_t)cookies, M_TEMP);
 	FREE((caddr_t)rbuf, M_TEMP);
 nfsmout:
+	NFSD_LOCK_ASSERT();
 	if (vp) {
 		NFSD_UNLOCK();
 		mtx_lock(&Giant);	/* VFS */
_at__at_ -4081,6 +4125,7 @@
 	vp = NULL;
 	NFSD_LOCK();
 ereply:
+	NFSD_LOCK_ASSERT();
 	nfsm_reply(NFSX_V3WCCDATA + NFSX_V3WRITEVERF);
 	nfsm_srvwcc_data(for_ret, &bfor, aft_ret, &aft);
 	if (!error) {
_at__at_ -4093,6 +4138,7 _at__at_
 		error = 0;
 	}
 nfsmout:
+	NFSD_LOCK_ASSERT();
 	NFSD_UNLOCK();
 	mtx_lock(&Giant);	/* VFS */
 	if (vp)
_at__at_ -4194,6 +4240,7 _at__at_
 			sfp->sf_bavail = txdr_unsigned(sf->f_bavail);
 	}
 nfsmout:
+	NFSD_LOCK_ASSERT();
 	if (vp) {
 		NFSD_UNLOCK();
 		mtx_lock(&Giant);	/* VFS */
_at__at_ -4280,6 +4327,7 _at__at_
 		NFSV3FSINFO_SYMLINK | NFSV3FSINFO_HOMOGENEOUS |
 		NFSV3FSINFO_CANSETTIME);
 nfsmout:
+	NFSD_LOCK_ASSERT();
 	if (vp) {
 		NFSD_UNLOCK();
 		mtx_lock(&Giant);	/* VFS */
_at__at_ -4361,6 +4409,7 @@
 	pc->pc_caseinsensitive = nfsrv_nfs_false;
 	pc->pc_casepreserving = nfsrv_nfs_true;
 nfsmout:
+	NFSD_LOCK_ASSERT();
 	if (vp) {
 		NFSD_UNLOCK();
 		mtx_lock(&Giant);	/* VFS */
_at__at_ -4389,6 +4438,7 _at__at_
 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 	nfsm_reply(0);
 nfsmout:
+	NFSD_LOCK_ASSERT();
 	return (error);
 }
 
_at__at_ -4415,6 +4465,7 @@
 	nfsm_reply(0);
 	error = 0;
 nfsmout:
+	NFSD_LOCK_ASSERT();
 	return (error);
 }
 
_at__at_ -4440,7 +4491,6 _at__at_
 	int error;
 
 	NFSD_LOCK_ASSERT();
-	NFSD_UNLOCK();
 
 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 	if (flags & VWRITE) {
_at__at_ -4455,8 +4505,7 @@
 			case VREG:
 			case VDIR:
 			case VLNK:
-				error = EROFS;
-				goto out;
+				return (EROFS);
 			default:
 				break;
 			}
_at__at_ -4465,15 +4514,14 _at__at_
 		 * If there's shared text associated with
 		 * the inode, we can't allow writing.
 		 */
-		if (vp->v_vflag & VV_TEXT) {
-			NFSD_LOCK();
+		if (vp->v_vflag & VV_TEXT)
 			return (ETXTBSY);
-		}
 	}
+	NFSD_UNLOCK();
 	mtx_lock(&Giant);	/* VFS */
 	error = VOP_GETATTR(vp, &vattr, cred, td);
 	if (error)
-		goto out2;
+		goto out;
 	error = VOP_ACCESS(vp, flags, cred, td);
 	/*
 	 * Allow certain operations for the owner (reads and writes
_at__at_ -4481,9 +4529,9 _at__at_
 	 */
 	if (override && error == EACCES && cred->cr_uid == vattr.va_uid)
 		error = 0;
-out2:
-	mtx_unlock(&Giant);	/* VFS */
 out:
+	NFSD_UNLOCK_ASSERT();
+	mtx_unlock(&Giant);	/* VFS */
 	NFSD_LOCK();
 	return error;
 }
Received on Mon Oct 18 2004 - 09:01:50 UTC

This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:38:18 UTC