Re: And probably the final crash for today's current :) (panic: filesystem goof: vop_panic[vop_print])

From: Jan Harkes <jaharkes_at_cs.cmu.edu>
Date: Fri, 24 Aug 2007 11:19:27 -0400
On Mon, Aug 20, 2007 at 11:40:29PM +0300, Nikolay Pavlov wrote:
> I am testing latest coda client code. 
> After simple ls -l /coda i am getting this panic:

Coda's kernel module performed various VOP_OPEN/CLOSE/READ operations on
the vnodes of the 'container files' (files containing the actual data
which are created on a normal (non-coda) filesystem and handed to the
Coda kernel module when a Coda file is opened).

The following patch adds locking around VOP_ operations. I ran some
tests and it seems like I got them all. For some reason I could only get
the warnings to go away when using exclusive locks, but since the
overhead in our case really is in userspace and we still hold the giant
lock pretty much everywhere there shouldn't be a measurable difference.

The one questionable change in this patch is in coda_namecache.c. We are
flushing some internal name->vnode lookup cache and while doing so we
check if VV_TEXT has been set in ->v_vflags. We really should take a
lock there, but I have no idea how to propagate a struct thread * down
there (since this flush is called from some places like vc_nb_write
(write on the /dev/cfs0 device) were it isn't obvious to me if it has
access to the thread context. So I commented out the assertion just to
shut up the warning. I don't really know what the preferred policy in
this case is, the flush only triggered during unmount and the assertion
doesn't drops the kernel in the debugger when it is compiled with the
default options.

Jan

-------------------------------------------------------------------

    We need to lock before we call VOP_ operations on the container files.

diff --git a/coda_namecache.c b/coda_namecache.c
index 916e605..0bf284d 100644
--- a/coda_namecache.c
+++ b/coda_namecache.c
_at__at_ -607,7 +607,10 _at__at_ coda_nc_flush(dcstat)
 			}
 			vrele(CTOV(cncp->dcp)); 
 
-			ASSERT_VOP_LOCKED(CTOV(cncp->cp), "coda_nc_flush");
+			/* We really should lock because we're looking at
+			 * ->v_vflag. On the other hand we're just looking and
+			 * it isn't clear how to propagate *td to here. */
+			/* ASSERT_VOP_LOCKED(CTOV(cncp->cp), "coda_nc_flush");*/
 			if (CTOV(cncp->cp)->v_vflag & VV_TEXT) {
 			    if (coda_vmflush(cncp->cp))
 				CODADEBUG(CODA_FLUSH, 
diff --git a/coda_vfsops.c b/coda_vfsops.c
index bfd0834..bc24aec 100644
--- a/coda_vfsops.c
+++ b/coda_vfsops.c
_at__at_ -229,8 +229,11 _at__at_ coda_unmount(vfsp, mntflags, td)
 	vrele(mi->mi_rootvp);
 	vrele(coda_ctlvp);
 	active = coda_kill(vfsp, NOT_DOWNCALL);
-	ASSERT_VOP_LOCKED(mi->mi_rootvp, "coda_unmount");
+
+	vn_lock(mi->mi_rootvp, LK_RETRY|LK_EXCLUSIVE, td);
 	mi->mi_rootvp->v_vflag &= ~VV_ROOT;
+	VOP_UNLOCK(mi->mi_rootvp, 0, td);
+
 	error = vflush(mi->mi_vfsp, 0, FORCECLOSE, td);
 #ifdef CODA_VERBOSE
 	printf("coda_unmount: active = %d, vflush active %d\n", active, error);
diff --git a/coda_vnops.c b/coda_vnops.c
index 11f1c39..968fe19 100644
--- a/coda_vnops.c
+++ b/coda_vnops.c
_at__at_ -240,7 +240,10 _at__at_ coda_open(struct vop_open_args *ap)
     }
 
     /* Open the cache file. */
+    vn_lock(vp, LK_RETRY|LK_EXCLUSIVE, td);
     error = VOP_OPEN(vp, flag, cred, td, NULL); 
+    VOP_UNLOCK(vp, 0, td);
+
     if (error) {
     	printf("coda_open: VOP_OPEN on container failed %d\n", error);
 	return (error);
_at__at_ -274,7 +277,9 _at__at_ coda_close(struct vop_close_args *ap)
     }
 
     if (cp->c_ovp) {
+	vn_lock(cp->c_ovp, LK_RETRY|LK_EXCLUSIVE, td);
 	VOP_CLOSE(cp->c_ovp, flag, cred, td); /* Do errors matter here? */
+	VOP_UNLOCK(cp->c_ovp, 0, td);
 	vrele(cp->c_ovp);
     }
 #ifdef CODA_VERBOSE
_at__at_ -346,8 +351,10 _at__at_ coda_rdwr(struct vnode *vp, struct uio *uiop, enum uio_rw rw, int ioflag,
     if (cfvp == NULL) {
 	opened_internally = 1;
 	MARK_INT_GEN(CODA_OPEN_STATS);
-	error = VOP_OPEN(vp, (rw == UIO_READ ? FREAD : FWRITE), cred, td, NULL);
 	printf("coda_rdwr: Internally Opening %p\n", vp);
+	vn_lock(vp, LK_RETRY|LK_EXCLUSIVE, td);
+	error = VOP_OPEN(vp, (rw == UIO_READ ? FREAD : FWRITE), cred, td, NULL);
+	VOP_UNLOCK(vp, 0, td);
 	if (error) {
 		printf("coda_rdwr: VOP_OPEN on container failed %d\n", error);
 		return (error);
_at__at_ -358,6 +365,8 _at__at_ coda_rdwr(struct vnode *vp, struct uio *uiop, enum uio_rw rw, int ioflag,
     /* Have UFS handle the call. */
     CODADEBUG(CODA_RDWR, myprintf(("indirect rdwr: fid = %s, refcnt = %d\n",
 			     coda_f2s(&cp->c_fid), CTOV(cp)->v_usecount)); )
+
+    vn_lock(cfvp, LK_RETRY|LK_EXCLUSIVE, td);
     if (rw == UIO_READ) {
 	error = VOP_READ(cfvp, uiop, ioflag, cred);
     } else {
_at__at_ -371,6 +380,7 _at__at_ coda_rdwr(struct vnode *vp, struct uio *uiop, enum uio_rw rw, int ioflag,
 	    }
 	}
     }
+    VOP_UNLOCK(cfvp, 0, td);
 
     if (error)
 	MARK_INT_FAIL(CODA_RDWR_STATS);
_at__at_ -380,7 +390,9 _at__at_ coda_rdwr(struct vnode *vp, struct uio *uiop, enum uio_rw rw, int ioflag,
     /* Do an internal close if necessary. */
     if (opened_internally) {
 	MARK_INT_GEN(CODA_CLOSE_STATS);
+	vn_lock(vp, LK_RETRY|LK_EXCLUSIVE, td);
 	(void)VOP_CLOSE(vp, (rw == UIO_READ ? FREAD : FWRITE), cred, td);
+	VOP_UNLOCK(vp, 0, td);
     }
 
     /* Invalidate cached attributes if writing. */
_at__at_ -694,8 +706,11 _at__at_ coda_fsync(struct vop_fsync_args *ap)
 	return(0);
     }
 
-    if (convp)
+    if (convp) {
+	vn_lock(convp, LK_RETRY|LK_EXCLUSIVE, td);
     	VOP_FSYNC(convp, MNT_WAIT, td);
+	VOP_UNLOCK(convp, 0, td);
+    }
 
     /*
      * We see fsyncs with usecount == 1 then usecount == 0.
_at__at_ -1408,8 +1423,11 _at__at_ coda_symlink(struct vop_symlink_args *ap)
     /* Invalidate the parent's attr cache, the modification time has changed */
     tdcp->c_flags &= ~C_VATTR;
 
-    if (error == 0)
+    if (error == 0) {
+	vn_lock(tdvp, LK_RETRY|LK_EXCLUSIVE, td);
 	error = VOP_LOOKUP(tdvp, vpp, cnp);
+	VOP_UNLOCK(tdvp, 0, td);
+    }
 
  exit:    
     CODADEBUG(CODA_SYMLINK, myprintf(("in symlink result %d\n",error)); )
_at__at_ -1452,22 +1470,29 _at__at_ coda_readdir(struct vop_readdir_args *ap)
     {
 	/* If directory is not already open do an "internal open" on it. */
 	int opened_internally = 0;
+
 	if (cp->c_ovp == NULL) {
 	    opened_internally = 1;
 	    MARK_INT_GEN(CODA_OPEN_STATS);
-	    error = VOP_OPEN(vp, FREAD, cred, td, NULL);
+
+	    vn_lock(vp, LK_RETRY|LK_EXCLUSIVE, td);
 	    printf("coda_readdir: Internally Opening %p\n", vp);
+	    error = VOP_OPEN(vp, FREAD, cred, td, NULL);
+	    VOP_UNLOCK(vp, 0, td);
+
 	    if (error) {
 		printf("coda_readdir: VOP_OPEN on container failed %d\n", error);
 		return (error);
 	    }
 	}
-	
+
+	vn_lock(cp->c_ovp, LK_RETRY|LK_EXCLUSIVE, td);
 	/* Have UFS handle the call. */
 	CODADEBUG(CODA_READDIR, myprintf(("indirect readdir: fid = %s, refcnt = %d\n", coda_f2s(&cp->c_fid), vp->v_usecount)); )
 	error = VOP_READDIR(cp->c_ovp, uiop, cred, eofflag, ncookies,
 			       cookies);
-	
+	VOP_UNLOCK(cp->c_ovp, 0, td);
+
 	if (error)
 	    MARK_INT_FAIL(CODA_READDIR_STATS);
 	else
_at__at_ -1476,7 +1501,9 _at__at_ coda_readdir(struct vop_readdir_args *ap)
 	/* Do an "internal close" if necessary. */ 
 	if (opened_internally) {
 	    MARK_INT_GEN(CODA_CLOSE_STATS);
+	    vn_lock(vp, LK_RETRY|LK_EXCLUSIVE, td);
 	    (void)VOP_CLOSE(vp, FREAD, cred, td);
+	    VOP_UNLOCK(vp, 0, td);
 	}
     }
 
_at__at_ -1505,6 +1532,7 _at__at_ coda_bmap(struct vop_bmap_args *ap)
 	cp = VTOC(vp);
 	if (cp->c_ovp) {
 		return EINVAL;
+/* looks like we never actually call VOP_BMAP, probably would need locking */
 		ret =  VOP_BMAP(cp->c_ovp, bn, bop, bnp, ap->a_runp, ap->a_runb);
 #if	0
 		printf("VOP_BMAP(cp->c_ovp %p, bn %p, bop %p, bnp %lld, ap->a_runp %p, ap->a_runb %p) = %d\n",
Received on Fri Aug 24 2007 - 13:19:29 UTC

This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:39:16 UTC