On 7 May, To: current_at_freebsd.org wrote: > I managed to find some time to take a closer look at vnode locking in > the NFS server code and found that the situation was worse than I > initially thought. I've put together a patch that seems to fix all the > bugs that I found. With this patch, the code passes the simple tests > that I wrote as well as NFS mounting a local directory on /usr/obj and > running "make -j10 buildworld" (after I cranked up vfs.hirunningspace > and vfs.lorunningspace by 50x to avoid the wdrain bio deadlock I > mentioned yesterday), all with the DEBUG_VFS_LOCKS kernel option > enabled. > > The NFS server code was in bad shape from being hacked on too many times > before I touched it and it looks like it has accumulated some historical > baggage, and my changes certainly don't help. I attempted to match the > existing style and control flow since I wanted to minimize the changes > at the time to avoid introducing new bugs, but this meant that I had to > duplicate some code in a number of places. > > I saw two possible ways of getting the initial dirp attributes. One was > to set LOCKPARENT on the first lookup() call in nfs_namei() and cap > VOP_GETATTR() at that point. I chose the other possible implementation, > which was to temporarily lock the dirp and call VOP_GETATTR() before the > loop, because this change was simpler. > > The NFS server code badly needs a rewrite by someone who understands it > well. > > I'm hoping to get enough review and testing of this patch so that I can > get re approval to fix vnode locking in the NFS server code for > 5.1. It might help to actually include the patch ... Index: sys/nfsserver/nfs.h =================================================================== RCS file: /home/ncvs/src/sys/nfsserver/nfs.h,v retrieving revision 1.68 diff -u -r1.68 nfs.h --- sys/nfsserver/nfs.h 24 Jul 2002 22:27:35 -0000 1.68 +++ sys/nfsserver/nfs.h 5 May 2003 00:34:59 -0000 _at__at_ -332,7 +332,8 _at__at_ int netaddr_match(int, union nethostaddr *, struct sockaddr *); int nfs_namei(struct nameidata *, fhandle_t *, int, struct nfssvc_sock *, struct sockaddr *, struct mbuf **, - caddr_t *, struct vnode **, struct thread *, int); + caddr_t *, struct vnode **, int, struct vattr *, int *, + struct thread *, int); void nfsm_adj(struct mbuf *, int, int); int nfsm_mbuftouio(struct mbuf **, struct uio *, int, caddr_t *); void nfsrv_initcache(void); Index: sys/nfsserver/nfs_serv.c =================================================================== RCS file: /home/ncvs/src/sys/nfsserver/nfs_serv.c,v retrieving revision 1.133 diff -u -r1.133 nfs_serv.c --- sys/nfsserver/nfs_serv.c 24 Apr 2003 04:31:25 -0000 1.133 +++ sys/nfsserver/nfs_serv.c 7 May 2003 02:20:28 -0000 _at__at_ -467,7 +467,7 _at__at_ nd.ni_cnd.cn_nameiop = LOOKUP; nd.ni_cnd.cn_flags = LOCKLEAF | SAVESTART; error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, - &dirp, td, pubflag); + &dirp, v3, &dirattr, &dirattr_ret, td, pubflag); /* * namei failure, only dirp to cleanup. Clear out garbarge from _at__at_ -476,9 +476,6 _at__at_ if (error) { if (dirp) { - if (v3) - dirattr_ret = VOP_GETATTR(dirp, &dirattr, cred, - td); vrele(dirp); dirp = NULL; } _at__at_ -551,9 +548,6 _at__at_ } if (dirp) { - if (v3) - dirattr_ret = VOP_GETATTR(dirp, &dirattr, cred, - td); vrele(dirp); dirp = NULL; } _at__at_ -1630,15 +1624,10 _at__at_ * prior to calling nfsm_reply ( which might goto nfsmout ). */ error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, - &dirp, td, FALSE); - if (dirp) { - if (v3) { - dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, - td); - } else { - vrele(dirp); - dirp = NULL; - } + &dirp, v3, &dirfor, &dirfor_ret, td, FALSE); + if (dirp && !v3) { + vrele(dirp); + dirp = NULL; } if (error) { nfsm_reply(NFSX_WCCDATA(v3)); _at__at_ -1809,9 +1798,27 _at__at_ if (exclusive_flag && !error && bcmp(cverf, (caddr_t)&vap->va_atime, NFSX_V3CREATEVERF)) error = EEXIST; - diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td); - vrele(dirp); - dirp = NULL; + if (dirp == nd.ni_dvp) + diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td); + else { + /* Drop the other locks to avoid deadlock. */ + if (nd.ni_dvp) { + if (nd.ni_dvp == nd.ni_vp) + vrele(nd.ni_dvp); + else + vput(nd.ni_dvp); + } + if (nd.ni_vp) + vput(nd.ni_vp); + nd.ni_dvp = NULL; + nd.ni_vp = NULL; + + if (vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY, td) == 0) { + diraft_ret = + VOP_GETATTR(dirp, &diraft, cred, td); + VOP_UNLOCK(dirp, 0, td); + } + } } ereply: nfsm_reply(NFSX_SRVFH(v3) + NFSX_FATTR(v3) + NFSX_WCCDATA(v3)); _at__at_ -1906,9 +1913,7 _at__at_ */ error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, - &dirp, td, FALSE); - if (dirp) - dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, td); + &dirp, v3, &dirfor, &dirfor_ret, td, FALSE); if (error) { nfsm_reply(NFSX_WCCDATA(1)); nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); _at__at_ -2006,10 +2011,10 _at__at_ vp = NULL; nd.ni_vp = NULL; } - diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td); if (dirp) { - vrele(dirp); - dirp = NULL; + if (vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY, td) == 0) + diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td); + VOP_UNLOCK(dirp, 0, td); } ereply: nfsm_reply(NFSX_SRVFH(1) + NFSX_POSTOPATTR(1) + NFSX_WCCDATA(1)); _at__at_ -2085,15 +2090,10 _at__at_ nd.ni_cnd.cn_nameiop = DELETE; nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF; error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, - &dirp, td, FALSE); - if (dirp) { - if (v3) { - dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, - td); - } else { - vrele(dirp); - dirp = NULL; - } + &dirp, v3, &dirfor, &dirfor_ret, td, FALSE); + if (dirp && !v3) { + vrele(dirp); + dirp = NULL; } if (error == 0) { if (nd.ni_vp->v_type == VDIR) { _at__at_ -2114,9 +2114,27 _at__at_ } } if (dirp && v3) { - diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td); - vrele(dirp); - dirp = NULL; + if (dirp == nd.ni_dvp) + diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td); + else { + /* Drop the other locks to avoid deadlock. */ + if (nd.ni_dvp) { + if (nd.ni_dvp == nd.ni_vp) + vrele(nd.ni_dvp); + else + vput(nd.ni_dvp); + } + if (nd.ni_vp) + vput(nd.ni_vp); + nd.ni_dvp = NULL; + nd.ni_vp = NULL; + + if (vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY, td) == 0) { + diraft_ret = + VOP_GETATTR(dirp, &diraft, cred, td); + VOP_UNLOCK(dirp, 0, td); + } + } } ereply: nfsm_reply(NFSX_WCCDATA(v3)); _at__at_ -2200,15 +2218,10 _at__at_ fromnd.ni_cnd.cn_nameiop = DELETE; fromnd.ni_cnd.cn_flags = WANTPARENT | SAVESTART; error = nfs_namei(&fromnd, ffhp, len, slp, nam, &md, - &dpos, &fdirp, td, FALSE); - if (fdirp) { - if (v3) { - fdirfor_ret = VOP_GETATTR(fdirp, &fdirfor, cred, - td); - } else { - vrele(fdirp); - fdirp = NULL; - } + &dpos, &fdirp, v3, &fdirfor, &fdirfor_ret, td, FALSE); + if (fdirp && !v3) { + vrele(fdirp); + fdirp = NULL; } if (error) { nfsm_reply(2 * NFSX_WCCDATA(v3)); _at__at_ -2227,15 +2240,10 _at__at_ tond.ni_cnd.cn_nameiop = RENAME; tond.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART; error = nfs_namei(&tond, tfhp, len2, slp, nam, &md, - &dpos, &tdirp, td, FALSE); - if (tdirp) { - if (v3) { - tdirfor_ret = VOP_GETATTR(tdirp, &tdirfor, cred, - td); - } else { - vrele(tdirp); - tdirp = NULL; - } + &dpos, &tdirp, v3, &tdirfor, &tdirfor_ret, td, FALSE); + if (tdirp && !v3) { + vrele(tdirp); + tdirp = NULL; } if (error) goto out1; _at__at_ -2318,12 +2326,30 _at__at_ /* fall through */ out1: - if (fdirp) - fdiraft_ret = VOP_GETATTR(fdirp, &fdiraft, cred, td); - if (tdirp) - tdiraft_ret = VOP_GETATTR(tdirp, &tdiraft, cred, td); nfsm_reply(2 * NFSX_WCCDATA(v3)); if (v3) { + /* Release existing locks to prevent deadlock. */ + if (tond.ni_dvp) { + if (tond.ni_dvp == tond.ni_vp) + vrele(tond.ni_dvp); + else + vput(tond.ni_dvp); + } + if (tond.ni_vp) + vput(tond.ni_vp); + tond.ni_dvp = NULL; + tond.ni_vp = NULL; + + if (fdirp && + vn_lock(fdirp, LK_EXCLUSIVE | LK_RETRY, td) == 0) { + fdiraft_ret = VOP_GETATTR(fdirp, &fdiraft, cred, td); + VOP_UNLOCK(fdirp, 0, td); + } + if (tdirp && + vn_lock(tdirp, LK_EXCLUSIVE | LK_RETRY, td) == 0) { + tdiraft_ret = VOP_GETATTR(tdirp, &tdiraft, cred, td); + VOP_UNLOCK(tdirp, 0, td); + } nfsm_srvwcc_data(fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft); nfsm_srvwcc_data(tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft); } _at__at_ -2407,7 +2433,7 _at__at_ nfsm_srvmtofh(dfhp); nfsm_srvnamesiz(len); - error = nfsrv_fhtovp(fhp, FALSE, &vp, cred, slp, nam, &rdonly, TRUE); + error = nfsrv_fhtovp(fhp, TRUE, &vp, cred, slp, nam, &rdonly, TRUE); if (error) { nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3)); if (v3) { _at__at_ -2418,47 +2444,77 _at__at_ error = 0; goto nfsmout; } + if (v3) + getret = VOP_GETATTR(vp, &at, cred, td); if (vp->v_type == VDIR) { error = EPERM; /* POSIX */ goto out1; } + VOP_UNLOCK(vp, 0, td); nd.ni_cnd.cn_cred = cred; nd.ni_cnd.cn_nameiop = CREATE; nd.ni_cnd.cn_flags = LOCKPARENT; error = nfs_namei(&nd, dfhp, len, slp, nam, &md, &dpos, - &dirp, td, FALSE); - if (dirp) { - if (v3) { - dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, - td); - } else { - vrele(dirp); - dirp = NULL; - } + &dirp, v3, &dirfor, &dirfor_ret, td, FALSE); + if (dirp && !v3) { + vrele(dirp); + dirp = NULL; + } + if (error) { + vrele(vp); + vp = NULL; + goto out2; } - if (error) - goto out1; - xp = nd.ni_vp; if (xp != NULL) { error = EEXIST; - goto out; + vrele(vp); + vp = NULL; + goto out2; } xp = nd.ni_dvp; - if (vp->v_mount != xp->v_mount) + if (vp->v_mount != xp->v_mount) { error = EXDEV; -out: - if (!error) { - error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); - NDFREE(&nd, NDF_ONLY_PNBUF); + vrele(vp); + vp = NULL; + goto out2; } + if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td)) != 0) { + vrele(vp); + vp = NULL; + goto out2; + } + error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); + NDFREE(&nd, NDF_ONLY_PNBUF); /* fall through */ out1: if (v3) getret = VOP_GETATTR(vp, &at, cred, td); - if (dirp) - diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td); +out2: + if (dirp) { + if (dirp == nd.ni_dvp) + diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td); + else { + /* Release existing locks to prevent deadlock. */ + if (nd.ni_dvp) { + if (nd.ni_dvp == nd.ni_vp) + vrele(nd.ni_dvp); + else + vput(nd.ni_dvp); + } + if (nd.ni_vp) + vrele(nd.ni_vp); + nd.ni_dvp = NULL; + nd.ni_vp = NULL; + + if (vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY, td) == 0) { + diraft_ret = + VOP_GETATTR(dirp, &diraft, cred, td); + VOP_UNLOCK(dirp, 0, td); + } + } + } ereply: nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3)); if (v3) { _at__at_ -2473,7 +2529,7 _at__at_ if (dirp) vrele(dirp); if (vp) - vrele(vp); + vput(vp); if (nd.ni_dvp) { if (nd.ni_dvp == nd.ni_vp) vrele(nd.ni_dvp); _at__at_ -2534,15 +2590,10 _at__at_ nd.ni_cnd.cn_nameiop = CREATE; nd.ni_cnd.cn_flags = LOCKPARENT | SAVESTART; error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, - &dirp, td, FALSE); - if (dirp) { - if (v3) { - dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, - td); - } else { - vrele(dirp); - dirp = NULL; - } + &dirp, v3, &dirfor, &dirfor_ret, td, FALSE); + if (dirp && !v3) { + vrele(dirp); + dirp = NULL; } if (error) goto out; _at__at_ -2629,10 +2680,9 _at__at_ FREE(pathcp, M_TEMP); pathcp = NULL; } - if (dirp) { + if (dirp && vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY, td) == 0) { diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td); - vrele(dirp); - dirp = NULL; + VOP_UNLOCK(dirp, 0, td); } if (nd.ni_startdir) { vrele(nd.ni_startdir); _at__at_ -2719,15 +2769,10 _at__at_ nd.ni_cnd.cn_flags = LOCKPARENT; error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, - &dirp, td, FALSE); - if (dirp) { - if (v3) { - dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, - td); - } else { - vrele(dirp); - dirp = NULL; - } + &dirp, v3, &dirfor, &dirfor_ret, td, FALSE); + if (dirp && !v3) { + vrele(dirp); + dirp = NULL; } if (error) { nfsm_reply(NFSX_WCCDATA(v3)); _at__at_ -2778,8 +2823,33 _at__at_ error = VOP_GETATTR(nd.ni_vp, vap, cred, td); } out: - if (dirp) - diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td); + if (dirp) { + if (dirp == nd.ni_dvp) { + diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td); + } else { + /* Release existing locks to prevent deadlock. */ + if (nd.ni_dvp) { + NDFREE(&nd, NDF_ONLY_PNBUF); + if (nd.ni_dvp == nd.ni_vp && vpexcl) + vrele(nd.ni_dvp); + else + vput(nd.ni_dvp); + } + if (nd.ni_vp) { + if (vpexcl) + vput(nd.ni_vp); + else + vrele(nd.ni_vp); + } + nd.ni_dvp = NULL; + nd.ni_vp = NULL; + if (vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY, td) == 0) { + diraft_ret = + VOP_GETATTR(dirp, &diraft, cred, td); + VOP_UNLOCK(dirp, 0, td); + } + } + } nfsm_reply(NFSX_SRVFH(v3) + NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3)); if (v3) { if (!error) { _at__at_ -2859,15 +2929,10 _at__at_ nd.ni_cnd.cn_nameiop = DELETE; nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF; error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, - &dirp, td, FALSE); - if (dirp) { - if (v3) { - dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, - td); - } else { - vrele(dirp); - dirp = NULL; - } + &dirp, v3, &dirfor, &dirfor_ret, td, FALSE); + if (dirp && !v3) { + vrele(dirp); + dirp = NULL; } if (error) { nfsm_reply(NFSX_WCCDATA(v3)); _at__at_ -2902,8 +2967,28 _at__at_ error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); NDFREE(&nd, NDF_ONLY_PNBUF); - if (dirp) - diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td); + if (dirp) { + if (dirp == nd.ni_dvp) + diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td); + else { + /* Release existing locks to prevent deadlock. */ + if (nd.ni_dvp) { + if (nd.ni_dvp == nd.ni_vp) + vrele(nd.ni_dvp); + else + vput(nd.ni_dvp); + } + if (nd.ni_vp) + vput(nd.ni_vp); + nd.ni_dvp = NULL; + nd.ni_vp = NULL; + if (vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY, td) == 0) { + diraft_ret = + VOP_GETATTR(dirp, &diraft, cred, td); + VOP_UNLOCK(dirp, 0, td); + } + } + } nfsm_reply(NFSX_WCCDATA(v3)); error = 0; if (v3) Index: sys/nfsserver/nfs_srvsubs.c =================================================================== RCS file: /home/ncvs/src/sys/nfsserver/nfs_srvsubs.c,v retrieving revision 1.120 diff -u -r1.120 nfs_srvsubs.c --- sys/nfsserver/nfs_srvsubs.c 19 Feb 2003 05:47:39 -0000 1.120 +++ sys/nfsserver/nfs_srvsubs.c 6 May 2003 02:56:15 -0000 _at__at_ -592,7 +592,8 _at__at_ int nfs_namei(struct nameidata *ndp, fhandle_t *fhp, int len, struct nfssvc_sock *slp, struct sockaddr *nam, struct mbuf **mdp, - caddr_t *dposp, struct vnode **retdirp, struct thread *td, int pubflag) + caddr_t *dposp, struct vnode **retdirp, int v3, struct vattr *retdirattrp, + int *retdirattr_retp, struct thread *td, int pubflag) { int i, rem; struct mbuf *md; _at__at_ -602,6 +603,7 _at__at_ struct vnode *dp; int error, rdonly, linklen; struct componentname *cnp = &ndp->ni_cnd; + int lockleaf = (cnp->cn_flags & LOCKLEAF) != 0; *retdirp = NULL; cnp->cn_flags |= NOMACCHECK; _at__at_ -664,6 +666,12 _at__at_ * to the returned pointer */ *retdirp = dp; + if (v3) { + vn_lock(dp, LK_EXCLUSIVE | LK_RETRY, td); + *retdirattr_retp = VOP_GETATTR(dp, retdirattrp, + ndp->ni_cnd.cn_cred, td); + VOP_UNLOCK(dp, 0, td); + } if (pubflag) { /* _at__at_ -736,6 +744,8 _at__at_ VREF(dp); ndp->ni_startdir = dp; + if (!lockleaf) + cnp->cn_flags |= LOCKLEAF; for (;;) { cnp->cn_nameptr = cnp->cn_pnbuf; /* _at__at_ -761,6 +771,8 _at__at_ cnp->cn_flags |= HASBUF; else uma_zfree(namei_zone, cnp->cn_pnbuf); + if (ndp->ni_vp && !lockleaf) + VOP_UNLOCK(ndp->ni_vp, 0, td); break; } _at__at_ -840,6 +852,8 _at__at_ ndp->ni_startdir = ndp->ni_dvp; ndp->ni_dvp = NULL; } + if (!lockleaf) + cnp->cn_flags &= ~LOCKLEAF; /* * nfs_namei() guarentees that fields will not contain garbageReceived on Wed May 07 2003 - 10:38:52 UTC
This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:37:06 UTC