filedesc_sx patch (20070401a)

From: Robert Watson <rwatson_at_FreeBSD.org>
Date: Sun, 1 Apr 2007 16:03:02 -0400 (EDT)
Dear all,

The attached patch moves file descriptor locks from being a custom mutex/sleep 
lock implemented using msleep() to an sx lock.  With the new sx lock 
optimizations in place, this is now sensible, avoiding both a custom lock type 
and significantly improving performance.  Kris has reported 2x-4x improvement 
in transactions/sec with MySQL using this patch, as it greatly reduces the 
cost of lock contention during file descriptor lookup for threaded 
applications, and also moves to shared locking to avoid exclusive acquisition 
for read-only operations (the vast majority in most workloads).  Patch is 
below, but you can also download from:

   http://www.watson.org/~robert/freebsd/netperf/20070401a-filedesc-sx.diff

I'm currently waiting for the sx lock changes to settle for a few days before 
committing, so will plan to commit this around Wednesday/Thursday of this week 
(unless serious problems arise).

Robert N M Watson
Computer Laboratory
University of Cambridge

--- //depot/vendor/freebsd/src/sys/compat/linux/linux_file.c	2007/03/29 02:17:34
+++ //depot/user/rwatson/filedesc/src/sys/compat/linux/linux_file.c	2007/04/01 15:10:26
_at__at_ -193,7 +193,7 _at__at_
  linux_at(struct thread *td, int dirfd, char *filename, char **newpath, char **freebuf)
  {
     	struct file *fp;
-	int error = 0;
+	int error = 0, vfslocked;
  	struct vnode *dvp;
  	struct filedesc *fdp = td->td_proc->p_fd;
  	char *fullpath = "unknown";
_at__at_ -207,9 +207,10 _at__at_

  	/* check for AT_FDWCD */
  	if (dirfd == LINUX_AT_FDCWD) {
-	   	FILEDESC_LOCK(fdp);
+	   	FILEDESC_SLOCK(fdp);
  		dvp = fdp->fd_cdir;
-	   	FILEDESC_UNLOCK(fdp);
+		vref(dvp);
+	   	FILEDESC_SUNLOCK(fdp);
  	} else {
  	   	error = fget(td, dirfd, &fp);
  		if (error)
_at__at_ -220,16 +221,28 _at__at_
  		   	fdrop(fp, td);
  			return (ENOTDIR);
  		}
+		vref(dvp);
  		fdrop(fp, td);
  	}

+	/*
+	 * XXXRW: This is bogus, as vn_fullpath() returns only an advisory
+	 * file path, and may fail in several common situations, including
+	 * for file systmes that don't use the name cache, and if the entry
+	 * for the file falls out of the name cache.  We should implement
+	 * openat() in the FreeBSD native system call layer properly (using a
+	 * requested starting directory), and have Linux and other ABIs wrap
+	 * the native implementation.
+	 */
  	error = vn_fullpath(td, dvp, &fullpath, &freepath);
  	if (!error) {
  	   	*newpath = malloc(strlen(fullpath) + strlen(filename) + 2, M_TEMP, M_WAITOK | M_ZERO);
  		*freebuf = freepath;
  		sprintf(*newpath, "%s/%s", fullpath, filename);
  	}
-
+	vfslocked = VFS_LOCK_GIANT(dvp->v_mount);
+	vrele(dvp);
+	VFS_UNLOCK_GIANT(vfslocked);
  	return (error);
  }

--- //depot/vendor/freebsd/src/sys/compat/svr4/svr4_filio.c	2005/01/05 22:36:13
+++ //depot/user/rwatson/filedesc/src/sys/compat/svr4/svr4_filio.c	2007/03/03 22:39:43
_at__at_ -211,15 +211,15 _at__at_

  	switch (cmd) {
  	case SVR4_FIOCLEX:
-		FILEDESC_LOCK_FAST(fdp);
+		FILEDESC_XLOCK(fdp);
  		fdp->fd_ofileflags[fd] |= UF_EXCLOSE;
-		FILEDESC_UNLOCK_FAST(fdp);
+		FILEDESC_XUNLOCK(fdp);
  		return 0;

  	case SVR4_FIONCLEX:
-		FILEDESC_LOCK_FAST(fdp);
+		FILEDESC_XLOCK(fdp);
  		fdp->fd_ofileflags[fd] &= ~UF_EXCLOSE;
-		FILEDESC_UNLOCK_FAST(fdp);
+		FILEDESC_XUNLOCK(fdp);
  		return 0;

  	case SVR4_FIOGETOWN:
--- //depot/vendor/freebsd/src/sys/dev/streams/streams.c	2006/07/21 20:40:58
+++ //depot/user/rwatson/filedesc/src/sys/dev/streams/streams.c	2007/03/03 22:39:43
_at__at_ -253,12 +253,15 _at__at_
  	   return error;
  	}

-	FILEDESC_LOCK_FAST(fdp);
+	/*
+	 * XXXRW: Should be locking fp?
+	 */
+	FILEDESC_XLOCK(fdp);
  	fp->f_data = so;
  	fp->f_flag = FREAD|FWRITE;
  	fp->f_ops = &svr4_netops;
  	fp->f_type = DTYPE_SOCKET;
-	FILEDESC_UNLOCK_FAST(fdp);
+	FILEDESC_XUNLOCK(fdp);

  	/*
  	 * Allocate a stream structure and attach it to this socket.
--- //depot/vendor/freebsd/src/sys/fs/fdescfs/fdesc_vfsops.c	2006/05/15 19:46:09
+++ //depot/user/rwatson/filedesc/src/sys/fs/fdescfs/fdesc_vfsops.c	2007/03/03 22:39:43
_at__at_ -176,7 +176,7 _at__at_
  	lim = lim_cur(td->td_proc, RLIMIT_NOFILE);
  	PROC_UNLOCK(td->td_proc);
  	fdp = td->td_proc->p_fd;
-	FILEDESC_LOCK_FAST(fdp);
+	FILEDESC_SLOCK(fdp);
  	last = min(fdp->fd_nfiles, lim);
  	freefd = 0;
  	for (i = fdp->fd_freefile; i < last; i++)
_at__at_ -189,7 +189,7 _at__at_
  	 */
  	if (fdp->fd_nfiles < lim)
  		freefd += (lim - fdp->fd_nfiles);
-	FILEDESC_UNLOCK_FAST(fdp);
+	FILEDESC_SUNLOCK(fdp);

  	sbp->f_flags = 0;
  	sbp->f_bsize = DEV_BSIZE;
--- //depot/vendor/freebsd/src/sys/fs/fdescfs/fdesc_vnops.c	2007/03/13 01:54:24
+++ //depot/user/rwatson/filedesc/src/sys/fs/fdescfs/fdesc_vnops.c	2007/03/17 21:03:04
_at__at_ -457,7 +457,7 _at__at_

  	fcnt = i - 2;		/* The first two nodes are `.' and `..' */

-	FILEDESC_LOCK_FAST(fdp);
+	FILEDESC_SLOCK(fdp);
  	while (i < fdp->fd_nfiles + 2 && uio->uio_resid >= UIO_MX) {
  		switch (i) {
  		case 0:	/* `.' */
_at__at_ -473,7 +473,7 _at__at_
  			break;
  		default:
  			if (fdp->fd_ofiles[fcnt] == NULL) {
-				FILEDESC_UNLOCK_FAST(fdp);
+				FILEDESC_SUNLOCK(fdp);
  				goto done;
  			}

_at__at_ -487,15 +487,15 _at__at_
  		/*
  		 * And ship to userland
  		 */
-		FILEDESC_UNLOCK_FAST(fdp);
+		FILEDESC_SUNLOCK(fdp);
  		error = uiomove(dp, UIO_MX, uio);
  		if (error)
  			goto done;
-		FILEDESC_LOCK_FAST(fdp);
+		FILEDESC_SLOCK(fdp);
  		i++;
  		fcnt++;
  	}
-	FILEDESC_UNLOCK_FAST(fdp);
+	FILEDESC_SUNLOCK(fdp);

  done:
  	uio->uio_offset = i * UIO_MX;
--- //depot/vendor/freebsd/src/sys/fs/unionfs/union_subr.c	2007/03/13 01:54:24
+++ //depot/user/rwatson/filedesc/src/sys/fs/unionfs/union_subr.c	2007/03/17 21:03:04
_at__at_ -450,9 +450,9 _at__at_
  		}
  		break;
  	default:		/* UNIONFS_TRADITIONAL */
-		FILEDESC_LOCK_FAST(td->td_proc->p_fd);
+		FILEDESC_SLOCK(td->td_proc->p_fd);
  		uva->va_mode = 0777 & ~td->td_proc->p_fd->fd_cmask;
-		FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
+		FILEDESC_SUNLOCK(td->td_proc->p_fd);
  		uva->va_uid = ump->um_uid;
  		uva->va_gid = ump->um_gid;
  		break;
--- //depot/vendor/freebsd/src/sys/kern/kern_descrip.c	2007/03/15 21:21:17
+++ //depot/user/rwatson/filedesc/src/sys/kern/kern_descrip.c	2007/04/01 17:49:49
_at__at_ -211,9 +211,11 _at__at_
  static void
  fdused(struct filedesc *fdp, int fd)
  {
-	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
+
+	FILEDESC_XLOCK_ASSERT(fdp);
  	KASSERT(!fdisused(fdp, fd),
  	    ("fd already used"));
+
  	fdp->fd_map[NDSLOT(fd)] |= NDBIT(fd);
  	if (fd > fdp->fd_lastfile)
  		fdp->fd_lastfile = fd;
_at__at_ -227,11 +229,13 _at__at_
  static void
  fdunused(struct filedesc *fdp, int fd)
  {
-	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
+
+	FILEDESC_XLOCK_ASSERT(fdp);
  	KASSERT(fdisused(fdp, fd),
  	    ("fd is already unused"));
  	KASSERT(fdp->fd_ofiles[fd] == NULL,
  	    ("fd is still in use"));
+
  	fdp->fd_map[NDSLOT(fd)] &= ~NDBIT(fd);
  	if (fd < fdp->fd_freefile)
  		fdp->fd_freefile = fd;
_at__at_ -371,10 +375,14 _at__at_
  	flg = F_POSIX;
  	p = td->td_proc;
  	fdp = p->p_fd;
-	FILEDESC_LOCK(fdp);
+
+	/*
+	 * XXXRW: It could be an exclusive lock is not [always] needed here.
+	 */
+	FILEDESC_XLOCK(fdp);
  	if ((unsigned)fd >= fdp->fd_nfiles ||
  	    (fp = fdp->fd_ofiles[fd]) == NULL) {
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_XUNLOCK(fdp);
  		error = EBADF;
  		goto done2;
  	}
_at__at_ -383,7 +391,7 _at__at_
  	switch (cmd) {
  	case F_DUPFD:
  		/* mtx_assert(&Giant, MA_NOTOWNED); */
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_XUNLOCK(fdp);
  		newmin = arg;
  		PROC_LOCK(p);
  		if (newmin >= lim_cur(p, RLIMIT_NOFILE) ||
_at__at_ -399,14 +407,14 _at__at_
  	case F_GETFD:
  		/* mtx_assert(&Giant, MA_NOTOWNED); */
  		td->td_retval[0] = (*pop & UF_EXCLOSE) ? FD_CLOEXEC : 0;
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_XUNLOCK(fdp);
  		break;

  	case F_SETFD:
  		/* mtx_assert(&Giant, MA_NOTOWNED); */
  		*pop = (*pop &~ UF_EXCLOSE) |
  		    (arg & FD_CLOEXEC ? UF_EXCLOSE : 0);
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_XUNLOCK(fdp);
  		break;

  	case F_GETFL:
_at__at_ -414,7 +422,7 _at__at_
  		FILE_LOCK(fp);
  		td->td_retval[0] = OFLAGS(fp->f_flag);
  		FILE_UNLOCK(fp);
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_XUNLOCK(fdp);
  		break;

  	case F_SETFL:
_at__at_ -424,7 +432,7 _at__at_
  		fp->f_flag &= ~FCNTLFLAGS;
  		fp->f_flag |= FFLAGS(arg & ~O_ACCMODE) & FCNTLFLAGS;
  		FILE_UNLOCK(fp);
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_XUNLOCK(fdp);
  		tmp = fp->f_flag & FNONBLOCK;
  		error = fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td);
  		if (error) {
_at__at_ -448,7 +456,7 _at__at_
  	case F_GETOWN:
  		mtx_assert(&Giant, MA_OWNED);
  		fhold(fp);
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_XUNLOCK(fdp);
  		error = fo_ioctl(fp, FIOGETOWN, &tmp, td->td_ucred, td);
  		if (error == 0)
  			td->td_retval[0] = tmp;
_at__at_ -458,7 +466,7 _at__at_
  	case F_SETOWN:
  		mtx_assert(&Giant, MA_OWNED);
  		fhold(fp);
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_XUNLOCK(fdp);
  		tmp = arg;
  		error = fo_ioctl(fp, FIOSETOWN, &tmp, td->td_ucred, td);
  		fdrop(fp, td);
_at__at_ -472,7 +480,7 _at__at_
  	case F_SETLK:
  		mtx_assert(&Giant, MA_OWNED);
  		if (fp->f_type != DTYPE_VNODE) {
-			FILEDESC_UNLOCK(fdp);
+			FILEDESC_XUNLOCK(fdp);
  			error = EBADF;
  			break;
  		}
_at__at_ -482,7 +490,7 _at__at_
  			if (fp->f_offset < 0 ||
  			    (flp->l_start > 0 &&
  			     fp->f_offset > OFF_MAX - flp->l_start)) {
-				FILEDESC_UNLOCK(fdp);
+				FILEDESC_XUNLOCK(fdp);
  				error = EOVERFLOW;
  				break;
  			}
_at__at_ -493,7 +501,7 _at__at_
  		 * VOP_ADVLOCK() may block.
  		 */
  		fhold(fp);
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_XUNLOCK(fdp);
  		vp = fp->f_vnode;

  		switch (flp->l_type) {
_at__at_ -528,10 +536,10 _at__at_
  			break;
  		}
  		/* Check for race with close */
-		FILEDESC_LOCK_FAST(fdp);
+		FILEDESC_XLOCK(fdp);
  		if ((unsigned) fd >= fdp->fd_nfiles ||
  		    fp != fdp->fd_ofiles[fd]) {
-			FILEDESC_UNLOCK_FAST(fdp);
+			FILEDESC_XUNLOCK(fdp);
  			flp->l_whence = SEEK_SET;
  			flp->l_start = 0;
  			flp->l_len = 0;
_at__at_ -539,21 +547,21 _at__at_
  			(void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader,
  					   F_UNLCK, flp, F_POSIX);
  		} else
-			FILEDESC_UNLOCK_FAST(fdp);
+			FILEDESC_XUNLOCK(fdp);
  		fdrop(fp, td);
  		break;

  	case F_GETLK:
  		mtx_assert(&Giant, MA_OWNED);
  		if (fp->f_type != DTYPE_VNODE) {
-			FILEDESC_UNLOCK(fdp);
+			FILEDESC_XUNLOCK(fdp);
  			error = EBADF;
  			break;
  		}
  		flp = (struct flock *)arg;
  		if (flp->l_type != F_RDLCK && flp->l_type != F_WRLCK &&
  		    flp->l_type != F_UNLCK) {
-			FILEDESC_UNLOCK(fdp);
+			FILEDESC_XUNLOCK(fdp);
  			error = EINVAL;
  			break;
  		}
_at__at_ -562,7 +570,7 _at__at_
  			    fp->f_offset > OFF_MAX - flp->l_start) ||
  			    (flp->l_start < 0 &&
  			     fp->f_offset < OFF_MIN - flp->l_start)) {
-				FILEDESC_UNLOCK(fdp);
+				FILEDESC_XUNLOCK(fdp);
  				error = EOVERFLOW;
  				break;
  			}
_at__at_ -572,14 +580,14 _at__at_
  		 * VOP_ADVLOCK() may block.
  		 */
  		fhold(fp);
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_XUNLOCK(fdp);
  		vp = fp->f_vnode;
  		error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK, flp,
  		    F_POSIX);
  		fdrop(fp, td);
  		break;
  	default:
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_XUNLOCK(fdp);
  		error = EINVAL;
  		break;
  	}
_at__at_ -593,7 +601,8 _at__at_
   * Common code for dup, dup2, and fcntl(F_DUPFD).
   */
  static int
-do_dup(struct thread *td, enum dup_type type, int old, int new, register_t *retval)
+do_dup(struct thread *td, enum dup_type type, int old, int new,
+    register_t *retval)
  {
  	struct filedesc *fdp;
  	struct proc *p;
_at__at_ -619,14 +628,14 _at__at_
  	if (new >= maxfd)
  		return (EMFILE);

-	FILEDESC_LOCK(fdp);
+	FILEDESC_XLOCK(fdp);
  	if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL) {
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_XUNLOCK(fdp);
  		return (EBADF);
  	}
  	if (type == DUP_FIXED && old == new) {
  		*retval = new;
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_XUNLOCK(fdp);
  		return (0);
  	}
  	fp = fdp->fd_ofiles[old];
_at__at_ -646,7 +655,7 _at__at_
  			fdused(fdp, new);
  	} else {
  		if ((error = fdalloc(td, new, &new)) != 0) {
-			FILEDESC_UNLOCK(fdp);
+			FILEDESC_XUNLOCK(fdp);
  			fdrop(fp, td);
  			return (error);
  		}
_at__at_ -661,7 +670,7 _at__at_
  		/* we've allocated a descriptor which we won't use */
  		if (fdp->fd_ofiles[new] == NULL)
  			fdunused(fdp, new);
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_XUNLOCK(fdp);
  		fdrop(fp, td);
  		return (EBADF);
  	}
_at__at_ -708,20 +717,20 _at__at_
  		knote_fdclose(td, new);
  		if (delfp->f_type == DTYPE_MQUEUE)
  			mq_fdclose(td, new, delfp);
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_XUNLOCK(fdp);
  		(void) closef(delfp, td);
  		if (holdleaders) {
-			FILEDESC_LOCK_FAST(fdp);
+			FILEDESC_XLOCK(fdp);
  			fdp->fd_holdleaderscount--;
  			if (fdp->fd_holdleaderscount == 0 &&
  			    fdp->fd_holdleaderswakeup != 0) {
  				fdp->fd_holdleaderswakeup = 0;
  				wakeup(&fdp->fd_holdleaderscount);
  			}
-			FILEDESC_UNLOCK_FAST(fdp);
+			FILEDESC_XUNLOCK(fdp);
  		}
  	} else {
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_XUNLOCK(fdp);
  	}
  	return (0);
  }
_at__at_ -979,10 +988,10 _at__at_

  	AUDIT_SYSCLOSE(td, fd);

-	FILEDESC_LOCK(fdp);
+	FILEDESC_XLOCK(fdp);
  	if ((unsigned)fd >= fdp->fd_nfiles ||
  	    (fp = fdp->fd_ofiles[fd]) == NULL) {
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_XUNLOCK(fdp);
  		return (EBADF);
  	}
  	fdp->fd_ofiles[fd] = NULL;
_at__at_ -998,27 +1007,26 _at__at_
  	}

  	/*
-	 * We now hold the fp reference that used to be owned by the descriptor
-	 * array.
-	 * We have to unlock the FILEDESC *AFTER* knote_fdclose to prevent a
-	 * race of the fd getting opened, a knote added, and deleteing a knote
-	 * for the new fd.
+	 * We now hold the fp reference that used to be owned by the
+	 * descriptor array.  We have to unlock the FILEDESC *AFTER*
+	 * knote_fdclose to prevent a race of the fd getting opened, a knote
+	 * added, and deleteing a knote for the new fd.
  	 */
  	knote_fdclose(td, fd);
  	if (fp->f_type == DTYPE_MQUEUE)
  		mq_fdclose(td, fd, fp);
-	FILEDESC_UNLOCK(fdp);
+	FILEDESC_XUNLOCK(fdp);

  	error = closef(fp, td);
  	if (holdleaders) {
-		FILEDESC_LOCK_FAST(fdp);
+		FILEDESC_XLOCK(fdp);
  		fdp->fd_holdleaderscount--;
  		if (fdp->fd_holdleaderscount == 0 &&
  		    fdp->fd_holdleaderswakeup != 0) {
  			fdp->fd_holdleaderswakeup = 0;
  			wakeup(&fdp->fd_holdleaderscount);
  		}
-		FILEDESC_UNLOCK_FAST(fdp);
+		FILEDESC_XUNLOCK(fdp);
  	}
  	return (error);
  }
_at__at_ -1176,7 +1184,7 _at__at_
  	int nnfiles, onfiles;
  	NDSLOTTYPE *nmap;

-	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
+	FILEDESC_XLOCK_ASSERT(fdp);

  	KASSERT(fdp->fd_nfiles > 0,
  	    ("zero-length file table"));
_at__at_ -1189,7 +1197,7 _at__at_
  		return;

  	/* allocate a new table and (if required) new bitmaps */
-	FILEDESC_UNLOCK(fdp);
+	FILEDESC_XUNLOCK(fdp);
  	MALLOC(ntable, struct file **, nnfiles * OFILESIZE,
  	    M_FILEDESC, M_ZERO | M_WAITOK);
  	nfileflags = (char *)&ntable[nnfiles];
_at__at_ -1198,7 +1206,7 _at__at_
  		    M_FILEDESC, M_ZERO | M_WAITOK);
  	else
  		nmap = NULL;
-	FILEDESC_LOCK(fdp);
+	FILEDESC_XLOCK(fdp);

  	/*
  	 * We now have new tables ready to go.  Since we dropped the
_at__at_ -1237,7 +1245,7 _at__at_
  	struct filedesc *fdp = p->p_fd;
  	int fd = -1, maxfd;

-	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
+	FILEDESC_XLOCK_ASSERT(fdp);

  	if (fdp->fd_freefile > minfd)
  		minfd = fdp->fd_freefile; 
_at__at_ -1276,8 +1284,8 _at__at_
  }

  /*
- * Check to see whether n user file descriptors
- * are available to the process p.
+ * Check to see whether n user file descriptors are available to the process
+ * p.
   */
  int
  fdavail(struct thread *td, int n)
_at__at_ -1287,7 +1295,7 _at__at_
  	struct file **fpp;
  	int i, lim, last;

-	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
+	FILEDESC_LOCK_ASSERT(fdp);

  	PROC_LOCK(p);
  	lim = min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc);
_at__at_ -1304,12 +1312,11 _at__at_
  }

  /*
- * Create a new open file structure and allocate
- * a file decriptor for the process that refers to it.
- * We add one reference to the file for the descriptor table
- * and one reference for resultfp. This is to prevent us being
- * preempted and the entry in the descriptor table closed after
- * we release the FILEDESC lock.
+ * Create a new open file structure and allocate a file decriptor for the
+ * process that refers to it.  We add one reference to the file for the
+ * descriptor table and one reference for resultfp. This is to prevent us
+ * being preempted and the entry in the descriptor table closed after we
+ * release the FILEDESC lock.
   */
  int
  falloc(struct thread *td, struct file **resultfp, int *resultfd)
_at__at_ -1350,7 +1357,7 _at__at_
  	fp->f_ops = &badfileops;
  	fp->f_data = NULL;
  	fp->f_vnode = NULL;
-	FILEDESC_LOCK(p->p_fd);
+	FILEDESC_XLOCK(p->p_fd);
  	if ((fq = p->p_fd->fd_ofiles[0])) {
  		LIST_INSERT_AFTER(fq, fp, f_list);
  	} else {
_at__at_ -1358,14 +1365,14 _at__at_
  	}
  	sx_xunlock(&filelist_lock);
  	if ((error = fdalloc(td, 0, &i))) {
-		FILEDESC_UNLOCK(p->p_fd);
+		FILEDESC_XUNLOCK(p->p_fd);
  		fdrop(fp, td);
  		if (resultfp)
  			fdrop(fp, td);
  		return (error);
  	}
  	p->p_fd->fd_ofiles[i] = fp;
-	FILEDESC_UNLOCK(p->p_fd);
+	FILEDESC_XUNLOCK(p->p_fd);
  	if (resultfp)
  		*resultfp = fp;
  	if (resultfd)
_at__at_ -1383,9 +1390,9 _at__at_
  	struct filedesc0 *newfdp;

  	newfdp = malloc(sizeof *newfdp, M_FILEDESC, M_WAITOK | M_ZERO);
-	mtx_init(&newfdp->fd_fd.fd_mtx, FILEDESC_LOCK_DESC, NULL, MTX_DEF);
+	FILEDESC_LOCK_INIT(&newfdp->fd_fd);
  	if (fdp != NULL) {
-		FILEDESC_LOCK(fdp);
+		FILEDESC_XLOCK(fdp);
  		newfdp->fd_fd.fd_cdir = fdp->fd_cdir;
  		if (newfdp->fd_fd.fd_cdir)
  			VREF(newfdp->fd_fd.fd_cdir);
_at__at_ -1395,7 +1402,7 _at__at_
  		newfdp->fd_fd.fd_jdir = fdp->fd_jdir;
  		if (newfdp->fd_fd.fd_jdir)
  			VREF(newfdp->fd_fd.fd_jdir);
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_XUNLOCK(fdp);
  	}

  	/* Create the file descriptor table. */
_at__at_ -1434,7 +1441,7 _at__at_
  	if (i > 0)
  		return;

-	mtx_destroy(&fdp->fd_mtx);
+	FILEDESC_LOCK_DESTROY(fdp);
  	FREE(fdp, M_FILEDESC);
  }

_at__at_ -1444,9 +1451,10 _at__at_
  struct filedesc *
  fdshare(struct filedesc *fdp)
  {
-	FILEDESC_LOCK_FAST(fdp);
+
+	FILEDESC_XLOCK(fdp);
  	fdp->fd_refcnt++;
-	FILEDESC_UNLOCK_FAST(fdp);
+	FILEDESC_XUNLOCK(fdp);
  	return (fdp);
  }

_at__at_ -1457,22 +1465,21 _at__at_
  fdunshare(struct proc *p, struct thread *td)
  {

-	FILEDESC_LOCK_FAST(p->p_fd);
+	FILEDESC_XLOCK(p->p_fd);
  	if (p->p_fd->fd_refcnt > 1) {
  		struct filedesc *tmp;

-		FILEDESC_UNLOCK_FAST(p->p_fd);
+		FILEDESC_XUNLOCK(p->p_fd);
  		tmp = fdcopy(p->p_fd);
  		fdfree(td);
  		p->p_fd = tmp;
  	} else
-		FILEDESC_UNLOCK_FAST(p->p_fd);
+		FILEDESC_XUNLOCK(p->p_fd);
  }

  /*
- * Copy a filedesc structure.
- * A NULL pointer in returns a NULL reference, this is to ease callers,
- * not catch errors.
+ * Copy a filedesc structure.  A NULL pointer in returns a NULL reference,
+ * this is to ease callers, not catch errors.
   */
  struct filedesc *
  fdcopy(struct filedesc *fdp)
_at__at_ -1485,13 +1492,13 _at__at_
  		return (NULL);

  	newfdp = fdinit(fdp);
-	FILEDESC_LOCK_FAST(fdp);
+	FILEDESC_SLOCK(fdp);
  	while (fdp->fd_lastfile >= newfdp->fd_nfiles) {
-		FILEDESC_UNLOCK_FAST(fdp);
-		FILEDESC_LOCK(newfdp);
+		FILEDESC_SUNLOCK(fdp);
+		FILEDESC_XLOCK(newfdp);
  		fdgrowtable(newfdp, fdp->fd_lastfile + 1);
-		FILEDESC_UNLOCK(newfdp);
-		FILEDESC_LOCK_FAST(fdp);
+		FILEDESC_XUNLOCK(newfdp);
+		FILEDESC_SLOCK(fdp);
  	}
  	/* copy everything except kqueue descriptors */
  	newfdp->fd_freefile = -1;
_at__at_ -1507,17 +1514,17 _at__at_
  				newfdp->fd_freefile = i;
  		}
  	}
-	FILEDESC_UNLOCK_FAST(fdp);
-	FILEDESC_LOCK(newfdp);
+	FILEDESC_SUNLOCK(fdp);
+	FILEDESC_XLOCK(newfdp);
  	for (i = 0; i <= newfdp->fd_lastfile; ++i)
  		if (newfdp->fd_ofiles[i] != NULL)
  			fdused(newfdp, i);
-	FILEDESC_UNLOCK(newfdp);
-	FILEDESC_LOCK_FAST(fdp);
+	FILEDESC_XUNLOCK(newfdp);
+	FILEDESC_SLOCK(fdp);
  	if (newfdp->fd_freefile == -1)
  		newfdp->fd_freefile = i;
  	newfdp->fd_cmask = fdp->fd_cmask;
-	FILEDESC_UNLOCK_FAST(fdp);
+	FILEDESC_SUNLOCK(fdp);
  	return (newfdp);
  }

_at__at_ -1543,7 +1550,7 _at__at_
  	/* Check for special need to clear POSIX style locks */
  	fdtol = td->td_proc->p_fdtol;
  	if (fdtol != NULL) {
-		FILEDESC_LOCK(fdp);
+		FILEDESC_XLOCK(fdp);
  		KASSERT(fdtol->fdl_refcount > 0,
  			("filedesc_to_refcount botch: fdl_refcount=%d",
  			 fdtol->fdl_refcount));
_at__at_ -1557,7 +1564,7 _at__at_
  					continue;
  				fp = *fpp;
  				fhold(fp);
-				FILEDESC_UNLOCK(fdp);
+				FILEDESC_XUNLOCK(fdp);
  				lf.l_whence = SEEK_SET;
  				lf.l_start = 0;
  				lf.l_len = 0;
_at__at_ -1571,7 +1578,7 _at__at_
  						   &lf,
  						   F_POSIX);
  				VFS_UNLOCK_GIANT(locked);
-				FILEDESC_LOCK(fdp);
+				FILEDESC_XLOCK(fdp);
  				fdrop(fp, td);
  				fpp = fdp->fd_ofiles + i;
  			}
_at__at_ -1585,18 +1592,18 _at__at_
  				 * in a shared file descriptor table.
  				 */
  				fdp->fd_holdleaderswakeup = 1;
-				msleep(&fdp->fd_holdleaderscount, &fdp->fd_mtx,
-				       PLOCK, "fdlhold", 0);
+				sx_sleep(&fdp->fd_holdleaderscount,
+				    FILEDESC_LOCK(fdp), PLOCK, "fdlhold", 0);
  				goto retry;
  			}
  			if (fdtol->fdl_holdcount > 0) {
  				/*
-				 * Ensure that fdtol->fdl_leader
-				 * remains valid in closef().
+				 * Ensure that fdtol->fdl_leader remains
+				 * valid in closef().
  				 */
  				fdtol->fdl_wakeup = 1;
-				msleep(fdtol, &fdp->fd_mtx,
-				       PLOCK, "fdlhold", 0);
+				sx_sleep(fdtol, FILEDESC_LOCK(fdp), PLOCK,
+				    "fdlhold", 0);
  				goto retry;
  			}
  		}
_at__at_ -1608,13 +1615,13 _at__at_
  		} else
  			fdtol = NULL;
  		td->td_proc->p_fdtol = NULL;
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_XUNLOCK(fdp);
  		if (fdtol != NULL)
  			FREE(fdtol, M_FILEDESC_TO_LEADER);
  	}
-	FILEDESC_LOCK_FAST(fdp);
+	FILEDESC_XLOCK(fdp);
  	i = --fdp->fd_refcnt;
-	FILEDESC_UNLOCK_FAST(fdp);
+	FILEDESC_XUNLOCK(fdp);
  	if (i > 0)
  		return;
  	/*
_at__at_ -1626,7 +1633,7 _at__at_
  		if (*fpp)
  			(void) closef(*fpp, td);
  	}
-	FILEDESC_LOCK(fdp);
+	FILEDESC_XLOCK(fdp);

  	/* XXX This should happen earlier. */
  	mtx_lock(&fdesc_mtx);
_at__at_ -1646,7 +1653,7 _at__at_
  	fdp->fd_rdir = NULL;
  	jdir = fdp->fd_jdir;
  	fdp->fd_jdir = NULL;
-	FILEDESC_UNLOCK(fdp);
+	FILEDESC_XUNLOCK(fdp);

  	if (cdir) {
  		locked = VFS_LOCK_GIANT(cdir->v_mount);
_at__at_ -1706,7 +1713,7 _at__at_
  	 * Note: fdp->fd_ofiles may be reallocated out from under us while
  	 * we are blocked in a close.  Be careful!
  	 */
-	FILEDESC_LOCK(fdp);
+	FILEDESC_XLOCK(fdp);
  	for (i = 0; i <= fdp->fd_lastfile; i++) {
  		if (i > 2)
  			break;
_at__at_ -1722,35 +1729,33 _at__at_
  			fdp->fd_ofiles[i] = NULL;
  			fdp->fd_ofileflags[i] = 0;
  			fdunused(fdp, i);
-			FILEDESC_UNLOCK(fdp);
+			FILEDESC_XUNLOCK(fdp);
  			(void) closef(fp, td);
-			FILEDESC_LOCK(fdp);
+			FILEDESC_XLOCK(fdp);
  		}
  	}
-	FILEDESC_UNLOCK(fdp);
+	FILEDESC_XUNLOCK(fdp);
  }

  /*
- * If a specific file object occupies a specific file descriptor,
- * close the file descriptor entry and drop a reference on the file
- * object.  This is a convenience function to handle a subsequent
- * error in a function that calls falloc() that handles the race that
- * another thread might have closed the file descriptor out from under
- * the thread creating the file object.
+ * If a specific file object occupies a specific file descriptor, close the
+ * file descriptor entry and drop a reference on the file object.  This is a
+ * convenience function to handle a subsequent error in a function that calls
+ * falloc() that handles the race that another thread might have closed the
+ * file descriptor out from under the thread creating the file object.
   */
  void
  fdclose(struct filedesc *fdp, struct file *fp, int idx, struct thread *td)
  {

-	FILEDESC_LOCK(fdp);
+	FILEDESC_XLOCK(fdp);
  	if (fdp->fd_ofiles[idx] == fp) {
  		fdp->fd_ofiles[idx] = NULL;
  		fdunused(fdp, idx);
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_XUNLOCK(fdp);
  		fdrop(fp, td);
-	} else {
-		FILEDESC_UNLOCK(fdp);
-	}
+	} else
+		FILEDESC_XUNLOCK(fdp);
  }

  /*
_at__at_ -1767,7 +1772,7 _at__at_
  	if (fdp == NULL)
  		return;

-	FILEDESC_LOCK(fdp);
+	FILEDESC_XLOCK(fdp);

  	/*
  	 * We cannot cache fd_ofiles or fd_ofileflags since operations
_at__at_ -1790,12 +1795,12 _at__at_
  			fdunused(fdp, i);
  			if (fp->f_type == DTYPE_MQUEUE)
  				mq_fdclose(td, i, fp);
-			FILEDESC_UNLOCK(fdp);
+			FILEDESC_XUNLOCK(fdp);
  			(void) closef(fp, td);
-			FILEDESC_LOCK(fdp);
+			FILEDESC_XLOCK(fdp);
  		}
  	}
-	FILEDESC_UNLOCK(fdp);
+	FILEDESC_XUNLOCK(fdp);
  }

  /*
_at__at_ -1838,14 +1843,15 _at__at_
  				/*
  				 * Someone may have closed the entry in the
  				 * file descriptor table, so check it hasn't
-				 * changed before dropping the reference count.
+				 * changed before dropping the reference
+				 * count.
  				 */
-				FILEDESC_LOCK(fdp);
+				FILEDESC_XLOCK(fdp);
  				KASSERT(fdp->fd_ofiles[fd] == fp,
  				    ("table not shared, how did it change?"));
  				fdp->fd_ofiles[fd] = NULL;
  				fdunused(fdp, fd);
-				FILEDESC_UNLOCK(fdp);
+				FILEDESC_XUNLOCK(fdp);
  				fdrop(fp, td);
  				fdrop(fp, td);
  				break;
_at__at_ -1873,8 +1879,7 _at__at_
  }

  /*
- * Internal form of close.
- * Decrement reference count on file structure.
+ * Internal form of close.  Decrement reference count on file structure.
   * Note: td may be NULL when closing a file that was being passed in a
   * message.
   *
_at__at_ -1917,11 +1922,11 _at__at_
  		fdtol = td->td_proc->p_fdtol;
  		if (fdtol != NULL) {
  			/*
-			 * Handle special case where file descriptor table
-			 * is shared between multiple process leaders.
+			 * Handle special case where file descriptor table is
+			 * shared between multiple process leaders.
  			 */
  			fdp = td->td_proc->p_fd;
-			FILEDESC_LOCK(fdp);
+			FILEDESC_XLOCK(fdp);
  			for (fdtol = fdtol->fdl_next;
  			     fdtol != td->td_proc->p_fdtol;
  			     fdtol = fdtol->fdl_next) {
_at__at_ -1929,7 +1934,7 _at__at_
  				     P_ADVLOCK) == 0)
  					continue;
  				fdtol->fdl_holdcount++;
-				FILEDESC_UNLOCK(fdp);
+				FILEDESC_XUNLOCK(fdp);
  				lf.l_whence = SEEK_SET;
  				lf.l_start = 0;
  				lf.l_len = 0;
_at__at_ -1938,7 +1943,7 _at__at_
  				(void) VOP_ADVLOCK(vp,
  						   (caddr_t)fdtol->fdl_leader,
  						   F_UNLCK, &lf, F_POSIX);
-				FILEDESC_LOCK(fdp);
+				FILEDESC_XLOCK(fdp);
  				fdtol->fdl_holdcount--;
  				if (fdtol->fdl_holdcount == 0 &&
  				    fdtol->fdl_wakeup != 0) {
_at__at_ -1946,7 +1951,7 _at__at_
  					wakeup(fdtol);
  				}
  			}
-			FILEDESC_UNLOCK(fdp);
+			FILEDESC_XUNLOCK(fdp);
  		}
  		VFS_UNLOCK_GIANT(vfslocked);
  	}
_at__at_ -1954,21 +1959,21 _at__at_
  }

  /*
- * Extract the file pointer associated with the specified descriptor for
- * the current user process.
+ * Extract the file pointer associated with the specified descriptor for the
+ * current user process.
   *
   * If the descriptor doesn't exist, EBADF is returned.
   *
- * If the descriptor exists but doesn't match 'flags' then
- * return EBADF for read attempts and EINVAL for write attempts.
+ * If the descriptor exists but doesn't match 'flags' then return EBADF for
+ * read attempts and EINVAL for write attempts.
   *
   * If 'hold' is set (non-zero) the file's refcount will be bumped on return.
- * It should be dropped with fdrop().
- * If it is not set, then the refcount will not be bumped however the
- * thread's filedesc struct will be returned locked (for fgetsock).
+ * It should be dropped with fdrop().  If it is not set, then the refcount
+ * will not be bumped however the thread's filedesc struct will be returned
+ * locked (for fgetsock).
   *
- * If an error occured the non-zero error is returned and *fpp is set to NULL.
- * Otherwise *fpp is set and zero is returned.
+ * If an error occured the non-zero error is returned and *fpp is set to
+ * NULL.  Otherwise *fpp is set and zero is returned.
   */
  static __inline int
  _fget(struct thread *td, int fd, struct file **fpp, int flags, int hold)
_at__at_ -1979,9 +1984,9 _at__at_
  	*fpp = NULL;
  	if (td == NULL || (fdp = td->td_proc->p_fd) == NULL)
  		return (EBADF);
-	FILEDESC_LOCK(fdp);
+	FILEDESC_SLOCK(fdp);
  	if ((fp = fget_locked(fdp, fd)) == NULL || fp->f_ops == &badfileops) {
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_SUNLOCK(fdp);
  		return (EBADF);
  	}

_at__at_ -1991,16 +1996,16 _at__at_
  	 * Only one flag, or 0, may be specified.
  	 */
  	if (flags == FREAD && (fp->f_flag & FREAD) == 0) {
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_SUNLOCK(fdp);
  		return (EBADF);
  	}
  	if (flags == FWRITE && (fp->f_flag & FWRITE) == 0) {
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_SUNLOCK(fdp);
  		return (EBADF);
  	}
  	if (hold) {
  		fhold(fp);
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_SUNLOCK(fdp);
  	}
  	*fpp = fp;
  	return (0);
_at__at_ -2028,9 +2033,9 _at__at_
  }

  /*
- * Like fget() but loads the underlying vnode, or returns an error if
- * the descriptor does not represent a vnode.  Note that pipes use vnodes
- * but never have VM objects.  The returned vnode will be vref()d.
+ * Like fget() but loads the underlying vnode, or returns an error if the
+ * descriptor does not represent a vnode.  Note that pipes use vnodes but
+ * never have VM objects.  The returned vnode will be vref()'d.
   *
   * XXX: what about the unused flags ?
   */
_at__at_ -2049,7 +2054,7 _at__at_
  		*vpp = fp->f_vnode;
  		vref(*vpp);
  	}
-	FILEDESC_UNLOCK(td->td_proc->p_fd);
+	FILEDESC_SUNLOCK(td->td_proc->p_fd);
  	return (error);
  }

_at__at_ -2077,15 +2082,15 _at__at_
  #endif

  /*
- * Like fget() but loads the underlying socket, or returns an error if
- * the descriptor does not represent a socket.
+ * Like fget() but loads the underlying socket, or returns an error if the
+ * descriptor does not represent a socket.
   *
- * We bump the ref count on the returned socket.  XXX Also obtain the SX
- * lock in the future.
+ * We bump the ref count on the returned socket.  XXX Also obtain the SX lock
+ * in the future.
   *
   * XXXRW: fgetsock() and fputsock() are deprecated, as consumers should rely
- * on their file descriptor reference to prevent the socket from being
- * freed during use.
+ * on their file descriptor reference to prevent the socket from being free'd
+ * during use.
   */
  int
  fgetsock(struct thread *td, int fd, struct socket **spp, u_int *fflagp)
_at__at_ -2110,7 +2115,7 _at__at_
  		soref(*spp);
  		SOCK_UNLOCK(*spp);
  	}
-	FILEDESC_UNLOCK(td->td_proc->p_fd);
+	FILEDESC_SUNLOCK(td->td_proc->p_fd);
  	return (error);
  }

_at__at_ -2257,22 +2262,20 _at__at_
  	 * of file descriptors, or the fd to be dup'd has already been
  	 * closed, then reject.
  	 */
-	FILEDESC_LOCK(fdp);
+	FILEDESC_XLOCK(fdp);
  	if (dfd < 0 || dfd >= fdp->fd_nfiles ||
  	    (wfp = fdp->fd_ofiles[dfd]) == NULL) {
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_XUNLOCK(fdp);
  		return (EBADF);
  	}

  	/*
  	 * There are two cases of interest here.
  	 *
-	 * For ENODEV simply dup (dfd) to file descriptor
-	 * (indx) and return.
+	 * For ENODEV simply dup (dfd) to file descriptor (indx) and return.
  	 *
-	 * For ENXIO steal away the file structure from (dfd) and
-	 * store it in (indx).  (dfd) is effectively closed by
-	 * this operation.
+	 * For ENXIO steal away the file structure from (dfd) and store it in
+	 * (indx).  (dfd) is effectively closed by this operation.
  	 *
  	 * Any other error code is just returned.
  	 */
_at__at_ -2285,7 +2288,7 _at__at_
  		FILE_LOCK(wfp);
  		if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) {
  			FILE_UNLOCK(wfp);
-			FILEDESC_UNLOCK(fdp);
+			FILEDESC_XUNLOCK(fdp);
  			return (EACCES);
  		}
  		fp = fdp->fd_ofiles[indx];
_at__at_ -2295,7 +2298,7 _at__at_
  			fdused(fdp, indx);
  		fhold_locked(wfp);
  		FILE_UNLOCK(wfp);
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_XUNLOCK(fdp);
  		if (fp != NULL)
  			/*
  			 * We now own the reference to fp that the ofiles[]
_at__at_ -2316,7 +2319,7 _at__at_
  		fdunused(fdp, dfd);
  		if (fp == NULL)
  			fdused(fdp, indx);
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_XUNLOCK(fdp);

  		/*
  		 * We now own the reference to fp that the ofiles[] array
_at__at_ -2327,16 +2330,15 _at__at_
  		return (0);

  	default:
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_XUNLOCK(fdp);
  		return (error);
  	}
  	/* NOTREACHED */
  }

  /*
- * Scan all active processes to see if any of them have a current
- * or root directory of `olddp'. If so, replace them with the new
- * mount point.
+ * Scan all active processes to see if any of them have a current or root
+ * directory of `olddp'. If so, replace them with the new mount point.
   */
  void
  mountcheckdirs(struct vnode *olddp, struct vnode *newdp)
_at__at_ -2353,7 +2355,7 _at__at_
  		if (fdp == NULL)
  			continue;
  		nrele = 0;
-		FILEDESC_LOCK_FAST(fdp);
+		FILEDESC_XLOCK(fdp);
  		if (fdp->fd_cdir == olddp) {
  			vref(newdp);
  			fdp->fd_cdir = newdp;
_at__at_ -2364,7 +2366,7 _at__at_
  			fdp->fd_rdir = newdp;
  			nrele++;
  		}
-		FILEDESC_UNLOCK_FAST(fdp);
+		FILEDESC_XUNLOCK(fdp);
  		fddrop(fdp);
  		while (nrele--)
  			vrele(olddp);
_at__at_ -2391,12 +2393,12 _at__at_
  	fdtol->fdl_wakeup = 0;
  	fdtol->fdl_leader = leader;
  	if (old != NULL) {
-		FILEDESC_LOCK(fdp);
+		FILEDESC_XLOCK(fdp);
  		fdtol->fdl_next = old->fdl_next;
  		fdtol->fdl_prev = old;
  		old->fdl_next = fdtol;
  		fdtol->fdl_next->fdl_prev = fdtol;
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_XUNLOCK(fdp);
  	} else {
  		fdtol->fdl_next = fdtol;
  		fdtol->fdl_prev = fdtol;
_at__at_ -2459,7 +2461,7 _at__at_
  		fdp = fdhold(p);
  		if (fdp == NULL)
  			continue;
-		FILEDESC_LOCK_FAST(fdp);
+		FILEDESC_SLOCK(fdp);
  		for (n = 0; fdp->fd_refcnt > 0 && n < fdp->fd_nfiles; ++n) {
  			if ((fp = fdp->fd_ofiles[n]) == NULL)
  				continue;
_at__at_ -2476,7 +2478,7 _at__at_
  			if (error)
  				break;
  		}
-		FILEDESC_UNLOCK_FAST(fdp);
+		FILEDESC_SUNLOCK(fdp);
  		fddrop(fdp);
  		if (error)
  			break;
--- //depot/vendor/freebsd/src/sys/kern/kern_event.c	2007/03/04 22:41:05
+++ //depot/user/rwatson/filedesc/src/sys/kern/kern_event.c	2007/03/05 16:48:38
_at__at_ -527,9 +527,9 _at__at_
  	knlist_init(&kq->kq_sel.si_note, &kq->kq_lock, NULL, NULL, NULL);
  	TASK_INIT(&kq->kq_task, 0, kqueue_task, kq);

-	FILEDESC_LOCK_FAST(fdp);
+	FILEDESC_XLOCK(fdp);
  	SLIST_INSERT_HEAD(&fdp->fd_kqlist, kq, kq_list);
-	FILEDESC_UNLOCK_FAST(fdp);
+	FILEDESC_XUNLOCK(fdp);

  	FILE_LOCK(fp);
  	fp->f_flag = FREAD | FWRITE;
_at__at_ -1493,9 +1493,9 _at__at_

  	KQ_UNLOCK(kq);

-	FILEDESC_LOCK_FAST(fdp);
+	FILEDESC_XLOCK(fdp);
  	SLIST_REMOVE(&fdp->fd_kqlist, kq, kqueue, kq_list);
-	FILEDESC_UNLOCK_FAST(fdp);
+	FILEDESC_XUNLOCK(fdp);

  	knlist_destroy(&kq->kq_sel.si_note);
  	mtx_destroy(&kq->kq_lock);
_at__at_ -1781,9 +1781,9 _at__at_
  }

  /*
- * remove all knotes referencing a specified fd
- * must be called with FILEDESC lock.  This prevents a race where a new fd
- * comes along and occupies the entry and we attach a knote to the fd.
+ * Remove all knotes referencing a specified fd must be called with FILEDESC
+ * lock.  This prevents a race where a new fd comes along and occupies the
+ * entry and we attach a knote to the fd.
   */
  void
  knote_fdclose(struct thread *td, int fd)
_at__at_ -1793,7 +1793,7 _at__at_
  	struct knote *kn;
  	int influx;

-	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
+	FILEDESC_XLOCK_ASSERT(fdp);

  	/*
  	 * We shouldn't have to worry about new kevents appearing on fd
--- //depot/vendor/freebsd/src/sys/kern/kern_fork.c	2007/03/04 22:41:05
+++ //depot/user/rwatson/filedesc/src/sys/kern/kern_fork.c	2007/03/05 16:48:38
_at__at_ -458,9 +458,9 _at__at_
  			 * shared process leaders.
  			 */
  			fdtol = p1->p_fdtol;
-			FILEDESC_LOCK_FAST(p1->p_fd);
+			FILEDESC_XLOCK(p1->p_fd);
  			fdtol->fdl_refcount++;
-			FILEDESC_UNLOCK_FAST(p1->p_fd);
+			FILEDESC_XUNLOCK(p1->p_fd);
  		} else {
  			/*
  			 * Shared file descriptor table, and
--- //depot/vendor/freebsd/src/sys/kern/subr_witness.c	2007/04/01 15:52:48
+++ //depot/user/rwatson/filedesc/src/sys/kern/subr_witness.c	2007/04/01 18:01:27
_at__at_ -281,7 +281,6 _at__at_
  	 * Various mutexes
  	 */
  	{ "Giant", &lock_class_mtx_sleep },
-	{ "filedesc structure", &lock_class_mtx_sleep },
  	{ "pipe mutex", &lock_class_mtx_sleep },
  	{ "sigio lock", &lock_class_mtx_sleep },
  	{ "process group", &lock_class_mtx_sleep },
_at__at_ -294,7 +293,6 _at__at_
  	/*
  	 * Sockets
  	 */
-	{ "filedesc structure", &lock_class_mtx_sleep },
  	{ "accept", &lock_class_mtx_sleep },
  	{ "so_snd", &lock_class_mtx_sleep },
  	{ "so_rcv", &lock_class_mtx_sleep },
--- //depot/vendor/freebsd/src/sys/kern/sys_generic.c	2007/03/05 13:12:17
+++ //depot/user/rwatson/filedesc/src/sys/kern/sys_generic.c	2007/03/05 16:48:38
_at__at_ -568,14 +568,14 _at__at_
  	fdp = td->td_proc->p_fd;
  	switch (com) {
  	case FIONCLEX:
-		FILEDESC_LOCK_FAST(fdp);
+		FILEDESC_XLOCK(fdp);
  		fdp->fd_ofileflags[fd] &= ~UF_EXCLOSE;
-		FILEDESC_UNLOCK_FAST(fdp);
+		FILEDESC_XUNLOCK(fdp);
  		goto out;
  	case FIOCLEX:
-		FILEDESC_LOCK_FAST(fdp);
+		FILEDESC_XLOCK(fdp);
  		fdp->fd_ofileflags[fd] |= UF_EXCLOSE;
-		FILEDESC_UNLOCK_FAST(fdp);
+		FILEDESC_XUNLOCK(fdp);
  		goto out;
  	case FIONBIO:
  		FILE_LOCK(fp);
_at__at_ -658,11 +658,10 _at__at_
  		return (EINVAL);
  	fdp = td->td_proc->p_fd;

-	FILEDESC_LOCK_FAST(fdp);
-
+	FILEDESC_SLOCK(fdp);
  	if (nd > td->td_proc->p_fd->fd_nfiles)
  		nd = td->td_proc->p_fd->fd_nfiles;   /* forgiving; slightly wrong */
-	FILEDESC_UNLOCK_FAST(fdp);
+	FILEDESC_SUNLOCK(fdp);

  	/*
  	 * Allocate just enough bits for the non-null fd_sets.  Use the
_at__at_ -809,7 +808,7 _at__at_
  	static int flag[3] = { POLLRDNORM, POLLWRNORM, POLLRDBAND };
  	struct filedesc *fdp = td->td_proc->p_fd;

-	FILEDESC_LOCK(fdp);
+	FILEDESC_SLOCK(fdp);
  	for (msk = 0; msk < 3; msk++) {
  		if (ibits[msk] == NULL)
  			continue;
_at__at_ -820,7 +819,7 _at__at_
  				if (!(bits & 1))
  					continue;
  				if ((fp = fget_locked(fdp, fd)) == NULL) {
-					FILEDESC_UNLOCK(fdp);
+					FILEDESC_SUNLOCK(fdp);
  					return (EBADF);
  				}
  				if (fo_poll(fp, flag[msk], td->td_ucred,
_at__at_ -832,7 +831,7 _at__at_
  			}
  		}
  	}
-	FILEDESC_UNLOCK(fdp);
+	FILEDESC_SUNLOCK(fdp);
  	td->td_retval[0] = n;
  	return (0);
  }
_at__at_ -973,7 +972,7 _at__at_
  	struct file *fp;
  	int n = 0;

-	FILEDESC_LOCK(fdp);
+	FILEDESC_SLOCK(fdp);
  	for (i = 0; i < nfd; i++, fds++) {
  		if (fds->fd >= fdp->fd_nfiles) {
  			fds->revents = POLLNVAL;
_at__at_ -997,7 +996,7 _at__at_
  			}
  		}
  	}
-	FILEDESC_UNLOCK(fdp);
+	FILEDESC_SUNLOCK(fdp);
  	td->td_retval[0] = n;
  	return (0);
  }
--- //depot/vendor/freebsd/src/sys/kern/uipc_mqueue.c	2007/03/13 01:54:24
+++ //depot/user/rwatson/filedesc/src/sys/kern/uipc_mqueue.c	2007/03/17 21:03:04
_at__at_ -2013,10 +2013,10 _at__at_
  	fp->f_data = pn;
  	FILE_UNLOCK(fp);

-	FILEDESC_LOCK_FAST(fdp);
+	FILEDESC_XLOCK(fdp);
  	if (fdp->fd_ofiles[fd] == fp)
  		fdp->fd_ofileflags[fd] |= UF_EXCLOSE;
-	FILEDESC_UNLOCK_FAST(fdp);
+	FILEDESC_XUNLOCK(fdp);
  	td->td_retval[0] = fd;
  	fdrop(fp, td);
  	return (0);
_at__at_ -2197,14 +2197,14 _at__at_
  	if (error)
  		return (error);
  again:
-	FILEDESC_LOCK_FAST(fdp);
+	FILEDESC_SLOCK(fdp);
  	if (fget_locked(fdp, uap->mqd) != fp) {
-		FILEDESC_UNLOCK_FAST(fdp);
+		FILEDESC_SUNLOCK(fdp);
  		error = EBADF;
  		goto out;
  	}
  	mtx_lock(&mq->mq_mutex);
-	FILEDESC_UNLOCK_FAST(fdp);
+	FILEDESC_SUNLOCK(fdp);
  	if (uap->sigev != NULL) {
  		if (mq->mq_notifier != NULL) {
  			error = EBUSY;
_at__at_ -2267,7 +2267,8 _at__at_
  	struct mqueue *mq;

  	fdp = td->td_proc->p_fd;
-	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
+	FILEDESC_LOCK_ASSERT(fdp);
+
  	if (fp->f_ops == &mqueueops) {
  		mq = FPTOMQ(fp);
  		mtx_lock(&mq->mq_mutex);
_at__at_ -2295,7 +2296,7 _at__at_
  	int i;

  	fdp = p->p_fd;
-	FILEDESC_LOCK_FAST(fdp);
+	FILEDESC_SLOCK(fdp);
  	for (i = 0; i < fdp->fd_nfiles; ++i) {
  		fp = fget_locked(fdp, i);
  		if (fp != NULL && fp->f_ops == &mqueueops) {
_at__at_ -2305,7 +2306,7 _at__at_
  			mtx_unlock(&mq->mq_mutex);
  		}
  	}
-	FILEDESC_UNLOCK_FAST(fdp);
+	FILEDESC_SUNLOCK(fdp);
  	KASSERT(LIST_EMPTY(&p->p_mqnotifier), ("mq notifiers left"));
  }

--- //depot/vendor/freebsd/src/sys/kern/uipc_syscalls.c	2007/03/05 13:12:17
+++ //depot/user/rwatson/filedesc/src/sys/kern/uipc_syscalls.c	2007/03/05 16:48:38
_at__at_ -124,7 +124,7 _at__at_
  	if (fdp == NULL)
  		error = EBADF;
  	else {
-		FILEDESC_LOCK_FAST(fdp);
+		FILEDESC_SLOCK(fdp);
  		fp = fget_locked(fdp, fd);
  		if (fp == NULL)
  			error = EBADF;
_at__at_ -137,7 +137,7 _at__at_
  				*fflagp = fp->f_flag;
  			error = 0;
  		}
-		FILEDESC_UNLOCK_FAST(fdp);
+		FILEDESC_SUNLOCK(fdp);
  	}
  	*fpp = fp;
  	return (error);
_at__at_ -182,12 +182,17 _at__at_
  	if (error) {
  		fdclose(fdp, fp, fd, td);
  	} else {
-		FILEDESC_LOCK_FAST(fdp);
+		/*
+		 * XXXRW: The logic here seems wrong -- shouldn't it be
+		 * locking the file, not the filedesc?  Other threads could
+		 * already have a reference to the socket by now.
+		 */
+		FILEDESC_XLOCK(fdp);
  		fp->f_data = so;	/* already has ref count */
  		fp->f_flag = FREAD|FWRITE;
  		fp->f_ops = &socketops;
  		fp->f_type = DTYPE_SOCKET;
-		FILEDESC_UNLOCK_FAST(fdp);
+		FILEDESC_XUNLOCK(fdp);
  		td->td_retval[0] = fd;
  	}
  	fdrop(fp, td);
--- //depot/vendor/freebsd/src/sys/kern/uipc_usrreq.c	2007/03/12 14:57:57
+++ //depot/user/rwatson/filedesc/src/sys/kern/uipc_usrreq.c	2007/03/17 21:03:04
_at__at_ -1579,10 +1579,10 _at__at_
  				unp_freerights(rp, newfds);
  				goto next;
  			}
-			FILEDESC_LOCK(td->td_proc->p_fd);
+			FILEDESC_XLOCK(td->td_proc->p_fd);
  			/* if the new FD's will not fit free them.  */
  			if (!fdavail(td, newfds)) {
-				FILEDESC_UNLOCK(td->td_proc->p_fd);
+				FILEDESC_XUNLOCK(td->td_proc->p_fd);
  				error = EMSGSIZE;
  				unp_freerights(rp, newfds);
  				goto next;
_at__at_ -1597,7 +1597,7 _at__at_
  			*controlp = sbcreatecontrol(NULL, newlen,
  			    SCM_RIGHTS, SOL_SOCKET);
  			if (*controlp == NULL) {
-				FILEDESC_UNLOCK(td->td_proc->p_fd);
+				FILEDESC_XUNLOCK(td->td_proc->p_fd);
  				error = E2BIG;
  				unp_freerights(rp, newfds);
  				goto next;
_at__at_ -1616,7 +1616,7 _at__at_
  				unp_rights--;
  				*fdp++ = f;
  			}
-			FILEDESC_UNLOCK(td->td_proc->p_fd);
+			FILEDESC_XUNLOCK(td->td_proc->p_fd);
  		} else {
  			/* We can just copy anything else across. */
  			if (error || controlp == NULL)
_at__at_ -1738,23 +1738,24 _at__at_
  			 * files.  If not, reject the entire operation.
  			 */
  			fdp = data;
-			FILEDESC_LOCK(fdescp);
+			FILEDESC_SLOCK(fdescp);
  			for (i = 0; i < oldfds; i++) {
  				fd = *fdp++;
  				if ((unsigned)fd >= fdescp->fd_nfiles ||
  				    fdescp->fd_ofiles[fd] == NULL) {
-					FILEDESC_UNLOCK(fdescp);
+					FILEDESC_SUNLOCK(fdescp);
  					error = EBADF;
  					goto out;
  				}
  				fp = fdescp->fd_ofiles[fd];
  				if (!(fp->f_ops->fo_flags & DFLAG_PASSABLE)) {
-					FILEDESC_UNLOCK(fdescp);
+					FILEDESC_SUNLOCK(fdescp);
  					error = EOPNOTSUPP;
  					goto out;
  				}

  			}
+
  			/*
  			 * Now replace the integer FDs with pointers to
  			 * the associated global file table entry..
_at__at_ -1763,7 +1764,7 _at__at_
  			*controlp = sbcreatecontrol(NULL, newlen,
  			    SCM_RIGHTS, SOL_SOCKET);
  			if (*controlp == NULL) {
-				FILEDESC_UNLOCK(fdescp);
+				FILEDESC_SUNLOCK(fdescp);
  				error = E2BIG;
  				goto out;
  			}
_at__at_ -1780,7 +1781,7 _at__at_
  				FILE_UNLOCK(fp);
  				unp_rights++;
  			}
-			FILEDESC_UNLOCK(fdescp);
+			FILEDESC_SUNLOCK(fdescp);
  			break;

  		case SCM_TIMESTAMP:
--- //depot/vendor/freebsd/src/sys/kern/vfs_cache.c	2007/03/05 13:12:17
+++ //depot/user/rwatson/filedesc/src/sys/kern/vfs_cache.c	2007/03/05 16:48:38
_at__at_ -717,10 +717,10 _at__at_
  	tmpbuf = malloc(buflen, M_TEMP, M_WAITOK);
  	fdp = td->td_proc->p_fd;
  	mtx_lock(&Giant);
-	FILEDESC_LOCK(fdp);
+	FILEDESC_SLOCK(fdp);
  	error = vn_fullpath1(td, fdp->fd_cdir, fdp->fd_rdir, tmpbuf,
  	    &bp, buflen);
-	FILEDESC_UNLOCK(fdp);
+	FILEDESC_SUNLOCK(fdp);
  	mtx_unlock(&Giant);

  	if (!error) {
_at__at_ -771,9 +771,9 _at__at_

  	buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
  	fdp = td->td_proc->p_fd;
-	FILEDESC_LOCK(fdp);
+	FILEDESC_SLOCK(fdp);
  	error = vn_fullpath1(td, vn, fdp->fd_rdir, buf, retbuf, MAXPATHLEN);
-	FILEDESC_UNLOCK(fdp);
+	FILEDESC_SUNLOCK(fdp);

  	if (!error)
  		*freebuf = buf;
--- //depot/vendor/freebsd/src/sys/kern/vfs_lookup.c	2007/03/31 16:11:57
+++ //depot/user/rwatson/filedesc/src/sys/kern/vfs_lookup.c	2007/04/01 13:10:13
_at__at_ -188,14 +188,14 _at__at_
  	/*
  	 * Get starting point for the translation.
  	 */
-	FILEDESC_LOCK(fdp);
+	FILEDESC_SLOCK(fdp);
  	ndp->ni_rootdir = fdp->fd_rdir;
  	ndp->ni_topdir = fdp->fd_jdir;

  	dp = fdp->fd_cdir;
  	vfslocked = VFS_LOCK_GIANT(dp->v_mount);
  	VREF(dp);
-	FILEDESC_UNLOCK(fdp);
+	FILEDESC_SUNLOCK(fdp);
  	for (;;) {
  		/*
  		 * Check if root directory should replace current directory.
--- //depot/vendor/freebsd/src/sys/kern/vfs_mount.c	2007/04/01 13:12:37
+++ //depot/user/rwatson/filedesc/src/sys/kern/vfs_mount.c	2007/04/01 13:21:17
_at__at_ -1361,7 +1361,7 _at__at_
  		panic("Cannot find root vnode");

  	p = td->td_proc;
-	FILEDESC_LOCK(p->p_fd);
+	FILEDESC_SLOCK(p->p_fd);

  	if (p->p_fd->fd_cdir != NULL)
  		vrele(p->p_fd->fd_cdir);
_at__at_ -1373,7 +1373,7 _at__at_
  	p->p_fd->fd_rdir = rootvnode;
  	VREF(rootvnode);

-	FILEDESC_UNLOCK(p->p_fd);
+	FILEDESC_SUNLOCK(p->p_fd);

  	VOP_UNLOCK(rootvnode, 0, td);
  }
--- //depot/vendor/freebsd/src/sys/kern/vfs_syscalls.c	2007/03/21 19:36:52
+++ //depot/user/rwatson/filedesc/src/sys/kern/vfs_syscalls.c	2007/04/01 13:10:13
_at__at_ -715,10 +715,10 _at__at_
  	}
  	VOP_UNLOCK(vp, 0, td);
  	VFS_UNLOCK_GIANT(vfslocked);
-	FILEDESC_LOCK_FAST(fdp);
+	FILEDESC_XLOCK(fdp);
  	vpold = fdp->fd_cdir;
  	fdp->fd_cdir = vp;
-	FILEDESC_UNLOCK_FAST(fdp);
+	FILEDESC_XUNLOCK(fdp);
  	vfslocked = VFS_LOCK_GIANT(vpold->v_mount);
  	vrele(vpold);
  	VFS_UNLOCK_GIANT(vfslocked);
_at__at_ -767,10 +767,10 _at__at_
  	VOP_UNLOCK(nd.ni_vp, 0, td);
  	VFS_UNLOCK_GIANT(vfslocked);
  	NDFREE(&nd, NDF_ONLY_PNBUF);
-	FILEDESC_LOCK_FAST(fdp);
+	FILEDESC_XLOCK(fdp);
  	vp = fdp->fd_cdir;
  	fdp->fd_cdir = nd.ni_vp;
-	FILEDESC_UNLOCK_FAST(fdp);
+	FILEDESC_XUNLOCK(fdp);
  	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
  	vrele(vp);
  	VFS_UNLOCK_GIANT(vfslocked);
_at__at_ -789,7 +789,8 _at__at_
  	struct file *fp;
  	int fd;

-	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
+	FILEDESC_LOCK_ASSERT(fdp);
+
  	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
  		fp = fget_locked(fdp, fd);
  		if (fp == NULL)
_at__at_ -905,12 +906,12 _at__at_

  	VFS_ASSERT_GIANT(vp->v_mount);
  	fdp = td->td_proc->p_fd;
-	FILEDESC_LOCK(fdp);
+	FILEDESC_XLOCK(fdp);
  	if (chroot_allow_open_directories == 0 ||
  	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
  		error = chroot_refuse_vdir_fds(fdp);
  		if (error) {
-			FILEDESC_UNLOCK(fdp);
+			FILEDESC_XUNLOCK(fdp);
  			return (error);
  		}
  	}
_at__at_ -921,7 +922,7 _at__at_
  		fdp->fd_jdir = vp;
  		VREF(fdp->fd_jdir);
  	}
-	FILEDESC_UNLOCK(fdp);
+	FILEDESC_XUNLOCK(fdp);
  	vfslocked = VFS_LOCK_GIANT(oldvp->v_mount);
  	vrele(oldvp);
  	VFS_UNLOCK_GIANT(vfslocked);
_at__at_ -1030,18 +1031,18 _at__at_
  	 *
  	 * Handle the case where someone closed the file (via its file
  	 * descriptor) while we were blocked.  The end result should look
-	 * like opening the file succeeded but it was immediately closed.
-	 * We call vn_close() manually because we haven't yet hooked up
-	 * the various 'struct file' fields.
+	 * like opening the file succeeded but it was immediately closed.  We
+	 * call vn_close() manually because we haven't yet hooked up the
+	 * various 'struct file' fields.
  	 */
-	FILEDESC_LOCK(fdp);
+	FILEDESC_SLOCK(fdp);
  	FILE_LOCK(fp);
  	if (fp->f_count == 1) {
  		mp = vp->v_mount;
  		KASSERT(fdp->fd_ofiles[indx] != fp,
  		    ("Open file descriptor lost all refs"));
  		FILE_UNLOCK(fp);
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_SUNLOCK(fdp);
  		VOP_UNLOCK(vp, 0, td);
  		vn_close(vp, flags & FMASK, fp->f_cred, td);
  		VFS_UNLOCK_GIANT(vfslocked);
_at__at_ -1058,7 +1059,7 _at__at_
  	fp->f_seqcount = 1;
  	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
  	FILE_UNLOCK(fp);
-	FILEDESC_UNLOCK(fdp);
+	FILEDESC_SUNLOCK(fdp);

  	VOP_UNLOCK(vp, 0, td);
  	if (flags & (O_EXLOCK | O_SHLOCK)) {
_at__at_ -1206,10 +1207,10 _at__at_
  		return (EEXIST);
  	} else {
  		VATTR_NULL(&vattr);
-		FILEDESC_LOCK_FAST(td->td_proc->p_fd);
+		FILEDESC_SLOCK(td->td_proc->p_fd);
  		vattr.va_mode = (mode & ALLPERMS) &
  		    ~td->td_proc->p_fd->fd_cmask;
-		FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
+		FILEDESC_SUNLOCK(td->td_proc->p_fd);
  		vattr.va_rdev = dev;
  		whiteout = 0;

_at__at_ -1319,9 +1320,9 _at__at_
  	}
  	VATTR_NULL(&vattr);
  	vattr.va_type = VFIFO;
-	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
+	FILEDESC_SLOCK(td->td_proc->p_fd);
  	vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
-	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
+	FILEDESC_SUNLOCK(td->td_proc->p_fd);
  #ifdef MAC
  	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
  	    &vattr);
_at__at_ -1534,9 +1535,9 _at__at_
  		goto restart;
  	}
  	VATTR_NULL(&vattr);
-	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
+	FILEDESC_SLOCK(td->td_proc->p_fd);
  	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
-	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
+	FILEDESC_SUNLOCK(td->td_proc->p_fd);
  #ifdef MAC
  	vattr.va_type = VLNK;
  	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
_at__at_ -3418,9 +3419,9 _at__at_
  	}
  	VATTR_NULL(&vattr);
  	vattr.va_type = VDIR;
-	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
+	FILEDESC_SLOCK(td->td_proc->p_fd);
  	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
-	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
+	FILEDESC_SUNLOCK(td->td_proc->p_fd);
  #ifdef MAC
  	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
  	    &vattr);
_at__at_ -3807,11 +3808,11 _at__at_
  {
  	register struct filedesc *fdp;

-	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
+	FILEDESC_XLOCK(td->td_proc->p_fd);
  	fdp = td->td_proc->p_fd;
  	td->td_retval[0] = fdp->fd_cmask;
  	fdp->fd_cmask = uap->newmask & ALLPERMS;
-	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
+	FILEDESC_XUNLOCK(td->td_proc->p_fd);
  	return (0);
  }

_at__at_ -3887,7 +3888,7 _at__at_
  	if (fdp == NULL)
  		error = EBADF;
  	else {
-		FILEDESC_LOCK(fdp);
+		FILEDESC_SLOCK(fdp);
  		if ((u_int)fd >= fdp->fd_nfiles ||
  		    (fp = fdp->fd_ofiles[fd]) == NULL)
  			error = EBADF;
_at__at_ -3898,7 +3899,7 _at__at_
  			fhold(fp);
  			error = 0;
  		}
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_SUNLOCK(fdp);
  	}
  	*fpp = fp;
  	return (error);
--- //depot/vendor/freebsd/src/sys/netsmb/smb_dev.c	2007/02/09 17:22:48
+++ //depot/user/rwatson/filedesc/src/sys/netsmb/smb_dev.c	2007/03/03 22:39:43
_at__at_ -368,15 +368,15 _at__at_
  {
  	struct file* fp;

-	FILEDESC_LOCK(fdp);
+	FILEDESC_SLOCK(fdp);
  	if (((u_int)fd) >= fdp->fd_nfiles ||
  	    (fp = fdp->fd_ofiles[fd]) == NULL ||
  	    (fp->f_flag & flag) == 0) {
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_SUNLOCK(fdp);
  		return (NULL);
  	}
  	fhold(fp);
-	FILEDESC_UNLOCK(fdp);
+	FILEDESC_SUNLOCK(fdp);
  	return (fp);
  }

--- //depot/vendor/freebsd/src/sys/security/audit/audit_bsm_klib.c	2006/12/29 12:22:04
+++ //depot/user/rwatson/filedesc/src/sys/security/audit/audit_bsm_klib.c	2007/03/03 22:39:43
_at__at_ -494,7 +494,7 _at__at_
  	fdp = td->td_proc->p_fd;
  	bufp = path;
  	cisr = 0;
-	FILEDESC_LOCK(fdp);
+	FILEDESC_SLOCK(fdp);
  	if (*(path) == '/') {
  		while (*(bufp) == '/')
  			bufp++;			/* Skip leading '/'s. */
_at__at_ -516,7 +516,7 _at__at_
  		vref(vnp);
  		bufp = path;
  	}
-	FILEDESC_UNLOCK(fdp);
+	FILEDESC_SUNLOCK(fdp);
  	if (vnp != NULL) {
  		/*
  		 * XXX: vn_fullpath() on FreeBSD is "less reliable" than
--- //depot/vendor/freebsd/src/sys/sys/filedesc.h	2006/04/07 05:20:46
+++ //depot/user/rwatson/filedesc/src/sys/sys/filedesc.h	2007/04/01 19:46:08
_at__at_ -35,9 +35,9 _at__at_

  #include <sys/queue.h>
  #include <sys/event.h>
+#include <sys/lock.h>
  #include <sys/priority.h>
-#include <sys/_lock.h>
-#include <sys/_mutex.h>
+#include <sys/sx.h>

  #include <machine/_limits.h>

_at__at_ -60,10 +60,7 _at__at_
  	u_short	fd_cmask;		/* mask for file creation */
  	u_short	fd_refcnt;		/* thread reference count */
  	u_short	fd_holdcnt;		/* hold count on structure + mutex */
-
-	struct	mtx fd_mtx;		/* protects members of this struct */
-	int	fd_locked;		/* long lock flag */
-	int	fd_wanted;		/* "" */
+	struct	sx fd_sx;		/* protects members of this struct */
  	struct	kqlist fd_kqlist;	/* list of kqueues on this filedesc */
  	int	fd_holdleaderscount;	/* block fdfree() for shared close() */
  	int	fd_holdleaderswakeup;	/* fdfree() needs wakeup */
_at__at_ -96,61 +93,18 _at__at_
  #ifdef _KERNEL

  /* Lock a file descriptor table. */
-#define	FILEDESC_LOCK(fd)								\
-	do {										\
-		mtx_lock(&(fd)->fd_mtx);						\
-		(fd)->fd_wanted++;							\
-		while ((fd)->fd_locked)							\
-			msleep(&(fd)->fd_locked, &(fd)->fd_mtx, PLOCK, "fdesc", 0);	\
-		(fd)->fd_locked = 2;							\
-		(fd)->fd_wanted--;							\
-		mtx_unlock(&(fd)->fd_mtx);						\
-	} while (0)
+#define	FILEDESC_LOCK_INIT(fdp)	sx_init(&(fdp)->fd_sx, "filedesc structure")
+#define	FILEDESC_LOCK_DESTROY(fdp)	sx_destroy(&(fdp)->fd_sx)
+#define	FILEDESC_LOCK(fdp)	(&(fdp)->fd_sx)
+#define	FILEDESC_XLOCK(fdp)	sx_xlock(&(fdp)->fd_sx)
+#define	FILEDESC_XUNLOCK(fdp)	sx_xunlock(&(fdp)->fd_sx)
+#define	FILEDESC_SLOCK(fdp)	sx_slock(&(fdp)->fd_sx)
+#define	FILEDESC_SUNLOCK(fdp)	sx_sunlock(&(fdp)->fd_sx)

-#define	FILEDESC_UNLOCK(fd)								\
-	do {										\
-		mtx_lock(&(fd)->fd_mtx);						\
-		KASSERT((fd)->fd_locked == 2,						\
-		    ("fdesc locking mistake %d should be %d", (fd)->fd_locked, 2));	\
-		(fd)->fd_locked = 0;							\
-		if ((fd)->fd_wanted)							\
-			wakeup(&(fd)->fd_locked);					\
-		mtx_unlock(&(fd)->fd_mtx);						\
-	} while (0)
-
-#define	FILEDESC_LOCK_FAST(fd)								\
-	do {										\
-		mtx_lock(&(fd)->fd_mtx);						\
-		(fd)->fd_wanted++;							\
-		while ((fd)->fd_locked)							\
-			msleep(&(fd)->fd_locked, &(fd)->fd_mtx, PLOCK, "fdesc", 0);	\
-		(fd)->fd_locked = 1;							\
-		(fd)->fd_wanted--;							\
-	} while (0)
-
-#define	FILEDESC_UNLOCK_FAST(fd)							\
-	do {										\
-		KASSERT((fd)->fd_locked == 1,						\
-		    ("fdesc locking mistake %d should be %d", (fd)->fd_locked, 1));	\
-		(fd)->fd_locked = 0;							\
-		if ((fd)->fd_wanted)							\
-			wakeup(&(fd)->fd_locked);					\
-		mtx_unlock(&(fd)->fd_mtx);						\
-	} while (0)
-
-#ifdef INVARIANT_SUPPORT
-#define	FILEDESC_LOCK_ASSERT(fd, arg)							\
-	do {										\
-		if ((arg) == MA_OWNED)							\
-			KASSERT((fd)->fd_locked != 0, ("fdesc locking mistake"));	\
-		else									\
-			KASSERT((fd)->fd_locked == 0, ("fdesc locking mistake"));	\
-	} while (0)
-#else
-#define	FILEDESC_LOCK_ASSERT(fd, arg)
-#endif
-
-#define	FILEDESC_LOCK_DESC	"filedesc structure"
+#define	FILEDESC_LOCK_ASSERT(fdp)	sx_assert(&(fdp)->fd_sx, SX_LOCKED | \
+					    SX_NOTRECURSED)
+#define	FILEDESC_XLOCK_ASSERT(fdp)	sx_assert(&(fdp)->fd_sx, SX_XLOCKED | \
+					    SX_NOTRECURSED)

  struct thread;
Received on Sun Apr 01 2007 - 18:03:04 UTC

This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:39:07 UTC