Dear all, The attached patch moves file descriptor locks from being a custom mutex/sleep lock implemented using msleep() to an sx lock. With the new sx lock optimizations in place, this is now sensible, avoiding both a custom lock type and significantly improving performance. Kris has reported 2x-4x improvement in transactions/sec with MySQL using this patch, as it greatly reduces the cost of lock contention during file descriptor lookup for threaded applications, and also moves to shared locking to avoid exclusive acquisition for read-only operations (the vast majority in most workloads). Patch is below, but you can also download from: http://www.watson.org/~robert/freebsd/netperf/20070401a-filedesc-sx.diff I'm currently waiting for the sx lock changes to settle for a few days before committing, so will plan to commit this around Wednesday/Thursday of this week (unless serious problems arise). Robert N M Watson Computer Laboratory University of Cambridge --- //depot/vendor/freebsd/src/sys/compat/linux/linux_file.c 2007/03/29 02:17:34 +++ //depot/user/rwatson/filedesc/src/sys/compat/linux/linux_file.c 2007/04/01 15:10:26 _at__at_ -193,7 +193,7 _at__at_ linux_at(struct thread *td, int dirfd, char *filename, char **newpath, char **freebuf) { struct file *fp; - int error = 0; + int error = 0, vfslocked; struct vnode *dvp; struct filedesc *fdp = td->td_proc->p_fd; char *fullpath = "unknown"; _at__at_ -207,9 +207,10 _at__at_ /* check for AT_FDWCD */ if (dirfd == LINUX_AT_FDCWD) { - FILEDESC_LOCK(fdp); + FILEDESC_SLOCK(fdp); dvp = fdp->fd_cdir; - FILEDESC_UNLOCK(fdp); + vref(dvp); + FILEDESC_SUNLOCK(fdp); } else { error = fget(td, dirfd, &fp); if (error) _at__at_ -220,16 +221,28 _at__at_ fdrop(fp, td); return (ENOTDIR); } + vref(dvp); fdrop(fp, td); } + /* + * XXXRW: This is bogus, as vn_fullpath() returns only an advisory + * file path, and may fail in several common situations, including + * for file systmes that don't use the name cache, and if the entry + * for the file falls out of the name cache. We should implement + * openat() in the FreeBSD native system call layer properly (using a + * requested starting directory), and have Linux and other ABIs wrap + * the native implementation. + */ error = vn_fullpath(td, dvp, &fullpath, &freepath); if (!error) { *newpath = malloc(strlen(fullpath) + strlen(filename) + 2, M_TEMP, M_WAITOK | M_ZERO); *freebuf = freepath; sprintf(*newpath, "%s/%s", fullpath, filename); } - + vfslocked = VFS_LOCK_GIANT(dvp->v_mount); + vrele(dvp); + VFS_UNLOCK_GIANT(vfslocked); return (error); } --- //depot/vendor/freebsd/src/sys/compat/svr4/svr4_filio.c 2005/01/05 22:36:13 +++ //depot/user/rwatson/filedesc/src/sys/compat/svr4/svr4_filio.c 2007/03/03 22:39:43 _at__at_ -211,15 +211,15 _at__at_ switch (cmd) { case SVR4_FIOCLEX: - FILEDESC_LOCK_FAST(fdp); + FILEDESC_XLOCK(fdp); fdp->fd_ofileflags[fd] |= UF_EXCLOSE; - FILEDESC_UNLOCK_FAST(fdp); + FILEDESC_XUNLOCK(fdp); return 0; case SVR4_FIONCLEX: - FILEDESC_LOCK_FAST(fdp); + FILEDESC_XLOCK(fdp); fdp->fd_ofileflags[fd] &= ~UF_EXCLOSE; - FILEDESC_UNLOCK_FAST(fdp); + FILEDESC_XUNLOCK(fdp); return 0; case SVR4_FIOGETOWN: --- //depot/vendor/freebsd/src/sys/dev/streams/streams.c 2006/07/21 20:40:58 +++ //depot/user/rwatson/filedesc/src/sys/dev/streams/streams.c 2007/03/03 22:39:43 _at__at_ -253,12 +253,15 _at__at_ return error; } - FILEDESC_LOCK_FAST(fdp); + /* + * XXXRW: Should be locking fp? + */ + FILEDESC_XLOCK(fdp); fp->f_data = so; fp->f_flag = FREAD|FWRITE; fp->f_ops = &svr4_netops; fp->f_type = DTYPE_SOCKET; - FILEDESC_UNLOCK_FAST(fdp); + FILEDESC_XUNLOCK(fdp); /* * Allocate a stream structure and attach it to this socket. --- //depot/vendor/freebsd/src/sys/fs/fdescfs/fdesc_vfsops.c 2006/05/15 19:46:09 +++ //depot/user/rwatson/filedesc/src/sys/fs/fdescfs/fdesc_vfsops.c 2007/03/03 22:39:43 _at__at_ -176,7 +176,7 _at__at_ lim = lim_cur(td->td_proc, RLIMIT_NOFILE); PROC_UNLOCK(td->td_proc); fdp = td->td_proc->p_fd; - FILEDESC_LOCK_FAST(fdp); + FILEDESC_SLOCK(fdp); last = min(fdp->fd_nfiles, lim); freefd = 0; for (i = fdp->fd_freefile; i < last; i++) _at__at_ -189,7 +189,7 _at__at_ */ if (fdp->fd_nfiles < lim) freefd += (lim - fdp->fd_nfiles); - FILEDESC_UNLOCK_FAST(fdp); + FILEDESC_SUNLOCK(fdp); sbp->f_flags = 0; sbp->f_bsize = DEV_BSIZE; --- //depot/vendor/freebsd/src/sys/fs/fdescfs/fdesc_vnops.c 2007/03/13 01:54:24 +++ //depot/user/rwatson/filedesc/src/sys/fs/fdescfs/fdesc_vnops.c 2007/03/17 21:03:04 _at__at_ -457,7 +457,7 _at__at_ fcnt = i - 2; /* The first two nodes are `.' and `..' */ - FILEDESC_LOCK_FAST(fdp); + FILEDESC_SLOCK(fdp); while (i < fdp->fd_nfiles + 2 && uio->uio_resid >= UIO_MX) { switch (i) { case 0: /* `.' */ _at__at_ -473,7 +473,7 _at__at_ break; default: if (fdp->fd_ofiles[fcnt] == NULL) { - FILEDESC_UNLOCK_FAST(fdp); + FILEDESC_SUNLOCK(fdp); goto done; } _at__at_ -487,15 +487,15 _at__at_ /* * And ship to userland */ - FILEDESC_UNLOCK_FAST(fdp); + FILEDESC_SUNLOCK(fdp); error = uiomove(dp, UIO_MX, uio); if (error) goto done; - FILEDESC_LOCK_FAST(fdp); + FILEDESC_SLOCK(fdp); i++; fcnt++; } - FILEDESC_UNLOCK_FAST(fdp); + FILEDESC_SUNLOCK(fdp); done: uio->uio_offset = i * UIO_MX; --- //depot/vendor/freebsd/src/sys/fs/unionfs/union_subr.c 2007/03/13 01:54:24 +++ //depot/user/rwatson/filedesc/src/sys/fs/unionfs/union_subr.c 2007/03/17 21:03:04 _at__at_ -450,9 +450,9 _at__at_ } break; default: /* UNIONFS_TRADITIONAL */ - FILEDESC_LOCK_FAST(td->td_proc->p_fd); + FILEDESC_SLOCK(td->td_proc->p_fd); uva->va_mode = 0777 & ~td->td_proc->p_fd->fd_cmask; - FILEDESC_UNLOCK_FAST(td->td_proc->p_fd); + FILEDESC_SUNLOCK(td->td_proc->p_fd); uva->va_uid = ump->um_uid; uva->va_gid = ump->um_gid; break; --- //depot/vendor/freebsd/src/sys/kern/kern_descrip.c 2007/03/15 21:21:17 +++ //depot/user/rwatson/filedesc/src/sys/kern/kern_descrip.c 2007/04/01 17:49:49 _at__at_ -211,9 +211,11 _at__at_ static void fdused(struct filedesc *fdp, int fd) { - FILEDESC_LOCK_ASSERT(fdp, MA_OWNED); + + FILEDESC_XLOCK_ASSERT(fdp); KASSERT(!fdisused(fdp, fd), ("fd already used")); + fdp->fd_map[NDSLOT(fd)] |= NDBIT(fd); if (fd > fdp->fd_lastfile) fdp->fd_lastfile = fd; _at__at_ -227,11 +229,13 _at__at_ static void fdunused(struct filedesc *fdp, int fd) { - FILEDESC_LOCK_ASSERT(fdp, MA_OWNED); + + FILEDESC_XLOCK_ASSERT(fdp); KASSERT(fdisused(fdp, fd), ("fd is already unused")); KASSERT(fdp->fd_ofiles[fd] == NULL, ("fd is still in use")); + fdp->fd_map[NDSLOT(fd)] &= ~NDBIT(fd); if (fd < fdp->fd_freefile) fdp->fd_freefile = fd; _at__at_ -371,10 +375,14 _at__at_ flg = F_POSIX; p = td->td_proc; fdp = p->p_fd; - FILEDESC_LOCK(fdp); + + /* + * XXXRW: It could be an exclusive lock is not [always] needed here. + */ + FILEDESC_XLOCK(fdp); if ((unsigned)fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL) { - FILEDESC_UNLOCK(fdp); + FILEDESC_XUNLOCK(fdp); error = EBADF; goto done2; } _at__at_ -383,7 +391,7 _at__at_ switch (cmd) { case F_DUPFD: /* mtx_assert(&Giant, MA_NOTOWNED); */ - FILEDESC_UNLOCK(fdp); + FILEDESC_XUNLOCK(fdp); newmin = arg; PROC_LOCK(p); if (newmin >= lim_cur(p, RLIMIT_NOFILE) || _at__at_ -399,14 +407,14 _at__at_ case F_GETFD: /* mtx_assert(&Giant, MA_NOTOWNED); */ td->td_retval[0] = (*pop & UF_EXCLOSE) ? FD_CLOEXEC : 0; - FILEDESC_UNLOCK(fdp); + FILEDESC_XUNLOCK(fdp); break; case F_SETFD: /* mtx_assert(&Giant, MA_NOTOWNED); */ *pop = (*pop &~ UF_EXCLOSE) | (arg & FD_CLOEXEC ? UF_EXCLOSE : 0); - FILEDESC_UNLOCK(fdp); + FILEDESC_XUNLOCK(fdp); break; case F_GETFL: _at__at_ -414,7 +422,7 _at__at_ FILE_LOCK(fp); td->td_retval[0] = OFLAGS(fp->f_flag); FILE_UNLOCK(fp); - FILEDESC_UNLOCK(fdp); + FILEDESC_XUNLOCK(fdp); break; case F_SETFL: _at__at_ -424,7 +432,7 _at__at_ fp->f_flag &= ~FCNTLFLAGS; fp->f_flag |= FFLAGS(arg & ~O_ACCMODE) & FCNTLFLAGS; FILE_UNLOCK(fp); - FILEDESC_UNLOCK(fdp); + FILEDESC_XUNLOCK(fdp); tmp = fp->f_flag & FNONBLOCK; error = fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td); if (error) { _at__at_ -448,7 +456,7 _at__at_ case F_GETOWN: mtx_assert(&Giant, MA_OWNED); fhold(fp); - FILEDESC_UNLOCK(fdp); + FILEDESC_XUNLOCK(fdp); error = fo_ioctl(fp, FIOGETOWN, &tmp, td->td_ucred, td); if (error == 0) td->td_retval[0] = tmp; _at__at_ -458,7 +466,7 _at__at_ case F_SETOWN: mtx_assert(&Giant, MA_OWNED); fhold(fp); - FILEDESC_UNLOCK(fdp); + FILEDESC_XUNLOCK(fdp); tmp = arg; error = fo_ioctl(fp, FIOSETOWN, &tmp, td->td_ucred, td); fdrop(fp, td); _at__at_ -472,7 +480,7 _at__at_ case F_SETLK: mtx_assert(&Giant, MA_OWNED); if (fp->f_type != DTYPE_VNODE) { - FILEDESC_UNLOCK(fdp); + FILEDESC_XUNLOCK(fdp); error = EBADF; break; } _at__at_ -482,7 +490,7 _at__at_ if (fp->f_offset < 0 || (flp->l_start > 0 && fp->f_offset > OFF_MAX - flp->l_start)) { - FILEDESC_UNLOCK(fdp); + FILEDESC_XUNLOCK(fdp); error = EOVERFLOW; break; } _at__at_ -493,7 +501,7 _at__at_ * VOP_ADVLOCK() may block. */ fhold(fp); - FILEDESC_UNLOCK(fdp); + FILEDESC_XUNLOCK(fdp); vp = fp->f_vnode; switch (flp->l_type) { _at__at_ -528,10 +536,10 _at__at_ break; } /* Check for race with close */ - FILEDESC_LOCK_FAST(fdp); + FILEDESC_XLOCK(fdp); if ((unsigned) fd >= fdp->fd_nfiles || fp != fdp->fd_ofiles[fd]) { - FILEDESC_UNLOCK_FAST(fdp); + FILEDESC_XUNLOCK(fdp); flp->l_whence = SEEK_SET; flp->l_start = 0; flp->l_len = 0; _at__at_ -539,21 +547,21 _at__at_ (void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK, flp, F_POSIX); } else - FILEDESC_UNLOCK_FAST(fdp); + FILEDESC_XUNLOCK(fdp); fdrop(fp, td); break; case F_GETLK: mtx_assert(&Giant, MA_OWNED); if (fp->f_type != DTYPE_VNODE) { - FILEDESC_UNLOCK(fdp); + FILEDESC_XUNLOCK(fdp); error = EBADF; break; } flp = (struct flock *)arg; if (flp->l_type != F_RDLCK && flp->l_type != F_WRLCK && flp->l_type != F_UNLCK) { - FILEDESC_UNLOCK(fdp); + FILEDESC_XUNLOCK(fdp); error = EINVAL; break; } _at__at_ -562,7 +570,7 _at__at_ fp->f_offset > OFF_MAX - flp->l_start) || (flp->l_start < 0 && fp->f_offset < OFF_MIN - flp->l_start)) { - FILEDESC_UNLOCK(fdp); + FILEDESC_XUNLOCK(fdp); error = EOVERFLOW; break; } _at__at_ -572,14 +580,14 _at__at_ * VOP_ADVLOCK() may block. */ fhold(fp); - FILEDESC_UNLOCK(fdp); + FILEDESC_XUNLOCK(fdp); vp = fp->f_vnode; error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK, flp, F_POSIX); fdrop(fp, td); break; default: - FILEDESC_UNLOCK(fdp); + FILEDESC_XUNLOCK(fdp); error = EINVAL; break; } _at__at_ -593,7 +601,8 _at__at_ * Common code for dup, dup2, and fcntl(F_DUPFD). */ static int -do_dup(struct thread *td, enum dup_type type, int old, int new, register_t *retval) +do_dup(struct thread *td, enum dup_type type, int old, int new, + register_t *retval) { struct filedesc *fdp; struct proc *p; _at__at_ -619,14 +628,14 _at__at_ if (new >= maxfd) return (EMFILE); - FILEDESC_LOCK(fdp); + FILEDESC_XLOCK(fdp); if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL) { - FILEDESC_UNLOCK(fdp); + FILEDESC_XUNLOCK(fdp); return (EBADF); } if (type == DUP_FIXED && old == new) { *retval = new; - FILEDESC_UNLOCK(fdp); + FILEDESC_XUNLOCK(fdp); return (0); } fp = fdp->fd_ofiles[old]; _at__at_ -646,7 +655,7 _at__at_ fdused(fdp, new); } else { if ((error = fdalloc(td, new, &new)) != 0) { - FILEDESC_UNLOCK(fdp); + FILEDESC_XUNLOCK(fdp); fdrop(fp, td); return (error); } _at__at_ -661,7 +670,7 _at__at_ /* we've allocated a descriptor which we won't use */ if (fdp->fd_ofiles[new] == NULL) fdunused(fdp, new); - FILEDESC_UNLOCK(fdp); + FILEDESC_XUNLOCK(fdp); fdrop(fp, td); return (EBADF); } _at__at_ -708,20 +717,20 _at__at_ knote_fdclose(td, new); if (delfp->f_type == DTYPE_MQUEUE) mq_fdclose(td, new, delfp); - FILEDESC_UNLOCK(fdp); + FILEDESC_XUNLOCK(fdp); (void) closef(delfp, td); if (holdleaders) { - FILEDESC_LOCK_FAST(fdp); + FILEDESC_XLOCK(fdp); fdp->fd_holdleaderscount--; if (fdp->fd_holdleaderscount == 0 && fdp->fd_holdleaderswakeup != 0) { fdp->fd_holdleaderswakeup = 0; wakeup(&fdp->fd_holdleaderscount); } - FILEDESC_UNLOCK_FAST(fdp); + FILEDESC_XUNLOCK(fdp); } } else { - FILEDESC_UNLOCK(fdp); + FILEDESC_XUNLOCK(fdp); } return (0); } _at__at_ -979,10 +988,10 _at__at_ AUDIT_SYSCLOSE(td, fd); - FILEDESC_LOCK(fdp); + FILEDESC_XLOCK(fdp); if ((unsigned)fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL) { - FILEDESC_UNLOCK(fdp); + FILEDESC_XUNLOCK(fdp); return (EBADF); } fdp->fd_ofiles[fd] = NULL; _at__at_ -998,27 +1007,26 _at__at_ } /* - * We now hold the fp reference that used to be owned by the descriptor - * array. - * We have to unlock the FILEDESC *AFTER* knote_fdclose to prevent a - * race of the fd getting opened, a knote added, and deleteing a knote - * for the new fd. + * We now hold the fp reference that used to be owned by the + * descriptor array. We have to unlock the FILEDESC *AFTER* + * knote_fdclose to prevent a race of the fd getting opened, a knote + * added, and deleteing a knote for the new fd. */ knote_fdclose(td, fd); if (fp->f_type == DTYPE_MQUEUE) mq_fdclose(td, fd, fp); - FILEDESC_UNLOCK(fdp); + FILEDESC_XUNLOCK(fdp); error = closef(fp, td); if (holdleaders) { - FILEDESC_LOCK_FAST(fdp); + FILEDESC_XLOCK(fdp); fdp->fd_holdleaderscount--; if (fdp->fd_holdleaderscount == 0 && fdp->fd_holdleaderswakeup != 0) { fdp->fd_holdleaderswakeup = 0; wakeup(&fdp->fd_holdleaderscount); } - FILEDESC_UNLOCK_FAST(fdp); + FILEDESC_XUNLOCK(fdp); } return (error); } _at__at_ -1176,7 +1184,7 _at__at_ int nnfiles, onfiles; NDSLOTTYPE *nmap; - FILEDESC_LOCK_ASSERT(fdp, MA_OWNED); + FILEDESC_XLOCK_ASSERT(fdp); KASSERT(fdp->fd_nfiles > 0, ("zero-length file table")); _at__at_ -1189,7 +1197,7 _at__at_ return; /* allocate a new table and (if required) new bitmaps */ - FILEDESC_UNLOCK(fdp); + FILEDESC_XUNLOCK(fdp); MALLOC(ntable, struct file **, nnfiles * OFILESIZE, M_FILEDESC, M_ZERO | M_WAITOK); nfileflags = (char *)&ntable[nnfiles]; _at__at_ -1198,7 +1206,7 _at__at_ M_FILEDESC, M_ZERO | M_WAITOK); else nmap = NULL; - FILEDESC_LOCK(fdp); + FILEDESC_XLOCK(fdp); /* * We now have new tables ready to go. Since we dropped the _at__at_ -1237,7 +1245,7 _at__at_ struct filedesc *fdp = p->p_fd; int fd = -1, maxfd; - FILEDESC_LOCK_ASSERT(fdp, MA_OWNED); + FILEDESC_XLOCK_ASSERT(fdp); if (fdp->fd_freefile > minfd) minfd = fdp->fd_freefile; _at__at_ -1276,8 +1284,8 _at__at_ } /* - * Check to see whether n user file descriptors - * are available to the process p. + * Check to see whether n user file descriptors are available to the process + * p. */ int fdavail(struct thread *td, int n) _at__at_ -1287,7 +1295,7 _at__at_ struct file **fpp; int i, lim, last; - FILEDESC_LOCK_ASSERT(fdp, MA_OWNED); + FILEDESC_LOCK_ASSERT(fdp); PROC_LOCK(p); lim = min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc); _at__at_ -1304,12 +1312,11 _at__at_ } /* - * Create a new open file structure and allocate - * a file decriptor for the process that refers to it. - * We add one reference to the file for the descriptor table - * and one reference for resultfp. This is to prevent us being - * preempted and the entry in the descriptor table closed after - * we release the FILEDESC lock. + * Create a new open file structure and allocate a file decriptor for the + * process that refers to it. We add one reference to the file for the + * descriptor table and one reference for resultfp. This is to prevent us + * being preempted and the entry in the descriptor table closed after we + * release the FILEDESC lock. */ int falloc(struct thread *td, struct file **resultfp, int *resultfd) _at__at_ -1350,7 +1357,7 _at__at_ fp->f_ops = &badfileops; fp->f_data = NULL; fp->f_vnode = NULL; - FILEDESC_LOCK(p->p_fd); + FILEDESC_XLOCK(p->p_fd); if ((fq = p->p_fd->fd_ofiles[0])) { LIST_INSERT_AFTER(fq, fp, f_list); } else { _at__at_ -1358,14 +1365,14 _at__at_ } sx_xunlock(&filelist_lock); if ((error = fdalloc(td, 0, &i))) { - FILEDESC_UNLOCK(p->p_fd); + FILEDESC_XUNLOCK(p->p_fd); fdrop(fp, td); if (resultfp) fdrop(fp, td); return (error); } p->p_fd->fd_ofiles[i] = fp; - FILEDESC_UNLOCK(p->p_fd); + FILEDESC_XUNLOCK(p->p_fd); if (resultfp) *resultfp = fp; if (resultfd) _at__at_ -1383,9 +1390,9 _at__at_ struct filedesc0 *newfdp; newfdp = malloc(sizeof *newfdp, M_FILEDESC, M_WAITOK | M_ZERO); - mtx_init(&newfdp->fd_fd.fd_mtx, FILEDESC_LOCK_DESC, NULL, MTX_DEF); + FILEDESC_LOCK_INIT(&newfdp->fd_fd); if (fdp != NULL) { - FILEDESC_LOCK(fdp); + FILEDESC_XLOCK(fdp); newfdp->fd_fd.fd_cdir = fdp->fd_cdir; if (newfdp->fd_fd.fd_cdir) VREF(newfdp->fd_fd.fd_cdir); _at__at_ -1395,7 +1402,7 _at__at_ newfdp->fd_fd.fd_jdir = fdp->fd_jdir; if (newfdp->fd_fd.fd_jdir) VREF(newfdp->fd_fd.fd_jdir); - FILEDESC_UNLOCK(fdp); + FILEDESC_XUNLOCK(fdp); } /* Create the file descriptor table. */ _at__at_ -1434,7 +1441,7 _at__at_ if (i > 0) return; - mtx_destroy(&fdp->fd_mtx); + FILEDESC_LOCK_DESTROY(fdp); FREE(fdp, M_FILEDESC); } _at__at_ -1444,9 +1451,10 _at__at_ struct filedesc * fdshare(struct filedesc *fdp) { - FILEDESC_LOCK_FAST(fdp); + + FILEDESC_XLOCK(fdp); fdp->fd_refcnt++; - FILEDESC_UNLOCK_FAST(fdp); + FILEDESC_XUNLOCK(fdp); return (fdp); } _at__at_ -1457,22 +1465,21 _at__at_ fdunshare(struct proc *p, struct thread *td) { - FILEDESC_LOCK_FAST(p->p_fd); + FILEDESC_XLOCK(p->p_fd); if (p->p_fd->fd_refcnt > 1) { struct filedesc *tmp; - FILEDESC_UNLOCK_FAST(p->p_fd); + FILEDESC_XUNLOCK(p->p_fd); tmp = fdcopy(p->p_fd); fdfree(td); p->p_fd = tmp; } else - FILEDESC_UNLOCK_FAST(p->p_fd); + FILEDESC_XUNLOCK(p->p_fd); } /* - * Copy a filedesc structure. - * A NULL pointer in returns a NULL reference, this is to ease callers, - * not catch errors. + * Copy a filedesc structure. A NULL pointer in returns a NULL reference, + * this is to ease callers, not catch errors. */ struct filedesc * fdcopy(struct filedesc *fdp) _at__at_ -1485,13 +1492,13 _at__at_ return (NULL); newfdp = fdinit(fdp); - FILEDESC_LOCK_FAST(fdp); + FILEDESC_SLOCK(fdp); while (fdp->fd_lastfile >= newfdp->fd_nfiles) { - FILEDESC_UNLOCK_FAST(fdp); - FILEDESC_LOCK(newfdp); + FILEDESC_SUNLOCK(fdp); + FILEDESC_XLOCK(newfdp); fdgrowtable(newfdp, fdp->fd_lastfile + 1); - FILEDESC_UNLOCK(newfdp); - FILEDESC_LOCK_FAST(fdp); + FILEDESC_XUNLOCK(newfdp); + FILEDESC_SLOCK(fdp); } /* copy everything except kqueue descriptors */ newfdp->fd_freefile = -1; _at__at_ -1507,17 +1514,17 _at__at_ newfdp->fd_freefile = i; } } - FILEDESC_UNLOCK_FAST(fdp); - FILEDESC_LOCK(newfdp); + FILEDESC_SUNLOCK(fdp); + FILEDESC_XLOCK(newfdp); for (i = 0; i <= newfdp->fd_lastfile; ++i) if (newfdp->fd_ofiles[i] != NULL) fdused(newfdp, i); - FILEDESC_UNLOCK(newfdp); - FILEDESC_LOCK_FAST(fdp); + FILEDESC_XUNLOCK(newfdp); + FILEDESC_SLOCK(fdp); if (newfdp->fd_freefile == -1) newfdp->fd_freefile = i; newfdp->fd_cmask = fdp->fd_cmask; - FILEDESC_UNLOCK_FAST(fdp); + FILEDESC_SUNLOCK(fdp); return (newfdp); } _at__at_ -1543,7 +1550,7 _at__at_ /* Check for special need to clear POSIX style locks */ fdtol = td->td_proc->p_fdtol; if (fdtol != NULL) { - FILEDESC_LOCK(fdp); + FILEDESC_XLOCK(fdp); KASSERT(fdtol->fdl_refcount > 0, ("filedesc_to_refcount botch: fdl_refcount=%d", fdtol->fdl_refcount)); _at__at_ -1557,7 +1564,7 _at__at_ continue; fp = *fpp; fhold(fp); - FILEDESC_UNLOCK(fdp); + FILEDESC_XUNLOCK(fdp); lf.l_whence = SEEK_SET; lf.l_start = 0; lf.l_len = 0; _at__at_ -1571,7 +1578,7 _at__at_ &lf, F_POSIX); VFS_UNLOCK_GIANT(locked); - FILEDESC_LOCK(fdp); + FILEDESC_XLOCK(fdp); fdrop(fp, td); fpp = fdp->fd_ofiles + i; } _at__at_ -1585,18 +1592,18 _at__at_ * in a shared file descriptor table. */ fdp->fd_holdleaderswakeup = 1; - msleep(&fdp->fd_holdleaderscount, &fdp->fd_mtx, - PLOCK, "fdlhold", 0); + sx_sleep(&fdp->fd_holdleaderscount, + FILEDESC_LOCK(fdp), PLOCK, "fdlhold", 0); goto retry; } if (fdtol->fdl_holdcount > 0) { /* - * Ensure that fdtol->fdl_leader - * remains valid in closef(). + * Ensure that fdtol->fdl_leader remains + * valid in closef(). */ fdtol->fdl_wakeup = 1; - msleep(fdtol, &fdp->fd_mtx, - PLOCK, "fdlhold", 0); + sx_sleep(fdtol, FILEDESC_LOCK(fdp), PLOCK, + "fdlhold", 0); goto retry; } } _at__at_ -1608,13 +1615,13 _at__at_ } else fdtol = NULL; td->td_proc->p_fdtol = NULL; - FILEDESC_UNLOCK(fdp); + FILEDESC_XUNLOCK(fdp); if (fdtol != NULL) FREE(fdtol, M_FILEDESC_TO_LEADER); } - FILEDESC_LOCK_FAST(fdp); + FILEDESC_XLOCK(fdp); i = --fdp->fd_refcnt; - FILEDESC_UNLOCK_FAST(fdp); + FILEDESC_XUNLOCK(fdp); if (i > 0) return; /* _at__at_ -1626,7 +1633,7 _at__at_ if (*fpp) (void) closef(*fpp, td); } - FILEDESC_LOCK(fdp); + FILEDESC_XLOCK(fdp); /* XXX This should happen earlier. */ mtx_lock(&fdesc_mtx); _at__at_ -1646,7 +1653,7 _at__at_ fdp->fd_rdir = NULL; jdir = fdp->fd_jdir; fdp->fd_jdir = NULL; - FILEDESC_UNLOCK(fdp); + FILEDESC_XUNLOCK(fdp); if (cdir) { locked = VFS_LOCK_GIANT(cdir->v_mount); _at__at_ -1706,7 +1713,7 _at__at_ * Note: fdp->fd_ofiles may be reallocated out from under us while * we are blocked in a close. Be careful! */ - FILEDESC_LOCK(fdp); + FILEDESC_XLOCK(fdp); for (i = 0; i <= fdp->fd_lastfile; i++) { if (i > 2) break; _at__at_ -1722,35 +1729,33 _at__at_ fdp->fd_ofiles[i] = NULL; fdp->fd_ofileflags[i] = 0; fdunused(fdp, i); - FILEDESC_UNLOCK(fdp); + FILEDESC_XUNLOCK(fdp); (void) closef(fp, td); - FILEDESC_LOCK(fdp); + FILEDESC_XLOCK(fdp); } } - FILEDESC_UNLOCK(fdp); + FILEDESC_XUNLOCK(fdp); } /* - * If a specific file object occupies a specific file descriptor, - * close the file descriptor entry and drop a reference on the file - * object. This is a convenience function to handle a subsequent - * error in a function that calls falloc() that handles the race that - * another thread might have closed the file descriptor out from under - * the thread creating the file object. + * If a specific file object occupies a specific file descriptor, close the + * file descriptor entry and drop a reference on the file object. This is a + * convenience function to handle a subsequent error in a function that calls + * falloc() that handles the race that another thread might have closed the + * file descriptor out from under the thread creating the file object. */ void fdclose(struct filedesc *fdp, struct file *fp, int idx, struct thread *td) { - FILEDESC_LOCK(fdp); + FILEDESC_XLOCK(fdp); if (fdp->fd_ofiles[idx] == fp) { fdp->fd_ofiles[idx] = NULL; fdunused(fdp, idx); - FILEDESC_UNLOCK(fdp); + FILEDESC_XUNLOCK(fdp); fdrop(fp, td); - } else { - FILEDESC_UNLOCK(fdp); - } + } else + FILEDESC_XUNLOCK(fdp); } /* _at__at_ -1767,7 +1772,7 _at__at_ if (fdp == NULL) return; - FILEDESC_LOCK(fdp); + FILEDESC_XLOCK(fdp); /* * We cannot cache fd_ofiles or fd_ofileflags since operations _at__at_ -1790,12 +1795,12 _at__at_ fdunused(fdp, i); if (fp->f_type == DTYPE_MQUEUE) mq_fdclose(td, i, fp); - FILEDESC_UNLOCK(fdp); + FILEDESC_XUNLOCK(fdp); (void) closef(fp, td); - FILEDESC_LOCK(fdp); + FILEDESC_XLOCK(fdp); } } - FILEDESC_UNLOCK(fdp); + FILEDESC_XUNLOCK(fdp); } /* _at__at_ -1838,14 +1843,15 _at__at_ /* * Someone may have closed the entry in the * file descriptor table, so check it hasn't - * changed before dropping the reference count. + * changed before dropping the reference + * count. */ - FILEDESC_LOCK(fdp); + FILEDESC_XLOCK(fdp); KASSERT(fdp->fd_ofiles[fd] == fp, ("table not shared, how did it change?")); fdp->fd_ofiles[fd] = NULL; fdunused(fdp, fd); - FILEDESC_UNLOCK(fdp); + FILEDESC_XUNLOCK(fdp); fdrop(fp, td); fdrop(fp, td); break; _at__at_ -1873,8 +1879,7 _at__at_ } /* - * Internal form of close. - * Decrement reference count on file structure. + * Internal form of close. Decrement reference count on file structure. * Note: td may be NULL when closing a file that was being passed in a * message. * _at__at_ -1917,11 +1922,11 _at__at_ fdtol = td->td_proc->p_fdtol; if (fdtol != NULL) { /* - * Handle special case where file descriptor table - * is shared between multiple process leaders. + * Handle special case where file descriptor table is + * shared between multiple process leaders. */ fdp = td->td_proc->p_fd; - FILEDESC_LOCK(fdp); + FILEDESC_XLOCK(fdp); for (fdtol = fdtol->fdl_next; fdtol != td->td_proc->p_fdtol; fdtol = fdtol->fdl_next) { _at__at_ -1929,7 +1934,7 _at__at_ P_ADVLOCK) == 0) continue; fdtol->fdl_holdcount++; - FILEDESC_UNLOCK(fdp); + FILEDESC_XUNLOCK(fdp); lf.l_whence = SEEK_SET; lf.l_start = 0; lf.l_len = 0; _at__at_ -1938,7 +1943,7 _at__at_ (void) VOP_ADVLOCK(vp, (caddr_t)fdtol->fdl_leader, F_UNLCK, &lf, F_POSIX); - FILEDESC_LOCK(fdp); + FILEDESC_XLOCK(fdp); fdtol->fdl_holdcount--; if (fdtol->fdl_holdcount == 0 && fdtol->fdl_wakeup != 0) { _at__at_ -1946,7 +1951,7 _at__at_ wakeup(fdtol); } } - FILEDESC_UNLOCK(fdp); + FILEDESC_XUNLOCK(fdp); } VFS_UNLOCK_GIANT(vfslocked); } _at__at_ -1954,21 +1959,21 _at__at_ } /* - * Extract the file pointer associated with the specified descriptor for - * the current user process. + * Extract the file pointer associated with the specified descriptor for the + * current user process. * * If the descriptor doesn't exist, EBADF is returned. * - * If the descriptor exists but doesn't match 'flags' then - * return EBADF for read attempts and EINVAL for write attempts. + * If the descriptor exists but doesn't match 'flags' then return EBADF for + * read attempts and EINVAL for write attempts. * * If 'hold' is set (non-zero) the file's refcount will be bumped on return. - * It should be dropped with fdrop(). - * If it is not set, then the refcount will not be bumped however the - * thread's filedesc struct will be returned locked (for fgetsock). + * It should be dropped with fdrop(). If it is not set, then the refcount + * will not be bumped however the thread's filedesc struct will be returned + * locked (for fgetsock). * - * If an error occured the non-zero error is returned and *fpp is set to NULL. - * Otherwise *fpp is set and zero is returned. + * If an error occured the non-zero error is returned and *fpp is set to + * NULL. Otherwise *fpp is set and zero is returned. */ static __inline int _fget(struct thread *td, int fd, struct file **fpp, int flags, int hold) _at__at_ -1979,9 +1984,9 _at__at_ *fpp = NULL; if (td == NULL || (fdp = td->td_proc->p_fd) == NULL) return (EBADF); - FILEDESC_LOCK(fdp); + FILEDESC_SLOCK(fdp); if ((fp = fget_locked(fdp, fd)) == NULL || fp->f_ops == &badfileops) { - FILEDESC_UNLOCK(fdp); + FILEDESC_SUNLOCK(fdp); return (EBADF); } _at__at_ -1991,16 +1996,16 _at__at_ * Only one flag, or 0, may be specified. */ if (flags == FREAD && (fp->f_flag & FREAD) == 0) { - FILEDESC_UNLOCK(fdp); + FILEDESC_SUNLOCK(fdp); return (EBADF); } if (flags == FWRITE && (fp->f_flag & FWRITE) == 0) { - FILEDESC_UNLOCK(fdp); + FILEDESC_SUNLOCK(fdp); return (EBADF); } if (hold) { fhold(fp); - FILEDESC_UNLOCK(fdp); + FILEDESC_SUNLOCK(fdp); } *fpp = fp; return (0); _at__at_ -2028,9 +2033,9 _at__at_ } /* - * Like fget() but loads the underlying vnode, or returns an error if - * the descriptor does not represent a vnode. Note that pipes use vnodes - * but never have VM objects. The returned vnode will be vref()d. + * Like fget() but loads the underlying vnode, or returns an error if the + * descriptor does not represent a vnode. Note that pipes use vnodes but + * never have VM objects. The returned vnode will be vref()'d. * * XXX: what about the unused flags ? */ _at__at_ -2049,7 +2054,7 _at__at_ *vpp = fp->f_vnode; vref(*vpp); } - FILEDESC_UNLOCK(td->td_proc->p_fd); + FILEDESC_SUNLOCK(td->td_proc->p_fd); return (error); } _at__at_ -2077,15 +2082,15 _at__at_ #endif /* - * Like fget() but loads the underlying socket, or returns an error if - * the descriptor does not represent a socket. + * Like fget() but loads the underlying socket, or returns an error if the + * descriptor does not represent a socket. * - * We bump the ref count on the returned socket. XXX Also obtain the SX - * lock in the future. + * We bump the ref count on the returned socket. XXX Also obtain the SX lock + * in the future. * * XXXRW: fgetsock() and fputsock() are deprecated, as consumers should rely - * on their file descriptor reference to prevent the socket from being - * freed during use. + * on their file descriptor reference to prevent the socket from being free'd + * during use. */ int fgetsock(struct thread *td, int fd, struct socket **spp, u_int *fflagp) _at__at_ -2110,7 +2115,7 _at__at_ soref(*spp); SOCK_UNLOCK(*spp); } - FILEDESC_UNLOCK(td->td_proc->p_fd); + FILEDESC_SUNLOCK(td->td_proc->p_fd); return (error); } _at__at_ -2257,22 +2262,20 _at__at_ * of file descriptors, or the fd to be dup'd has already been * closed, then reject. */ - FILEDESC_LOCK(fdp); + FILEDESC_XLOCK(fdp); if (dfd < 0 || dfd >= fdp->fd_nfiles || (wfp = fdp->fd_ofiles[dfd]) == NULL) { - FILEDESC_UNLOCK(fdp); + FILEDESC_XUNLOCK(fdp); return (EBADF); } /* * There are two cases of interest here. * - * For ENODEV simply dup (dfd) to file descriptor - * (indx) and return. + * For ENODEV simply dup (dfd) to file descriptor (indx) and return. * - * For ENXIO steal away the file structure from (dfd) and - * store it in (indx). (dfd) is effectively closed by - * this operation. + * For ENXIO steal away the file structure from (dfd) and store it in + * (indx). (dfd) is effectively closed by this operation. * * Any other error code is just returned. */ _at__at_ -2285,7 +2288,7 _at__at_ FILE_LOCK(wfp); if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) { FILE_UNLOCK(wfp); - FILEDESC_UNLOCK(fdp); + FILEDESC_XUNLOCK(fdp); return (EACCES); } fp = fdp->fd_ofiles[indx]; _at__at_ -2295,7 +2298,7 _at__at_ fdused(fdp, indx); fhold_locked(wfp); FILE_UNLOCK(wfp); - FILEDESC_UNLOCK(fdp); + FILEDESC_XUNLOCK(fdp); if (fp != NULL) /* * We now own the reference to fp that the ofiles[] _at__at_ -2316,7 +2319,7 _at__at_ fdunused(fdp, dfd); if (fp == NULL) fdused(fdp, indx); - FILEDESC_UNLOCK(fdp); + FILEDESC_XUNLOCK(fdp); /* * We now own the reference to fp that the ofiles[] array _at__at_ -2327,16 +2330,15 _at__at_ return (0); default: - FILEDESC_UNLOCK(fdp); + FILEDESC_XUNLOCK(fdp); return (error); } /* NOTREACHED */ } /* - * Scan all active processes to see if any of them have a current - * or root directory of `olddp'. If so, replace them with the new - * mount point. + * Scan all active processes to see if any of them have a current or root + * directory of `olddp'. If so, replace them with the new mount point. */ void mountcheckdirs(struct vnode *olddp, struct vnode *newdp) _at__at_ -2353,7 +2355,7 _at__at_ if (fdp == NULL) continue; nrele = 0; - FILEDESC_LOCK_FAST(fdp); + FILEDESC_XLOCK(fdp); if (fdp->fd_cdir == olddp) { vref(newdp); fdp->fd_cdir = newdp; _at__at_ -2364,7 +2366,7 _at__at_ fdp->fd_rdir = newdp; nrele++; } - FILEDESC_UNLOCK_FAST(fdp); + FILEDESC_XUNLOCK(fdp); fddrop(fdp); while (nrele--) vrele(olddp); _at__at_ -2391,12 +2393,12 _at__at_ fdtol->fdl_wakeup = 0; fdtol->fdl_leader = leader; if (old != NULL) { - FILEDESC_LOCK(fdp); + FILEDESC_XLOCK(fdp); fdtol->fdl_next = old->fdl_next; fdtol->fdl_prev = old; old->fdl_next = fdtol; fdtol->fdl_next->fdl_prev = fdtol; - FILEDESC_UNLOCK(fdp); + FILEDESC_XUNLOCK(fdp); } else { fdtol->fdl_next = fdtol; fdtol->fdl_prev = fdtol; _at__at_ -2459,7 +2461,7 _at__at_ fdp = fdhold(p); if (fdp == NULL) continue; - FILEDESC_LOCK_FAST(fdp); + FILEDESC_SLOCK(fdp); for (n = 0; fdp->fd_refcnt > 0 && n < fdp->fd_nfiles; ++n) { if ((fp = fdp->fd_ofiles[n]) == NULL) continue; _at__at_ -2476,7 +2478,7 _at__at_ if (error) break; } - FILEDESC_UNLOCK_FAST(fdp); + FILEDESC_SUNLOCK(fdp); fddrop(fdp); if (error) break; --- //depot/vendor/freebsd/src/sys/kern/kern_event.c 2007/03/04 22:41:05 +++ //depot/user/rwatson/filedesc/src/sys/kern/kern_event.c 2007/03/05 16:48:38 _at__at_ -527,9 +527,9 _at__at_ knlist_init(&kq->kq_sel.si_note, &kq->kq_lock, NULL, NULL, NULL); TASK_INIT(&kq->kq_task, 0, kqueue_task, kq); - FILEDESC_LOCK_FAST(fdp); + FILEDESC_XLOCK(fdp); SLIST_INSERT_HEAD(&fdp->fd_kqlist, kq, kq_list); - FILEDESC_UNLOCK_FAST(fdp); + FILEDESC_XUNLOCK(fdp); FILE_LOCK(fp); fp->f_flag = FREAD | FWRITE; _at__at_ -1493,9 +1493,9 _at__at_ KQ_UNLOCK(kq); - FILEDESC_LOCK_FAST(fdp); + FILEDESC_XLOCK(fdp); SLIST_REMOVE(&fdp->fd_kqlist, kq, kqueue, kq_list); - FILEDESC_UNLOCK_FAST(fdp); + FILEDESC_XUNLOCK(fdp); knlist_destroy(&kq->kq_sel.si_note); mtx_destroy(&kq->kq_lock); _at__at_ -1781,9 +1781,9 _at__at_ } /* - * remove all knotes referencing a specified fd - * must be called with FILEDESC lock. This prevents a race where a new fd - * comes along and occupies the entry and we attach a knote to the fd. + * Remove all knotes referencing a specified fd must be called with FILEDESC + * lock. This prevents a race where a new fd comes along and occupies the + * entry and we attach a knote to the fd. */ void knote_fdclose(struct thread *td, int fd) _at__at_ -1793,7 +1793,7 _at__at_ struct knote *kn; int influx; - FILEDESC_LOCK_ASSERT(fdp, MA_OWNED); + FILEDESC_XLOCK_ASSERT(fdp); /* * We shouldn't have to worry about new kevents appearing on fd --- //depot/vendor/freebsd/src/sys/kern/kern_fork.c 2007/03/04 22:41:05 +++ //depot/user/rwatson/filedesc/src/sys/kern/kern_fork.c 2007/03/05 16:48:38 _at__at_ -458,9 +458,9 _at__at_ * shared process leaders. */ fdtol = p1->p_fdtol; - FILEDESC_LOCK_FAST(p1->p_fd); + FILEDESC_XLOCK(p1->p_fd); fdtol->fdl_refcount++; - FILEDESC_UNLOCK_FAST(p1->p_fd); + FILEDESC_XUNLOCK(p1->p_fd); } else { /* * Shared file descriptor table, and --- //depot/vendor/freebsd/src/sys/kern/subr_witness.c 2007/04/01 15:52:48 +++ //depot/user/rwatson/filedesc/src/sys/kern/subr_witness.c 2007/04/01 18:01:27 _at__at_ -281,7 +281,6 _at__at_ * Various mutexes */ { "Giant", &lock_class_mtx_sleep }, - { "filedesc structure", &lock_class_mtx_sleep }, { "pipe mutex", &lock_class_mtx_sleep }, { "sigio lock", &lock_class_mtx_sleep }, { "process group", &lock_class_mtx_sleep }, _at__at_ -294,7 +293,6 _at__at_ /* * Sockets */ - { "filedesc structure", &lock_class_mtx_sleep }, { "accept", &lock_class_mtx_sleep }, { "so_snd", &lock_class_mtx_sleep }, { "so_rcv", &lock_class_mtx_sleep }, --- //depot/vendor/freebsd/src/sys/kern/sys_generic.c 2007/03/05 13:12:17 +++ //depot/user/rwatson/filedesc/src/sys/kern/sys_generic.c 2007/03/05 16:48:38 _at__at_ -568,14 +568,14 _at__at_ fdp = td->td_proc->p_fd; switch (com) { case FIONCLEX: - FILEDESC_LOCK_FAST(fdp); + FILEDESC_XLOCK(fdp); fdp->fd_ofileflags[fd] &= ~UF_EXCLOSE; - FILEDESC_UNLOCK_FAST(fdp); + FILEDESC_XUNLOCK(fdp); goto out; case FIOCLEX: - FILEDESC_LOCK_FAST(fdp); + FILEDESC_XLOCK(fdp); fdp->fd_ofileflags[fd] |= UF_EXCLOSE; - FILEDESC_UNLOCK_FAST(fdp); + FILEDESC_XUNLOCK(fdp); goto out; case FIONBIO: FILE_LOCK(fp); _at__at_ -658,11 +658,10 _at__at_ return (EINVAL); fdp = td->td_proc->p_fd; - FILEDESC_LOCK_FAST(fdp); - + FILEDESC_SLOCK(fdp); if (nd > td->td_proc->p_fd->fd_nfiles) nd = td->td_proc->p_fd->fd_nfiles; /* forgiving; slightly wrong */ - FILEDESC_UNLOCK_FAST(fdp); + FILEDESC_SUNLOCK(fdp); /* * Allocate just enough bits for the non-null fd_sets. Use the _at__at_ -809,7 +808,7 _at__at_ static int flag[3] = { POLLRDNORM, POLLWRNORM, POLLRDBAND }; struct filedesc *fdp = td->td_proc->p_fd; - FILEDESC_LOCK(fdp); + FILEDESC_SLOCK(fdp); for (msk = 0; msk < 3; msk++) { if (ibits[msk] == NULL) continue; _at__at_ -820,7 +819,7 _at__at_ if (!(bits & 1)) continue; if ((fp = fget_locked(fdp, fd)) == NULL) { - FILEDESC_UNLOCK(fdp); + FILEDESC_SUNLOCK(fdp); return (EBADF); } if (fo_poll(fp, flag[msk], td->td_ucred, _at__at_ -832,7 +831,7 _at__at_ } } } - FILEDESC_UNLOCK(fdp); + FILEDESC_SUNLOCK(fdp); td->td_retval[0] = n; return (0); } _at__at_ -973,7 +972,7 _at__at_ struct file *fp; int n = 0; - FILEDESC_LOCK(fdp); + FILEDESC_SLOCK(fdp); for (i = 0; i < nfd; i++, fds++) { if (fds->fd >= fdp->fd_nfiles) { fds->revents = POLLNVAL; _at__at_ -997,7 +996,7 _at__at_ } } } - FILEDESC_UNLOCK(fdp); + FILEDESC_SUNLOCK(fdp); td->td_retval[0] = n; return (0); } --- //depot/vendor/freebsd/src/sys/kern/uipc_mqueue.c 2007/03/13 01:54:24 +++ //depot/user/rwatson/filedesc/src/sys/kern/uipc_mqueue.c 2007/03/17 21:03:04 _at__at_ -2013,10 +2013,10 _at__at_ fp->f_data = pn; FILE_UNLOCK(fp); - FILEDESC_LOCK_FAST(fdp); + FILEDESC_XLOCK(fdp); if (fdp->fd_ofiles[fd] == fp) fdp->fd_ofileflags[fd] |= UF_EXCLOSE; - FILEDESC_UNLOCK_FAST(fdp); + FILEDESC_XUNLOCK(fdp); td->td_retval[0] = fd; fdrop(fp, td); return (0); _at__at_ -2197,14 +2197,14 _at__at_ if (error) return (error); again: - FILEDESC_LOCK_FAST(fdp); + FILEDESC_SLOCK(fdp); if (fget_locked(fdp, uap->mqd) != fp) { - FILEDESC_UNLOCK_FAST(fdp); + FILEDESC_SUNLOCK(fdp); error = EBADF; goto out; } mtx_lock(&mq->mq_mutex); - FILEDESC_UNLOCK_FAST(fdp); + FILEDESC_SUNLOCK(fdp); if (uap->sigev != NULL) { if (mq->mq_notifier != NULL) { error = EBUSY; _at__at_ -2267,7 +2267,8 _at__at_ struct mqueue *mq; fdp = td->td_proc->p_fd; - FILEDESC_LOCK_ASSERT(fdp, MA_OWNED); + FILEDESC_LOCK_ASSERT(fdp); + if (fp->f_ops == &mqueueops) { mq = FPTOMQ(fp); mtx_lock(&mq->mq_mutex); _at__at_ -2295,7 +2296,7 _at__at_ int i; fdp = p->p_fd; - FILEDESC_LOCK_FAST(fdp); + FILEDESC_SLOCK(fdp); for (i = 0; i < fdp->fd_nfiles; ++i) { fp = fget_locked(fdp, i); if (fp != NULL && fp->f_ops == &mqueueops) { _at__at_ -2305,7 +2306,7 _at__at_ mtx_unlock(&mq->mq_mutex); } } - FILEDESC_UNLOCK_FAST(fdp); + FILEDESC_SUNLOCK(fdp); KASSERT(LIST_EMPTY(&p->p_mqnotifier), ("mq notifiers left")); } --- //depot/vendor/freebsd/src/sys/kern/uipc_syscalls.c 2007/03/05 13:12:17 +++ //depot/user/rwatson/filedesc/src/sys/kern/uipc_syscalls.c 2007/03/05 16:48:38 _at__at_ -124,7 +124,7 _at__at_ if (fdp == NULL) error = EBADF; else { - FILEDESC_LOCK_FAST(fdp); + FILEDESC_SLOCK(fdp); fp = fget_locked(fdp, fd); if (fp == NULL) error = EBADF; _at__at_ -137,7 +137,7 _at__at_ *fflagp = fp->f_flag; error = 0; } - FILEDESC_UNLOCK_FAST(fdp); + FILEDESC_SUNLOCK(fdp); } *fpp = fp; return (error); _at__at_ -182,12 +182,17 _at__at_ if (error) { fdclose(fdp, fp, fd, td); } else { - FILEDESC_LOCK_FAST(fdp); + /* + * XXXRW: The logic here seems wrong -- shouldn't it be + * locking the file, not the filedesc? Other threads could + * already have a reference to the socket by now. + */ + FILEDESC_XLOCK(fdp); fp->f_data = so; /* already has ref count */ fp->f_flag = FREAD|FWRITE; fp->f_ops = &socketops; fp->f_type = DTYPE_SOCKET; - FILEDESC_UNLOCK_FAST(fdp); + FILEDESC_XUNLOCK(fdp); td->td_retval[0] = fd; } fdrop(fp, td); --- //depot/vendor/freebsd/src/sys/kern/uipc_usrreq.c 2007/03/12 14:57:57 +++ //depot/user/rwatson/filedesc/src/sys/kern/uipc_usrreq.c 2007/03/17 21:03:04 _at__at_ -1579,10 +1579,10 _at__at_ unp_freerights(rp, newfds); goto next; } - FILEDESC_LOCK(td->td_proc->p_fd); + FILEDESC_XLOCK(td->td_proc->p_fd); /* if the new FD's will not fit free them. */ if (!fdavail(td, newfds)) { - FILEDESC_UNLOCK(td->td_proc->p_fd); + FILEDESC_XUNLOCK(td->td_proc->p_fd); error = EMSGSIZE; unp_freerights(rp, newfds); goto next; _at__at_ -1597,7 +1597,7 _at__at_ *controlp = sbcreatecontrol(NULL, newlen, SCM_RIGHTS, SOL_SOCKET); if (*controlp == NULL) { - FILEDESC_UNLOCK(td->td_proc->p_fd); + FILEDESC_XUNLOCK(td->td_proc->p_fd); error = E2BIG; unp_freerights(rp, newfds); goto next; _at__at_ -1616,7 +1616,7 _at__at_ unp_rights--; *fdp++ = f; } - FILEDESC_UNLOCK(td->td_proc->p_fd); + FILEDESC_XUNLOCK(td->td_proc->p_fd); } else { /* We can just copy anything else across. */ if (error || controlp == NULL) _at__at_ -1738,23 +1738,24 _at__at_ * files. If not, reject the entire operation. */ fdp = data; - FILEDESC_LOCK(fdescp); + FILEDESC_SLOCK(fdescp); for (i = 0; i < oldfds; i++) { fd = *fdp++; if ((unsigned)fd >= fdescp->fd_nfiles || fdescp->fd_ofiles[fd] == NULL) { - FILEDESC_UNLOCK(fdescp); + FILEDESC_SUNLOCK(fdescp); error = EBADF; goto out; } fp = fdescp->fd_ofiles[fd]; if (!(fp->f_ops->fo_flags & DFLAG_PASSABLE)) { - FILEDESC_UNLOCK(fdescp); + FILEDESC_SUNLOCK(fdescp); error = EOPNOTSUPP; goto out; } } + /* * Now replace the integer FDs with pointers to * the associated global file table entry.. _at__at_ -1763,7 +1764,7 _at__at_ *controlp = sbcreatecontrol(NULL, newlen, SCM_RIGHTS, SOL_SOCKET); if (*controlp == NULL) { - FILEDESC_UNLOCK(fdescp); + FILEDESC_SUNLOCK(fdescp); error = E2BIG; goto out; } _at__at_ -1780,7 +1781,7 _at__at_ FILE_UNLOCK(fp); unp_rights++; } - FILEDESC_UNLOCK(fdescp); + FILEDESC_SUNLOCK(fdescp); break; case SCM_TIMESTAMP: --- //depot/vendor/freebsd/src/sys/kern/vfs_cache.c 2007/03/05 13:12:17 +++ //depot/user/rwatson/filedesc/src/sys/kern/vfs_cache.c 2007/03/05 16:48:38 _at__at_ -717,10 +717,10 _at__at_ tmpbuf = malloc(buflen, M_TEMP, M_WAITOK); fdp = td->td_proc->p_fd; mtx_lock(&Giant); - FILEDESC_LOCK(fdp); + FILEDESC_SLOCK(fdp); error = vn_fullpath1(td, fdp->fd_cdir, fdp->fd_rdir, tmpbuf, &bp, buflen); - FILEDESC_UNLOCK(fdp); + FILEDESC_SUNLOCK(fdp); mtx_unlock(&Giant); if (!error) { _at__at_ -771,9 +771,9 _at__at_ buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); fdp = td->td_proc->p_fd; - FILEDESC_LOCK(fdp); + FILEDESC_SLOCK(fdp); error = vn_fullpath1(td, vn, fdp->fd_rdir, buf, retbuf, MAXPATHLEN); - FILEDESC_UNLOCK(fdp); + FILEDESC_SUNLOCK(fdp); if (!error) *freebuf = buf; --- //depot/vendor/freebsd/src/sys/kern/vfs_lookup.c 2007/03/31 16:11:57 +++ //depot/user/rwatson/filedesc/src/sys/kern/vfs_lookup.c 2007/04/01 13:10:13 _at__at_ -188,14 +188,14 _at__at_ /* * Get starting point for the translation. */ - FILEDESC_LOCK(fdp); + FILEDESC_SLOCK(fdp); ndp->ni_rootdir = fdp->fd_rdir; ndp->ni_topdir = fdp->fd_jdir; dp = fdp->fd_cdir; vfslocked = VFS_LOCK_GIANT(dp->v_mount); VREF(dp); - FILEDESC_UNLOCK(fdp); + FILEDESC_SUNLOCK(fdp); for (;;) { /* * Check if root directory should replace current directory. --- //depot/vendor/freebsd/src/sys/kern/vfs_mount.c 2007/04/01 13:12:37 +++ //depot/user/rwatson/filedesc/src/sys/kern/vfs_mount.c 2007/04/01 13:21:17 _at__at_ -1361,7 +1361,7 _at__at_ panic("Cannot find root vnode"); p = td->td_proc; - FILEDESC_LOCK(p->p_fd); + FILEDESC_SLOCK(p->p_fd); if (p->p_fd->fd_cdir != NULL) vrele(p->p_fd->fd_cdir); _at__at_ -1373,7 +1373,7 _at__at_ p->p_fd->fd_rdir = rootvnode; VREF(rootvnode); - FILEDESC_UNLOCK(p->p_fd); + FILEDESC_SUNLOCK(p->p_fd); VOP_UNLOCK(rootvnode, 0, td); } --- //depot/vendor/freebsd/src/sys/kern/vfs_syscalls.c 2007/03/21 19:36:52 +++ //depot/user/rwatson/filedesc/src/sys/kern/vfs_syscalls.c 2007/04/01 13:10:13 _at__at_ -715,10 +715,10 _at__at_ } VOP_UNLOCK(vp, 0, td); VFS_UNLOCK_GIANT(vfslocked); - FILEDESC_LOCK_FAST(fdp); + FILEDESC_XLOCK(fdp); vpold = fdp->fd_cdir; fdp->fd_cdir = vp; - FILEDESC_UNLOCK_FAST(fdp); + FILEDESC_XUNLOCK(fdp); vfslocked = VFS_LOCK_GIANT(vpold->v_mount); vrele(vpold); VFS_UNLOCK_GIANT(vfslocked); _at__at_ -767,10 +767,10 _at__at_ VOP_UNLOCK(nd.ni_vp, 0, td); VFS_UNLOCK_GIANT(vfslocked); NDFREE(&nd, NDF_ONLY_PNBUF); - FILEDESC_LOCK_FAST(fdp); + FILEDESC_XLOCK(fdp); vp = fdp->fd_cdir; fdp->fd_cdir = nd.ni_vp; - FILEDESC_UNLOCK_FAST(fdp); + FILEDESC_XUNLOCK(fdp); vfslocked = VFS_LOCK_GIANT(vp->v_mount); vrele(vp); VFS_UNLOCK_GIANT(vfslocked); _at__at_ -789,7 +789,8 _at__at_ struct file *fp; int fd; - FILEDESC_LOCK_ASSERT(fdp, MA_OWNED); + FILEDESC_LOCK_ASSERT(fdp); + for (fd = 0; fd < fdp->fd_nfiles ; fd++) { fp = fget_locked(fdp, fd); if (fp == NULL) _at__at_ -905,12 +906,12 _at__at_ VFS_ASSERT_GIANT(vp->v_mount); fdp = td->td_proc->p_fd; - FILEDESC_LOCK(fdp); + FILEDESC_XLOCK(fdp); if (chroot_allow_open_directories == 0 || (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { error = chroot_refuse_vdir_fds(fdp); if (error) { - FILEDESC_UNLOCK(fdp); + FILEDESC_XUNLOCK(fdp); return (error); } } _at__at_ -921,7 +922,7 _at__at_ fdp->fd_jdir = vp; VREF(fdp->fd_jdir); } - FILEDESC_UNLOCK(fdp); + FILEDESC_XUNLOCK(fdp); vfslocked = VFS_LOCK_GIANT(oldvp->v_mount); vrele(oldvp); VFS_UNLOCK_GIANT(vfslocked); _at__at_ -1030,18 +1031,18 _at__at_ * * Handle the case where someone closed the file (via its file * descriptor) while we were blocked. The end result should look - * like opening the file succeeded but it was immediately closed. - * We call vn_close() manually because we haven't yet hooked up - * the various 'struct file' fields. + * like opening the file succeeded but it was immediately closed. We + * call vn_close() manually because we haven't yet hooked up the + * various 'struct file' fields. */ - FILEDESC_LOCK(fdp); + FILEDESC_SLOCK(fdp); FILE_LOCK(fp); if (fp->f_count == 1) { mp = vp->v_mount; KASSERT(fdp->fd_ofiles[indx] != fp, ("Open file descriptor lost all refs")); FILE_UNLOCK(fp); - FILEDESC_UNLOCK(fdp); + FILEDESC_SUNLOCK(fdp); VOP_UNLOCK(vp, 0, td); vn_close(vp, flags & FMASK, fp->f_cred, td); VFS_UNLOCK_GIANT(vfslocked); _at__at_ -1058,7 +1059,7 _at__at_ fp->f_seqcount = 1; fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE); FILE_UNLOCK(fp); - FILEDESC_UNLOCK(fdp); + FILEDESC_SUNLOCK(fdp); VOP_UNLOCK(vp, 0, td); if (flags & (O_EXLOCK | O_SHLOCK)) { _at__at_ -1206,10 +1207,10 _at__at_ return (EEXIST); } else { VATTR_NULL(&vattr); - FILEDESC_LOCK_FAST(td->td_proc->p_fd); + FILEDESC_SLOCK(td->td_proc->p_fd); vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask; - FILEDESC_UNLOCK_FAST(td->td_proc->p_fd); + FILEDESC_SUNLOCK(td->td_proc->p_fd); vattr.va_rdev = dev; whiteout = 0; _at__at_ -1319,9 +1320,9 _at__at_ } VATTR_NULL(&vattr); vattr.va_type = VFIFO; - FILEDESC_LOCK_FAST(td->td_proc->p_fd); + FILEDESC_SLOCK(td->td_proc->p_fd); vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask; - FILEDESC_UNLOCK_FAST(td->td_proc->p_fd); + FILEDESC_SUNLOCK(td->td_proc->p_fd); #ifdef MAC error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, &vattr); _at__at_ -1534,9 +1535,9 _at__at_ goto restart; } VATTR_NULL(&vattr); - FILEDESC_LOCK_FAST(td->td_proc->p_fd); + FILEDESC_SLOCK(td->td_proc->p_fd); vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask; - FILEDESC_UNLOCK_FAST(td->td_proc->p_fd); + FILEDESC_SUNLOCK(td->td_proc->p_fd); #ifdef MAC vattr.va_type = VLNK; error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, _at__at_ -3418,9 +3419,9 _at__at_ } VATTR_NULL(&vattr); vattr.va_type = VDIR; - FILEDESC_LOCK_FAST(td->td_proc->p_fd); + FILEDESC_SLOCK(td->td_proc->p_fd); vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask; - FILEDESC_UNLOCK_FAST(td->td_proc->p_fd); + FILEDESC_SUNLOCK(td->td_proc->p_fd); #ifdef MAC error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, &vattr); _at__at_ -3807,11 +3808,11 _at__at_ { register struct filedesc *fdp; - FILEDESC_LOCK_FAST(td->td_proc->p_fd); + FILEDESC_XLOCK(td->td_proc->p_fd); fdp = td->td_proc->p_fd; td->td_retval[0] = fdp->fd_cmask; fdp->fd_cmask = uap->newmask & ALLPERMS; - FILEDESC_UNLOCK_FAST(td->td_proc->p_fd); + FILEDESC_XUNLOCK(td->td_proc->p_fd); return (0); } _at__at_ -3887,7 +3888,7 _at__at_ if (fdp == NULL) error = EBADF; else { - FILEDESC_LOCK(fdp); + FILEDESC_SLOCK(fdp); if ((u_int)fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL) error = EBADF; _at__at_ -3898,7 +3899,7 _at__at_ fhold(fp); error = 0; } - FILEDESC_UNLOCK(fdp); + FILEDESC_SUNLOCK(fdp); } *fpp = fp; return (error); --- //depot/vendor/freebsd/src/sys/netsmb/smb_dev.c 2007/02/09 17:22:48 +++ //depot/user/rwatson/filedesc/src/sys/netsmb/smb_dev.c 2007/03/03 22:39:43 _at__at_ -368,15 +368,15 _at__at_ { struct file* fp; - FILEDESC_LOCK(fdp); + FILEDESC_SLOCK(fdp); if (((u_int)fd) >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL || (fp->f_flag & flag) == 0) { - FILEDESC_UNLOCK(fdp); + FILEDESC_SUNLOCK(fdp); return (NULL); } fhold(fp); - FILEDESC_UNLOCK(fdp); + FILEDESC_SUNLOCK(fdp); return (fp); } --- //depot/vendor/freebsd/src/sys/security/audit/audit_bsm_klib.c 2006/12/29 12:22:04 +++ //depot/user/rwatson/filedesc/src/sys/security/audit/audit_bsm_klib.c 2007/03/03 22:39:43 _at__at_ -494,7 +494,7 _at__at_ fdp = td->td_proc->p_fd; bufp = path; cisr = 0; - FILEDESC_LOCK(fdp); + FILEDESC_SLOCK(fdp); if (*(path) == '/') { while (*(bufp) == '/') bufp++; /* Skip leading '/'s. */ _at__at_ -516,7 +516,7 _at__at_ vref(vnp); bufp = path; } - FILEDESC_UNLOCK(fdp); + FILEDESC_SUNLOCK(fdp); if (vnp != NULL) { /* * XXX: vn_fullpath() on FreeBSD is "less reliable" than --- //depot/vendor/freebsd/src/sys/sys/filedesc.h 2006/04/07 05:20:46 +++ //depot/user/rwatson/filedesc/src/sys/sys/filedesc.h 2007/04/01 19:46:08 _at__at_ -35,9 +35,9 _at__at_ #include <sys/queue.h> #include <sys/event.h> +#include <sys/lock.h> #include <sys/priority.h> -#include <sys/_lock.h> -#include <sys/_mutex.h> +#include <sys/sx.h> #include <machine/_limits.h> _at__at_ -60,10 +60,7 _at__at_ u_short fd_cmask; /* mask for file creation */ u_short fd_refcnt; /* thread reference count */ u_short fd_holdcnt; /* hold count on structure + mutex */ - - struct mtx fd_mtx; /* protects members of this struct */ - int fd_locked; /* long lock flag */ - int fd_wanted; /* "" */ + struct sx fd_sx; /* protects members of this struct */ struct kqlist fd_kqlist; /* list of kqueues on this filedesc */ int fd_holdleaderscount; /* block fdfree() for shared close() */ int fd_holdleaderswakeup; /* fdfree() needs wakeup */ _at__at_ -96,61 +93,18 _at__at_ #ifdef _KERNEL /* Lock a file descriptor table. */ -#define FILEDESC_LOCK(fd) \ - do { \ - mtx_lock(&(fd)->fd_mtx); \ - (fd)->fd_wanted++; \ - while ((fd)->fd_locked) \ - msleep(&(fd)->fd_locked, &(fd)->fd_mtx, PLOCK, "fdesc", 0); \ - (fd)->fd_locked = 2; \ - (fd)->fd_wanted--; \ - mtx_unlock(&(fd)->fd_mtx); \ - } while (0) +#define FILEDESC_LOCK_INIT(fdp) sx_init(&(fdp)->fd_sx, "filedesc structure") +#define FILEDESC_LOCK_DESTROY(fdp) sx_destroy(&(fdp)->fd_sx) +#define FILEDESC_LOCK(fdp) (&(fdp)->fd_sx) +#define FILEDESC_XLOCK(fdp) sx_xlock(&(fdp)->fd_sx) +#define FILEDESC_XUNLOCK(fdp) sx_xunlock(&(fdp)->fd_sx) +#define FILEDESC_SLOCK(fdp) sx_slock(&(fdp)->fd_sx) +#define FILEDESC_SUNLOCK(fdp) sx_sunlock(&(fdp)->fd_sx) -#define FILEDESC_UNLOCK(fd) \ - do { \ - mtx_lock(&(fd)->fd_mtx); \ - KASSERT((fd)->fd_locked == 2, \ - ("fdesc locking mistake %d should be %d", (fd)->fd_locked, 2)); \ - (fd)->fd_locked = 0; \ - if ((fd)->fd_wanted) \ - wakeup(&(fd)->fd_locked); \ - mtx_unlock(&(fd)->fd_mtx); \ - } while (0) - -#define FILEDESC_LOCK_FAST(fd) \ - do { \ - mtx_lock(&(fd)->fd_mtx); \ - (fd)->fd_wanted++; \ - while ((fd)->fd_locked) \ - msleep(&(fd)->fd_locked, &(fd)->fd_mtx, PLOCK, "fdesc", 0); \ - (fd)->fd_locked = 1; \ - (fd)->fd_wanted--; \ - } while (0) - -#define FILEDESC_UNLOCK_FAST(fd) \ - do { \ - KASSERT((fd)->fd_locked == 1, \ - ("fdesc locking mistake %d should be %d", (fd)->fd_locked, 1)); \ - (fd)->fd_locked = 0; \ - if ((fd)->fd_wanted) \ - wakeup(&(fd)->fd_locked); \ - mtx_unlock(&(fd)->fd_mtx); \ - } while (0) - -#ifdef INVARIANT_SUPPORT -#define FILEDESC_LOCK_ASSERT(fd, arg) \ - do { \ - if ((arg) == MA_OWNED) \ - KASSERT((fd)->fd_locked != 0, ("fdesc locking mistake")); \ - else \ - KASSERT((fd)->fd_locked == 0, ("fdesc locking mistake")); \ - } while (0) -#else -#define FILEDESC_LOCK_ASSERT(fd, arg) -#endif - -#define FILEDESC_LOCK_DESC "filedesc structure" +#define FILEDESC_LOCK_ASSERT(fdp) sx_assert(&(fdp)->fd_sx, SX_LOCKED | \ + SX_NOTRECURSED) +#define FILEDESC_XLOCK_ASSERT(fdp) sx_assert(&(fdp)->fd_sx, SX_XLOCKED | \ + SX_NOTRECURSED) struct thread;Received on Sun Apr 01 2007 - 18:03:04 UTC
This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:39:07 UTC