On Tue, Mar 04, 2008 at 12:27:04PM +0100, Pietro Cerutti wrote: > -----BEGIN PGP SIGNED MESSAGE----- > Hash: SHA512 > > Dear all, > > FreeBSD gahrtop.localhost 8.0-CURRENT FreeBSD 8.0-CURRENT #8: Mon Mar 3 > 14:07:47 CET 2008 > root_at_gahrtop.localhost:/usr/obj/usr/src/sys/MSI1034 i386 > > vmcore and kernel.symbols available at: > http://www.gahr.ch/FreeBSD/misc/crash17/vmcore.17 > http://www.gahr.ch/FreeBSD/misc/crash17/kernel.symbols > > (if you get a page not found, 's|www.gahr.ch|213.142.182.66|', DNS are > still refreshing after relocation...) > > Fatal trap 12: page fault while in kernel mode > cpuid = 1; apic id = 01 > fault virtual address = 0x60 > fault code = supervisor read, page not present > instruction pointer = 0x20:0xc055d43b > stack pointer = 0x28:0xe757ca68 > frame pointer = 0x28:0xe757ca84 > code segment = base 0x0, limit 0xfffff, type 0x1b > = DPL 0, pres 1, def32 1, gran 1 > processor eflags = interrupt enabled, resume, IOPL = 0 > current process = 399 (moused) > trap number = 12 > panic: page fault > cpuid = 1 > Uptime: 54m21s > Physical memory: 2031 MB > Dumping 83 MB: 68 52 36 20 4 > > (kgdb) bt > #0 doadump () at pcpu.h:195 > #1 0xc0592a86 in boot (howto=260) at /usr/src/sys/kern/kern_shutdown.c:417 > #2 0xc0592ec8 in panic (fmt=0x104 <Address 0x104 out of bounds>) at > /usr/src/sys/kern/kern_shutdown.c:571 > #3 0xc07f5b8a in trap_fatal (frame=0xe757ca28, eva=40) at > /usr/src/sys/i386/i386/trap.c:898 > #4 0xc07f5ef9 in trap_pfault (frame=0xe757ca28, usermode=0, eva=96) at > /usr/src/sys/i386/i386/trap.c:811 > #5 0xc07f68db in trap (frame=0xe757ca28) at > /usr/src/sys/i386/i386/trap.c:489 > #6 0xc07dd1fb in calltrap () at /usr/src/sys/i386/i386/exception.s:146 > #7 0xc055d43b in giant_poll (dev=0xc4c09e00, events=64, td=0xc4e6b220) > at /usr/src/sys/kern/kern_conf.c:385 > #8 0xc05161d5 in devfs_poll_f (fp=0xc4e715e4, events=64, > cred=0xc4ace500, td=0xc4e6b220) at /usr/src/sys/fs/devfs/devfs_vnops.c:842 > #9 0xc05c9fca in kern_select (td=0xc4e6b220, nd=20, fd_in=0xbfbfea70, > fd_ou=0x0, fd_ex=0x0, tvp=0xe757cc6c) at file.h:265 > #10 0xc05ca41c in select (td=0xc4e6b220, uap=0xe757ccf8) at > /usr/src/sys/kern/sys_generic.c:758 > #11 0xc07f6142 in syscall (frame=0xe757cd38) at > /usr/src/sys/i386/i386/trap.c:1034 > #12 0xc07dd260 in Xint0x80_syscall () at > /usr/src/sys/i386/i386/exception.s:203 > #13 0x00000033 in ?? () > (kgdb) up 7 > #7 0xc055d43b in giant_poll (dev=0xc4c09e00, events=64, td=0xc4e6b220) > at /usr/src/sys/kern/kern_conf.c:385 > 385 retval = dev->si_devsw->d_gianttrick-> > (kgdb) list > 380 giant_poll(struct cdev *dev, int events, struct thread *td) > 381 { > 382 int retval; > 383 > 384 mtx_lock(&Giant); > 385 retval = dev->si_devsw->d_gianttrick-> > 386 d_poll(dev, events, td); > 387 mtx_unlock(&Giant); > 388 return (retval); > 389 } > (kgdb) set print pretty > (kgdb) print *dev > $2 = { > si_priv = 0xc4c09e00, > si_flags = 0, > si_atime = { > tv_sec = 1204568256, > tv_nsec = 0 > }, > si_ctime = { > tv_sec = 0, > tv_nsec = 0 > }, > si_mtime = { > tv_sec = 0, > tv_nsec = 0 > }, > si_uid = 0, > si_gid = 5, > si_mode = 420, > si_cred = 0x0, > si_drv0 = 0, > si_refcount = 3, > si_list = { > le_next = 0x0, > le_prev = 0xc0871a58 > }, > si_clone = { > le_next = 0x0, > le_prev = 0x0 > }, > si_children = { > lh_first = 0x0 > }, > si_siblings = { > le_next = 0x0, > le_prev = 0x0 > }, > si_parent = 0x0, > si_name = 0xc4c09e78 "ums0", > si_drv1 = 0x0, > si_drv2 = 0x0, > si_devsw = 0x0, > si_iosize_max = 65536, > si_usecount = 1, > si_threadcount = 1, > __si_u = { > - ---Type <return> to continue, or q <return> to quit--- > __sit_tty = 0x0, > __sid_snapdata = 0x0 > }, > __si_namebuf = "ums0", '\0' <repeats 59 times> > } > > Please note that dev->si_devsw is NULL, and kern_conf.c:385 tries to > dereference it. I remember this issue was already reported by M. Warner Losh. The reason for the problem is that si_refcount does not protect struct cdev, it only keeps the cdev structure from going away (as well as protecting against finalizing the cdevsw). The callers of devvn_refthread() are assumed to only use the returned dsw instead of dereferencing dev again. Giant_trick explicitely breaks the rule. Besides the problem I described above, there is another race in make_dev_credv(), where cdevsw may be finalized just before new device is created, since dev_mtx is dropped between prep_cdevsw and newdev() and later. This requires rapid device creation/destruction, and was discovered by Peter Holm. You may try the following patch. Besides removing giant_trick and directly checking the Giant, it puts the prep_cdevsw() into the common dev_mtx-protected region. diff --git a/sys/fs/devfs/devfs_vnops.c b/sys/fs/devfs/devfs_vnops.c index d9565ee..58e943a 100644 --- a/sys/fs/devfs/devfs_vnops.c +++ b/sys/fs/devfs/devfs_vnops.c _at__at_ -369,7 +369,9 _at__at_ devfs_close(struct vop_close_args *ap) error = dsw->d_close(dev, ap->a_fflag, S_IFCHR, td); PICKUP_GIANT(); } else { + mtx_lock(&Giant); error = dsw->d_close(dev, ap->a_fflag, S_IFCHR, td); + mtx_unlock(&Giant); } dev_relthread(dev); vn_lock(vp, vp_locked | LK_RETRY); _at__at_ -491,7 +493,11 _at__at_ devfs_ioctl_f(struct file *fp, u_long com, void *data, struct ucred *cred, struc dev_relthread(dev); return (error); } + if (dsw->d_flags & D_NEEDGIANT) + mtx_lock(&Giant); error = dsw->d_ioctl(dev, com, data, fp->f_flag, td); + if (dsw->d_flags & D_NEEDGIANT) + mtx_unlock(&Giant); dev_relthread(dev); if (error == ENOIOCTL) error = ENOTTY; _at__at_ -534,7 +540,11 _at__at_ devfs_kqfilter_f(struct file *fp, struct knote *kn) error = devfs_fp_check(fp, &dev, &dsw); if (error) return (error); + if (dsw->d_flags & D_NEEDGIANT) + mtx_lock(&Giant); error = dsw->d_kqfilter(dev, kn); + if (dsw->d_flags & D_NEEDGIANT) + mtx_unlock(&Giant); dev_relthread(dev); return (error); } _at__at_ -780,10 +790,12 _at__at_ devfs_open(struct vop_open_args *ap) error = dsw->d_open(dev, ap->a_mode, S_IFCHR, td); PICKUP_GIANT(); } else { + mtx_lock(&Giant); if (dsw->d_fdopen != NULL) error = dsw->d_fdopen(dev, ap->a_mode, td, fp); else error = dsw->d_open(dev, ap->a_mode, S_IFCHR, td); + mtx_unlock(&Giant); } vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); _at__at_ -839,7 +851,11 _at__at_ devfs_poll_f(struct file *fp, int events, struct ucred *cred, struct thread *td) error = devfs_fp_check(fp, &dev, &dsw); if (error) return (error); + if (dsw->d_flags & D_NEEDGIANT) + mtx_lock(&Giant); error = dsw->d_poll(dev, events, td); + if (dsw->d_flags & D_NEEDGIANT) + mtx_unlock(&Giant); dev_relthread(dev); return(error); } _at__at_ -874,7 +890,11 _at__at_ devfs_read_f(struct file *fp, struct uio *uio, struct ucred *cred, int flags, st if ((flags & FOF_OFFSET) == 0) uio->uio_offset = fp->f_offset; + if (dsw->d_flags & D_NEEDGIANT) + mtx_lock(&Giant); error = dsw->d_read(dev, uio, ioflag); + if (dsw->d_flags & D_NEEDGIANT) + mtx_unlock(&Giant); if (uio->uio_resid != resid || (error == 0 && resid != 0)) vfs_timestamp(&dev->si_atime); dev_relthread(dev); _at__at_ -1308,7 +1328,11 _at__at_ devfs_write_f(struct file *fp, struct uio *uio, struct ucred *cred, int flags, s resid = uio->uio_resid; + if (dsw->d_flags & D_NEEDGIANT) + mtx_lock(&Giant); error = dsw->d_write(dev, uio, ioflag); + if (dsw->d_flags & D_NEEDGIANT) + mtx_unlock(&Giant); if (uio->uio_resid != resid || (error == 0 && resid != 0)) { vfs_timestamp(&dev->si_ctime); dev->si_mtime = dev->si_ctime; diff --git a/sys/kern/kern_conf.c b/sys/kern/kern_conf.c index 33285b4..910a41a 100644 --- a/sys/kern/kern_conf.c +++ b/sys/kern/kern_conf.c _at__at_ -294,125 +294,6 _at__at_ no_poll(struct cdev *dev __unused, int events, struct thread *td __unused) #define no_dump (dumper_t *)enodev -static int -giant_open(struct cdev *dev, int oflags, int devtype, struct thread *td) -{ - int retval; - - mtx_lock(&Giant); - retval = dev->si_devsw->d_gianttrick-> - d_open(dev, oflags, devtype, td); - mtx_unlock(&Giant); - return (retval); -} - -static int -giant_fdopen(struct cdev *dev, int oflags, struct thread *td, struct file *fp) -{ - int retval; - - mtx_lock(&Giant); - retval = dev->si_devsw->d_gianttrick-> - d_fdopen(dev, oflags, td, fp); - mtx_unlock(&Giant); - return (retval); -} - -static int -giant_close(struct cdev *dev, int fflag, int devtype, struct thread *td) -{ - int retval; - - mtx_lock(&Giant); - retval = dev->si_devsw->d_gianttrick-> - d_close(dev, fflag, devtype, td); - mtx_unlock(&Giant); - return (retval); -} - -static void -giant_strategy(struct bio *bp) -{ - - mtx_lock(&Giant); - bp->bio_dev->si_devsw->d_gianttrick-> - d_strategy(bp); - mtx_unlock(&Giant); -} - -static int -giant_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct thread *td) -{ - int retval; - - mtx_lock(&Giant); - retval = dev->si_devsw->d_gianttrick-> - d_ioctl(dev, cmd, data, fflag, td); - mtx_unlock(&Giant); - return (retval); -} - -static int -giant_read(struct cdev *dev, struct uio *uio, int ioflag) -{ - int retval; - - mtx_lock(&Giant); - retval = dev->si_devsw->d_gianttrick-> - d_read(dev, uio, ioflag); - mtx_unlock(&Giant); - return (retval); -} - -static int -giant_write(struct cdev *dev, struct uio *uio, int ioflag) -{ - int retval; - - mtx_lock(&Giant); - retval = dev->si_devsw->d_gianttrick-> - d_write(dev, uio, ioflag); - mtx_unlock(&Giant); - return (retval); -} - -static int -giant_poll(struct cdev *dev, int events, struct thread *td) -{ - int retval; - - mtx_lock(&Giant); - retval = dev->si_devsw->d_gianttrick-> - d_poll(dev, events, td); - mtx_unlock(&Giant); - return (retval); -} - -static int -giant_kqfilter(struct cdev *dev, struct knote *kn) -{ - int retval; - - mtx_lock(&Giant); - retval = dev->si_devsw->d_gianttrick-> - d_kqfilter(dev, kn); - mtx_unlock(&Giant); - return (retval); -} - -static int -giant_mmap(struct cdev *dev, vm_offset_t offset, vm_paddr_t *paddr, int nprot) -{ - int retval; - - mtx_lock(&Giant); - retval = dev->si_devsw->d_gianttrick-> - d_mmap(dev, offset, paddr, nprot); - mtx_unlock(&Giant); - return (retval); -} - - /* * struct cdev * and u_dev_t primitives */ _at__at_ -485,27 +366,16 _at__at_ umajor(dev_t dev) static void fini_cdevsw(struct cdevsw *devsw) { - struct cdevsw *gt; + mtx_assert(&devmtx, MA_OWNED); - if (devsw->d_gianttrick != NULL) { - gt = devsw->d_gianttrick; - memcpy(devsw, gt, sizeof *devsw); - free(gt, M_DEVT); - devsw->d_gianttrick = NULL; - } devsw->d_flags &= ~D_INIT; } static void prep_cdevsw(struct cdevsw *devsw) { - struct cdevsw *dsw2; - if (devsw->d_flags & D_NEEDGIANT) - dsw2 = malloc(sizeof *dsw2, M_DEVT, M_WAITOK); - else - dsw2 = NULL; - dev_lock(); + mtx_assert(&devmtx, MA_OWNED); if (devsw->d_version != D_VERSION_01) { printf( _at__at_ -532,41 +402,29 _at__at_ prep_cdevsw(struct cdevsw *devsw) if (devsw->d_poll == NULL) devsw->d_poll = ttypoll; } - if (devsw->d_flags & D_NEEDGIANT) { - if (devsw->d_gianttrick == NULL) { - memcpy(dsw2, devsw, sizeof *dsw2); - devsw->d_gianttrick = dsw2; - } else - free(dsw2, M_DEVT); - } - -#define FIXUP(member, noop, giant) \ +#define FIXUP(member, noop) \ do { \ if (devsw->member == NULL) { \ devsw->member = noop; \ - } else if (devsw->d_flags & D_NEEDGIANT) \ - devsw->member = giant; \ } \ - while (0) - - FIXUP(d_open, null_open, giant_open); - FIXUP(d_fdopen, NULL, giant_fdopen); - FIXUP(d_close, null_close, giant_close); - FIXUP(d_read, no_read, giant_read); - FIXUP(d_write, no_write, giant_write); - FIXUP(d_ioctl, no_ioctl, giant_ioctl); - FIXUP(d_poll, no_poll, giant_poll); - FIXUP(d_mmap, no_mmap, giant_mmap); - FIXUP(d_strategy, no_strategy, giant_strategy); - FIXUP(d_kqfilter, no_kqfilter, giant_kqfilter); + } while (0) + + FIXUP(d_open, null_open); + FIXUP(d_fdopen, NULL); + FIXUP(d_close, null_close); + FIXUP(d_read, no_read); + FIXUP(d_write, no_write); + FIXUP(d_ioctl, no_ioctl); + FIXUP(d_poll, no_poll); + FIXUP(d_mmap, no_mmap); + FIXUP(d_strategy, no_strategy); + FIXUP(d_kqfilter, no_kqfilter); if (devsw->d_dump == NULL) devsw->d_dump = no_dump; LIST_INIT(&devsw->d_devs); devsw->d_flags |= D_INIT; - - dev_unlock(); } struct cdev * _at__at_ -580,10 +438,10 _at__at_ make_dev_credv(int flags, struct cdevsw *devsw, int minornr, KASSERT((minornr & ~MAXMINOR) == 0, ("Invalid minor (0x%x) in make_dev", minornr)); - if (!(devsw->d_flags & D_INIT)) - prep_cdevsw(devsw); dev = devfs_alloc(); dev_lock(); + if (!(devsw->d_flags & D_INIT)) + prep_cdevsw(devsw); dev = newdev(devsw, minornr, dev); if (flags & MAKEDEV_REF) dev_refl(dev); _at__at_ -884,9 +742,6 _at__at_ clone_create(struct clonedevs **cdp, struct cdevsw *csw, int *up, struct cdev ** KASSERT(*up <= CLONE_UNITMASK, ("Too high unit (0x%x) in clone_create", *up)); - if (!(csw->d_flags & D_INIT)) - prep_cdevsw(csw); - /* * Search the list for a lot of things in one go: * A preexisting match is returned immediately. _at__at_ -898,6 +753,8 _at__at_ clone_create(struct clonedevs **cdp, struct cdevsw *csw, int *up, struct cdev ** unit = *up; ndev = devfs_alloc(); dev_lock(); + if (!(csw->d_flags & D_INIT)) + prep_cdevsw(csw); low = extra; de = dl = NULL; cd = *cdp; diff --git a/sys/kern/tty_cons.c b/sys/kern/tty_cons.c index 35638c4..758d225 100644 --- a/sys/kern/tty_cons.c +++ b/sys/kern/tty_cons.c _at__at_ -393,7 +393,11 _at__at_ cn_devopen(struct cn_device *cnd, struct thread *td, int forceopen) csw = dev_refthread(dev); if (csw == NULL) return (ENXIO); + if (csw->d_flags & D_NEEDGIANT) + mtx_lock(&Giant); error = (*csw->d_open)(dev, openflag, 0, td); + if (csw->d_flags & D_NEEDGIANT) + mtx_unlock(&Giant); dev_relthread(dev); return (error); } _at__at_ -458,7 +462,11 _at__at_ cnread(struct cdev *dev, struct uio *uio, int flag) csw = dev_refthread(dev); if (csw == NULL) return (ENXIO); + if (csw->d_flags & D_NEEDGIANT) + mtx_lock(&Giant); error = (csw->d_read)(dev, uio, flag); + if (csw->d_flags & D_NEEDGIANT) + mtx_unlock(&Giant); dev_relthread(dev); return (error); } _at__at_ -482,7 +490,11 _at__at_ cnwrite(struct cdev *dev, struct uio *uio, int flag) csw = dev_refthread(dev); if (csw == NULL) return (ENXIO); + if (csw->d_flags & D_NEEDGIANT) + mtx_lock(&Giant); error = (csw->d_write)(dev, uio, flag); + if (csw->d_flags & D_NEEDGIANT) + mtx_unlock(&Giant); dev_relthread(dev); return (error); } _at__at_ -518,7 +530,11 _at__at_ cnioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td) csw = dev_refthread(dev); if (csw == NULL) return (ENXIO); + if (csw->d_flags & D_NEEDGIANT) + mtx_lock(&Giant); error = (csw->d_ioctl)(dev, cmd, data, flag, td); + if (csw->d_flags & D_NEEDGIANT) + mtx_unlock(&Giant); dev_relthread(dev); return (error); } _at__at_ -543,7 +559,11 _at__at_ cnpoll(struct cdev *dev, int events, struct thread *td) csw = dev_refthread(dev); if (csw == NULL) return (ENXIO); + if (csw->d_flags & D_NEEDGIANT) + mtx_lock(&Giant); error = (csw->d_poll)(dev, events, td); + if (csw->d_flags & D_NEEDGIANT) + mtx_unlock(&Giant); dev_relthread(dev); return (error); } _at__at_ -564,7 +584,11 _at__at_ cnkqfilter(struct cdev *dev, struct knote *kn) csw = dev_refthread(dev); if (csw == NULL) return (ENXIO); + if (csw->d_flags & D_NEEDGIANT) + mtx_lock(&Giant); error = (csw->d_kqfilter)(dev, kn); + if (csw->d_flags & D_NEEDGIANT) + mtx_unlock(&Giant); dev_relthread(dev); return (error); } diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index 00ff023..94f5986 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c _at__at_ -3118,7 +3118,11 _at__at_ dev_strategy(struct cdev *dev, struct buf *bp) bufdone(bp); return; } + if (csw->d_flags & D_NEEDGIANT) + mtx_lock(&Giant); (*csw->d_strategy)(bip); + if (csw->d_flags & D_NEEDGIANT) + mtx_unlock(&Giant); dev_relthread(dev); } diff --git a/sys/sys/conf.h b/sys/sys/conf.h index c36ea8c..fdf9d4f 100644 --- a/sys/sys/conf.h +++ b/sys/sys/conf.h _at__at_ -213,7 +213,7 _at__at_ struct cdevsw { LIST_ENTRY(cdevsw) d_list; LIST_HEAD(, cdev) d_devs; int d_spare3; - struct cdevsw *d_gianttrick; + void *d_spare4; }; #define NUMCDEVSW 256 diff --git a/sys/vm/device_pager.c b/sys/vm/device_pager.c index f0d661f..4f67112 100644 --- a/sys/vm/device_pager.c +++ b/sys/vm/device_pager.c _at__at_ -131,11 +131,17 _at__at_ dev_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot, vm_ooffset_t fo * XXX assumes VM_PROT_* == PROT_* */ npages = OFF_TO_IDX(size); + if (csw->d_flags & D_NEEDGIANT) + mtx_lock(&Giant); for (off = foff; npages--; off += PAGE_SIZE) if ((*csw->d_mmap)(dev, off, &paddr, (int)prot) != 0) { + if (csw->d_flags & D_NEEDGIANT) + mtx_unlock(&Giant); dev_relthread(dev); return (NULL); } + if (csw->d_flags & D_NEEDGIANT) + mtx_unlock(&Giant); mtx_lock(&dev_pager_mtx); _at__at_ -220,7 +226,11 _at__at_ dev_pager_getpages(object, m, count, reqpage) panic("dev_pager_getpage: no cdevsw"); prot = PROT_READ; /* XXX should pass in? */ + if (csw->d_flags & D_NEEDGIANT) + mtx_lock(&Giant); ret = (*csw->d_mmap)(dev, (vm_offset_t)offset << PAGE_SHIFT, &paddr, prot); + if (csw->d_flags & D_NEEDGIANT) + mtx_unlock(&Giant); KASSERT(ret == 0, ("dev_pager_getpage: map function returns error")); dev_relthread(dev);
This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:39:28 UTC