Re: panic: vm_fault: fault on nofault entry

From: Glen Barber <gjb_at_FreeBSD.org>
Date: Sun, 9 Mar 2014 14:16:57 -0400
On Sun, Mar 09, 2014 at 08:01:32PM +0200, Konstantin Belousov wrote:
> On Sun, Mar 09, 2014 at 12:56:48PM -0400, Glen Barber wrote:
> > We are having regular panics on several machines in the cluster.
> > 
> > Below follows the script from the kgdb(1) session, hopefully providing
> > enough information.  This machine runs 11.0-CURRENT #2 r262892, from
> > 2 days ago.
> > 
> > It uses tmpfs(5) for the port build workspace.  I have an unconfirmed
> > suspicion that use of sysutils/lsof is involved somehow, but cannot be
> > sure.  (In my experience with panics with port building, removing lsof
> > from the system did have an effect, but I may be going down the wrong
> > rabbit hole.)
> > 
> 
> This is very similar to issue reported several time ago.
> Try this patch.  I never get a feedback.
> 
> diff --git a/sys/amd64/amd64/mem.c b/sys/amd64/amd64/mem.c
> index abbbb21..fd9c5df 100644
> --- a/sys/amd64/amd64/mem.c
> +++ b/sys/amd64/amd64/mem.c
> _at__at_ -98,7 +98,13 _at__at_ memrw(struct cdev *dev, struct uio *uio, int flags)
>  kmemphys:
>  			o = v & PAGE_MASK;
>  			c = min(uio->uio_resid, (u_int)(PAGE_SIZE - o));
> -			error = uiomove((void *)PHYS_TO_DMAP(v), (int)c, uio);
> +			v = PHYS_TO_DMAP(v);
> +			if (v < DMAP_MIN_ADDRESS ||
> +			    (v > DMAP_MIN_ADDRESS + dmaplimit &&
> +			    v <= DMAP_MAX_ADDRESS) ||
> +			    pmap_kextract(v) == 0)
> +				return (EFAULT);
> +			error = uiomove((void *)v, (int)c, uio);
>  			continue;
>  		}
>  		else if (dev2unit(dev) == CDEV_MINOR_KMEM) {

There is a very similar patch on one of these machines.

  Index: sys/amd64/amd64/mem.c
  ===================================================================
  --- sys/amd64/amd64/mem.c	(revision 262298)
  +++ sys/amd64/amd64/mem.c	(working copy)
  _at__at_ -98,6 +98,12 _at__at_
   kmemphys:
   			o = v & PAGE_MASK;
   			c = min(uio->uio_resid, (u_int)(PAGE_SIZE - o));
  +			v = PHYS_TO_DMAP(v);
  +			if (v < DMAP_MIN_ADDRESS ||
  +			    (v > DMAP_MIN_ADDRESS + dmaplimit &&
  +			    v <= DMAP_MAX_ADDRESS) ||
  +			    pmap_kextract(v) == 0)
  +				return (EFAULT);
   			error = uiomove((void *)PHYS_TO_DMAP(v), (int)c, uio);
   			continue;
   		}
  Index: sys/amd64/amd64/pmap.c
  ===================================================================
  --- sys/amd64/amd64/pmap.c	(revision 262298)
  +++ sys/amd64/amd64/pmap.c	(working copy)
  _at__at_ -321,7 +321,7 _at__at_
       "Number of kernel page table pages allocated on bootup");
   
   static int ndmpdp;
  -static vm_paddr_t dmaplimit;
  +vm_paddr_t dmaplimit;
   vm_offset_t kernel_vm_end = VM_MIN_KERNEL_ADDRESS;
   pt_entry_t pg_nx;
   
  Index: sys/amd64/include/pmap.h
  ===================================================================
  --- sys/amd64/include/pmap.h	(revision 262298)
  +++ sys/amd64/include/pmap.h	(working copy)
  _at__at_ -369,6 +369,7 _at__at_
   extern vm_paddr_t dump_avail[];
   extern vm_offset_t virtual_avail;
   extern vm_offset_t virtual_end;
  +extern vm_paddr_t dmaplimit;
   
   #define	pmap_page_get_memattr(m)	((vm_memattr_t)(m)->md.pat_mode)
   #define	pmap_page_is_write_mapped(m)	(((m)->aflags & PGA_WRITEABLE) != 0)

The machine this change is on paniced today as well.  That machine runs
r262298M, and I have a vmcore from Feb 24 (there was not enough
available space to get a crash dump today.)

The backtrace from Feb 24 follows.

Script started on Sun Mar  9 18:14:41 2014
root_at_redbuild04.nyi:/usr/obj/usr/src/sys/REDBUILD # sh
# kgdb ./kernel.debug /var/crash/vmcore.3
GNU gdb 6.1.1 [FreeBSD]
Copyright 2004 Free Software Foundation, Inc.
GDB is free software, covered by the GNU General Public License, and you are
welcome to change it and/or distribute copies of it under certain conditions.
Type "show copying" to see the conditions.
There is absolutely no warranty for GDB.  Type "show warranty" for details.
This GDB was configured as "amd64-marcel-freebsd"...

Unread portion of the kernel message buffer:
panic: vm_fault: fault on nofault entry, addr: fffffe03becbc000
cpuid = 23
KDB: stack backtrace:
db_trace_self_wrapper() at db_trace_self_wrapper+0x2b/frame 0xfffffe1838ec1180
kdb_backtrace() at kdb_backtrace+0x39/frame 0xfffffe1838ec1230
panic() at panic+0x155/frame 0xfffffe1838ec12b0
vm_fault_hold() at vm_fault_hold+0x1e7a/frame 0xfffffe1838ec1500
vm_fault() at vm_fault+0x77/frame 0xfffffe1838ec1540
trap_pfault() at trap_pfault+0x199/frame 0xfffffe1838ec15e0
trap() at trap+0x4a0/frame 0xfffffe1838ec17f0
calltrap() at calltrap+0x8/frame 0xfffffe1838ec17f0
--- trap 0xc, rip = 0xffffffff80d971fb, rsp = 0xfffffe1838ec18b0, rbp = 0xfffffe1838ec1910 ---
copyout() at copyout+0x3b/frame 0xfffffe1838ec1910
memrw() at memrw+0x1ef/frame 0xfffffe1838ec1950
giant_read() at giant_read+0xa4/frame 0xfffffe1838ec1990
devfs_read_f() at devfs_read_f+0xeb/frame 0xfffffe1838ec19f0
dofileread() at dofileread+0x95/frame 0xfffffe1838ec1a40
kern_readv() at kern_readv+0x68/frame 0xfffffe1838ec1a90
sys_read() at sys_read+0x63/frame 0xfffffe1838ec1ae0
amd64_syscall() at amd64_syscall+0x3fb/frame 0xfffffe1838ec1bf0
Xfast_syscall() at Xfast_syscall+0xfb/frame 0xfffffe1838ec1bf0
--- syscall (3, FreeBSD ELF64, sys_read), rip = 0x800b8343a, rsp = 0x7fffffffcfe8, rbp = 0x7fffffffd030 ---
KDB: enter: panic

Reading symbols from /boot/kernel/zfs.ko.symbols...done.
Loaded symbols for /boot/kernel/zfs.ko.symbols
Reading symbols from /boot/kernel/opensolaris.ko.symbols...done.
Loaded symbols for /boot/kernel/opensolaris.ko.symbols
Reading symbols from /boot/kernel/ums.ko.symbols...done.
Loaded symbols for /boot/kernel/ums.ko.symbols
Reading symbols from /boot/kernel/tmpfs.ko.symbols...done.
Loaded symbols for /boot/kernel/tmpfs.ko.symbols
Reading symbols from /boot/kernel/nullfs.ko.symbols...done.
Loaded symbols for /boot/kernel/nullfs.ko.symbols
Reading symbols from /boot/kernel/linprocfs.ko.symbols...done.
Loaded symbols for /boot/kernel/linprocfs.ko.symbols
Reading symbols from /boot/kernel/linux.ko.symbols...done.
Loaded symbols for /boot/kernel/linux.ko.symbols
#0  doadump (textdump=-954994000) at pcpu.h:219
219		__asm("movq %%gs:%1,%0" : "=r" (td)
(kgdb) bt
#0  doadump (textdump=-954994000) at pcpu.h:219
#1  0xffffffff8034a175 in db_fncall (dummy1=<value optimized out>, 
    dummy2=<value optimized out>, dummy3=<value optimized out>, dummy4=<value optimized out>)
    at /usr/src/sys/ddb/db_command.c:578
#2  0xffffffff80349e5d in db_command (cmd_table=0x0) at /usr/src/sys/ddb/db_command.c:449
#3  0xffffffff80349bd4 in db_command_loop () at /usr/src/sys/ddb/db_command.c:502
#4  0xffffffff8034c630 in db_trap (type=<value optimized out>, code=0)
    at /usr/src/sys/ddb/db_main.c:231
#5  0xffffffff80987329 in kdb_trap (type=3, code=0, tf=<value optimized out>)
    at /usr/src/sys/kern/subr_kdb.c:656
#6  0xffffffff80d99009 in trap (frame=0xfffffe1838ec1160)
    at /usr/src/sys/amd64/amd64/trap.c:571
#7  0xffffffff80d7dd12 in calltrap () at /usr/src/sys/amd64/amd64/exception.S:231
#8  0xffffffff80986a8e in kdb_enter (why=0xffffffff8100ed4f "panic", msg=<value optimized out>)
    at cpufunc.h:63
#9  0xffffffff809462b5 in panic (fmt=<value optimized out>)
    at /usr/src/sys/kern/kern_shutdown.c:752
#10 0xffffffff80c0981a in vm_fault_hold (map=<value optimized out>, 
    vaddr=<value optimized out>, fault_type=<value optimized out>, 
    fault_flags=<value optimized out>, m_hold=<value optimized out>)
    at /usr/src/sys/vm/vm_fault.c:272
#11 0xffffffff80c07957 in vm_fault (map=0xfffff80002000000, vaddr=<value optimized out>, 
    fault_type=1 '\001', fault_flags=128) at /usr/src/sys/vm/vm_fault.c:217
#12 0xffffffff80d997f9 in trap_pfault (frame=0xfffffe1838ec1800, usermode=0)
    at /usr/src/sys/amd64/amd64/trap.c:767
#13 0xffffffff80d99020 in trap (frame=0xfffffe1838ec1800)
    at /usr/src/sys/amd64/amd64/trap.c:455
#14 0xffffffff80d7dd12 in calltrap () at /usr/src/sys/amd64/amd64/exception.S:231
#15 0xffffffff80d971fb in copyout () at /usr/src/sys/amd64/amd64/support.S:246
#16 0xffffffff8099bb35 in uiomove_faultflag (cp=<value optimized out>, 
    n=<value optimized out>, uio=0xfffffe1838ec1ab0, nofault=<value optimized out>)
    at /usr/src/sys/kern/subr_uio.c:192
#17 0xffffffff80d8576f in memrw (dev=<value optimized out>, uio=<value optimized out>, 
    flags=<value optimized out>) at /usr/src/sys/amd64/amd64/mem.c:107
---Type <return> to continue, or q <return> to quit---
#18 0xffffffff808ec764 in giant_read (dev=0xfffff80011347c00, uio=0xfffffe1838ec1ab0, ioflag=0)
    at /usr/src/sys/kern/kern_conf.c:442
#19 0xffffffff80817e2b in devfs_read_f (fp=0xfffff80854be3140, uio=0xfffffe1838ec1ab0, 
    cred=<value optimized out>, flags=0, td=0xfffff801f52c5490)
    at /usr/src/sys/fs/devfs/devfs_vnops.c:1193
#20 0xffffffff809a0e25 in dofileread (td=0xfffff801f52c5490, fd=4, fp=0xfffff80854be3140, 
    auio=0xfffffe1838ec1ab0, offset=<value optimized out>, flags=1172307968) at file.h:299
#21 0xffffffff809a0b48 in kern_readv (td=0xfffff801f52c5490, fd=4, auio=0xfffffe1838ec1ab0)
    at /usr/src/sys/kern/sys_generic.c:256
#22 0xffffffff809a0ad3 in sys_read (td=<value optimized out>, uap=<value optimized out>)
    at /usr/src/sys/kern/sys_generic.c:171
#23 0xffffffff80d9a04b in amd64_syscall (td=0xfffff801f52c5490, traced=0) at subr_syscall.c:133
#24 0xffffffff80d7dffb in Xfast_syscall () at /usr/src/sys/amd64/amd64/exception.S:390
#25 0x0000000800b8343a in ?? ()
Previous frame inner to this frame (corrupt stack?)
Current language:  auto; currently minimal
(kgdb) quit

Script done on Sun Mar  9 18:14:59 2014

Glen


Received on Sun Mar 09 2014 - 17:17:00 UTC

This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:40:47 UTC