Re: panic: vm_fault: fault on nofault entry

From: Konstantin Belousov <kostikbel_at_gmail.com>
Date: Mon, 10 Mar 2014 21:01:12 +0200
On Mon, Mar 10, 2014 at 02:05:08PM -0400, Glen Barber wrote:
> Unread portion of the kernel message buffer:
> Sleeping thread (tid 100702, pid 24712) owns a non-sleepable lock

Would be nice to see the full message before and panic from the console.
From what I see, this is a lock leak, I forgot to unlock the map.
It is nice that it is so simple to reproduce the issue in your setup.

Try this update.

diff --git a/sys/amd64/amd64/mem.c b/sys/amd64/amd64/mem.c
index abbbb21..5a4d8a9 100644
--- a/sys/amd64/amd64/mem.c
+++ b/sys/amd64/amd64/mem.c
_at__at_ -76,14 +76,16 _at__at_ MALLOC_DEFINE(M_MEMDESC, "memdesc", "memory range descriptors");
 int
 memrw(struct cdev *dev, struct uio *uio, int flags)
 {
-	int o;
-	u_long c = 0, v;
 	struct iovec *iov;
-	int error = 0;
+	u_long c, v;
+	int error, o, sflags;
 	vm_offset_t addr, eaddr;
 
 	GIANT_REQUIRED;
 
+	error = 0;
+	c = 0;
+	sflags = curthread_pflags_set(TDP_DEVMEMIO);
 	while (uio->uio_resid > 0 && error == 0) {
 		iov = uio->uio_iov;
 		if (iov->iov_len == 0) {
_at__at_ -98,7 +100,15 _at__at_ memrw(struct cdev *dev, struct uio *uio, int flags)
 kmemphys:
 			o = v & PAGE_MASK;
 			c = min(uio->uio_resid, (u_int)(PAGE_SIZE - o));
-			error = uiomove((void *)PHYS_TO_DMAP(v), (int)c, uio);
+			v = PHYS_TO_DMAP(v);
+			if (v < DMAP_MIN_ADDRESS ||
+			    (v > DMAP_MIN_ADDRESS + dmaplimit &&
+			    v <= DMAP_MAX_ADDRESS) ||
+			    pmap_kextract(v) == 0) {
+				error = EFAULT;
+				goto ret;
+			}
+			error = uiomove((void *)v, (int)c, uio);
 			continue;
 		}
 		else if (dev2unit(dev) == CDEV_MINOR_KMEM) {
_at__at_ -119,22 +129,30 _at__at_ kmemphys:
 			addr = trunc_page(v);
 			eaddr = round_page(v + c);
 
-			if (addr < VM_MIN_KERNEL_ADDRESS)
-				return (EFAULT);
-			for (; addr < eaddr; addr += PAGE_SIZE) 
-				if (pmap_extract(kernel_pmap, addr) == 0)
-					return (EFAULT);
-
+			if (addr < VM_MIN_KERNEL_ADDRESS) {
+				error = EFAULT;
+				goto ret;
+			}
+			for (; addr < eaddr; addr += PAGE_SIZE) {
+				if (pmap_extract(kernel_pmap, addr) == 0) {
+					error = EFAULT;
+					goto ret;
+				}
+			}
 			if (!kernacc((caddr_t)(long)v, c,
 			    uio->uio_rw == UIO_READ ? 
-			    VM_PROT_READ : VM_PROT_WRITE))
-				return (EFAULT);
+			    VM_PROT_READ : VM_PROT_WRITE)) {
+				error = EFAULT;
+				goto ret;
+			}
 
 			error = uiomove((caddr_t)(long)v, (int)c, uio);
 			continue;
 		}
 		/* else panic! */
 	}
+ret:
+	curthread_pflags_restore(sflags);
 	return (error);
 }
 
diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c
index f7d0afd..b1cbdbc 100644
--- a/sys/amd64/amd64/trap.c
+++ b/sys/amd64/amd64/trap.c
_at__at_ -787,6 +787,12 _at__at_ nogo:
 			frame->tf_rip = (long)curpcb->pcb_onfault;
 			return (0);
 		}
+		if ((td->td_pflags & TDP_DEVMEMIO) != 0) {
+			KASSERT(curpcb->pcb_onfault != NULL,
+			    ("/dev/mem without pcb_onfault"));
+			frame->tf_rip = (long)curpcb->pcb_onfault;
+			return (0);
+		}
 		trap_fatal(frame, eva);
 		return (-1);
 	}
diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c
index 07d63f8..9633e34 100644
--- a/sys/kern/subr_trap.c
+++ b/sys/kern/subr_trap.c
_at__at_ -157,6 +157,8 _at__at_ userret(struct thread *td, struct trapframe *frame)
 	    td->td_rw_rlocks));
 	KASSERT((td->td_pflags & TDP_NOFAULTING) == 0,
 	    ("userret: Returning with pagefaults disabled"));
+	KASSERT((td->td_pflags & TDP_DEVMEMIO) == 0,
+	    ("userret: Returning with /dev/mem i/o leaked"));
 	KASSERT(td->td_no_sleeping == 0,
 	    ("userret: Returning with sleep disabled"));
 	KASSERT(td->td_pinned == 0 || (td->td_pflags & TDP_CALLCHAIN) != 0,
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index fce1f8a..e7cd022 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
_at__at_ -424,6 +424,7 _at__at_ do {									\
 #define	TDP_RESETSPUR	0x04000000 /* Reset spurious page fault history. */
 #define	TDP_NERRNO	0x08000000 /* Last errno is already in td_errno */
 #define	TDP_UIOHELD	0x10000000 /* Current uio has pages held in td_ma */
+#define	TDP_DEVMEMIO	0x20000000 /* Accessing memory for /dev/mem */
 
 /*
  * Reasons that the current thread can not be run yet.
diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c
index 4a6495f..ab48462 100644
--- a/sys/vm/vm_fault.c
+++ b/sys/vm/vm_fault.c
_at__at_ -269,6 +269,10 _at__at_ RetryFault:;
 	map_generation = fs.map->timestamp;
 
 	if (fs.entry->eflags & MAP_ENTRY_NOFAULT) {
+		if ((curthread->td_pflags & TDP_DEVMEMIO) != 0) {
+			vm_map_unlock_read(fs.map);
+			return (KERN_FAILURE);
+		}
 		panic("vm_fault: fault on nofault entry, addr: %lx",
 		    (u_long)vaddr);
 	}

Received on Mon Mar 10 2014 - 18:01:25 UTC

This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:40:47 UTC