Re: Your locking and rman changes to pci/if_*

From: Bruce Evans <bde_at_zeta.org.au>
Date: Fri, 18 Apr 2003 06:40:02 +1000 (EST)
On Thu, 17 Apr 2003, Ian Dowse wrote:

> It looks BTW, as if we convert some kernel page faults into witness
> panics, which is not so good... I think it is limited to cases where
> we page fault without Giant, but with a spin lock held (in this
> case callout_lock).

The i386 pagefault handler attempts to handle pagefaults on user address
in kernel mode.  This is dubious in RELENG_4 and just broken in -current
since there is a PROC_LOCK() is always attempted and is sometimes fatal.
The broken cases that I saw involved accesses to the stack gap.  The page
fault could be handled in this case, but it was fatal if enough error
checking was enabled because the process didn't know what it was doing
and held PROC_LOCK() (which is non-recursive).  Your cleanup of some
stack gaps fixed the main offenders.  A page fault while a spin lock is
held is more broken.

My version of trap_pfault() doesn't permit these accesses.  See near
"goto maygo" in the patch.  It is also supposed to not permit page
faults in critical regions, but this is a little broken (it's missing
at least a td_critnest check).  See near the start of the patch.  The
error handling for both cases works much better than panics deep in
witness, at least if DDB is configured.  ddb gets control at the
faulting instruction just like it does for null pointer accesses.

%%%
Index: trap.c
===================================================================
RCS file: /home/ncvs/src/sys/i386/i386/trap.c,v
retrieving revision 1.237
diff -u -2 -r1.237 trap.c
--- trap.c	7 Nov 2002 01:34:23 -0000	1.237
+++ trap.c	22 Nov 2002 17:14:31 -0000
_at__at_ -677,6 +724,6 _at__at_
 {
 	vm_offset_t va;
-	struct vmspace *vm = NULL;
-	vm_map_t map = 0;
+	struct vmspace *vm;
+	vm_map_t map;
 	int rv = 0;
 	vm_prot_t ftype;
_at__at_ -684,8 +731,18 _at__at_
 	struct proc *p = td->td_proc;

+	if (/* XXX intr proc || */ td == PCPU_GET(idlethread) ||
+	    td->td_intr_nesting_level != 0 || (frame->tf_eflags & PSL_I) == 0) {
+		Debugger("impossible pfault");
+		trap_fatal(frame, eva);
+		return (-1);
+	}
+
 	va = trunc_page(eva);
 	if (va >= KERNBASE) {
 		/*
 		 * Don't allow user-mode faults in kernel address space.
+		 */
+#if defined(I586_CPU) && !defined(NO_F00F_HACK)
+		/*
 		 * An exception:  if the faulting address is the invalid
 		 * instruction entry in the IDT, then the Intel Pentium
_at__at_ -694,7 +751,6 _at__at_
 		 * fault.
 		 */
-#if defined(I586_CPU) && !defined(NO_F00F_HACK)
-		if ((eva == (unsigned int)&idt[6]) && has_f00f_bug)
-			return -2;
+		if (eva == (vm_offset_t)&idt[6] && has_f00f_bug)
+			return (-2);
 #endif
 		if (usermode)
_at__at_ -705,10 +761,26 _at__at_
 		/*
 		 * This is a fault on non-kernel virtual memory.
-		 * vm is initialized above to NULL. If curproc is NULL
-		 * or curproc->p_vmspace is NULL the fault is fatal.
+		 * Do not allow it in kernel mode unless it is for a
+		 * a recognized copying function.
 		 */
-		if (p != NULL)
-			vm = p->p_vmspace;
+		if (!usermode &&
+		    frame->tf_eip != (int)fubyte_access &&
+		    frame->tf_eip != (int)fuword16_access &&
+		    frame->tf_eip != (int)fuword_access &&
+		    frame->tf_eip != (int)subyte_access &&
+		    frame->tf_eip != (int)suword16_access &&
+		    frame->tf_eip != (int)suword_access &&
+		    PCPU_GET(curpcb)->pcb_onfault == NULL) {
+			Debugger(
+			"pagefault for kernel access to unmapped user memory");
+			goto maygo;
+			goto nogo;
+		}
+maygo:

+		/*
+		 * If curproc->p_vmspace is NULL the fault is fatal.
+		 */
+		vm = p->p_vmspace;
 		if (vm == NULL)
 			goto nogo;
_at__at_ -733,6 +805,5 _at__at_
 		/* Fault in the user page: */
 		rv = vm_fault(map, va, ftype,
-			      (ftype & VM_PROT_WRITE) ? VM_FAULT_DIRTY
-						      : VM_FAULT_NORMAL);
+		    (ftype & VM_PROT_WRITE) ? VM_FAULT_DIRTY : VM_FAULT_NORMAL);

 		PROC_LOCK(p);
_at__at_ -750,7 +821,19 _at__at_
 nogo:
 	if (!usermode) {
-		if (td->td_intr_nesting_level == 0 &&
-		    PCPU_GET(curpcb) != NULL &&
-		    PCPU_GET(curpcb)->pcb_onfault != NULL) {
+#undef MAYBE_FUSU_FAULT
+#define	MAYBE_FUSU_FAULT(where, whereto) do {	\
+	if (frame->tf_eip == (int)where) {	\
+		breakpoint();			\
+		frame->tf_eip = (int)whereto;	\
+		return (0);			\
+	}					\
+} while (0)
+		MAYBE_FUSU_FAULT(fubyte_access, fusufault);
+		MAYBE_FUSU_FAULT(fuword16_access, fusufault);
+		MAYBE_FUSU_FAULT(fuword_access, fusufault);
+		MAYBE_FUSU_FAULT(subyte_access, fusufault);
+		MAYBE_FUSU_FAULT(suword16_access, fusufault);
+		MAYBE_FUSU_FAULT(suword_access, fusufault);
+		if (PCPU_GET(curpcb)->pcb_onfault != NULL) {
 			frame->tf_eip = (int)PCPU_GET(curpcb)->pcb_onfault;
 			return (0);
_at__at_ -760,8 +843,8 _at__at_
 	}

-	/* kludge to pass faulting virtual address to sendsig */
+	/* Kludge to pass faulting virtual address to sendsig(). */
 	frame->tf_err = eva;

-	return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
+	return ((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
 }

%%%
Received on Thu Apr 17 2003 - 11:40:19 UTC

This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:37:03 UTC