Re: amd64: panic on -CURRENT _at_r330539 for certain UEFI hosts

From: Konstantin Belousov <kostikbel_at_gmail.com>
Date: Fri, 16 Mar 2018 11:56:27 +0200
On Thu, Mar 15, 2018 at 09:38:56PM -0500, Peter Lei wrote:
> Some recent UEFI implementations have begun to leave the CPU with page
> write protection enabled in CR0.
> 
> With r330539 which enables kernel page protections, interesting things
> happen during boot (aka panic) when protection is already enabled,
> including a write protection fault from an explicit .text fixup write
> from xsave->xsaveopt by fpuinit().
> 
> I see this so far booting -CURRENT under virtual environments:
> 
> - QEMU with recent OVMF EDK2 builds: this is certainly due to UEFI
> enabling paging and page protections.
> 
> - VMWare Fusion 10.1.x on Mac: no specific insight on what's going
> inside the implementation, but CR0_WP is definitely left enabled before
> the kernel is booted.
> 
> I have patched my kernel build to explicitly clear CR0_WP (e.g. in
> initializecpu) prior to creating the page tables to get around this, but
> someone might have a cleaner/better solution...

Try this.

diff --git a/sys/amd64/amd64/db_interface.c b/sys/amd64/amd64/db_interface.c
index 9dfd44cf82c..1ecec02835c 100644
--- a/sys/amd64/amd64/db_interface.c
+++ b/sys/amd64/amd64/db_interface.c
_at__at_ -37,6 +37,7 _at__at_ __FBSDID("$FreeBSD$");
 #include <sys/pcpu.h>
 
 #include <machine/cpufunc.h>
+#include <machine/md_var.h>
 #include <machine/specialreg.h>
 
 #include <ddb/ddb.h>
_at__at_ -75,19 +76,19 _at__at_ db_write_bytes(vm_offset_t addr, size_t size, char *data)
 	jmp_buf jb;
 	void *prev_jb;
 	char *dst;
-	u_long cr0save;
+	bool old_wp;
 	int ret;
 
-	cr0save = rcr0();
+	old_wp = false;
 	prev_jb = kdb_jmpbuf(jb);
 	ret = setjmp(jb);
 	if (ret == 0) {
-		load_cr0(cr0save & ~CR0_WP);
+		old_wp = disable_wp();
 		dst = (char *)addr;
 		while (size-- > 0)
 			*dst++ = *data++;
 	}
-	load_cr0(cr0save);
+	restore_wp(old_wp);
 	(void)kdb_jmpbuf(prev_jb);
 	return (ret);
 }
diff --git a/sys/amd64/amd64/fpu.c b/sys/amd64/amd64/fpu.c
index 72b10396341..39367fa6ffb 100644
--- a/sys/amd64/amd64/fpu.c
+++ b/sys/amd64/amd64/fpu.c
_at__at_ -205,6 +205,7 _at__at_ fpuinit_bsp1(void)
 {
 	u_int cp[4];
 	uint64_t xsave_mask_user;
+	bool old_wp;
 
 	if ((cpu_feature2 & CPUID2_XSAVE) != 0) {
 		use_xsave = 1;
_at__at_ -233,8 +234,14 _at__at_ fpuinit_bsp1(void)
 		 * Patch the XSAVE instruction in the cpu_switch code
 		 * to XSAVEOPT.  We assume that XSAVE encoding used
 		 * REX byte, and set the bit 4 of the r/m byte.
+		 *
+		 * It seems that some BIOSes give control to the OS
+		 * with CR0.WP already set, making the kernel text
+		 * read-only before cpu_startup().
 		 */
+		old_wp = disable_wp();
 		ctx_switch_xsave[3] |= 0x10;
+		restore_wp(old_wp);
 	}
 }
 
diff --git a/sys/amd64/amd64/gdb_machdep.c b/sys/amd64/amd64/gdb_machdep.c
index 68eb6002593..f7ca3c07ea3 100644
--- a/sys/amd64/amd64/gdb_machdep.c
+++ b/sys/amd64/amd64/gdb_machdep.c
_at__at_ -39,6 +39,7 _at__at_ __FBSDID("$FreeBSD$");
 #include <machine/cpufunc.h>
 #include <machine/frame.h>
 #include <machine/gdb_machdep.h>
+#include <machine/md_var.h>
 #include <machine/pcb.h>
 #include <machine/psl.h>
 #include <machine/reg.h>
_at__at_ -127,17 +128,14 _at__at_ gdb_cpu_signal(int type, int code)
 void *
 gdb_begin_write(void)
 {
-	u_long cr0save;
 
-	cr0save = rcr0();
-	load_cr0(cr0save & ~CR0_WP);
-	return ((void *)cr0save);
+	return (disable_wp() ? &gdb_begin_write : NULL);
 }
 
 void
 gdb_end_write(void *arg)
 {
 
-	load_cr0((u_long)arg);
+	restore_wp(arg != NULL);
 }
 
diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c
index e340c6cd14d..fcc45eca57d 100644
--- a/sys/amd64/amd64/machdep.c
+++ b/sys/amd64/amd64/machdep.c
_at__at_ -2597,6 +2597,31 _at__at_ clear_pcb_flags(struct pcb *pcb, const u_int flags)
 	    : "cc", "memory");
 }
 
+/*
+ * Enable and restore kernel text write permissions.
+ * Callers must ensure that disable_wp()/restore_wp() are executed
+ * without rescheduling on the same core.
+ */
+bool
+disable_wp(void)
+{
+	u_int cr0;
+
+	cr0 = rcr0();
+	if ((cr0 & CR0_WP) == 0)
+		return (false);
+	load_cr0(cr0 & ~CR0_WP);
+	return (true);
+}
+
+void
+restore_wp(bool old_wp)
+{
+
+	if (old_wp)
+		load_cr0(rcr0() | CR0_WP);
+}
+
 #ifdef KDB
 
 /*
diff --git a/sys/amd64/include/md_var.h b/sys/amd64/include/md_var.h
index 63dabaf4047..abcc273b6c6 100644
--- a/sys/amd64/include/md_var.h
+++ b/sys/amd64/include/md_var.h
_at__at_ -53,6 +53,8 _at__at_ void	amd64_conf_fast_syscall(void);
 void	amd64_db_resume_dbreg(void);
 void	amd64_lower_shared_page(struct sysentvec *);
 void	amd64_syscall(struct thread *td, int traced);
+bool	disable_wp(void);
+void	restore_wp(bool old_wp);
 void	doreti_iret(void) __asm(__STRING(doreti_iret));
 void	doreti_iret_fault(void) __asm(__STRING(doreti_iret_fault));
 void	ld_ds(void) __asm(__STRING(ld_ds));
Received on Fri Mar 16 2018 - 08:56:45 UTC

This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:41:15 UTC